|  | // Copyright 2013 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "components/url_matcher/url_matcher.h" | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <iterator> | 
|  | #include <utility> | 
|  |  | 
|  | #include "base/logging.h" | 
|  | #include "base/stl_util.h" | 
|  | #include "url/gurl.h" | 
|  | #include "url/url_canon.h" | 
|  |  | 
|  | namespace url_matcher { | 
|  |  | 
|  | // This set of classes implement a mapping of URL Component Patterns, such as | 
|  | // host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns | 
|  | // for use in substring comparisons. | 
|  | // | 
|  | // The idea of this mapping is to reduce the problem of comparing many | 
|  | // URL Component Patterns against one URL to the problem of searching many | 
|  | // substrings in one string: | 
|  | // | 
|  | // ----------------------                    ----------------- | 
|  | // | URL Query operator | ----translate----> | StringPattern | | 
|  | // ----------------------                    ----------------- | 
|  | //                                                   ^ | 
|  | //                                                   | | 
|  | //                                                compare | 
|  | //                                                   | | 
|  | //                                                   v | 
|  | // ----------------------                    ----------------- | 
|  | // | URL to compare     |                    |               | | 
|  | // | to all URL Query   | ----translate----> | String        | | 
|  | // | operators          |                    |               | | 
|  | // ----------------------                    ----------------- | 
|  | // | 
|  | // The reason for this problem reduction is that there are efficient algorithms | 
|  | // for searching many substrings in one string (see Aho-Corasick algorithm). | 
|  | // | 
|  | // Additionally, some of the same pieces are reused to implement regular | 
|  | // expression comparisons. The FilteredRE2 implementation for matching many | 
|  | // regular expressions against one string uses prefiltering, in which a set | 
|  | // of substrings (derived from the regexes) are first searched for, to reduce | 
|  | // the number of regular expressions to test; the prefiltering step also | 
|  | // uses Aho-Corasick. | 
|  | // | 
|  | // Case 1: {host,path,query}_{prefix,suffix,equals} searches. | 
|  | // ========================================================== | 
|  | // | 
|  | // For searches in this class, we normalize URLs as follows: | 
|  | // | 
|  | // Step 1: | 
|  | // Remove scheme, port and segment from URL: | 
|  | // -> http://www.example.com:8080/index.html?search=foo#first_match becomes | 
|  | //    www.example.com/index.html?search=foo | 
|  | // | 
|  | // We remove the scheme and port number because they can be checked later | 
|  | // in a secondary filter step. We remove the segment (the #... part) because | 
|  | // this is not guaranteed to be ASCII-7 encoded. | 
|  | // | 
|  | // Step 2: | 
|  | // Translate URL to String and add the following position markers: | 
|  | // - BU = Beginning of URL | 
|  | // - ED = End of Domain | 
|  | // - EP = End of Path | 
|  | // - EU = End of URL | 
|  | // Furthermore, the hostname is canonicalized to start with a ".". | 
|  | // | 
|  | // Position markers are represented as characters >127, which are therefore | 
|  | // guaranteed not to be part of the ASCII-7 encoded URL character set. | 
|  | // | 
|  | // -> www.example.com/index.html?search=foo becomes | 
|  | // BU .www.example.com ED /index.html EP ?search=foo EU | 
|  | // | 
|  | // -> www.example.com/index.html becomes | 
|  | // BU .www.example.com ED /index.html EP EU | 
|  | // | 
|  | // Step 3: | 
|  | // Translate URL Component Patterns as follows: | 
|  | // | 
|  | // host_prefix(prefix) = BU add_missing_dot_prefix(prefix) | 
|  | // -> host_prefix("www.example") = BU .www.example | 
|  | // | 
|  | // host_suffix(suffix) = suffix ED | 
|  | // -> host_suffix("example.com") = example.com ED | 
|  | // -> host_suffix(".example.com") = .example.com ED | 
|  | // | 
|  | // host_equals(domain) = BU add_missing_dot_prefix(domain) ED | 
|  | // -> host_equals("www.example.com") = BU .www.example.com ED | 
|  | // | 
|  | // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). | 
|  | // | 
|  | // With this, we can search the StringPatterns in the normalized URL. | 
|  | // | 
|  | // | 
|  | // Case 2: url_{prefix,suffix,equals,contains} searches. | 
|  | // ===================================================== | 
|  | // | 
|  | // Step 1: as above, except that | 
|  | // - the scheme is not removed | 
|  | // - the port is not removed if it is specified and does not match the default | 
|  | //   port for the given scheme. | 
|  | // | 
|  | // Step 2: | 
|  | // Translate URL to String and add the following position markers: | 
|  | // - BU = Beginning of URL | 
|  | // - EU = End of URL | 
|  | // | 
|  | // -> http://www.example.com:8080/index.html?search=foo#first_match becomes | 
|  | // BU http://www.example.com:8080/index.html?search=foo EU | 
|  | // -> http://www.example.com:80/index.html?search=foo#first_match becomes | 
|  | // BU http://www.example.com/index.html?search=foo EU | 
|  | // | 
|  | // url_prefix(prefix) = BU prefix | 
|  | // -> url_prefix("http://www.example") = BU http://www.example | 
|  | // | 
|  | // url_contains(substring) = substring | 
|  | // -> url_contains("index") = index | 
|  | // | 
|  | // | 
|  | // Case 3: {host,path,query}_contains searches. | 
|  | // ============================================ | 
|  | // | 
|  | // These kinds of searches are not supported directly but can be derived | 
|  | // by a combination of a url_contains() query followed by an explicit test: | 
|  | // | 
|  | // host_contains(str) = url_contains(str) followed by test whether str occurs | 
|  | //   in host component of original URL. | 
|  | // -> host_contains("example.co") = example.co | 
|  | //    followed by gurl.host().find("example.co"); | 
|  | // | 
|  | // [similarly for path_contains and query_contains]. | 
|  | // | 
|  | // | 
|  | // Regular expression matching (url_matches searches) | 
|  | // ================================================== | 
|  | // | 
|  | // This class also supports matching regular expressions (RE2 syntax) | 
|  | // against full URLs, which are transformed as in case 2. | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) { | 
|  | return criterion == URLMatcherCondition::URL_MATCHES; | 
|  | } | 
|  |  | 
|  | bool IsOriginAndPathRegexCriterion(URLMatcherCondition::Criterion criterion) { | 
|  | return criterion == URLMatcherCondition::ORIGIN_AND_PATH_MATCHES; | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | // | 
|  | // URLMatcherCondition | 
|  | // | 
|  |  | 
|  | URLMatcherCondition::URLMatcherCondition() | 
|  | : criterion_(HOST_PREFIX), | 
|  | string_pattern_(NULL) {} | 
|  |  | 
|  | URLMatcherCondition::~URLMatcherCondition() {} | 
|  |  | 
|  | URLMatcherCondition::URLMatcherCondition( | 
|  | Criterion criterion, | 
|  | const StringPattern* string_pattern) | 
|  | : criterion_(criterion), | 
|  | string_pattern_(string_pattern) {} | 
|  |  | 
|  | URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs) | 
|  | : criterion_(rhs.criterion_), | 
|  | string_pattern_(rhs.string_pattern_) {} | 
|  |  | 
|  | URLMatcherCondition& URLMatcherCondition::operator=( | 
|  | const URLMatcherCondition& rhs) { | 
|  | criterion_ = rhs.criterion_; | 
|  | string_pattern_ = rhs.string_pattern_; | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const { | 
|  | if (criterion_ < rhs.criterion_) return true; | 
|  | if (criterion_ > rhs.criterion_) return false; | 
|  | if (string_pattern_ != NULL && rhs.string_pattern_ != NULL) | 
|  | return *string_pattern_ < *rhs.string_pattern_; | 
|  | if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true; | 
|  | // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL, | 
|  | // or both are NULL. | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool URLMatcherCondition::IsFullURLCondition() const { | 
|  | // For these criteria the SubstringMatcher needs to be executed on the | 
|  | // GURL that is canonicalized with | 
|  | // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. | 
|  | switch (criterion_) { | 
|  | case HOST_CONTAINS: | 
|  | case PATH_CONTAINS: | 
|  | case QUERY_CONTAINS: | 
|  | case URL_PREFIX: | 
|  | case URL_SUFFIX: | 
|  | case URL_CONTAINS: | 
|  | case URL_EQUALS: | 
|  | return true; | 
|  | default: | 
|  | break; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool URLMatcherCondition::IsRegexCondition() const { | 
|  | return IsRegexCriterion(criterion_); | 
|  | } | 
|  |  | 
|  | bool URLMatcherCondition::IsOriginAndPathRegexCondition() const { | 
|  | return IsOriginAndPathRegexCriterion(criterion_); | 
|  | } | 
|  |  | 
|  | bool URLMatcherCondition::IsMatch( | 
|  | const std::set<StringPattern::ID>& matching_patterns, | 
|  | const GURL& url) const { | 
|  | DCHECK(string_pattern_); | 
|  | if (!ContainsKey(matching_patterns, string_pattern_->id())) | 
|  | return false; | 
|  | // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on | 
|  | // a substring match on the raw URL. In case of a match, we need to verify | 
|  | // that the match was found in the correct component of the URL. | 
|  | switch (criterion_) { | 
|  | case HOST_CONTAINS: | 
|  | return url.host().find(string_pattern_->pattern()) != | 
|  | std::string::npos; | 
|  | case PATH_CONTAINS: | 
|  | return url.path().find(string_pattern_->pattern()) != | 
|  | std::string::npos; | 
|  | case QUERY_CONTAINS: | 
|  | return url.query().find(string_pattern_->pattern()) != | 
|  | std::string::npos; | 
|  | default: | 
|  | break; | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // | 
|  | // URLMatcherConditionFactory | 
|  | // | 
|  |  | 
|  | namespace { | 
|  | // These are symbols that are not contained in 7-bit ASCII used in GURLs. | 
|  | const char kBeginningOfURL[] = {static_cast<char>(-1), 0}; | 
|  | const char kEndOfDomain[] = {static_cast<char>(-2), 0}; | 
|  | const char kEndOfPath[] = {static_cast<char>(-3), 0}; | 
|  | const char kQueryComponentDelimiter[] = {static_cast<char>(-4), 0}; | 
|  | const char kEndOfURL[] = {static_cast<char>(-5), 0}; | 
|  |  | 
|  | // The delimiter for query parameters | 
|  | const char kQuerySeparator = '&'; | 
|  | }  // namespace | 
|  |  | 
|  | URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} | 
|  |  | 
|  | URLMatcherConditionFactory::~URLMatcherConditionFactory() { | 
|  | STLDeleteElements(&substring_pattern_singletons_); | 
|  | STLDeleteElements(®ex_pattern_singletons_); | 
|  | STLDeleteElements(&origin_and_path_regex_pattern_singletons_); | 
|  | } | 
|  |  | 
|  | std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( | 
|  | const GURL& url) const { | 
|  | return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain + | 
|  | url.path() + kEndOfPath + | 
|  | (url.has_query() ? CanonicalizeQuery(url.query(), true, true) | 
|  | : std::string()) + | 
|  | kEndOfURL; | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition( | 
|  | const std::string& prefix) { | 
|  | return CreateCondition(URLMatcherCondition::HOST_PREFIX, | 
|  | kBeginningOfURL + CanonicalizeHostPrefix(prefix)); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateHostSuffixCondition( | 
|  | const std::string& suffix) { | 
|  | return CreateCondition(URLMatcherCondition::HOST_SUFFIX, | 
|  | CanonicalizeHostSuffix(suffix) + kEndOfDomain); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateHostContainsCondition( | 
|  | const std::string& str) { | 
|  | return CreateCondition(URLMatcherCondition::HOST_CONTAINS, str); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateHostEqualsCondition( | 
|  | const std::string& str) { | 
|  | return CreateCondition(URLMatcherCondition::HOST_EQUALS, | 
|  | kBeginningOfURL + CanonicalizeHostname(str) + kEndOfDomain); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreatePathPrefixCondition( | 
|  | const std::string& prefix) { | 
|  | return CreateCondition(URLMatcherCondition::PATH_PREFIX, | 
|  | kEndOfDomain + prefix); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreatePathSuffixCondition( | 
|  | const std::string& suffix) { | 
|  | return CreateCondition(URLMatcherCondition::PATH_SUFFIX, suffix + kEndOfPath); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreatePathContainsCondition( | 
|  | const std::string& str) { | 
|  | return CreateCondition(URLMatcherCondition::PATH_CONTAINS, str); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreatePathEqualsCondition( | 
|  | const std::string& str) { | 
|  | return CreateCondition(URLMatcherCondition::PATH_EQUALS, | 
|  | kEndOfDomain + str + kEndOfPath); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateQueryPrefixCondition( | 
|  | const std::string& prefix) { | 
|  | std::string pattern; | 
|  | if (!prefix.empty() && prefix[0] == '?') | 
|  | pattern = kEndOfPath + CanonicalizeQuery(prefix.substr(1), true, false); | 
|  | else | 
|  | pattern = kEndOfPath + CanonicalizeQuery(prefix, true, false); | 
|  |  | 
|  | return CreateCondition(URLMatcherCondition::QUERY_PREFIX, pattern); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateQuerySuffixCondition( | 
|  | const std::string& suffix) { | 
|  | if (!suffix.empty() && suffix[0] == '?') { | 
|  | return CreateQueryEqualsCondition(suffix); | 
|  | } else { | 
|  | return CreateCondition(URLMatcherCondition::QUERY_SUFFIX, | 
|  | CanonicalizeQuery(suffix, false, true) + kEndOfURL); | 
|  | } | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateQueryContainsCondition( | 
|  | const std::string& str) { | 
|  | if (!str.empty() && str[0] == '?') | 
|  | return CreateQueryPrefixCondition(str); | 
|  | else | 
|  | return CreateCondition(URLMatcherCondition::QUERY_CONTAINS, str); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateQueryEqualsCondition( | 
|  | const std::string& str) { | 
|  | std::string pattern; | 
|  | if (!str.empty() && str[0] == '?') | 
|  | pattern = | 
|  | kEndOfPath + CanonicalizeQuery(str.substr(1), true, true) + kEndOfURL; | 
|  | else | 
|  | pattern = kEndOfPath + CanonicalizeQuery(str, true, true) + kEndOfURL; | 
|  |  | 
|  | return CreateCondition(URLMatcherCondition::QUERY_EQUALS, pattern); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition | 
|  | URLMatcherConditionFactory::CreateHostSuffixPathPrefixCondition( | 
|  | const std::string& host_suffix, | 
|  | const std::string& path_prefix) { | 
|  | return CreateCondition(URLMatcherCondition::HOST_SUFFIX_PATH_PREFIX, | 
|  | CanonicalizeHostSuffix(host_suffix) + kEndOfDomain + path_prefix); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition | 
|  | URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( | 
|  | const std::string& host, | 
|  | const std::string& path_prefix) { | 
|  | return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, | 
|  | kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + | 
|  | path_prefix); | 
|  | } | 
|  |  | 
|  | std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( | 
|  | const GURL& url) const { | 
|  | GURL::Replacements replacements; | 
|  | replacements.ClearPassword(); | 
|  | replacements.ClearUsername(); | 
|  | replacements.ClearRef(); | 
|  | // Clear port if it is implicit from scheme. | 
|  | if (url.has_port()) { | 
|  | const std::string& port = url.scheme(); | 
|  | if (url::DefaultPortForScheme(port.c_str(), port.size()) == | 
|  | url.EffectiveIntPort()) { | 
|  | replacements.ClearPort(); | 
|  | } | 
|  | } | 
|  | return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + | 
|  | kEndOfURL; | 
|  | } | 
|  |  | 
|  | static std::string CanonicalizeURLForRegexSearchesHelper( | 
|  | const GURL& url, | 
|  | bool clear_query) { | 
|  | GURL::Replacements replacements; | 
|  | replacements.ClearPassword(); | 
|  | replacements.ClearUsername(); | 
|  | replacements.ClearRef(); | 
|  | if (clear_query) | 
|  | replacements.ClearQuery(); | 
|  | // Clear port if it is implicit from scheme. | 
|  | if (url.has_port()) { | 
|  | const std::string& port = url.scheme(); | 
|  | if (url::DefaultPortForScheme(port.c_str(), port.size()) == | 
|  | url.EffectiveIntPort()) { | 
|  | replacements.ClearPort(); | 
|  | } | 
|  | } | 
|  | return url.ReplaceComponents(replacements).spec(); | 
|  | } | 
|  |  | 
|  | std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches( | 
|  | const GURL& url) const { | 
|  | return CanonicalizeURLForRegexSearchesHelper(url, false); | 
|  | } | 
|  |  | 
|  | std::string | 
|  | URLMatcherConditionFactory::CanonicalizeURLForOriginAndPathRegexSearches( | 
|  | const GURL& url) const { | 
|  | return CanonicalizeURLForRegexSearchesHelper(url, true); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( | 
|  | const std::string& prefix) { | 
|  | return CreateCondition(URLMatcherCondition::URL_PREFIX, | 
|  | kBeginningOfURL + prefix); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( | 
|  | const std::string& suffix) { | 
|  | return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( | 
|  | const std::string& str) { | 
|  | return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( | 
|  | const std::string& str) { | 
|  | return CreateCondition(URLMatcherCondition::URL_EQUALS, | 
|  | kBeginningOfURL + str + kEndOfURL); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition( | 
|  | const std::string& regex) { | 
|  | return CreateCondition(URLMatcherCondition::URL_MATCHES, regex); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition | 
|  | URLMatcherConditionFactory::CreateOriginAndPathMatchesCondition( | 
|  | const std::string& regex) { | 
|  | return CreateCondition(URLMatcherCondition::ORIGIN_AND_PATH_MATCHES, regex); | 
|  | } | 
|  |  | 
|  | void URLMatcherConditionFactory::ForgetUnusedPatterns( | 
|  | const std::set<StringPattern::ID>& used_patterns) { | 
|  | PatternSingletons::iterator i = substring_pattern_singletons_.begin(); | 
|  | while (i != substring_pattern_singletons_.end()) { | 
|  | if (ContainsKey(used_patterns, (*i)->id())) { | 
|  | ++i; | 
|  | } else { | 
|  | delete *i; | 
|  | substring_pattern_singletons_.erase(i++); | 
|  | } | 
|  | } | 
|  | i = regex_pattern_singletons_.begin(); | 
|  | while (i != regex_pattern_singletons_.end()) { | 
|  | if (ContainsKey(used_patterns, (*i)->id())) { | 
|  | ++i; | 
|  | } else { | 
|  | delete *i; | 
|  | regex_pattern_singletons_.erase(i++); | 
|  | } | 
|  | } | 
|  | i = origin_and_path_regex_pattern_singletons_.begin(); | 
|  | while (i != origin_and_path_regex_pattern_singletons_.end()) { | 
|  | if (ContainsKey(used_patterns, (*i)->id())) { | 
|  | ++i; | 
|  | } else { | 
|  | delete *i; | 
|  | origin_and_path_regex_pattern_singletons_.erase(i++); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | bool URLMatcherConditionFactory::IsEmpty() const { | 
|  | return substring_pattern_singletons_.empty() && | 
|  | regex_pattern_singletons_.empty() && | 
|  | origin_and_path_regex_pattern_singletons_.empty(); | 
|  | } | 
|  |  | 
|  | URLMatcherCondition URLMatcherConditionFactory::CreateCondition( | 
|  | URLMatcherCondition::Criterion criterion, | 
|  | const std::string& pattern) { | 
|  | StringPattern search_pattern(pattern, 0); | 
|  | PatternSingletons* pattern_singletons = NULL; | 
|  | if (IsRegexCriterion(criterion)) | 
|  | pattern_singletons = ®ex_pattern_singletons_; | 
|  | else if (IsOriginAndPathRegexCriterion(criterion)) | 
|  | pattern_singletons = &origin_and_path_regex_pattern_singletons_; | 
|  | else | 
|  | pattern_singletons = &substring_pattern_singletons_; | 
|  |  | 
|  | PatternSingletons::const_iterator iter = | 
|  | pattern_singletons->find(&search_pattern); | 
|  |  | 
|  | if (iter != pattern_singletons->end()) { | 
|  | return URLMatcherCondition(criterion, *iter); | 
|  | } else { | 
|  | StringPattern* new_pattern = | 
|  | new StringPattern(pattern, id_counter_++); | 
|  | pattern_singletons->insert(new_pattern); | 
|  | return URLMatcherCondition(criterion, new_pattern); | 
|  | } | 
|  | } | 
|  |  | 
|  | std::string URLMatcherConditionFactory::CanonicalizeHostSuffix( | 
|  | const std::string& suffix) const { | 
|  | if (!suffix.empty() && suffix[suffix.size() - 1] == '.') | 
|  | return suffix; | 
|  | else | 
|  | return suffix + "."; | 
|  | } | 
|  |  | 
|  | std::string URLMatcherConditionFactory::CanonicalizeHostPrefix( | 
|  | const std::string& prefix) const { | 
|  | if (!prefix.empty() && prefix[0] == '.') | 
|  | return prefix; | 
|  | else | 
|  | return "." + prefix; | 
|  | } | 
|  |  | 
|  | std::string URLMatcherConditionFactory::CanonicalizeHostname( | 
|  | const std::string& hostname) const { | 
|  | return CanonicalizeHostPrefix(CanonicalizeHostSuffix(hostname)); | 
|  | } | 
|  |  | 
|  | // This function prepares the query string by replacing query separator with a | 
|  | // magic value (|kQueryComponentDelimiter|). When the boolean | 
|  | // |prepend_beginning_of_query_component| is true the function prepends the | 
|  | // query with the same magic. This is done to locate the start of a key value | 
|  | // pair in the query string. The parameter |query| is passed by value | 
|  | // intentionally, since it is locally modified. | 
|  | std::string URLMatcherConditionFactory::CanonicalizeQuery( | 
|  | std::string query, | 
|  | bool prepend_beginning_of_query_component, | 
|  | bool append_end_of_query_component) const { | 
|  | for (std::string::iterator it = query.begin(); it != query.end(); ++it) { | 
|  | if (*it == kQuerySeparator) | 
|  | *it = kQueryComponentDelimiter[0]; | 
|  | } | 
|  | if (prepend_beginning_of_query_component) | 
|  | query = kQueryComponentDelimiter + query; | 
|  | if (append_end_of_query_component) | 
|  | query += kQueryComponentDelimiter; | 
|  | return query; | 
|  | } | 
|  |  | 
|  | bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()( | 
|  | StringPattern* lhs, | 
|  | StringPattern* rhs) const { | 
|  | if (lhs == NULL && rhs != NULL) return true; | 
|  | if (lhs != NULL && rhs != NULL) | 
|  | return lhs->pattern() < rhs->pattern(); | 
|  | // Either both are NULL or only rhs is NULL. | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // | 
|  | // URLQueryElementMatcherCondition | 
|  | // | 
|  |  | 
|  | URLQueryElementMatcherCondition::URLQueryElementMatcherCondition( | 
|  | const std::string& key, | 
|  | const std::string& value, | 
|  | QueryValueMatchType query_value_match_type, | 
|  | QueryElementType query_element_type, | 
|  | Type match_type, | 
|  | URLMatcherConditionFactory* factory) { | 
|  | match_type_ = match_type; | 
|  |  | 
|  | if (query_element_type == ELEMENT_TYPE_KEY_VALUE) { | 
|  | key_ = kQueryComponentDelimiter + key + "="; | 
|  | value_ = value; | 
|  | } else { | 
|  | key_ = kQueryComponentDelimiter + key; | 
|  | value_ = std::string(); | 
|  | } | 
|  |  | 
|  | if (query_value_match_type == QUERY_VALUE_MATCH_EXACT) | 
|  | value_ += kQueryComponentDelimiter; | 
|  |  | 
|  | // If |value_| is empty no need to find the |key_| and verify if the value | 
|  | // matches. Simply checking the presence of key is sufficient, which is done | 
|  | // by MATCH_ANY | 
|  | if (value_.empty()) | 
|  | match_type_ = MATCH_ANY; | 
|  |  | 
|  | URLMatcherCondition condition; | 
|  | // If |match_type_| is MATCH_ANY, then we could simply look for the | 
|  | // combination of |key_| + |value_|, which can be efficiently done by | 
|  | // SubstringMatcher | 
|  | if (match_type_ == MATCH_ANY) | 
|  | condition = factory->CreateQueryContainsCondition(key_ + value_); | 
|  | else | 
|  | condition = factory->CreateQueryContainsCondition(key_); | 
|  | string_pattern_ = condition.string_pattern(); | 
|  |  | 
|  | key_length_ = key_.length(); | 
|  | value_length_ = value_.length(); | 
|  | } | 
|  |  | 
|  | URLQueryElementMatcherCondition::~URLQueryElementMatcherCondition() {} | 
|  |  | 
|  | bool URLQueryElementMatcherCondition::operator<( | 
|  | const URLQueryElementMatcherCondition& rhs) const { | 
|  | if (match_type_ != rhs.match_type_) | 
|  | return match_type_ < rhs.match_type_; | 
|  | if (string_pattern_ != NULL && rhs.string_pattern_ != NULL) | 
|  | return *string_pattern_ < *rhs.string_pattern_; | 
|  | if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) | 
|  | return true; | 
|  | // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL, | 
|  | // or both are NULL. | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool URLQueryElementMatcherCondition::IsMatch( | 
|  | const std::string& url_for_component_searches) const { | 
|  | switch (match_type_) { | 
|  | case MATCH_ANY: { | 
|  | // For MATCH_ANY, no additional verification step is needed. We can trust | 
|  | // the SubstringMatcher to do the verification. | 
|  | return true; | 
|  | } | 
|  | case MATCH_ALL: { | 
|  | size_t start = 0; | 
|  | int found = 0; | 
|  | size_t offset; | 
|  | while ((offset = url_for_component_searches.find(key_, start)) != | 
|  | std::string::npos) { | 
|  | if (url_for_component_searches.compare( | 
|  | offset + key_length_, value_length_, value_) != 0) { | 
|  | return false; | 
|  | } else { | 
|  | ++found; | 
|  | } | 
|  | start = offset + key_length_ + value_length_ - 1; | 
|  | } | 
|  | return !!found; | 
|  | } | 
|  | case MATCH_FIRST: { | 
|  | size_t offset = url_for_component_searches.find(key_); | 
|  | return url_for_component_searches.compare( | 
|  | offset + key_length_, value_length_, value_) == 0; | 
|  | } | 
|  | case MATCH_LAST: { | 
|  | size_t offset = url_for_component_searches.rfind(key_); | 
|  | return url_for_component_searches.compare( | 
|  | offset + key_length_, value_length_, value_) == 0; | 
|  | } | 
|  | } | 
|  | NOTREACHED(); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // | 
|  | // URLMatcherSchemeFilter | 
|  | // | 
|  |  | 
|  | URLMatcherSchemeFilter::URLMatcherSchemeFilter(const std::string& filter) | 
|  | : filters_(1) { | 
|  | filters_.push_back(filter); | 
|  | } | 
|  |  | 
|  | URLMatcherSchemeFilter::URLMatcherSchemeFilter( | 
|  | const std::vector<std::string>& filters) | 
|  | : filters_(filters) {} | 
|  |  | 
|  | URLMatcherSchemeFilter::~URLMatcherSchemeFilter() {} | 
|  |  | 
|  | bool URLMatcherSchemeFilter::IsMatch(const GURL& url) const { | 
|  | return std::find(filters_.begin(), filters_.end(), url.scheme()) != | 
|  | filters_.end(); | 
|  | } | 
|  |  | 
|  | // | 
|  | // URLMatcherPortFilter | 
|  | // | 
|  |  | 
|  | URLMatcherPortFilter::URLMatcherPortFilter( | 
|  | const std::vector<URLMatcherPortFilter::Range>& ranges) | 
|  | : ranges_(ranges) {} | 
|  |  | 
|  | URLMatcherPortFilter::~URLMatcherPortFilter() {} | 
|  |  | 
|  | bool URLMatcherPortFilter::IsMatch(const GURL& url) const { | 
|  | int port = url.EffectiveIntPort(); | 
|  | for (std::vector<Range>::const_iterator i = ranges_.begin(); | 
|  | i != ranges_.end(); ++i) { | 
|  | if (i->first <= port && port <= i->second) | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // static | 
|  | URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int from, | 
|  | int to) { | 
|  | return Range(from, to); | 
|  | } | 
|  |  | 
|  | // static | 
|  | URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int port) { | 
|  | return Range(port, port); | 
|  | } | 
|  |  | 
|  | // | 
|  | // URLMatcherConditionSet | 
|  | // | 
|  |  | 
|  | URLMatcherConditionSet::~URLMatcherConditionSet() {} | 
|  |  | 
|  | URLMatcherConditionSet::URLMatcherConditionSet( | 
|  | ID id, | 
|  | const Conditions& conditions) | 
|  | : id_(id), | 
|  | conditions_(conditions) {} | 
|  |  | 
|  | URLMatcherConditionSet::URLMatcherConditionSet( | 
|  | ID id, | 
|  | const Conditions& conditions, | 
|  | scoped_ptr<URLMatcherSchemeFilter> scheme_filter, | 
|  | scoped_ptr<URLMatcherPortFilter> port_filter) | 
|  | : id_(id), | 
|  | conditions_(conditions), | 
|  | scheme_filter_(std::move(scheme_filter)), | 
|  | port_filter_(std::move(port_filter)) {} | 
|  |  | 
|  | URLMatcherConditionSet::URLMatcherConditionSet( | 
|  | ID id, | 
|  | const Conditions& conditions, | 
|  | const QueryConditions& query_conditions, | 
|  | scoped_ptr<URLMatcherSchemeFilter> scheme_filter, | 
|  | scoped_ptr<URLMatcherPortFilter> port_filter) | 
|  | : id_(id), | 
|  | conditions_(conditions), | 
|  | query_conditions_(query_conditions), | 
|  | scheme_filter_(std::move(scheme_filter)), | 
|  | port_filter_(std::move(port_filter)) {} | 
|  |  | 
|  | bool URLMatcherConditionSet::IsMatch( | 
|  | const std::set<StringPattern::ID>& matching_patterns, | 
|  | const GURL& url) const { | 
|  | return IsMatch(matching_patterns, url, std::string()); | 
|  | } | 
|  |  | 
|  | bool URLMatcherConditionSet::IsMatch( | 
|  | const std::set<StringPattern::ID>& matching_patterns, | 
|  | const GURL& url, | 
|  | const std::string& url_for_component_searches) const { | 
|  | for (Conditions::const_iterator i = conditions_.begin(); | 
|  | i != conditions_.end(); ++i) { | 
|  | if (!i->IsMatch(matching_patterns, url)) | 
|  | return false; | 
|  | } | 
|  | if (scheme_filter_.get() && !scheme_filter_->IsMatch(url)) | 
|  | return false; | 
|  | if (port_filter_.get() && !port_filter_->IsMatch(url)) | 
|  | return false; | 
|  | if (query_conditions_.empty()) | 
|  | return true; | 
|  | // The loop is duplicated below for performance reasons. If not all query | 
|  | // elements are found, no need to verify match that is expected to take more | 
|  | // cycles. | 
|  | for (QueryConditions::const_iterator i = query_conditions_.begin(); | 
|  | i != query_conditions_.end(); | 
|  | ++i) { | 
|  | if (!ContainsKey(matching_patterns, i->string_pattern()->id())) | 
|  | return false; | 
|  | } | 
|  | for (QueryConditions::const_iterator i = query_conditions_.begin(); | 
|  | i != query_conditions_.end(); | 
|  | ++i) { | 
|  | if (!i->IsMatch(url_for_component_searches)) | 
|  | return false; | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // | 
|  | // URLMatcher | 
|  | // | 
|  |  | 
|  | URLMatcher::URLMatcher() {} | 
|  |  | 
|  | URLMatcher::~URLMatcher() {} | 
|  |  | 
|  | void URLMatcher::AddConditionSets( | 
|  | const URLMatcherConditionSet::Vector& condition_sets) { | 
|  | for (URLMatcherConditionSet::Vector::const_iterator i = | 
|  | condition_sets.begin(); i != condition_sets.end(); ++i) { | 
|  | DCHECK(url_matcher_condition_sets_.find((*i)->id()) == | 
|  | url_matcher_condition_sets_.end()); | 
|  | url_matcher_condition_sets_[(*i)->id()] = *i; | 
|  | } | 
|  | UpdateInternalDatastructures(); | 
|  | } | 
|  |  | 
|  | void URLMatcher::RemoveConditionSets( | 
|  | const std::vector<URLMatcherConditionSet::ID>& condition_set_ids) { | 
|  | for (std::vector<URLMatcherConditionSet::ID>::const_iterator i = | 
|  | condition_set_ids.begin(); i != condition_set_ids.end(); ++i) { | 
|  | DCHECK(url_matcher_condition_sets_.find(*i) != | 
|  | url_matcher_condition_sets_.end()); | 
|  | url_matcher_condition_sets_.erase(*i); | 
|  | } | 
|  | UpdateInternalDatastructures(); | 
|  | } | 
|  |  | 
|  | void URLMatcher::ClearUnusedConditionSets() { | 
|  | UpdateConditionFactory(); | 
|  | } | 
|  |  | 
|  | std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL( | 
|  | const GURL& url) const { | 
|  | // Find all IDs of StringPatterns that match |url|. | 
|  | // See URLMatcherConditionFactory for the canonicalization of URLs and the | 
|  | // distinction between full url searches and url component searches. | 
|  | std::set<StringPattern::ID> matches; | 
|  | std::string url_for_component_searches; | 
|  |  | 
|  | if (!full_url_matcher_.IsEmpty()) { | 
|  | full_url_matcher_.Match( | 
|  | condition_factory_.CanonicalizeURLForFullSearches(url), &matches); | 
|  | } | 
|  | if (!url_component_matcher_.IsEmpty()) { | 
|  | url_for_component_searches = | 
|  | condition_factory_.CanonicalizeURLForComponentSearches(url); | 
|  | url_component_matcher_.Match(url_for_component_searches, &matches); | 
|  | } | 
|  | if (!regex_set_matcher_.IsEmpty()) { | 
|  | regex_set_matcher_.Match( | 
|  | condition_factory_.CanonicalizeURLForRegexSearches(url), &matches); | 
|  | } | 
|  | if (!origin_and_path_regex_set_matcher_.IsEmpty()) { | 
|  | origin_and_path_regex_set_matcher_.Match( | 
|  | condition_factory_.CanonicalizeURLForOriginAndPathRegexSearches(url), | 
|  | &matches); | 
|  | } | 
|  |  | 
|  | // Calculate all URLMatcherConditionSets for which all URLMatcherConditions | 
|  | // were fulfilled. | 
|  | std::set<URLMatcherConditionSet::ID> result; | 
|  | for (std::set<StringPattern::ID>::const_iterator i = matches.begin(); | 
|  | i != matches.end(); ++i) { | 
|  | // For each URLMatcherConditionSet there is exactly one condition | 
|  | // registered in substring_match_triggers_. This means that the following | 
|  | // logic tests each URLMatcherConditionSet exactly once if it can be | 
|  | // completely fulfilled. | 
|  | StringPatternTriggers::const_iterator triggered_condition_sets_iter = | 
|  | substring_match_triggers_.find(*i); | 
|  | if (triggered_condition_sets_iter == substring_match_triggers_.end()) | 
|  | continue;  // Not all substring matches are triggers for a condition set. | 
|  | const std::set<URLMatcherConditionSet::ID>& condition_sets = | 
|  | triggered_condition_sets_iter->second; | 
|  | for (std::set<URLMatcherConditionSet::ID>::const_iterator j = | 
|  | condition_sets.begin(); j != condition_sets.end(); ++j) { | 
|  | URLMatcherConditionSets::const_iterator condition_set_iter = | 
|  | url_matcher_condition_sets_.find(*j); | 
|  | DCHECK(condition_set_iter != url_matcher_condition_sets_.end()); | 
|  | if (condition_set_iter->second->IsMatch( | 
|  | matches, url, url_for_component_searches)) | 
|  | result.insert(*j); | 
|  | } | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | bool URLMatcher::IsEmpty() const { | 
|  | return condition_factory_.IsEmpty() && | 
|  | url_matcher_condition_sets_.empty() && | 
|  | substring_match_triggers_.empty() && | 
|  | full_url_matcher_.IsEmpty() && | 
|  | url_component_matcher_.IsEmpty() && | 
|  | regex_set_matcher_.IsEmpty() && | 
|  | origin_and_path_regex_set_matcher_.IsEmpty() && | 
|  | registered_full_url_patterns_.empty() && | 
|  | registered_url_component_patterns_.empty(); | 
|  | } | 
|  |  | 
|  | void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { | 
|  | // The purpose of |full_url_conditions| is just that we need to execute | 
|  | // the same logic once for Full URL searches and once for URL Component | 
|  | // searches (see URLMatcherConditionFactory). | 
|  |  | 
|  | // Determine which patterns need to be registered when this function | 
|  | // terminates. | 
|  | std::set<const StringPattern*> new_patterns; | 
|  | for (URLMatcherConditionSets::const_iterator condition_set_iter = | 
|  | url_matcher_condition_sets_.begin(); | 
|  | condition_set_iter != url_matcher_condition_sets_.end(); | 
|  | ++condition_set_iter) { | 
|  | const URLMatcherConditionSet::Conditions& conditions = | 
|  | condition_set_iter->second->conditions(); | 
|  | for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 
|  | conditions.begin(); condition_iter != conditions.end(); | 
|  | ++condition_iter) { | 
|  | // If we are called to process Full URL searches, ignore others, and | 
|  | // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.) | 
|  | if (!condition_iter->IsRegexCondition() && | 
|  | !condition_iter->IsOriginAndPathRegexCondition() && | 
|  | full_url_conditions == condition_iter->IsFullURLCondition()) | 
|  | new_patterns.insert(condition_iter->string_pattern()); | 
|  | } | 
|  |  | 
|  | if (full_url_conditions) | 
|  | continue; | 
|  |  | 
|  | const URLMatcherConditionSet::QueryConditions& query_conditions = | 
|  | condition_set_iter->second->query_conditions(); | 
|  | for (URLMatcherConditionSet::QueryConditions::const_iterator | 
|  | query_condition_iter = query_conditions.begin(); | 
|  | query_condition_iter != query_conditions.end(); | 
|  | ++query_condition_iter) { | 
|  | new_patterns.insert(query_condition_iter->string_pattern()); | 
|  | } | 
|  | } | 
|  |  | 
|  | // This is the set of patterns that were registered before this function | 
|  | // is called. | 
|  | std::set<const StringPattern*>& registered_patterns = | 
|  | full_url_conditions ? registered_full_url_patterns_ | 
|  | : registered_url_component_patterns_; | 
|  |  | 
|  | // Add all patterns that are in new_patterns but not in registered_patterns. | 
|  | std::vector<const StringPattern*> patterns_to_register = | 
|  | base::STLSetDifference<std::vector<const StringPattern*> >( | 
|  | new_patterns, registered_patterns); | 
|  |  | 
|  | // Remove all patterns that are in registered_patterns but not in | 
|  | // new_patterns. | 
|  | std::vector<const StringPattern*> patterns_to_unregister = | 
|  | base::STLSetDifference<std::vector<const StringPattern*> >( | 
|  | registered_patterns, new_patterns); | 
|  |  | 
|  | // Update the SubstringSetMatcher. | 
|  | SubstringSetMatcher& url_matcher = | 
|  | full_url_conditions ? full_url_matcher_ : url_component_matcher_; | 
|  | url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, | 
|  | patterns_to_unregister); | 
|  |  | 
|  | // Update the set of registered_patterns for the next time this function | 
|  | // is being called. | 
|  | registered_patterns.swap(new_patterns); | 
|  | } | 
|  |  | 
|  | void URLMatcher::UpdateRegexSetMatcher() { | 
|  | std::vector<const StringPattern*> new_patterns; | 
|  | std::vector<const StringPattern*> new_origin_and_path_patterns; | 
|  |  | 
|  | for (URLMatcherConditionSets::const_iterator condition_set_iter = | 
|  | url_matcher_condition_sets_.begin(); | 
|  | condition_set_iter != url_matcher_condition_sets_.end(); | 
|  | ++condition_set_iter) { | 
|  | const URLMatcherConditionSet::Conditions& conditions = | 
|  | condition_set_iter->second->conditions(); | 
|  | for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 
|  | conditions.begin(); condition_iter != conditions.end(); | 
|  | ++condition_iter) { | 
|  | if (condition_iter->IsRegexCondition()) { | 
|  | new_patterns.push_back(condition_iter->string_pattern()); | 
|  | } else if (condition_iter->IsOriginAndPathRegexCondition()) { | 
|  | new_origin_and_path_patterns.push_back( | 
|  | condition_iter->string_pattern()); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Start over from scratch. We can't really do better than this, since the | 
|  | // FilteredRE2 backend doesn't support incremental updates. | 
|  | regex_set_matcher_.ClearPatterns(); | 
|  | regex_set_matcher_.AddPatterns(new_patterns); | 
|  | origin_and_path_regex_set_matcher_.ClearPatterns(); | 
|  | origin_and_path_regex_set_matcher_.AddPatterns(new_origin_and_path_patterns); | 
|  | } | 
|  |  | 
|  | void URLMatcher::UpdateTriggers() { | 
|  | // Count substring pattern frequencies. | 
|  | std::map<StringPattern::ID, size_t> substring_pattern_frequencies; | 
|  | for (URLMatcherConditionSets::const_iterator condition_set_iter = | 
|  | url_matcher_condition_sets_.begin(); | 
|  | condition_set_iter != url_matcher_condition_sets_.end(); | 
|  | ++condition_set_iter) { | 
|  | const URLMatcherConditionSet::Conditions& conditions = | 
|  | condition_set_iter->second->conditions(); | 
|  | for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 
|  | conditions.begin(); condition_iter != conditions.end(); | 
|  | ++condition_iter) { | 
|  | const StringPattern* pattern = condition_iter->string_pattern(); | 
|  | substring_pattern_frequencies[pattern->id()]++; | 
|  | } | 
|  |  | 
|  | const URLMatcherConditionSet::QueryConditions& query_conditions = | 
|  | condition_set_iter->second->query_conditions(); | 
|  | for (URLMatcherConditionSet::QueryConditions::const_iterator | 
|  | query_condition_iter = query_conditions.begin(); | 
|  | query_condition_iter != query_conditions.end(); | 
|  | ++query_condition_iter) { | 
|  | const StringPattern* pattern = query_condition_iter->string_pattern(); | 
|  | substring_pattern_frequencies[pattern->id()]++; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Update trigger conditions: Determine for each URLMatcherConditionSet which | 
|  | // URLMatcherCondition contains a StringPattern that occurs least | 
|  | // frequently in this URLMatcher. We assume that this condition is very | 
|  | // specific and occurs rarely in URLs. If a match occurs for this | 
|  | // URLMatcherCondition, we want to test all other URLMatcherCondition in the | 
|  | // respective URLMatcherConditionSet as well to see whether the entire | 
|  | // URLMatcherConditionSet is considered matching. | 
|  | substring_match_triggers_.clear(); | 
|  | for (URLMatcherConditionSets::const_iterator condition_set_iter = | 
|  | url_matcher_condition_sets_.begin(); | 
|  | condition_set_iter != url_matcher_condition_sets_.end(); | 
|  | ++condition_set_iter) { | 
|  | const URLMatcherConditionSet::Conditions& conditions = | 
|  | condition_set_iter->second->conditions(); | 
|  | if (conditions.empty()) | 
|  | continue; | 
|  | URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 
|  | conditions.begin(); | 
|  | StringPattern::ID trigger = condition_iter->string_pattern()->id(); | 
|  | // We skip the first element in the following loop. | 
|  | ++condition_iter; | 
|  | for (; condition_iter != conditions.end(); ++condition_iter) { | 
|  | StringPattern::ID current_id = | 
|  | condition_iter->string_pattern()->id(); | 
|  | if (substring_pattern_frequencies[trigger] > | 
|  | substring_pattern_frequencies[current_id]) { | 
|  | trigger = current_id; | 
|  | } | 
|  | } | 
|  |  | 
|  | const URLMatcherConditionSet::QueryConditions& query_conditions = | 
|  | condition_set_iter->second->query_conditions(); | 
|  | for (URLMatcherConditionSet::QueryConditions::const_iterator | 
|  | query_condition_iter = query_conditions.begin(); | 
|  | query_condition_iter != query_conditions.end(); | 
|  | ++query_condition_iter) { | 
|  | StringPattern::ID current_id = | 
|  | query_condition_iter->string_pattern()->id(); | 
|  | if (substring_pattern_frequencies[trigger] > | 
|  | substring_pattern_frequencies[current_id]) { | 
|  | trigger = current_id; | 
|  | } | 
|  | } | 
|  |  | 
|  | substring_match_triggers_[trigger].insert(condition_set_iter->second->id()); | 
|  | } | 
|  | } | 
|  |  | 
|  | void URLMatcher::UpdateConditionFactory() { | 
|  | std::set<StringPattern::ID> used_patterns; | 
|  | for (URLMatcherConditionSets::const_iterator condition_set_iter = | 
|  | url_matcher_condition_sets_.begin(); | 
|  | condition_set_iter != url_matcher_condition_sets_.end(); | 
|  | ++condition_set_iter) { | 
|  | const URLMatcherConditionSet::Conditions& conditions = | 
|  | condition_set_iter->second->conditions(); | 
|  | for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 
|  | conditions.begin(); condition_iter != conditions.end(); | 
|  | ++condition_iter) { | 
|  | used_patterns.insert(condition_iter->string_pattern()->id()); | 
|  | } | 
|  | const URLMatcherConditionSet::QueryConditions& query_conditions = | 
|  | condition_set_iter->second->query_conditions(); | 
|  | for (URLMatcherConditionSet::QueryConditions::const_iterator | 
|  | query_condition_iter = query_conditions.begin(); | 
|  | query_condition_iter != query_conditions.end(); | 
|  | ++query_condition_iter) { | 
|  | used_patterns.insert(query_condition_iter->string_pattern()->id()); | 
|  | } | 
|  | } | 
|  | condition_factory_.ForgetUnusedPatterns(used_patterns); | 
|  | } | 
|  |  | 
|  | void URLMatcher::UpdateInternalDatastructures() { | 
|  | UpdateSubstringSetMatcher(false); | 
|  | UpdateSubstringSetMatcher(true); | 
|  | UpdateRegexSetMatcher(); | 
|  | UpdateTriggers(); | 
|  | UpdateConditionFactory(); | 
|  | } | 
|  |  | 
|  | }  // namespace url_matcher |