components/url_pattern_index/url_pattern.cc - chromium/src - Git at Google

 // Copyright 2016 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 // The matching logic distinguishes between the terms URL pattern and
 // subpattern. A URL pattern usually stands for the full thing, e.g.
 // "example.com^*path*par=val^", whereas subpattern denotes a maximal substring
 // of a pattern not containing the wildcard '*' character. For the example above
 // the subpatterns are: "example.com^", "path" and "par=val^".
 //
 // The separator placeholder '^' symbol is used in subpatterns to match any
 // separator character, which is any ASCII symbol except letters, digits, and
 // the following: '_', '-', '.', '%'. Note that the separator placeholder
 // character '^' is itself a separator, as well as '\0'.

 #include "components/url_pattern_index/url_pattern.h"

 #include <stddef.h>

 #include <algorithm>
 #include <ostream>
 #include <string_view>

 #include "base/check_op.h"
 #include "base/notreached.h"
 #include "base/numerics/checked_math.h"
 #include "base/strings/string_util.h"
 #include "components/url_pattern_index/flat/url_pattern_index_generated.h"
 #include "components/url_pattern_index/fuzzy_pattern_matching.h"
 #include "components/url_pattern_index/string_splitter.h"
 #include "url/gurl.h"
 #include "url/third_party/mozilla/url_parse.h"

 namespace url_pattern_index {

 namespace {

 constexpr char kWildcard = '*';

 class IsWildcard {
  public:
   bool operator()(char c) const { return c == kWildcard; }
 };

 proto::UrlPatternType ConvertUrlPatternType(flat::UrlPatternType type) {
   switch (type) {
     case flat::UrlPatternType_SUBSTRING:
       return proto::URL_PATTERN_TYPE_SUBSTRING;
     case flat::UrlPatternType_WILDCARDED:
       return proto::URL_PATTERN_TYPE_WILDCARDED;
     case flat::UrlPatternType_REGEXP:
       return proto::URL_PATTERN_TYPE_REGEXP;
     default:
       return proto::URL_PATTERN_TYPE_UNSPECIFIED;
   }
 }

 proto::AnchorType ConvertAnchorType(flat::AnchorType type) {
   switch (type) {
     case flat::AnchorType_NONE:
       return proto::ANCHOR_TYPE_NONE;
     case flat::AnchorType_BOUNDARY:
       return proto::ANCHOR_TYPE_BOUNDARY;
     case flat::AnchorType_SUBDOMAIN:
       return proto::ANCHOR_TYPE_SUBDOMAIN;
     default:
       return proto::ANCHOR_TYPE_UNSPECIFIED;
   }
 }

 std::string_view ConvertString(const flatbuffers::String* string) {
   return string ? std::string_view(string->data(), string->size())
                 : std::string_view();
 }

 bool HasAnyUpperAscii(std::string_view string) {
   return std::ranges::any_of(string, base::IsAsciiUpper<char>);
 }

 // Returns whether |position| within the |url| belongs to its |host| component
 // and corresponds to the beginning of a (sub-)domain.
 inline bool IsSubdomainAnchored(std::string_view url,
                                 url::Component host,
                                 size_t position) {
   DCHECK_LE(position, url.size());
   const size_t host_begin = static_cast<size_t>(host.begin);
   const size_t host_end = static_cast<size_t>(host.end());
   DCHECK_LE(host_end, url.size());

   return position == host_begin ||
          (position > host_begin && position <= host_end &&
           url[position - 1] == '.');
 }

 // Returns the position of the leftmost occurrence of a |subpattern| in the
 // |text| starting no earlier than |from| the specified position. If the
 // |subpattern| has separator placeholders, searches for a fuzzy occurrence.
 size_t FindSubpattern(std::string_view text,
                       std::string_view subpattern,
                       size_t from = 0) {
   const bool is_fuzzy =
       (subpattern.find(kSeparatorPlaceholder) != std::string_view::npos);
   return is_fuzzy ? FindFuzzy(text, subpattern, from)
                   : text.find(subpattern, from);
 }

 // Same as FindSubpattern(url, subpattern), but searches for an occurrence that
 // starts at the beginning of a (sub-)domain within the url's |host| component.
 size_t FindSubdomainAnchoredSubpattern(std::string_view url,
                                        url::Component host,
                                        std::string_view subpattern) {
   const bool is_fuzzy =
       (subpattern.find(kSeparatorPlaceholder) != std::string_view::npos);

   // Any match found after the end of the host will be discarded, so just
   // avoid searching there for the subpattern to begin with.
   //
   // Check for overflow.
   size_t max_match_end = 0;
   if (!base::CheckAdd(host.end(), subpattern.length())
            .AssignIfValid(&max_match_end)) {
     return std::string_view::npos;
   }
   const std::string_view url_match_candidate = url.substr(0, max_match_end);
   const std::string_view url_host = url.substr(0, host.end());

   for (size_t position = static_cast<size_t>(host.begin);
        position <= static_cast<size_t>(host.end()); ++position) {
     // Enforce as a loop precondition that we are always anchored at a
     // sub-domain before calling find. This is to reduce the number of potential
     // searches for |subpattern|.
     DCHECK(IsSubdomainAnchored(url, host, position));

     position = is_fuzzy ? FindFuzzy(url_match_candidate, subpattern, position)
                         : url_match_candidate.find(subpattern, position);
     if (position == std::string_view::npos ||
         IsSubdomainAnchored(url, host, position)) {
       return position;
     }

     // Enforce the loop precondition. This skips |position| to the next '.',
     // within the host, which the loop itself increments to the anchored
     // sub-domain.
     position = url_host.find('.', position);
     if (position == std::string_view::npos) {
       break;
     }
   }
   return std::string_view::npos;
 }

 // Helper for DoesTextMatchLastSubpattern. Treats kSeparatorPlaceholder as not
 // matching the end of the text.
 bool DoesTextMatchLastSubpatternInternal(proto::AnchorType anchor_left,
                                          proto::AnchorType anchor_right,
                                          std::string_view text,
                                          url::Component url_host,
                                          std::string_view subpattern) {
   // Enumerate all possible combinations of |anchor_left| and |anchor_right|.
   if (anchor_left == proto::ANCHOR_TYPE_NONE &&
       anchor_right == proto::ANCHOR_TYPE_NONE) {
     return FindSubpattern(text, subpattern) != std::string_view::npos;
   }

   if (anchor_left == proto::ANCHOR_TYPE_NONE &&
       anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
     return EndsWithFuzzy(text, subpattern);
   }

   if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY &&
       anchor_right == proto::ANCHOR_TYPE_NONE) {
     return StartsWithFuzzy(text, subpattern);
   }

   if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY &&
       anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
     return text.size() == subpattern.size() &&
            StartsWithFuzzy(text, subpattern);
   }

   if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN &&
       anchor_right == proto::ANCHOR_TYPE_NONE) {
     return url_host.is_nonempty() &&
            FindSubdomainAnchoredSubpattern(text, url_host, subpattern) !=
                std::string_view::npos;
   }

   if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN &&
       anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
     return url_host.is_nonempty() && text.size() >= subpattern.size() &&
            IsSubdomainAnchored(text, url_host,
                                text.size() - subpattern.size()) &&
            EndsWithFuzzy(text, subpattern);
   }

   NOTREACHED();
 }

 // Matches the last |subpattern| against |text|. Special treatment is required
 // for the last subpattern since |kSeparatorPlaceholder| can also match the end
 // of the text.
 bool DoesTextMatchLastSubpattern(proto::AnchorType anchor_left,
                                  proto::AnchorType anchor_right,
                                  std::string_view text,
                                  url::Component url_host,
                                  std::string_view subpattern) {
   DCHECK(!subpattern.empty());

   if (DoesTextMatchLastSubpatternInternal(anchor_left, anchor_right, text,
                                           url_host, subpattern)) {
     return true;
   }

   // If the last |subpattern| ends with kSeparatorPlaceholder, then it can also
   // match the end of text.
   if (subpattern.back() == kSeparatorPlaceholder) {
     subpattern.remove_suffix(1);
     return DoesTextMatchLastSubpatternInternal(
         anchor_left, proto::ANCHOR_TYPE_BOUNDARY, text, url_host, subpattern);
   }

   return false;
 }

 // Returns whether the given |url_pattern| matches the given |url_spec|.
 // Compares the pattern the the url in a case-sensitive manner.
 bool IsCaseSensitiveMatch(std::string_view url_pattern,
                           proto::AnchorType anchor_left,
                           proto::AnchorType anchor_right,
                           std::string_view url_spec,
                           url::Component url_host) {
   DCHECK(!url_spec.empty());

   StringSplitter<IsWildcard> subpatterns(url_pattern);
   auto subpattern_it = subpatterns.begin();
   auto subpattern_end = subpatterns.end();

   // No subpatterns.
   if (subpattern_it == subpattern_end) {
     return anchor_left == proto::ANCHOR_TYPE_NONE ||
            anchor_right == proto::ANCHOR_TYPE_NONE;
   }

   std::string_view subpattern = *subpattern_it;
   ++subpattern_it;

   // There is only one |subpattern|.
   if (subpattern_it == subpattern_end) {
     return DoesTextMatchLastSubpattern(anchor_left, anchor_right, url_spec,
                                        url_host, subpattern);
   }

   // Otherwise, the first |subpattern| does not have to be a suffix. But it
   // still can have a left anchor. Check and handle that.
   std::string_view text = url_spec;
   if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY) {
     if (!StartsWithFuzzy(url_spec, subpattern))
       return false;
     text.remove_prefix(subpattern.size());
   } else if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN) {
     if (url_host.is_empty())
       return false;
     const size_t match_begin =
         FindSubdomainAnchoredSubpattern(url_spec, url_host, subpattern);
     if (match_begin == std::string_view::npos) {
       return false;
     }
     text.remove_prefix(match_begin + subpattern.size());
   } else {
     DCHECK_EQ(anchor_left, proto::ANCHOR_TYPE_NONE);
     // Get back to the initial |subpattern|, process it in the loop below.
     subpattern_it = subpatterns.begin();
   }

   DCHECK(subpattern_it != subpattern_end);
   subpattern = *subpattern_it;

   // Consecutively find all the remaining subpatterns in the |text|. Handle the
   // last subpattern outside the loop.
   while (++subpattern_it != subpattern_end) {
     DCHECK(!subpattern.empty());

     const size_t match_position = FindSubpattern(text, subpattern);
     if (match_position == std::string_view::npos) {
       return false;
     }
     text.remove_prefix(match_position + subpattern.size());

     subpattern = *subpattern_it;
   }

   return DoesTextMatchLastSubpattern(proto::ANCHOR_TYPE_NONE, anchor_right,
                                      text, url::Component(), subpattern);
 }

 }  // namespace

 UrlPattern::UrlInfo::UrlInfo(const GURL& url)
     : spec_(url.possibly_invalid_spec()),
       host_(url.parsed_for_possibly_invalid_spec().host) {
   DCHECK(url.is_valid());
 }

 std::string_view UrlPattern::UrlInfo::GetLowerCaseSpec() const {
   if (lower_case_spec_cached_)
     return *lower_case_spec_cached_;

   if (!HasAnyUpperAscii(spec_)) {
     lower_case_spec_cached_ = spec_;
   } else {
     lower_case_spec_owner_ = base::ToLowerASCII(spec_);
     lower_case_spec_cached_ = lower_case_spec_owner_;
   }
   return *lower_case_spec_cached_;
 }

 std::string_view UrlPattern::UrlInfo::GetStringHost() const {
   if (host().is_empty())
     return std::string_view();
   return std::string_view(&spec_[host().begin], host().len);
 }

 UrlPattern::UrlInfo::~UrlInfo() = default;

 UrlPattern::UrlPattern() = default;

 UrlPattern::UrlPattern(std::string_view url_pattern,
                        proto::UrlPatternType type,
                        MatchCase match_case)
     : type_(type), url_pattern_(url_pattern), match_case_(match_case) {}

 UrlPattern::UrlPattern(std::string_view url_pattern,
                        proto::AnchorType anchor_left,
                        proto::AnchorType anchor_right)
     : type_(proto::URL_PATTERN_TYPE_WILDCARDED),
       url_pattern_(url_pattern),
       anchor_left_(anchor_left),
       anchor_right_(anchor_right) {}

 UrlPattern::UrlPattern(const flat::UrlRule& rule)
     : type_(ConvertUrlPatternType(rule.url_pattern_type())),
       url_pattern_(ConvertString(rule.url_pattern())),
       anchor_left_(ConvertAnchorType(rule.anchor_left())),
       anchor_right_(ConvertAnchorType(rule.anchor_right())),
       match_case_(rule.options() & flat::OptionFlag_IS_MATCH_CASE
                       ? MatchCase::kTrue
                       : MatchCase::kFalse) {}

 UrlPattern::~UrlPattern() = default;

 bool UrlPattern::MatchesUrl(const UrlInfo& url) const {
   DCHECK(type_ == proto::URL_PATTERN_TYPE_SUBSTRING ||
          type_ == proto::URL_PATTERN_TYPE_WILDCARDED);
   DCHECK(base::IsStringASCII(url_pattern_));
   DCHECK(base::IsStringASCII(url.spec()));
   DCHECK(base::IsStringASCII(url.GetLowerCaseSpec()));

   // Pre-process patterns to ensure left anchored and right anchored patterns
   // don't begin and end with a wildcard respectively i.e. change "|*xyz" to
   // "*xyz" and "xyz*|" to "xyz*".
   proto::AnchorType anchor_left = anchor_left_;
   proto::AnchorType anchor_right = anchor_right_;
   if (!url_pattern_.empty()) {
     if (url_pattern_.front() == kWildcard) {
       // Note: We don't handle "||*" and expect clients to disallow it.
       DCHECK_NE(proto::ANCHOR_TYPE_SUBDOMAIN, anchor_left_);
       anchor_left = proto::ANCHOR_TYPE_NONE;
     }
     if (url_pattern_.back() == kWildcard)
       anchor_right = proto::ANCHOR_TYPE_NONE;
   }

   if (match_case()) {
     return IsCaseSensitiveMatch(url_pattern_, anchor_left, anchor_right,
                                 url.spec(), url.host());
   }

   // Use the lower-cased url for case-insensitive comparison. Case-insensitive
   // patterns should already be lower-cased.
   DCHECK(!HasAnyUpperAscii(url_pattern_));
   return IsCaseSensitiveMatch(url_pattern_, anchor_left, anchor_right,
                               url.GetLowerCaseSpec(), url.host());
 }

 std::ostream& operator<<(std::ostream& out, const UrlPattern& pattern) {
   switch (pattern.anchor_left()) {
     case proto::ANCHOR_TYPE_SUBDOMAIN:
       out << '|';
       [[fallthrough]];
     case proto::ANCHOR_TYPE_BOUNDARY:
       out << '|';
       [[fallthrough]];
     default:
       break;
   }
   out << pattern.url_pattern();
   if (pattern.anchor_right() == proto::ANCHOR_TYPE_BOUNDARY)
     out << '|';
   if (pattern.match_case())
     out << "$match-case";

   return out;
 }

 }  // namespace url_pattern_index
	// Copyright 2016 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	// The matching logic distinguishes between the terms URL pattern and
	// subpattern. A URL pattern usually stands for the full thing, e.g.
	// "example.com^pathpar=val^", whereas subpattern denotes a maximal substring
	// of a pattern not containing the wildcard '*' character. For the example above
	// the subpatterns are: "example.com^", "path" and "par=val^".
	//
	// The separator placeholder '^' symbol is used in subpatterns to match any
	// separator character, which is any ASCII symbol except letters, digits, and
	// the following: '_', '-', '.', '%'. Note that the separator placeholder
	// character '^' is itself a separator, as well as '\0'.

	#include "components/url_pattern_index/url_pattern.h"

	#include <stddef.h>

	#include <algorithm>
	#include <ostream>
	#include <string_view>

	#include "base/check_op.h"
	#include "base/notreached.h"
	#include "base/numerics/checked_math.h"
	#include "base/strings/string_util.h"
	#include "components/url_pattern_index/flat/url_pattern_index_generated.h"
	#include "components/url_pattern_index/fuzzy_pattern_matching.h"
	#include "components/url_pattern_index/string_splitter.h"
	#include "url/gurl.h"
	#include "url/third_party/mozilla/url_parse.h"

	namespace url_pattern_index {

	namespace {

	constexpr char kWildcard = '*';

	class IsWildcard {
	public:
	bool operator()(char c) const { return c == kWildcard; }
	};

	proto::UrlPatternType ConvertUrlPatternType(flat::UrlPatternType type) {
	switch (type) {
	case flat::UrlPatternType_SUBSTRING:
	return proto::URL_PATTERN_TYPE_SUBSTRING;
	case flat::UrlPatternType_WILDCARDED:
	return proto::URL_PATTERN_TYPE_WILDCARDED;
	case flat::UrlPatternType_REGEXP:
	return proto::URL_PATTERN_TYPE_REGEXP;
	default:
	return proto::URL_PATTERN_TYPE_UNSPECIFIED;
	}
	}

	proto::AnchorType ConvertAnchorType(flat::AnchorType type) {
	switch (type) {
	case flat::AnchorType_NONE:
	return proto::ANCHOR_TYPE_NONE;
	case flat::AnchorType_BOUNDARY:
	return proto::ANCHOR_TYPE_BOUNDARY;
	case flat::AnchorType_SUBDOMAIN:
	return proto::ANCHOR_TYPE_SUBDOMAIN;
	default:
	return proto::ANCHOR_TYPE_UNSPECIFIED;
	}
	}

	std::string_view ConvertString(const flatbuffers::String* string) {
	return string ? std::string_view(string->data(), string->size())
	: std::string_view();
	}

	bool HasAnyUpperAscii(std::string_view string) {
	return std::ranges::any_of(string, base::IsAsciiUpper<char>);
	}

	// Returns whether \|position\| within the \|url\| belongs to its \|host\| component
	// and corresponds to the beginning of a (sub-)domain.
	inline bool IsSubdomainAnchored(std::string_view url,
	url::Component host,
	size_t position) {
	DCHECK_LE(position, url.size());
	const size_t host_begin = static_cast<size_t>(host.begin);
	const size_t host_end = static_cast<size_t>(host.end());
	DCHECK_LE(host_end, url.size());

	return position == host_begin \|\|
	(position > host_begin && position <= host_end &&
	url[position - 1] == '.');
	}

	// Returns the position of the leftmost occurrence of a \|subpattern\| in the
	// \|text\| starting no earlier than \|from\| the specified position. If the
	// \|subpattern\| has separator placeholders, searches for a fuzzy occurrence.
	size_t FindSubpattern(std::string_view text,
	std::string_view subpattern,
	size_t from = 0) {
	const bool is_fuzzy =
	(subpattern.find(kSeparatorPlaceholder) != std::string_view::npos);
	return is_fuzzy ? FindFuzzy(text, subpattern, from)
	: text.find(subpattern, from);
	}

	// Same as FindSubpattern(url, subpattern), but searches for an occurrence that
	// starts at the beginning of a (sub-)domain within the url's \|host\| component.
	size_t FindSubdomainAnchoredSubpattern(std::string_view url,
	url::Component host,
	std::string_view subpattern) {
	const bool is_fuzzy =
	(subpattern.find(kSeparatorPlaceholder) != std::string_view::npos);

	// Any match found after the end of the host will be discarded, so just
	// avoid searching there for the subpattern to begin with.
	//
	// Check for overflow.
	size_t max_match_end = 0;
	if (!base::CheckAdd(host.end(), subpattern.length())
	.AssignIfValid(&max_match_end)) {
	return std::string_view::npos;
	}
	const std::string_view url_match_candidate = url.substr(0, max_match_end);
	const std::string_view url_host = url.substr(0, host.end());

	for (size_t position = static_cast<size_t>(host.begin);
	position <= static_cast<size_t>(host.end()); ++position) {
	// Enforce as a loop precondition that we are always anchored at a
	// sub-domain before calling find. This is to reduce the number of potential
	// searches for \|subpattern\|.
	DCHECK(IsSubdomainAnchored(url, host, position));

	position = is_fuzzy ? FindFuzzy(url_match_candidate, subpattern, position)
	: url_match_candidate.find(subpattern, position);
	if (position == std::string_view::npos \|\|
	IsSubdomainAnchored(url, host, position)) {
	return position;
	}

	// Enforce the loop precondition. This skips \|position\| to the next '.',
	// within the host, which the loop itself increments to the anchored
	// sub-domain.
	position = url_host.find('.', position);
	if (position == std::string_view::npos) {
	break;
	}
	}
	return std::string_view::npos;
	}

	// Helper for DoesTextMatchLastSubpattern. Treats kSeparatorPlaceholder as not
	// matching the end of the text.
	bool DoesTextMatchLastSubpatternInternal(proto::AnchorType anchor_left,
	proto::AnchorType anchor_right,
	std::string_view text,
	url::Component url_host,
	std::string_view subpattern) {
	// Enumerate all possible combinations of \|anchor_left\| and \|anchor_right\|.
	if (anchor_left == proto::ANCHOR_TYPE_NONE &&
	anchor_right == proto::ANCHOR_TYPE_NONE) {
	return FindSubpattern(text, subpattern) != std::string_view::npos;
	}

	if (anchor_left == proto::ANCHOR_TYPE_NONE &&
	anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
	return EndsWithFuzzy(text, subpattern);
	}

	if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY &&
	anchor_right == proto::ANCHOR_TYPE_NONE) {
	return StartsWithFuzzy(text, subpattern);
	}

	if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY &&
	anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
	return text.size() == subpattern.size() &&
	StartsWithFuzzy(text, subpattern);
	}

	if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN &&
	anchor_right == proto::ANCHOR_TYPE_NONE) {
	return url_host.is_nonempty() &&
	FindSubdomainAnchoredSubpattern(text, url_host, subpattern) !=
	std::string_view::npos;
	}

	if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN &&
	anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
	return url_host.is_nonempty() && text.size() >= subpattern.size() &&
	IsSubdomainAnchored(text, url_host,
	text.size() - subpattern.size()) &&
	EndsWithFuzzy(text, subpattern);
	}

	NOTREACHED();
	}

	// Matches the last \|subpattern\| against \|text\|. Special treatment is required
	// for the last subpattern since \|kSeparatorPlaceholder\| can also match the end
	// of the text.
	bool DoesTextMatchLastSubpattern(proto::AnchorType anchor_left,
	proto::AnchorType anchor_right,
	std::string_view text,
	url::Component url_host,
	std::string_view subpattern) {
	DCHECK(!subpattern.empty());

	if (DoesTextMatchLastSubpatternInternal(anchor_left, anchor_right, text,
	url_host, subpattern)) {
	return true;
	}

	// If the last \|subpattern\| ends with kSeparatorPlaceholder, then it can also
	// match the end of text.
	if (subpattern.back() == kSeparatorPlaceholder) {
	subpattern.remove_suffix(1);
	return DoesTextMatchLastSubpatternInternal(
	anchor_left, proto::ANCHOR_TYPE_BOUNDARY, text, url_host, subpattern);
	}

	return false;
	}

	// Returns whether the given \|url_pattern\| matches the given \|url_spec\|.
	// Compares the pattern the the url in a case-sensitive manner.
	bool IsCaseSensitiveMatch(std::string_view url_pattern,
	proto::AnchorType anchor_left,
	proto::AnchorType anchor_right,
	std::string_view url_spec,
	url::Component url_host) {
	DCHECK(!url_spec.empty());

	StringSplitter<IsWildcard> subpatterns(url_pattern);
	auto subpattern_it = subpatterns.begin();
	auto subpattern_end = subpatterns.end();

	// No subpatterns.
	if (subpattern_it == subpattern_end) {
	return anchor_left == proto::ANCHOR_TYPE_NONE \|\|
	anchor_right == proto::ANCHOR_TYPE_NONE;
	}

	std::string_view subpattern = *subpattern_it;
	++subpattern_it;

	// There is only one \|subpattern\|.
	if (subpattern_it == subpattern_end) {
	return DoesTextMatchLastSubpattern(anchor_left, anchor_right, url_spec,
	url_host, subpattern);
	}

	// Otherwise, the first \|subpattern\| does not have to be a suffix. But it
	// still can have a left anchor. Check and handle that.
	std::string_view text = url_spec;
	if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY) {
	if (!StartsWithFuzzy(url_spec, subpattern))
	return false;
	text.remove_prefix(subpattern.size());
	} else if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN) {
	if (url_host.is_empty())
	return false;
	const size_t match_begin =
	FindSubdomainAnchoredSubpattern(url_spec, url_host, subpattern);
	if (match_begin == std::string_view::npos) {
	return false;
	}
	text.remove_prefix(match_begin + subpattern.size());
	} else {
	DCHECK_EQ(anchor_left, proto::ANCHOR_TYPE_NONE);
	// Get back to the initial \|subpattern\|, process it in the loop below.
	subpattern_it = subpatterns.begin();
	}

	DCHECK(subpattern_it != subpattern_end);
	subpattern = *subpattern_it;

	// Consecutively find all the remaining subpatterns in the \|text\|. Handle the
	// last subpattern outside the loop.
	while (++subpattern_it != subpattern_end) {
	DCHECK(!subpattern.empty());

	const size_t match_position = FindSubpattern(text, subpattern);
	if (match_position == std::string_view::npos) {
	return false;
	}
	text.remove_prefix(match_position + subpattern.size());

	subpattern = *subpattern_it;
	}

	return DoesTextMatchLastSubpattern(proto::ANCHOR_TYPE_NONE, anchor_right,
	text, url::Component(), subpattern);
	}

	} // namespace

	UrlPattern::UrlInfo::UrlInfo(const GURL& url)
	: spec_(url.possibly_invalid_spec()),
	host_(url.parsed_for_possibly_invalid_spec().host) {
	DCHECK(url.is_valid());
	}

	std::string_view UrlPattern::UrlInfo::GetLowerCaseSpec() const {
	if (lower_case_spec_cached_)
	return *lower_case_spec_cached_;

	if (!HasAnyUpperAscii(spec_)) {
	lower_case_spec_cached_ = spec_;
	} else {
	lower_case_spec_owner_ = base::ToLowerASCII(spec_);
	lower_case_spec_cached_ = lower_case_spec_owner_;
	}
	return *lower_case_spec_cached_;
	}

	std::string_view UrlPattern::UrlInfo::GetStringHost() const {
	if (host().is_empty())
	return std::string_view();
	return std::string_view(&spec_[host().begin], host().len);
	}

	UrlPattern::UrlInfo::~UrlInfo() = default;

	UrlPattern::UrlPattern() = default;

	UrlPattern::UrlPattern(std::string_view url_pattern,
	proto::UrlPatternType type,
	MatchCase match_case)
	: type_(type), url_pattern_(url_pattern), match_case_(match_case) {}

	UrlPattern::UrlPattern(std::string_view url_pattern,
	proto::AnchorType anchor_left,
	proto::AnchorType anchor_right)
	: type_(proto::URL_PATTERN_TYPE_WILDCARDED),
	url_pattern_(url_pattern),
	anchor_left_(anchor_left),
	anchor_right_(anchor_right) {}

	UrlPattern::UrlPattern(const flat::UrlRule& rule)
	: type_(ConvertUrlPatternType(rule.url_pattern_type())),
	url_pattern_(ConvertString(rule.url_pattern())),
	anchor_left_(ConvertAnchorType(rule.anchor_left())),
	anchor_right_(ConvertAnchorType(rule.anchor_right())),
	match_case_(rule.options() & flat::OptionFlag_IS_MATCH_CASE
	? MatchCase::kTrue
	: MatchCase::kFalse) {}

	UrlPattern::~UrlPattern() = default;

	bool UrlPattern::MatchesUrl(const UrlInfo& url) const {
	DCHECK(type_ == proto::URL_PATTERN_TYPE_SUBSTRING \|\|
	type_ == proto::URL_PATTERN_TYPE_WILDCARDED);
	DCHECK(base::IsStringASCII(url_pattern_));
	DCHECK(base::IsStringASCII(url.spec()));
	DCHECK(base::IsStringASCII(url.GetLowerCaseSpec()));

	// Pre-process patterns to ensure left anchored and right anchored patterns
	// don't begin and end with a wildcard respectively i.e. change "\|*xyz" to
	// "xyz" and "xyz\|" to "xyz*".
	proto::AnchorType anchor_left = anchor_left_;
	proto::AnchorType anchor_right = anchor_right_;
	if (!url_pattern_.empty()) {
	if (url_pattern_.front() == kWildcard) {
	// Note: We don't handle "\|\|*" and expect clients to disallow it.
	DCHECK_NE(proto::ANCHOR_TYPE_SUBDOMAIN, anchor_left_);
	anchor_left = proto::ANCHOR_TYPE_NONE;
	}
	if (url_pattern_.back() == kWildcard)
	anchor_right = proto::ANCHOR_TYPE_NONE;
	}

	if (match_case()) {
	return IsCaseSensitiveMatch(url_pattern_, anchor_left, anchor_right,
	url.spec(), url.host());
	}

	// Use the lower-cased url for case-insensitive comparison. Case-insensitive
	// patterns should already be lower-cased.
	DCHECK(!HasAnyUpperAscii(url_pattern_));
	return IsCaseSensitiveMatch(url_pattern_, anchor_left, anchor_right,
	url.GetLowerCaseSpec(), url.host());
	}

	std::ostream& operator<<(std::ostream& out, const UrlPattern& pattern) {
	switch (pattern.anchor_left()) {
	case proto::ANCHOR_TYPE_SUBDOMAIN:
	out << '\|';
	[[fallthrough]];
	case proto::ANCHOR_TYPE_BOUNDARY:
	out << '\|';
	[[fallthrough]];
	default:
	break;
	}
	out << pattern.url_pattern();
	if (pattern.anchor_right() == proto::ANCHOR_TYPE_BOUNDARY)
	out << '\|';
	if (pattern.match_case())
	out << "$match-case";

	return out;
	}

	} // namespace url_pattern_index