chrome/browser/browser_switcher/browser_switcher_sitelist.cc - chromium/src - Git at Google

 // Copyright 2018 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "chrome/browser/browser_switcher/browser_switcher_sitelist.h"

 #include <string.h>

 #include <algorithm>
 #include <optional>
 #include <sstream>
 #include <string>
 #include <string_view>
 #include <utility>
 #include <vector>

 #include "base/functional/bind.h"
 #include "base/metrics/histogram_macros.h"
 #include "base/strings/strcat.h"
 #include "base/strings/string_util.h"
 #include "base/values.h"
 #include "chrome/browser/browser_switcher/browser_switcher_prefs.h"
 #include "chrome/browser/browser_switcher/ieem_sitelist_parser.h"
 #include "components/prefs/pref_service.h"
 #include "components/url_formatter/url_fixer.h"
 #include "third_party/re2/src/re2/re2.h"
 #include "url/gurl.h"
 #include "url/url_util.h"

 namespace browser_switcher {

 namespace {

 // Find the position of |token| inside |input|, if present. Ignore case for
 // ASCII characters.
 //
 // If |token| is not in |input|, return a pointer to the null-byte at the end
 // of |input|.
 auto StringFindInsensitiveASCII(std::string_view input,
                                 std::string_view token) {
   return std::ranges::search(input, token, std::equal_to<>(),
                              &base::ToLowerASCII<char>,
                              &base::ToLowerASCII<char>);
 }

 // Checks if the omitted prefix for a non-fully specific prefix is one of the
 // expected parts that are allowed to be omitted (e.g. "https://").
 bool IsValidPrefix(std::string_view prefix) {
   static re2::LazyRE2 re = {"(https?|file):(//)?"};
   return prefix.empty() || re2::RE2::FullMatch(prefix, *re);
 }

 // Checks whether |patterns| contains a pattern that matches |url|, and returns
 // the longest matching pattern. If there are no matches, an empty pattern is
 // returned.
 //
 // If |contains_inverted_matches| is true, treat patterns that start with "!" as
 // inverted matches.
 const Rule* MatchUrlToList(const NoCopyUrl& url,
                            const std::vector<std::unique_ptr<Rule>>& rules,
                            bool contains_inverted_matches) {
   const Rule* reason = nullptr;
   for (const std::unique_ptr<Rule>& rule : rules) {
     DCHECK(rule);
     if (reason && rule->priority() <= reason->priority())
       continue;
     if (rule->inverted() && !contains_inverted_matches)
       continue;
     if (rule->Matches(url))
       reason = rule.get();
   }
   return reason;
 }

 // Rules that are just an "*" are the most simple: they just return true all the
 // time, regardless of ParsingMode.
 class WildcardRule : public Rule {
  public:
   WildcardRule() : Rule("*") {}
   ~WildcardRule() override = default;

   bool Matches(const NoCopyUrl& url) const override { return true; }

   bool IsValid() const override { return true; }

   std::string ToString() const override { return "*"; }
 };

 // Rules with ParsingMode::kDefault. They treat rules with/without a '/'
 // separately. They do some pre-processing to come up with a |canonical_| rule
 // string, then some simple string searches.
 class DefaultModeRule : public Rule {
  public:
   explicit DefaultModeRule(std::string_view original_rule)
       : Rule(original_rule) {
     canonical_ = std::string(original_rule);

     // Drop the leading "!", if present.
     if (inverted())
       canonical_ = canonical_.substr(1);

     if (canonical_.find("/") == std::string::npos) {
       // No "/" in the string. It's a hostnmae or wildcard, so just convert to
       // lowercase.
       canonical_ = base::ToLowerASCII(canonical_);
       return;
     }

     // The string has a "/" in it. It could be:
     // - "//example.com/abc", convert hostname to lowercase
     // - "example.com/abc", treat same as "//example.com/abc"
     // - "http://example.com/abc", convert hostname and scheme to lowercase
     // - "/abc", keep capitalization

     if (base::StartsWith(canonical_, "/") &&
         !base::StartsWith(canonical_, "//")) {
       // Rule starts with a single slash, e.g. "/abc". Don't change case.
       return;
     }

     if (canonical_.find("/") != 0 &&
         canonical_.find("://") == std::string::npos) {
       // Transform "example.com/abc" => "//example.com/abc".
       canonical_.insert(0, "//");
     }

     // For patterns that include a "/": parse the URL to get the proper
     // capitalization (for scheme/hostname).
     //
     // To properly parse URLs with no scheme, we need a valid base URL. We use
     // "ftp://XXX/", which is a valid URL with an unsupported scheme. That
     // way, parsing still succeeds, and we can easily know when the scheme
     // isn't part of the original pattern (and omit it from the output).
     const char* placeholder_scheme = "ftp:";
     std::string placeholder = base::StrCat({placeholder_scheme, "//XXX/"});
     GURL base_url(placeholder);

     GURL relative_url = base_url.Resolve(canonical_);
     std::string_view spec = relative_url.possibly_invalid_spec();

     // The parsed URL might start with "ftp://XXX/" or "ftp://". Remove that
     // prefix.
     auto remainder = base::RemovePrefix(spec, placeholder,
                                         base::CompareCase::INSENSITIVE_ASCII);
     if (remainder) {
       spec = *remainder;
     }
     remainder = base::RemovePrefix(spec, placeholder_scheme,
                                    base::CompareCase::INSENSITIVE_ASCII);
     if (remainder) {
       spec = *remainder;
     }
     canonical_ = std::string(spec);
   }

   ~DefaultModeRule() override = default;

   bool Matches(const NoCopyUrl& url) const override {
     std::string_view pattern = canonical_;

     if (pattern.find('/') != std::string_view::npos) {
       // Check that the prefix is valid. The URL's hostname/scheme have
       // already been case-normalized, so that part of the URL is always
       // case-insensitive.
       size_t pos = url.spec().find(pattern);
       if (pos != std::string_view::npos &&
           IsValidPrefix(std::string_view(url.spec().data(), pos))) {
         return true;
       }
       if (!url.spec_without_port().empty()) {
         pos = url.spec_without_port().find(pattern);
         return pos != std::string_view::npos &&
                IsValidPrefix(
                    std::string_view(url.spec_without_port().data(), pos));
       }
       return false;
     }

     // Compare hosts and ports, case-insensitive.
     auto result = StringFindInsensitiveASCII(url.host_and_port(), pattern);
     return result.begin() != url.host_and_port().end();
   }

   bool IsValid() const override { return true; }

   std::string ToString() const override {
     if (inverted())
       return "!" + canonical_;
     return canonical_;
   }

  private:
   // The canonical version of the rule, with the leading "!" removed if it's
   // inverted.
   std::string canonical_;
 };

 // Rules with ParsingMode::kIESiteListMode. They treat rules the same regardless
 // of whether a '/' is present. They parse the rule as a URL, then split it
 // into scheme, host, port, and path parts. They compare each of these parts
 // with the URL to be matched.
 class IESiteListModeRule : public Rule {
  public:
   explicit IESiteListModeRule(std::string_view original_rule)
       : Rule(original_rule) {
     // Parse the string as a URL and extract its parts.
     //
     // Some parts of the URL will be dropped, to match IE/Edge behavior:
     //   - username
     //   - password
     //   - query
     //   - fragment

     // Drop the leading "!", if present.
     if (inverted())
       original_rule = original_rule.substr(1);

     // Rules with leading slashes are interpreted as file:// URLs on POSIX
     // systems. To make it more consistent with Windows, remove the leading
     // slashes.
     //
     // Only remove the first leading slash, to be consistent with Edge (which
     // *does* parse it as a file:// URL if there are 2 slashes).
     if (base::StartsWith(original_rule, "/"))
       original_rule = original_rule.substr(1);

     // Parse as a URL. This is more relaxed than GURL's constructor, e.g. it
     // adds http:// if the scheme is missing.
     //
     // This lets us parse strings like "example.com", even though they're not
     // fully-specified URLs (missing scheme and path).
     GURL url = url_formatter::FixupURL(std::string(original_rule), "");

     if (!url.is_valid() ||
         (!url.SchemeIsHTTPOrHTTPS() && !url.SchemeIsFile())) {
       // The rule is invalid, so it won't match anything. Continue parsing it,
       // in case we want to print it later for debugging/troubleshooting.
       valid_ = false;
     }

     // If it starts with http:// or https://, preserve the scheme. Otherwise,
     // use a wildcard ("*") as the scheme.
     //
     // "http://" may have been added by FixupUrl(), so look for it in the
     // original string instead.
     if (valid_ &&
         (StringFindInsensitiveASCII(original_rule, "http://").begin() ==
              original_rule.begin() ||
          StringFindInsensitiveASCII(original_rule, "https://").begin() ==
              original_rule.begin() ||
          url.SchemeIsFile())) {
       scheme_ = url.scheme();
     }

     if (url.has_host())
       host_ = url.host();

     if (url.has_port())
       port_ = url.IntPort();

     // Make sure |path_| always has at least the leading slash.
     if (url.has_path() && !url.path_piece().empty())
       path_ = base::ToLowerASCII(url.path());
     else
       path_ = "/";
   }

   ~IESiteListModeRule() override = default;

   bool Matches(const NoCopyUrl& no_copy_url) const override {
     DCHECK(valid_);

     const GURL& url = no_copy_url.original();
     // Compare schemes, if present in the rule.
     if (scheme_ && url.scheme_piece() != *scheme_) {
       return false;
     }

     // Compare hosts.
     if (!url::DomainIs(url.host_piece(), host_))
       return false;

     // Compare ports, if present in the rule.
     if (port_ && url.IntPort() != *port_)
       return false;

     // Compare paths, case-insensitively. They must match at the beginning.
     return StringFindInsensitiveASCII(url.path_piece(), path_).begin() ==
            url.path_piece().begin();
   }

   bool IsValid() const override { return valid_; }

   // Typical return value looks like "*://example.com:8000/path".
   std::string ToString() const override {
     DCHECK(valid_);

     std::ostringstream out;

     if (inverted())
       out << "!";

     // <scheme>://
     if (scheme_)
       out << *scheme_;
     else
       out << "*";
     out << "://";

     // <host>:<port>
     out << host_;
     if (port_)
       out << ":" << *port_;

     // <path>
     out << path_;

     return out.str();
   }

  private:
   std::optional<std::string> scheme_;
   std::string host_;
   std::optional<int> port_;
   // Always at least a "/".
   std::string path_;

   bool valid_ = true;
 };

 }  // namespace

 std::unique_ptr<Rule> CanonicalizeRule(std::string_view original_rule,
                                        ParsingMode parsing_mode) {
   std::unique_ptr<Rule> rule;

   if (original_rule == "*") {
     rule = std::make_unique<WildcardRule>();
   } else {
     switch (parsing_mode) {
       case ParsingMode::kDefault:
         rule = std::make_unique<DefaultModeRule>(original_rule);
         break;
       case ParsingMode::kIESiteListMode:
         rule = std::make_unique<IESiteListModeRule>(original_rule);
         break;
       default:
         NOTREACHED();
     }
   }

   if (!rule || !rule->IsValid())
     return nullptr;
   else
     return rule;
 }

 Decision::Decision(Action action_, Reason reason_, const Rule* matching_rule_)
     : action(action_), reason(reason_), matching_rule(matching_rule_) {}

 Decision::Decision() = default;
 Decision::Decision(Decision&) = default;
 Decision::Decision(Decision&&) = default;

 bool Decision::operator==(const Decision& that) const {
   if (action != that.action || reason != that.reason)
     return false;
   if (matching_rule == that.matching_rule)
     return true;
   if (!matching_rule || !that.matching_rule)
     return false;
   return matching_rule->ToString() == that.matching_rule->ToString();
 }

 BrowserSwitcherSitelist::~BrowserSwitcherSitelist() = default;

 bool BrowserSwitcherSitelist::ShouldSwitch(const GURL& url) const {
   return GetDecision(url).action == kGo;
 }

 BrowserSwitcherSitelistImpl::BrowserSwitcherSitelistImpl(
     BrowserSwitcherPrefs* prefs)
     : prefs_(prefs) {
   prefs_changed_subscription_ = prefs_->RegisterPrefsChangedCallback(
       base::BindRepeating(&BrowserSwitcherSitelistImpl::OnPrefsChanged,
                           base::Unretained(this)));
 }

 BrowserSwitcherSitelistImpl::~BrowserSwitcherSitelistImpl() = default;

 Decision BrowserSwitcherSitelistImpl::GetDecision(const GURL& url) const {
   // Don't record metrics for LBS non-users.
   if (!IsActive())
     return {kStay, kDisabled, nullptr};

   Decision decision = GetDecisionImpl(url);
   UMA_HISTOGRAM_BOOLEAN("BrowserSwitcher.Decision", decision.action == kGo);
   return decision;
 }

 Decision BrowserSwitcherSitelistImpl::GetDecisionImpl(const GURL& url) const {
   SCOPED_UMA_HISTOGRAM_TIMER("BrowserSwitcher.DecisionTime");

   if (!url.SchemeIsHTTPOrHTTPS() && !url.SchemeIsFile()) {
     return {kStay, kProtocol, nullptr};
   }

   NoCopyUrl no_copy_url(url);
   const RuleSet* rulesets[] = {&prefs_->GetRules(), &ieem_sitelist_,
                                &external_sitelist_, &external_greylist_};

   const Rule* reason_to_go = nullptr;
   for (const RuleSet* rules : rulesets) {
     const Rule* match = MatchUrlToList(no_copy_url, rules->sitelist,
                                        /*contains_inverted_matches=*/true);
     if (!match)
       continue;
     if (!reason_to_go || match->priority() > reason_to_go->priority())
       reason_to_go = match;
   }

   // If sitelists don't match, no need to check the greylists.
   if (!reason_to_go)
     return {kStay, kDefault, nullptr};
   if (reason_to_go->inverted())
     return {kStay, kSitelist, reason_to_go};

   const Rule* reason_to_stay = nullptr;
   for (const RuleSet* rules : rulesets) {
     const Rule* match = MatchUrlToList(no_copy_url, rules->greylist,
                                        /*contains_inverted_matches=*/false);
     if (!match)
       continue;
     if (!reason_to_stay || match->priority() > reason_to_stay->priority())
       reason_to_stay = match;
   }

   if (reason_to_go->priority() <= 1 && reason_to_stay)
     return {kStay, kGreylist, reason_to_stay};

   if (!reason_to_stay || reason_to_go->priority() >= reason_to_stay->priority())
     return {kGo, kSitelist, reason_to_go};
   else
     return {kStay, kGreylist, reason_to_stay};
 }

 void BrowserSwitcherSitelistImpl::SetIeemSitelist(RawRuleSet&& rules) {
   UMA_HISTOGRAM_COUNTS_100000("BrowserSwitcher.IeemSitelistSize",
                               rules.sitelist.size());
   StoreRules(ieem_sitelist_, rules);
   original_ieem_sitelist_ = std::move(rules);
 }

 void BrowserSwitcherSitelistImpl::SetExternalSitelist(RawRuleSet&& rules) {
   UMA_HISTOGRAM_COUNTS_100000("BrowserSwitcher.ExternalSitelistSize",
                               rules.sitelist.size());
   StoreRules(external_sitelist_, rules);
   original_external_sitelist_ = std::move(rules);
 }

 void BrowserSwitcherSitelistImpl::SetExternalGreylist(RawRuleSet&& rules) {
   UMA_HISTOGRAM_COUNTS_100000("BrowserSwitcher.ExternalGreylistSize",
                               rules.sitelist.size());
   DCHECK(rules.sitelist.empty());
   StoreRules(external_greylist_, rules);
   original_external_greylist_ = std::move(rules);
 }

 const RuleSet* BrowserSwitcherSitelistImpl::GetIeemSitelist() const {
   return &ieem_sitelist_;
 }

 const RuleSet* BrowserSwitcherSitelistImpl::GetExternalSitelist() const {
   return &external_sitelist_;
 }

 const RuleSet* BrowserSwitcherSitelistImpl::GetExternalGreylist() const {
   return &external_greylist_;
 }

 void BrowserSwitcherSitelistImpl::StoreRules(RuleSet& dst,
                                              const RawRuleSet& src) {
   dst.sitelist.clear();
   dst.greylist.clear();
   ParsingMode parsing_mode = prefs_->GetParsingMode();
   for (const std::string& original_rule : src.sitelist) {
     std::unique_ptr<Rule> rule = CanonicalizeRule(original_rule, parsing_mode);
     if (rule)
       dst.sitelist.push_back(std::move(rule));
   }
   for (const std::string& original_rule : src.greylist) {
     std::unique_ptr<Rule> rule = CanonicalizeRule(original_rule, parsing_mode);
     if (rule)
       dst.greylist.push_back(std::move(rule));
   }
 }

 void BrowserSwitcherSitelistImpl::OnPrefsChanged(
     BrowserSwitcherPrefs* prefs,
     const std::vector<std::string>& changed_prefs) {
   auto it = std::ranges::find(changed_prefs, prefs::kParsingMode);
   if (it != changed_prefs.end()) {
     // ParsingMode changed, re-canonicalize rules.
     StoreRules(ieem_sitelist_, original_ieem_sitelist_);
     StoreRules(external_sitelist_, original_external_sitelist_);
     StoreRules(external_greylist_, original_external_greylist_);
   }
 }

 bool BrowserSwitcherSitelistImpl::IsActive() const {
   if (!prefs_->IsEnabled())
     return false;

   const RuleSet* rulesets[] = {&prefs_->GetRules(), &ieem_sitelist_,
                                &external_sitelist_, &external_greylist_};
   for (const RuleSet* rules : rulesets) {
     if (!rules->sitelist.empty() || !rules->greylist.empty())
       return true;
   }
   return false;
 }

 }  // namespace browser_switcher
	// Copyright 2018 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "chrome/browser/browser_switcher/browser_switcher_sitelist.h"

	#include <string.h>

	#include <algorithm>
	#include <optional>
	#include <sstream>
	#include <string>
	#include <string_view>
	#include <utility>
	#include <vector>

	#include "base/functional/bind.h"
	#include "base/metrics/histogram_macros.h"
	#include "base/strings/strcat.h"
	#include "base/strings/string_util.h"
	#include "base/values.h"
	#include "chrome/browser/browser_switcher/browser_switcher_prefs.h"
	#include "chrome/browser/browser_switcher/ieem_sitelist_parser.h"
	#include "components/prefs/pref_service.h"
	#include "components/url_formatter/url_fixer.h"
	#include "third_party/re2/src/re2/re2.h"
	#include "url/gurl.h"
	#include "url/url_util.h"

	namespace browser_switcher {

	namespace {

	// Find the position of \|token\| inside \|input\|, if present. Ignore case for
	// ASCII characters.
	//
	// If \|token\| is not in \|input\|, return a pointer to the null-byte at the end
	// of \|input\|.
	auto StringFindInsensitiveASCII(std::string_view input,
	std::string_view token) {
	return std::ranges::search(input, token, std::equal_to<>(),
	&base::ToLowerASCII<char>,
	&base::ToLowerASCII<char>);
	}

	// Checks if the omitted prefix for a non-fully specific prefix is one of the
	// expected parts that are allowed to be omitted (e.g. "https://").
	bool IsValidPrefix(std::string_view prefix) {
	static re2::LazyRE2 re = {"(https?\|file):(//)?"};
	return prefix.empty() \|\| re2::RE2::FullMatch(prefix, *re);
	}

	// Checks whether \|patterns\| contains a pattern that matches \|url\|, and returns
	// the longest matching pattern. If there are no matches, an empty pattern is
	// returned.
	//
	// If \|contains_inverted_matches\| is true, treat patterns that start with "!" as
	// inverted matches.
	const Rule* MatchUrlToList(const NoCopyUrl& url,
	const std::vector<std::unique_ptr<Rule>>& rules,
	bool contains_inverted_matches) {
	const Rule* reason = nullptr;
	for (const std::unique_ptr<Rule>& rule : rules) {
	DCHECK(rule);
	if (reason && rule->priority() <= reason->priority())
	continue;
	if (rule->inverted() && !contains_inverted_matches)
	continue;
	if (rule->Matches(url))
	reason = rule.get();
	}
	return reason;
	}

	// Rules that are just an "*" are the most simple: they just return true all the
	// time, regardless of ParsingMode.
	class WildcardRule : public Rule {
	public:
	WildcardRule() : Rule("*") {}
	~WildcardRule() override = default;

	bool Matches(const NoCopyUrl& url) const override { return true; }

	bool IsValid() const override { return true; }

	std::string ToString() const override { return "*"; }
	};

	// Rules with ParsingMode::kDefault. They treat rules with/without a '/'
	// separately. They do some pre-processing to come up with a \|canonical_\| rule
	// string, then some simple string searches.
	class DefaultModeRule : public Rule {
	public:
	explicit DefaultModeRule(std::string_view original_rule)
	: Rule(original_rule) {
	canonical_ = std::string(original_rule);

	// Drop the leading "!", if present.
	if (inverted())
	canonical_ = canonical_.substr(1);

	if (canonical_.find("/") == std::string::npos) {
	// No "/" in the string. It's a hostnmae or wildcard, so just convert to
	// lowercase.
	canonical_ = base::ToLowerASCII(canonical_);
	return;
	}

	// The string has a "/" in it. It could be:
	// - "//example.com/abc", convert hostname to lowercase
	// - "example.com/abc", treat same as "//example.com/abc"
	// - "http://example.com/abc", convert hostname and scheme to lowercase
	// - "/abc", keep capitalization

	if (base::StartsWith(canonical_, "/") &&
	!base::StartsWith(canonical_, "//")) {
	// Rule starts with a single slash, e.g. "/abc". Don't change case.
	return;
	}

	if (canonical_.find("/") != 0 &&
	canonical_.find("://") == std::string::npos) {
	// Transform "example.com/abc" => "//example.com/abc".
	canonical_.insert(0, "//");
	}

	// For patterns that include a "/": parse the URL to get the proper
	// capitalization (for scheme/hostname).
	//
	// To properly parse URLs with no scheme, we need a valid base URL. We use
	// "ftp://XXX/", which is a valid URL with an unsupported scheme. That
	// way, parsing still succeeds, and we can easily know when the scheme
	// isn't part of the original pattern (and omit it from the output).
	const char* placeholder_scheme = "ftp:";
	std::string placeholder = base::StrCat({placeholder_scheme, "//XXX/"});
	GURL base_url(placeholder);

	GURL relative_url = base_url.Resolve(canonical_);
	std::string_view spec = relative_url.possibly_invalid_spec();

	// The parsed URL might start with "ftp://XXX/" or "ftp://". Remove that
	// prefix.
	auto remainder = base::RemovePrefix(spec, placeholder,
	base::CompareCase::INSENSITIVE_ASCII);
	if (remainder) {
	spec = *remainder;
	}
	remainder = base::RemovePrefix(spec, placeholder_scheme,
	base::CompareCase::INSENSITIVE_ASCII);
	if (remainder) {
	spec = *remainder;
	}
	canonical_ = std::string(spec);
	}

	~DefaultModeRule() override = default;

	bool Matches(const NoCopyUrl& url) const override {
	std::string_view pattern = canonical_;

	if (pattern.find('/') != std::string_view::npos) {
	// Check that the prefix is valid. The URL's hostname/scheme have
	// already been case-normalized, so that part of the URL is always
	// case-insensitive.
	size_t pos = url.spec().find(pattern);
	if (pos != std::string_view::npos &&
	IsValidPrefix(std::string_view(url.spec().data(), pos))) {
	return true;
	}
	if (!url.spec_without_port().empty()) {
	pos = url.spec_without_port().find(pattern);
	return pos != std::string_view::npos &&
	IsValidPrefix(
	std::string_view(url.spec_without_port().data(), pos));
	}
	return false;
	}

	// Compare hosts and ports, case-insensitive.
	auto result = StringFindInsensitiveASCII(url.host_and_port(), pattern);
	return result.begin() != url.host_and_port().end();
	}

	bool IsValid() const override { return true; }

	std::string ToString() const override {
	if (inverted())
	return "!" + canonical_;
	return canonical_;
	}

	private:
	// The canonical version of the rule, with the leading "!" removed if it's
	// inverted.
	std::string canonical_;
	};

	// Rules with ParsingMode::kIESiteListMode. They treat rules the same regardless
	// of whether a '/' is present. They parse the rule as a URL, then split it
	// into scheme, host, port, and path parts. They compare each of these parts
	// with the URL to be matched.
	class IESiteListModeRule : public Rule {
	public:
	explicit IESiteListModeRule(std::string_view original_rule)
	: Rule(original_rule) {
	// Parse the string as a URL and extract its parts.
	//
	// Some parts of the URL will be dropped, to match IE/Edge behavior:
	// - username
	// - password
	// - query
	// - fragment

	// Drop the leading "!", if present.
	if (inverted())
	original_rule = original_rule.substr(1);

	// Rules with leading slashes are interpreted as file:// URLs on POSIX
	// systems. To make it more consistent with Windows, remove the leading
	// slashes.
	//
	// Only remove the first leading slash, to be consistent with Edge (which
	// does parse it as a file:// URL if there are 2 slashes).
	if (base::StartsWith(original_rule, "/"))
	original_rule = original_rule.substr(1);

	// Parse as a URL. This is more relaxed than GURL's constructor, e.g. it
	// adds http:// if the scheme is missing.
	//
	// This lets us parse strings like "example.com", even though they're not
	// fully-specified URLs (missing scheme and path).
	GURL url = url_formatter::FixupURL(std::string(original_rule), "");

	if (!url.is_valid() \|\|
	(!url.SchemeIsHTTPOrHTTPS() && !url.SchemeIsFile())) {
	// The rule is invalid, so it won't match anything. Continue parsing it,
	// in case we want to print it later for debugging/troubleshooting.
	valid_ = false;
	}

	// If it starts with http:// or https://, preserve the scheme. Otherwise,
	// use a wildcard ("*") as the scheme.
	//
	// "http://" may have been added by FixupUrl(), so look for it in the
	// original string instead.
	if (valid_ &&
	(StringFindInsensitiveASCII(original_rule, "http://").begin() ==
	original_rule.begin() \|\|
	StringFindInsensitiveASCII(original_rule, "https://").begin() ==
	original_rule.begin() \|\|
	url.SchemeIsFile())) {
	scheme_ = url.scheme();
	}

	if (url.has_host())
	host_ = url.host();

	if (url.has_port())
	port_ = url.IntPort();

	// Make sure \|path_\| always has at least the leading slash.
	if (url.has_path() && !url.path_piece().empty())
	path_ = base::ToLowerASCII(url.path());
	else
	path_ = "/";
	}

	~IESiteListModeRule() override = default;

	bool Matches(const NoCopyUrl& no_copy_url) const override {
	DCHECK(valid_);

	const GURL& url = no_copy_url.original();
	// Compare schemes, if present in the rule.
	if (scheme_ && url.scheme_piece() != *scheme_) {
	return false;
	}

	// Compare hosts.
	if (!url::DomainIs(url.host_piece(), host_))
	return false;

	// Compare ports, if present in the rule.
	if (port_ && url.IntPort() != *port_)
	return false;

	// Compare paths, case-insensitively. They must match at the beginning.
	return StringFindInsensitiveASCII(url.path_piece(), path_).begin() ==
	url.path_piece().begin();
	}

	bool IsValid() const override { return valid_; }

	// Typical return value looks like "*://example.com:8000/path".
	std::string ToString() const override {
	DCHECK(valid_);

	std::ostringstream out;

	if (inverted())
	out << "!";

	// <scheme>://
	if (scheme_)
	out << *scheme_;
	else
	out << "*";
	out << "://";

	// <host>:<port>
	out << host_;
	if (port_)
	out << ":" << *port_;

	// <path>
	out << path_;

	return out.str();
	}

	private:
	std::optional<std::string> scheme_;
	std::string host_;
	std::optional<int> port_;
	// Always at least a "/".
	std::string path_;

	bool valid_ = true;
	};

	} // namespace

	std::unique_ptr<Rule> CanonicalizeRule(std::string_view original_rule,
	ParsingMode parsing_mode) {
	std::unique_ptr<Rule> rule;

	if (original_rule == "*") {
	rule = std::make_unique<WildcardRule>();
	} else {
	switch (parsing_mode) {
	case ParsingMode::kDefault:
	rule = std::make_unique<DefaultModeRule>(original_rule);
	break;
	case ParsingMode::kIESiteListMode:
	rule = std::make_unique<IESiteListModeRule>(original_rule);
	break;
	default:
	NOTREACHED();
	}
	}

	if (!rule \|\| !rule->IsValid())
	return nullptr;
	else
	return rule;
	}

	Decision::Decision(Action action_, Reason reason_, const Rule* matching_rule_)
	: action(action_), reason(reason_), matching_rule(matching_rule_) {}

	Decision::Decision() = default;
	Decision::Decision(Decision&) = default;
	Decision::Decision(Decision&&) = default;

	bool Decision::operator==(const Decision& that) const {
	if (action != that.action \|\| reason != that.reason)
	return false;
	if (matching_rule == that.matching_rule)
	return true;
	if (!matching_rule \|\| !that.matching_rule)
	return false;
	return matching_rule->ToString() == that.matching_rule->ToString();
	}

	BrowserSwitcherSitelist::~BrowserSwitcherSitelist() = default;

	bool BrowserSwitcherSitelist::ShouldSwitch(const GURL& url) const {
	return GetDecision(url).action == kGo;
	}

	BrowserSwitcherSitelistImpl::BrowserSwitcherSitelistImpl(
	BrowserSwitcherPrefs* prefs)
	: prefs_(prefs) {
	prefs_changed_subscription_ = prefs_->RegisterPrefsChangedCallback(
	base::BindRepeating(&BrowserSwitcherSitelistImpl::OnPrefsChanged,
	base::Unretained(this)));
	}

	BrowserSwitcherSitelistImpl::~BrowserSwitcherSitelistImpl() = default;

	Decision BrowserSwitcherSitelistImpl::GetDecision(const GURL& url) const {
	// Don't record metrics for LBS non-users.
	if (!IsActive())
	return {kStay, kDisabled, nullptr};

	Decision decision = GetDecisionImpl(url);
	UMA_HISTOGRAM_BOOLEAN("BrowserSwitcher.Decision", decision.action == kGo);
	return decision;
	}

	Decision BrowserSwitcherSitelistImpl::GetDecisionImpl(const GURL& url) const {
	SCOPED_UMA_HISTOGRAM_TIMER("BrowserSwitcher.DecisionTime");

	if (!url.SchemeIsHTTPOrHTTPS() && !url.SchemeIsFile()) {
	return {kStay, kProtocol, nullptr};
	}

	NoCopyUrl no_copy_url(url);
	const RuleSet* rulesets[] = {&prefs_->GetRules(), &ieem_sitelist_,
	&external_sitelist_, &external_greylist_};

	const Rule* reason_to_go = nullptr;
	for (const RuleSet* rules : rulesets) {
	const Rule* match = MatchUrlToList(no_copy_url, rules->sitelist,
	/contains_inverted_matches=/true);
	if (!match)
	continue;
	if (!reason_to_go \|\| match->priority() > reason_to_go->priority())
	reason_to_go = match;
	}

	// If sitelists don't match, no need to check the greylists.
	if (!reason_to_go)
	return {kStay, kDefault, nullptr};
	if (reason_to_go->inverted())
	return {kStay, kSitelist, reason_to_go};

	const Rule* reason_to_stay = nullptr;
	for (const RuleSet* rules : rulesets) {
	const Rule* match = MatchUrlToList(no_copy_url, rules->greylist,
	/contains_inverted_matches=/false);
	if (!match)
	continue;
	if (!reason_to_stay \|\| match->priority() > reason_to_stay->priority())
	reason_to_stay = match;
	}

	if (reason_to_go->priority() <= 1 && reason_to_stay)
	return {kStay, kGreylist, reason_to_stay};

	if (!reason_to_stay \|\| reason_to_go->priority() >= reason_to_stay->priority())
	return {kGo, kSitelist, reason_to_go};
	else
	return {kStay, kGreylist, reason_to_stay};
	}

	void BrowserSwitcherSitelistImpl::SetIeemSitelist(RawRuleSet&& rules) {
	UMA_HISTOGRAM_COUNTS_100000("BrowserSwitcher.IeemSitelistSize",
	rules.sitelist.size());
	StoreRules(ieem_sitelist_, rules);
	original_ieem_sitelist_ = std::move(rules);
	}

	void BrowserSwitcherSitelistImpl::SetExternalSitelist(RawRuleSet&& rules) {
	UMA_HISTOGRAM_COUNTS_100000("BrowserSwitcher.ExternalSitelistSize",
	rules.sitelist.size());
	StoreRules(external_sitelist_, rules);
	original_external_sitelist_ = std::move(rules);
	}

	void BrowserSwitcherSitelistImpl::SetExternalGreylist(RawRuleSet&& rules) {
	UMA_HISTOGRAM_COUNTS_100000("BrowserSwitcher.ExternalGreylistSize",
	rules.sitelist.size());
	DCHECK(rules.sitelist.empty());
	StoreRules(external_greylist_, rules);
	original_external_greylist_ = std::move(rules);
	}

	const RuleSet* BrowserSwitcherSitelistImpl::GetIeemSitelist() const {
	return &ieem_sitelist_;
	}

	const RuleSet* BrowserSwitcherSitelistImpl::GetExternalSitelist() const {
	return &external_sitelist_;
	}

	const RuleSet* BrowserSwitcherSitelistImpl::GetExternalGreylist() const {
	return &external_greylist_;
	}

	void BrowserSwitcherSitelistImpl::StoreRules(RuleSet& dst,
	const RawRuleSet& src) {
	dst.sitelist.clear();
	dst.greylist.clear();
	ParsingMode parsing_mode = prefs_->GetParsingMode();
	for (const std::string& original_rule : src.sitelist) {
	std::unique_ptr<Rule> rule = CanonicalizeRule(original_rule, parsing_mode);
	if (rule)
	dst.sitelist.push_back(std::move(rule));
	}
	for (const std::string& original_rule : src.greylist) {
	std::unique_ptr<Rule> rule = CanonicalizeRule(original_rule, parsing_mode);
	if (rule)
	dst.greylist.push_back(std::move(rule));
	}
	}

	void BrowserSwitcherSitelistImpl::OnPrefsChanged(
	BrowserSwitcherPrefs* prefs,
	const std::vector<std::string>& changed_prefs) {
	auto it = std::ranges::find(changed_prefs, prefs::kParsingMode);
	if (it != changed_prefs.end()) {
	// ParsingMode changed, re-canonicalize rules.
	StoreRules(ieem_sitelist_, original_ieem_sitelist_);
	StoreRules(external_sitelist_, original_external_sitelist_);
	StoreRules(external_greylist_, original_external_greylist_);
	}
	}

	bool BrowserSwitcherSitelistImpl::IsActive() const {
	if (!prefs_->IsEnabled())
	return false;

	const RuleSet* rulesets[] = {&prefs_->GetRules(), &ieem_sitelist_,
	&external_sitelist_, &external_greylist_};
	for (const RuleSet* rules : rulesets) {
	if (!rules->sitelist.empty() \|\| !rules->greylist.empty())
	return true;
	}
	return false;
	}

	} // namespace browser_switcher