| // Copyright 2024 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "net/device_bound_sessions/session_inclusion_rules.h" |
| |
| #include <string_view> |
| |
| #include "base/check.h" |
| #include "base/containers/adapters.h" |
| #include "base/logging.h" |
| #include "base/strings/string_util.h" |
| #include "net/base/ip_address.h" |
| #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| #include "net/base/scheme_host_port_matcher_result.h" |
| #include "net/base/scheme_host_port_matcher_rule.h" |
| #include "net/base/url_util.h" |
| #include "net/device_bound_sessions/proto/storage.pb.h" |
| #include "net/device_bound_sessions/session.h" |
| |
| namespace net::device_bound_sessions { |
| |
| namespace { |
| |
| bool IsIncludeSiteAllowed(const url::Origin& origin) { |
| // This is eTLD+1 |
| const std::string domain_and_registry = |
| registry_controlled_domains::GetDomainAndRegistry( |
| origin, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); |
| return !domain_and_registry.empty() && origin.host() == domain_and_registry; |
| } |
| |
| SessionInclusionRules::InclusionResult AsInclusionResult(bool should_include) { |
| return should_include ? SessionInclusionRules::kInclude |
| : SessionInclusionRules::kExclude; |
| } |
| |
| // Types of characters valid in IPv6 addresses. |
| // Derived from logic in url::DoIPv6AddressToNumber() and url::DoParseIPv6(). |
| bool IsValidIPv6Char(char c) { |
| return c == ':' || base::IsHexDigit(c) || c == '.' || |
| // 'x' or 'X' is used in IPv4 to denote hex values, and can be used in |
| // parts of IPv6 addresses. |
| c == 'x' || c == 'X'; |
| } |
| |
| proto::RuleType GetRuleTypeProto( |
| SessionInclusionRules::InclusionResult result) { |
| return result == SessionInclusionRules::InclusionResult::kInclude |
| ? proto::RuleType::INCLUDE |
| : proto::RuleType::EXCLUDE; |
| } |
| |
| std::optional<SessionInclusionRules::InclusionResult> GetInclusionResult( |
| proto::RuleType proto) { |
| if (proto == proto::RuleType::INCLUDE) { |
| return SessionInclusionRules::InclusionResult::kInclude; |
| } else if (proto == proto::RuleType::EXCLUDE) { |
| return SessionInclusionRules::InclusionResult::kExclude; |
| } |
| |
| // proto = RULE_TYPE_UNSPECIFIED |
| return std::nullopt; |
| } |
| |
| std::string RuleTypeToString(SessionInclusionRules::InclusionResult rule_type) { |
| switch (rule_type) { |
| case SessionInclusionRules::InclusionResult::kExclude: |
| return "exclude"; |
| case SessionInclusionRules::InclusionResult::kInclude: |
| return "include"; |
| } |
| } |
| |
| } // namespace |
| |
| // Encapsulates a single rule which applies to the request URL. |
| struct SessionInclusionRules::UrlRule { |
| // URLs that match the rule will be subject to inclusion or exclusion as |
| // specified by the type. |
| InclusionResult rule_type; |
| |
| // Domain or pattern that the URL must match. This must either be a |
| // full domain (host piece) or a pattern containing a wildcard in the |
| // most-specific (leftmost) label position followed by a dot and a non-eTLD. |
| // The matched strings follow SchemeHostPortMatcherRule's logic, but with |
| // some extra requirements for validity: |
| // - A leading wildcard * must be followed by a dot, so "*ple.com" is not |
| // acceptable. |
| // - "*.com" is not accepted because com is an eTLD. Same with "*.co.uk" and |
| // similar. |
| // - Multiple wildcards are not allowed. |
| // - Internal wildcards are not allowed, so "sub.*.example.com" does not |
| // work because the wildcard is not the leftmost component. |
| // - IP addresses also work if specified as the exact host, as described in |
| // SchemeHostPortMatcherRule. |
| std::unique_ptr<SchemeHostPortMatcherRule> host_matcher_rule; |
| |
| // Prefix consisting of path components that the URL must match. Must begin |
| // with '/'. Wildcards are not allowed. Simply use "/" to match all paths. |
| std::string path_prefix; |
| |
| friend bool operator==(const UrlRule& lhs, const UrlRule& rhs) { |
| return lhs.rule_type == rhs.rule_type && |
| lhs.path_prefix == rhs.path_prefix && |
| lhs.host_matcher_rule->ToString() == |
| rhs.host_matcher_rule->ToString(); |
| } |
| |
| // Returns whether the given `url` matches this rule. Note that this |
| // function does not check the scheme and port portions of the URL/origin. |
| bool MatchesHostAndPath(const GURL& url) const; |
| }; |
| |
| SessionInclusionRules::SessionInclusionRules(const url::Origin& origin) |
| : origin_(origin), may_include_site_(IsIncludeSiteAllowed(origin)) {} |
| |
| SessionInclusionRules::SessionInclusionRules() = default; |
| |
| SessionInclusionRules::~SessionInclusionRules() = default; |
| |
| SessionInclusionRules::SessionInclusionRules(SessionInclusionRules&& other) = |
| default; |
| |
| SessionInclusionRules& SessionInclusionRules::operator=( |
| SessionInclusionRules&& other) = default; |
| |
| bool SessionInclusionRules::operator==( |
| const SessionInclusionRules& other) const = default; |
| |
| void SessionInclusionRules::SetIncludeSite(bool include_site) { |
| if (!may_include_site_) { |
| return; |
| } |
| |
| if (!include_site) { |
| include_site_.reset(); |
| return; |
| } |
| |
| include_site_ = SchemefulSite(origin_); |
| } |
| |
| bool SessionInclusionRules::AddUrlRuleIfValid(InclusionResult rule_type, |
| const std::string& host_pattern, |
| const std::string& path_prefix) { |
| if (path_prefix.empty() || path_prefix.front() != '/') { |
| return false; |
| } |
| if (host_pattern.empty()) { |
| return false; |
| } |
| |
| // If only the origin is allowed, the host_pattern must be precisely its host. |
| bool host_pattern_is_host = host_pattern == origin_.host(); |
| if (!may_include_site_ && !host_pattern_is_host) { |
| return false; |
| } |
| |
| // Don't allow '*' anywhere besides the first character of the pattern. |
| size_t star_pos = host_pattern.rfind('*'); |
| if (star_pos != std::string::npos && star_pos != 0) { |
| return false; |
| } |
| // Only allow wildcard if immediately followed by a dot. |
| bool has_initial_wildcard_label = host_pattern.starts_with("*."); |
| if (star_pos != std::string::npos && !has_initial_wildcard_label) { |
| return false; |
| } |
| |
| std::string_view hostlike_part{host_pattern}; |
| if (has_initial_wildcard_label) { |
| hostlike_part = hostlike_part.substr(2); |
| } |
| |
| bool presumed_ipv6 = host_pattern.front() == '['; |
| if (presumed_ipv6 && host_pattern.back() != ']') { |
| return false; |
| } |
| |
| // Allow only specific characters into SchemeHostPortMatcherRule parsing. |
| if (presumed_ipv6) { |
| // Leave out the brackets, but everything else must be a valid char. |
| std::string_view ipv6_address{host_pattern.begin() + 1, |
| host_pattern.end() - 1}; |
| if (std::find_if_not(ipv6_address.begin(), ipv6_address.end(), |
| &IsValidIPv6Char) != ipv6_address.end()) { |
| return false; |
| } |
| } else { |
| // Note that this excludes a ':' character specifying a port number, even |
| // though SchemeHostPortMatcherRule supports it. Same for '/' (for the |
| // scheme or an IP block). |
| // TODO(chlily): Consider supporting port numbers. |
| if (!IsCanonicalizedHostCompliant(hostlike_part)) { |
| return false; |
| } |
| } |
| |
| // Delegate the rest of the parsing to SchemeHostPortMatcherRule. |
| std::unique_ptr<SchemeHostPortMatcherRule> host_matcher_rule = |
| SchemeHostPortMatcherRule::FromUntrimmedRawString(host_pattern); |
| if (!host_matcher_rule) { |
| return false; |
| } |
| |
| // Now that we know the host_pattern is at least the right shape, validate the |
| // remaining restrictions. |
| |
| // Skip the eTLD lookups if the host pattern is an exact match. |
| if (host_pattern_is_host) { |
| url_rules_.emplace_back(rule_type, std::move(host_matcher_rule), |
| path_prefix); |
| return true; |
| } |
| |
| std::string hostlike_part_domain = |
| registry_controlled_domains::GetDomainAndRegistry( |
| hostlike_part, |
| registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); |
| // If there is a wildcard, we require the pattern to be a normal domain and |
| // not an eTLD. |
| if (has_initial_wildcard_label && hostlike_part_domain.empty()) { |
| return false; |
| } |
| |
| // Validate that the host pattern is on the right origin/site. |
| // TODO(chlily): Perhaps we should use a cached value, but surely URL rule |
| // parsing only happens a small number of times. |
| std::string domain_and_registry = |
| registry_controlled_domains::GetDomainAndRegistry( |
| origin_, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); |
| // The origin_ must have an eTLD+1, because if it didn't, then we'd know that |
| // !may_include_site_, and that would mean we'd have already returned early |
| // and would never get here. |
| CHECK(!domain_and_registry.empty()); |
| if (hostlike_part_domain != domain_and_registry) { |
| return false; |
| } |
| |
| url_rules_.emplace_back(rule_type, std::move(host_matcher_rule), path_prefix); |
| return true; |
| } |
| |
| SessionInclusionRules::InclusionResult |
| SessionInclusionRules::EvaluateRequestUrl(const GURL& url) const { |
| bool same_origin = origin_.IsSameOriginWith(url); |
| if (!may_include_site_ && !same_origin) { |
| return SessionInclusionRules::kExclude; |
| } |
| |
| // Evaluate against specific rules, most-recently-added first. |
| for (const UrlRule& rule : base::Reversed(url_rules_)) { |
| // The rule covers host and path, and scheme is checked too. We don't check |
| // port here, because in the !may_include_site_ case that's already covered |
| // by being same-origin, and in the may_include_site_ case it's ok for the |
| // port to differ. |
| if (rule.MatchesHostAndPath(url) && |
| url.scheme_piece() == origin_.scheme()) { |
| return rule.rule_type; |
| } |
| } |
| |
| // None of the specific rules apply. Evaluate against the basic include rule. |
| if (include_site_) { |
| return AsInclusionResult(SchemefulSite(url) == *include_site_); |
| } |
| return AsInclusionResult(same_origin); |
| } |
| |
| bool SessionInclusionRules::UrlRule::MatchesHostAndPath(const GURL& url) const { |
| if (host_matcher_rule->Evaluate(url) == |
| SchemeHostPortMatcherResult::kNoMatch) { |
| return false; |
| } |
| |
| std::string_view url_path = url.path_piece(); |
| if (!url_path.starts_with(path_prefix)) { |
| return false; |
| } |
| // We must check the following to prevent a path prefix like "/foo" from |
| // erroneously matching a URL path like "/foobar/baz". There are 2 possible |
| // cases: `url_path` may be the same length as `path_prefix`, or `url_path` |
| // may be longer than `path_prefix`. In the first case, the two paths are |
| // equal and a match has been found. In the second case, we want to know |
| // whether the end of the `path_prefix` represents a full label in the path. |
| // Either the path_prefix string ends in '/' and is explicitly the end of a |
| // label, or the next character of `url_path` beyond the identical portion is |
| // '/'. Otherwise, reject the path as a false (incomplete label) prefix match. |
| CHECK(url_path.length() >= path_prefix.length()); |
| if (url_path.length() > path_prefix.length() && path_prefix.back() != '/' && |
| url_path[path_prefix.length()] != '/') { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| size_t SessionInclusionRules::num_url_rules_for_testing() const { |
| return url_rules_.size(); |
| } |
| |
| proto::SessionInclusionRules SessionInclusionRules::ToProto() const { |
| proto::SessionInclusionRules proto; |
| proto.set_origin(origin_.Serialize()); |
| proto.set_do_include_site(include_site_.has_value()); |
| |
| // Note that the ordering of the rules (in terms of when they were added to |
| // the session) is preserved in the proto. Preserving the ordering is |
| // important to handle rules overlap - the latest rule wins. |
| for (auto& rule : url_rules_) { |
| proto::UrlRule rule_proto; |
| rule_proto.set_rule_type(GetRuleTypeProto(rule.rule_type)); |
| rule_proto.set_host_matcher_rule(rule.host_matcher_rule->ToString()); |
| rule_proto.set_path_prefix(rule.path_prefix); |
| proto.mutable_url_rules()->Add(std::move(rule_proto)); |
| } |
| |
| return proto; |
| } |
| |
| // static: |
| std::unique_ptr<SessionInclusionRules> SessionInclusionRules::CreateFromProto( |
| const proto::SessionInclusionRules& proto) { |
| if (!proto.has_origin() || !proto.has_do_include_site()) { |
| return nullptr; |
| } |
| url::Origin origin = url::Origin::Create(GURL(proto.origin())); |
| if (origin.opaque()) { |
| DLOG(ERROR) << "proto origin parse error: " << origin.GetDebugString(); |
| return nullptr; |
| } |
| |
| auto result = std::make_unique<SessionInclusionRules>(origin); |
| result->SetIncludeSite(proto.do_include_site()); |
| for (const auto& rule_proto : proto.url_rules()) { |
| std::optional<InclusionResult> rule_type = |
| GetInclusionResult(rule_proto.rule_type()); |
| if (!rule_type.has_value() || |
| !result->AddUrlRuleIfValid(*rule_type, rule_proto.host_matcher_rule(), |
| rule_proto.path_prefix())) { |
| DLOG(ERROR) << "proto rule parse error: " << "type:" |
| << proto::RuleType_Name(rule_proto.rule_type()) << " " |
| << "matcher:" << rule_proto.host_matcher_rule() << " " |
| << "prefix:" << rule_proto.path_prefix(); |
| return nullptr; |
| } |
| } |
| |
| return result; |
| } |
| |
| std::string SessionInclusionRules::DebugString() const { |
| std::string result; |
| for (const UrlRule& rule : url_rules_) { |
| base::StrAppend(&result, {"Type=", RuleTypeToString(rule.rule_type), |
| "; Domain=", rule.host_matcher_rule->ToString(), |
| "; Path=", rule.path_prefix, "\n"}); |
| } |
| return result; |
| } |
| |
| } // namespace net::device_bound_sessions |