blob: d334a36c1873f0833d2692660ce4cead013d591e [file] [log] [blame]
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "extensions/browser/api/declarative_net_request/request_params.h"
#include <algorithm>
#include <optional>
#include <string_view>
#include "base/check.h"
#include "base/containers/flat_map.h"
#include "base/dcheck_is_on.h"
#include "base/functional/bind.h"
#include "base/no_destructor.h"
#include "base/strings/pattern.h"
#include "base/strings/string_util.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/render_process_host.h"
#include "content/public/browser/web_contents.h"
#include "extensions/browser/api/declarative_net_request/constants.h"
#include "extensions/browser/api/declarative_net_request/flat/extension_ruleset_generated.h"
#include "extensions/browser/api/declarative_net_request/utils.h"
#include "extensions/browser/api/web_request/web_request_info.h"
#include "extensions/browser/api/web_request/web_request_resource_type.h"
#include "extensions/browser/extensions_browser_client.h"
#include "extensions/common/constants.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "net/http/http_response_headers.h"
#include "third_party/blink/public/mojom/loader/resource_load_info.mojom-shared.h"
#include "url/gurl.h"
namespace extensions::declarative_net_request {
namespace {
namespace flat_rule = url_pattern_index::flat;
// Returns whether the request to `url` is third party to its `document_origin`.
// TODO(crbug.com/40508457): Look into caching this.
bool IsThirdPartyRequest(const GURL& url, const url::Origin& document_origin) {
if (document_origin.opaque()) {
return true;
}
return !net::registry_controlled_domains::SameDomainOrHost(
url, document_origin,
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
}
bool IsThirdPartyRequest(const url::Origin& origin,
const url::Origin& document_origin) {
if (document_origin.opaque()) {
return true;
}
return !net::registry_controlled_domains::SameDomainOrHost(
origin, document_origin,
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
}
content::GlobalRenderFrameHostId GetFrameRoutingId(
content::RenderFrameHost* host) {
if (!host) {
return content::GlobalRenderFrameHostId();
}
return host->GetGlobalId();
}
// Returns if any value for `header` in `response_headers` matches the value
// pattern from `flat_pattern`.
// Note: Matches are case-insensitive, and supports * (0 or more characters) and
// ? (0 or 1 characters) matching.
bool HasHeaderValue(const net::HttpResponseHeaders& response_headers,
std::string_view header,
const flatbuffers::String* flat_pattern) {
auto pattern = CreateString<std::string_view>(*flat_pattern);
size_t iter = 0;
std::optional<std::string_view> temp;
while ((temp = response_headers.EnumerateHeader(&iter, header))) {
if (base::MatchPattern(base::ToLowerASCII(*temp), pattern)) {
return true;
}
}
return false;
}
// Returns true if the request's response headers matches at least one condition
// in `header_conditions`. A header matches a condition if:
// - the header exists AND
// - contains a value in condition->values() if specified AND
// - does not contain any values in condition->excluded_values() if specified.
bool MatchesHeaderConditions(
const net::HttpResponseHeaders& response_headers,
const flatbuffers::Vector<flatbuffers::Offset<flat::HeaderCondition>>&
header_conditions) {
for (const flat::HeaderCondition* header_condition : header_conditions) {
std::string_view header =
CreateString<std::string_view>(*header_condition->header());
if (!response_headers.HasHeader(header)) {
continue;
}
// Match on the existence of the header if no values or excluded values are
// specified.
if (!header_condition->values() && !header_condition->excluded_values()) {
return true;
}
auto has_header_value = [&response_headers,
header](const flatbuffers::String* value) {
return HasHeaderValue(response_headers, header, value);
};
// The condition for `header` does not match if there's an excluded value,
// continue to the next header.
if (header_condition->excluded_values() &&
std::ranges::any_of(*header_condition->excluded_values(),
has_header_value)) {
continue;
}
// Match if the response contains at least one header value in
// `header_condition->values()`.
if (!header_condition->values() ||
std::ranges::any_of(*header_condition->values(), has_header_value)) {
return true;
}
}
return false;
}
bool DoEmbedderConditionsMatch(
int tab_id,
const std::string& top_level_frame_or_initiator_host,
scoped_refptr<const net::HttpResponseHeaders> response_headers,
const flatbuffers::Vector<uint8_t>& conditions_buffer) {
#if DCHECK_IS_ON()
// Verify that `conditions_buffer` corresponds to a valid Flatbuffer with
// `flat::EmbedderConditions` as the root. Note: this is a sanity check and
// not a security check. Consider the two cases:
// - For a file backed ruleset, we already verify the file checksum on
// ruleset load. So the nested flatbuffer shouldn't be corrupted. On-disk
// modification of stored artifacts is outside Chrome's security model
// anyway.
// - For a non-file backed (session-scoped) ruleset, the ruleset is only
// maintained in memory. Hence there shouldn't be corruption risk.
flatbuffers::Verifier verifier(conditions_buffer.Data(),
conditions_buffer.size());
CHECK(verifier.VerifyBuffer<flat::EmbedderConditions>(
kEmbedderConditionsBufferIdentifier));
#endif // DCHECK_IS_ON()
auto* embedder_conditions =
flatbuffers::GetRoot<flat::EmbedderConditions>(conditions_buffer.Data());
DCHECK(embedder_conditions);
auto matches_tab_ids =
[tab_id](const flatbuffers::Vector<int32_t>& sorted_tab_ids) {
DCHECK(std::is_sorted(sorted_tab_ids.begin(), sorted_tab_ids.end()));
return std::binary_search(sorted_tab_ids.begin(), sorted_tab_ids.end(),
tab_id);
};
if (embedder_conditions->tab_ids_included() &&
!matches_tab_ids(*embedder_conditions->tab_ids_included())) {
return false;
}
if (embedder_conditions->tab_ids_excluded() &&
matches_tab_ids(*embedder_conditions->tab_ids_excluded())) {
return false;
}
// Top-level frame domain matching.
#if DCHECK_IS_ON()
auto domain_precedes = [](const flatbuffers::String* lhs,
const flatbuffers::String* rhs) {
return url_pattern_index::CompareDomains(
std::string_view(lhs->c_str(), lhs->size()),
std::string_view(rhs->c_str(), rhs->size())) < 0;
};
if (embedder_conditions->top_domains_included()) {
CHECK(std::is_sorted(embedder_conditions->top_domains_included()->begin(),
embedder_conditions->top_domains_included()->end(),
domain_precedes));
}
if (embedder_conditions->top_domains_excluded()) {
CHECK(std::is_sorted(embedder_conditions->top_domains_excluded()->begin(),
embedder_conditions->top_domains_excluded()->end(),
domain_precedes));
}
#endif // DCHECK_IS_ON()
if (!url_pattern_index::DoesHostMatchDomainLists(
top_level_frame_or_initiator_host,
embedder_conditions->top_domains_included(),
embedder_conditions->top_domains_excluded())) {
return false;
}
if (response_headers) {
// Do not match the rule if any conditions in `excluded_response_headers()`
// match.
if (embedder_conditions->excluded_response_headers() &&
MatchesHeaderConditions(
*response_headers,
*embedder_conditions->excluded_response_headers())) {
return false;
}
// Do not match the rule if no conditions in `response_headers()` match.
if (embedder_conditions->response_headers() &&
embedder_conditions->response_headers()->size() &&
!MatchesHeaderConditions(*response_headers,
*embedder_conditions->response_headers())) {
return false;
}
}
return true;
}
} // namespace
RequestParams::RequestParams(
const WebRequestInfo& info,
scoped_refptr<const net::HttpResponseHeaders> response_headers)
: url(&info.url),
first_party_origin(info.initiator.value_or(url::Origin())),
element_type(GetElementType(info.web_request_type)),
method(GetRequestMethod(info.url.SchemeIsHTTPOrHTTPS(), info.method)),
parent_routing_id(info.parent_routing_id) {
// Allow/allowAllRequest rules matched in earlier rule matching stages can
// influence rule matches for later matching stages. Hence this information
// is needed from `info`.
for (auto& it : info.max_priority_allow_action) {
max_priority_allow_action.emplace(
it.first, it.second.has_value() ? std::make_optional(it.second->Clone())
: std::nullopt);
}
is_third_party = IsThirdPartyRequest(*url, first_party_origin);
// Determine the top-level frame or initiator host. This is the request host
// for main-frame requests, the host of the outer-most frame of the request
// initiator if available, otherwise the host of the request initiator. When
// none of those are available, fall back to an opaque origin.
std::string top_level_frame_or_initiator_host;
if (info.web_request_type == WebRequestResourceType::MAIN_FRAME) {
top_level_frame_or_initiator_host = info.url.host();
} else {
url::Origin top_level_frame_or_initiator_origin = first_party_origin;
content::RenderFrameHost* initiator_host =
content::RenderFrameHost::FromID(info.parent_routing_id);
if (initiator_host) {
url::Origin top_origin =
initiator_host->GetOutermostMainFrame()->GetLastCommittedOrigin();
if (!top_origin.opaque()) {
top_level_frame_or_initiator_origin = top_origin;
}
}
top_level_frame_or_initiator_host =
top_level_frame_or_initiator_origin.host();
}
embedder_conditions_matcher = base::BindRepeating(
DoEmbedderConditionsMatch, info.frame_data.tab_id,
std::move(top_level_frame_or_initiator_host), response_headers);
}
RequestParams::RequestParams(
content::RenderFrameHost* host,
bool is_post_navigation,
scoped_refptr<const net::HttpResponseHeaders> response_headers)
: url(&host->GetLastCommittedURL()),
method(is_post_navigation ? flat_rule::RequestMethod_POST
: flat_rule::RequestMethod_GET),
parent_routing_id(GetFrameRoutingId(host->GetParentOrOuterDocument())) {
if (host->GetParentOrOuterDocument()) {
// Note the discrepancy with the WebRequestInfo constructor. For a
// navigation request, we'd use the request initiator as the
// `first_party_origin`. But here we use the origin of the parent frame.
// This is the same as crbug.com/996998.
first_party_origin =
host->GetParentOrOuterDocument()->GetLastCommittedOrigin();
element_type = url_pattern_index::flat::ElementType_SUBDOCUMENT;
} else {
first_party_origin = url::Origin();
element_type = url_pattern_index::flat::ElementType_MAIN_FRAME;
}
is_third_party =
IsThirdPartyRequest(host->GetLastCommittedOrigin(), first_party_origin);
url::Origin top_origin =
host->GetOutermostMainFrame()
? host->GetOutermostMainFrame()->GetLastCommittedOrigin()
: url::Origin();
std::string top_level_frame_or_initiator_host =
top_origin.opaque() ? first_party_origin.host() : top_origin.host();
int window_id_unused = extension_misc::kUnknownWindowId;
int tab_id = extension_misc::kUnknownTabId;
ExtensionsBrowserClient::Get()->GetTabAndWindowIdForWebContents(
content::WebContents::FromRenderFrameHost(host), &tab_id,
&window_id_unused);
embedder_conditions_matcher = base::BindRepeating(
DoEmbedderConditionsMatch, tab_id,
std::move(top_level_frame_or_initiator_host), response_headers);
}
RequestParams::RequestParams(
const GURL& url,
const url::Origin& initiator,
const url::Origin& top_origin,
const api::declarative_net_request::ResourceType request_type,
const api::declarative_net_request::RequestMethod request_method,
int tab_id,
scoped_refptr<const net::HttpResponseHeaders> response_headers)
: url(&url),
first_party_origin(initiator),
element_type(GetElementType(request_type)),
is_third_party(IsThirdPartyRequest(url, first_party_origin)),
method(GetRequestMethod(url.SchemeIsHTTPOrHTTPS(), request_method)),
embedder_conditions_matcher(base::BindRepeating(
DoEmbedderConditionsMatch,
tab_id,
top_origin.opaque() ? initiator.host() : top_origin.host(),
response_headers)) {}
RequestParams::RequestParams() = default;
RequestParams::~RequestParams() = default;
} // namespace extensions::declarative_net_request