blob: e11759a0950492d8af5765da23292f741391bc8e [file] [log] [blame]
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "extensions/browser/api/declarative_net_request/ruleset_matcher_base.h"
#include <algorithm>
#include <string_view>
#include <tuple>
#include "base/strings/strcat.h"
#include "base/strings/string_util.h"
#include "components/url_pattern_index/flat/url_pattern_index_generated.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/render_process_host.h"
#include "extensions/browser/api/declarative_net_request/constants.h"
#include "extensions/browser/api/declarative_net_request/request_action.h"
#include "extensions/browser/api/declarative_net_request/request_params.h"
#include "extensions/browser/api/declarative_net_request/utils.h"
#include "extensions/common/api/declarative_net_request.h"
#include "net/base/url_util.h"
#include "url/gurl.h"
namespace extensions::declarative_net_request {
namespace flat_rule = url_pattern_index::flat;
namespace dnr_api = api::declarative_net_request;
namespace {
bool ShouldCollapseResourceType(flat_rule::ElementType type) {
// TODO(crbug.com/40578984): Add support for other element types like
// OBJECT.
return type == flat_rule::ElementType_IMAGE ||
type == flat_rule::ElementType_SUBDOCUMENT;
}
bool IsUpgradeableUrl(const GURL& url) {
return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kFtpScheme);
}
// Upgrades the url's scheme to HTTPS.
GURL GetUpgradedUrl(const GURL& url) {
DCHECK(IsUpgradeableUrl(url));
GURL::Replacements replacements;
replacements.SetSchemeStr(url::kHttpsScheme);
return url.ReplaceComponents(replacements);
}
// Returns true if the given |vec| is nullptr or empty.
template <typename T>
bool IsEmpty(const flatbuffers::Vector<T>* vec) {
return !vec || vec->size() == 0;
}
// Performs any required query transformations on the |url|. Returns true if the
// query should be modified and populates |modified_query|.
bool GetModifiedQuery(const GURL& url,
const flat::UrlTransform& transform,
std::string* modified_query) {
DCHECK(modified_query);
// |remove_query_params| should always be sorted.
DCHECK(
IsEmpty(transform.remove_query_params()) ||
std::is_sorted(transform.remove_query_params()->begin(),
transform.remove_query_params()->end(),
[](const flatbuffers::String* x1,
const flatbuffers::String* x2) { return *x1 < *x2; }));
// Return early if there's nothing to modify.
if (IsEmpty(transform.remove_query_params()) &&
IsEmpty(transform.add_or_replace_query_params())) {
return false;
}
std::vector<std::string_view> remove_query_params;
if (!IsEmpty(transform.remove_query_params())) {
remove_query_params.reserve(transform.remove_query_params()->size());
for (const ::flatbuffers::String* str : *transform.remove_query_params()) {
remove_query_params.push_back(str->string_view());
}
}
// We don't use a map from keys to vector of values to ensure the relative
// order of different params specified by the extension is respected. We use a
// std::list to support fast removal from middle of the list. Note that the
// key value pairs should already be escaped.
struct QueryReplace {
std::string_view key;
std::string_view value;
bool replace_only = false;
};
std::list<QueryReplace> add_or_replace_query_params;
if (!IsEmpty(transform.add_or_replace_query_params())) {
for (const flat::QueryKeyValue* query_pair :
*transform.add_or_replace_query_params()) {
DCHECK(query_pair->key());
DCHECK(query_pair->value());
add_or_replace_query_params.emplace_back(QueryReplace{
query_pair->key()->string_view(), query_pair->value()->string_view(),
query_pair->replace_only()});
}
}
std::vector<std::string> query_parts;
auto create_query_part = [](std::string_view key,
std::string_view value) -> std::string {
return base::StrCat({key, "=", value});
};
bool query_changed = false;
for (net::QueryIterator it(url); !it.IsAtEnd(); it.Advance()) {
const std::string_view key = it.GetKey();
// Remove query param.
if (std::binary_search(remove_query_params.begin(),
remove_query_params.end(), key)) {
query_changed = true;
continue;
}
auto replace_iterator =
std::ranges::find(add_or_replace_query_params, key, &QueryReplace::key);
// Nothing to do.
if (replace_iterator == add_or_replace_query_params.end()) {
query_parts.push_back(create_query_part(key, it.GetValue()));
continue;
}
// Replace query param.
query_changed = true;
query_parts.push_back(create_query_part(key, replace_iterator->value));
add_or_replace_query_params.erase(replace_iterator);
}
// Append any remaining query params.
for (const auto& params : add_or_replace_query_params) {
if (!params.replace_only) {
query_parts.push_back(create_query_part(params.key, params.value));
query_changed = true;
}
}
if (!query_changed) {
return false;
}
*modified_query = base::JoinString(query_parts, "&");
return true;
}
GURL GetTransformedURL(const RequestParams& params,
const flat::UrlTransform& transform) {
GURL::Replacements replacements;
if (transform.scheme()) {
replacements.SetSchemeStr(transform.scheme()->string_view());
}
if (transform.host()) {
replacements.SetHostStr(transform.host()->string_view());
}
DCHECK(!(transform.clear_port() && transform.port()));
if (transform.clear_port()) {
replacements.ClearPort();
} else if (transform.port()) {
replacements.SetPortStr(transform.port()->string_view());
}
DCHECK(!(transform.clear_path() && transform.path()));
if (transform.clear_path()) {
replacements.ClearPath();
} else if (transform.path()) {
replacements.SetPathStr(transform.path()->string_view());
}
// |query| is defined outside the if conditions since url::Replacements does
// not own the strings it uses.
std::string query;
if (transform.clear_query()) {
replacements.ClearQuery();
} else if (transform.query()) {
replacements.SetQueryStr(transform.query()->string_view());
} else if (GetModifiedQuery(*params.url, transform, &query)) {
if (query.empty()) {
replacements.ClearQuery();
} else {
replacements.SetQueryStr(query);
}
}
DCHECK(!(transform.clear_fragment() && transform.fragment()));
if (transform.clear_fragment()) {
replacements.ClearRef();
} else if (transform.fragment()) {
replacements.SetRefStr(transform.fragment()->string_view());
}
if (transform.password()) {
replacements.SetPasswordStr(transform.password()->string_view());
}
if (transform.username()) {
replacements.SetUsernameStr(transform.username()->string_view());
}
return params.url->ReplaceComponents(replacements);
}
} // namespace
RulesetMatcherBase::RulesetMatcherBase(const ExtensionId& extension_id,
RulesetID ruleset_id)
: extension_id_(extension_id), ruleset_id_(ruleset_id) {}
RulesetMatcherBase::~RulesetMatcherBase() = default;
std::optional<RequestAction> RulesetMatcherBase::GetAction(
const RequestParams& params,
RulesetMatchingStage stage) const {
std::optional<RequestAction> action =
GetActionIgnoringAncestors(params, stage);
std::optional<RequestAction> parent_action =
GetAllowlistedFrameAction(params.parent_routing_id);
return GetMaxPriorityAction(std::move(action), std::move(parent_action));
}
void RulesetMatcherBase::OnRenderFrameCreated(content::RenderFrameHost* host) {
DCHECK(host);
content::RenderFrameHost* parent = host->GetParentOrOuterDocument();
if (!parent) {
return;
}
// Some frames like srcdoc frames inherit URLLoaderFactories from their
// parents and can make network requests before a corresponding navigation
// commit for the frame is received in the browser (via DidFinishNavigation).
// Hence if the parent frame is allowlisted, we allow list the current frame
// as well in OnRenderFrameCreated.
std::optional<RequestAction> parent_action =
GetAllowlistedFrameAction(parent->GetGlobalId());
if (!parent_action) {
return;
}
bool inserted = false;
std::tie(std::ignore, inserted) = allowlisted_frames_.insert(
std::make_pair(host->GetGlobalId(), std::move(*parent_action)));
DCHECK(inserted);
}
void RulesetMatcherBase::OnRenderFrameDeleted(content::RenderFrameHost* host) {
DCHECK(host);
allowlisted_frames_.erase(host->GetGlobalId());
}
void RulesetMatcherBase::OnDidFinishNavigation(
content::NavigationHandle* navigation_handle) {
content::RenderFrameHost* host = navigation_handle->GetRenderFrameHost();
// Note: we only start tracking frames on navigation, since a document only
// issues network requests after the corresponding navigation is committed.
// Hence we need not listen to OnRenderFrameCreated.
DCHECK(host);
RequestParams params(host, navigation_handle->IsPost(),
/*response_headers=*/nullptr);
// Find the highest priority allowAllRequests action corresponding to this
// frame for rules that match in the onBeforeRequest request stage.
std::optional<RequestAction> frame_action =
GetAllowAllRequestsAction(params, RulesetMatchingStage::kOnBeforeRequest);
// The only navigation requests that match DNR rules in the OnHeadersReceived
// request phase are HTTP/HTTPS and will have response headers. So in this
// method, if a navigation request:
// - has response headers, then match it against rules for both the
// `kOnBeforeRequest` and `kOnHeadersReceived` stages.
// - has no response headers, then only match against rule for the
// `kOnBeforeRequest` stage.
// TODO(crbug.com/331846139): Add filtering logic to limit which requests can
// be matched here, similar to what's done in the webrequest event router for
// OnBeforeRequest and OnHeadersReceived.
if (navigation_handle->GetResponseHeaders()) {
// The allow rule cache from `params` does not need to be copied into
// `params_with_headers` since it won't have an effect on the final value of
// `frame_action`.
RequestParams params_with_headers(host, navigation_handle->IsPost(),
navigation_handle->GetResponseHeaders());
// Take the matching allowAllRequests action with the highest priority
// between all ruleset matching stages that this navigation request can be
// matched against.
frame_action = GetMaxPriorityAction(
std::move(frame_action),
GetAllowAllRequestsAction(params_with_headers,
RulesetMatchingStage::kOnHeadersReceived));
}
std::optional<RequestAction> action =
GetMaxPriorityAction(GetAllowlistedFrameAction(params.parent_routing_id),
std::move(frame_action));
content::GlobalRenderFrameHostId frame_id = host->GetGlobalId();
allowlisted_frames_.erase(frame_id);
if (action) {
allowlisted_frames_.insert(std::make_pair(frame_id, std::move(*action)));
}
}
std::optional<RequestAction>
RulesetMatcherBase::GetAllowlistedFrameActionForTesting(
content::RenderFrameHost* host) const {
DCHECK(host);
return GetAllowlistedFrameAction(host->GetGlobalId());
}
RequestAction RulesetMatcherBase::CreateBlockOrCollapseRequestAction(
const RequestParams& params,
const flat_rule::UrlRule& rule) const {
return CreateRequestAction(ShouldCollapseResourceType(params.element_type)
? RequestAction::Type::COLLAPSE
: RequestAction::Type::BLOCK,
rule);
}
RequestAction RulesetMatcherBase::CreateAllowAction(
const RequestParams& params,
const flat_rule::UrlRule& rule) const {
return CreateRequestAction(RequestAction::Type::ALLOW, rule);
}
RequestAction RulesetMatcherBase::CreateAllowAllRequestsAction(
const RequestParams& params,
const url_pattern_index::flat::UrlRule& rule) const {
return CreateRequestAction(RequestAction::Type::ALLOW_ALL_REQUESTS, rule);
}
std::optional<RequestAction> RulesetMatcherBase::CreateUpgradeAction(
const RequestParams& params,
const url_pattern_index::flat::UrlRule& rule) const {
if (!IsUpgradeableUrl(*params.url)) {
// TODO(crbug.com/40111509): this results in counterintuitive behavior.
return std::nullopt;
}
RequestAction upgrade_action =
CreateRequestAction(RequestAction::Type::UPGRADE, rule);
upgrade_action.redirect_url = GetUpgradedUrl(*params.url);
return upgrade_action;
}
std::optional<RequestAction>
RulesetMatcherBase::CreateRedirectActionFromMetadata(
const RequestParams& params,
const url_pattern_index::flat::UrlRule& rule,
const ExtensionMetadataList& metadata_list) const {
// Find the UrlRuleMetadata corresponding to |rule|. Since |metadata_list| is
// sorted by rule id, use LookupByKey which binary searches for fast lookup.
const flat::UrlRuleMetadata* metadata = metadata_list.LookupByKey(rule.id());
// There must be a UrlRuleMetadata object corresponding to the |rule|.
DCHECK(metadata);
DCHECK_EQ(metadata->id(), rule.id());
DCHECK(metadata->redirect_url() || metadata->transform());
GURL redirect_url;
if (metadata->redirect_url()) {
redirect_url = GURL(metadata->redirect_url()->string_view());
} else {
redirect_url = GetTransformedURL(params, *metadata->transform());
}
// Sanity check that we don't redirect to a javascript url. Specifying
// redirect to a javascript url and specifying javascript as a transform
// scheme is prohibited. In addition extensions can't intercept requests to
// javascript urls. Hence we should never end up with a javascript url here.
DCHECK(!redirect_url.SchemeIs(url::kJavaScriptScheme));
return CreateRedirectAction(params, rule, std::move(redirect_url));
}
std::optional<RequestAction> RulesetMatcherBase::CreateRedirectAction(
const RequestParams& params,
const url_pattern_index::flat::UrlRule& rule,
GURL redirect_url) const {
// Redirecting WebSocket handshake request is prohibited.
// TODO(crbug.com/40111509): this results in counterintuitive behavior.
if (params.element_type == flat_rule::ElementType_WEBSOCKET) {
return std::nullopt;
}
// Prevent a redirect loop where a URL continuously redirects to itself.
if (!redirect_url.is_valid() || *params.url == redirect_url) {
return std::nullopt;
}
RequestAction redirect_action =
CreateRequestAction(RequestAction::Type::REDIRECT, rule);
redirect_action.redirect_url = std::move(redirect_url);
return redirect_action;
}
std::vector<RequestAction>
RulesetMatcherBase::GetModifyHeadersActionsFromMetadata(
const RequestParams& params,
const std::vector<const url_pattern_index::flat::UrlRule*>& rules,
const ExtensionMetadataList& metadata_list) const {
using FlatHeaderList = flatbuffers::Vector<flatbuffers::Offset<
extensions::declarative_net_request::flat::ModifyHeaderInfo>>;
// Helper method to convert a list of headers from a rule's metadata to a list
// of RequestAction::HeaderInfo.
auto get_headers_for_action = [](const FlatHeaderList& headers_for_rule) {
std::vector<RequestAction::HeaderInfo> headers_for_action;
for (const auto* flat_header_info : headers_for_rule) {
headers_for_action.emplace_back(*flat_header_info);
}
return headers_for_action;
};
std::vector<RequestAction> actions;
for (const auto* rule : rules) {
const flat::UrlRuleMetadata* metadata =
metadata_list.LookupByKey(rule->id());
DCHECK(metadata);
DCHECK_EQ(metadata->id(), rule->id());
RequestAction action =
CreateRequestAction(RequestAction::Type::MODIFY_HEADERS, *rule);
action.request_headers_to_modify =
get_headers_for_action(*metadata->request_headers());
action.response_headers_to_modify =
get_headers_for_action(*metadata->response_headers());
actions.push_back(std::move(action));
}
return actions;
}
RequestAction RulesetMatcherBase::CreateRequestAction(
RequestAction::Type type,
const flat_rule::UrlRule& rule) const {
return RequestAction(type, rule.id(), rule.priority(), ruleset_id(),
extension_id());
}
std::optional<RequestAction> RulesetMatcherBase::GetAllowlistedFrameAction(
content::GlobalRenderFrameHostId frame_id) const {
auto it = allowlisted_frames_.find(frame_id);
if (it == allowlisted_frames_.end()) {
return std::nullopt;
}
return it->second.Clone();
}
} // namespace extensions::declarative_net_request