blob: 64c99e5e89639f6549f7722e4ed4288a92e451f0 [file] [log] [blame]
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "extensions/browser/api/declarative_net_request/ruleset_manager.h"
#include <algorithm>
#include <tuple>
#include <utility>
#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/optional.h"
#include "base/stl_util.h"
#include "components/web_cache/browser/web_cache_manager.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/resource_request_info.h"
#include "extensions/browser/api/declarative_net_request/ruleset_matcher.h"
#include "extensions/browser/api/web_request/web_request_info.h"
#include "extensions/browser/api/web_request/web_request_permissions.h"
#include "extensions/browser/info_map.h"
#include "extensions/common/api/declarative_net_request/utils.h"
#include "extensions/common/constants.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "url/origin.h"
namespace extensions {
namespace declarative_net_request {
namespace {
namespace flat_rule = url_pattern_index::flat;
// Describes the different cases pertaining to initiator checks to find the main
// frame url for a main frame subresource.
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class PageAllowingInitiatorCheck {
kInitiatorAbsent = 0,
kNeitherCandidateMatchesInitiator = 1,
kCommittedCandidateMatchesInitiator = 2,
kPendingCandidateMatchesInitiator = 3,
kBothCandidatesMatchInitiator = 4,
kMaxValue = kBothCandidatesMatchInitiator,
};
// Maps content::ResourceType to flat_rule::ElementType.
flat_rule::ElementType GetElementType(content::ResourceType type) {
switch (type) {
case content::RESOURCE_TYPE_LAST_TYPE:
case content::RESOURCE_TYPE_PREFETCH:
case content::RESOURCE_TYPE_SUB_RESOURCE:
return flat_rule::ElementType_OTHER;
case content::RESOURCE_TYPE_MAIN_FRAME:
return flat_rule::ElementType_MAIN_FRAME;
case content::RESOURCE_TYPE_CSP_REPORT:
return flat_rule::ElementType_CSP_REPORT;
case content::RESOURCE_TYPE_SCRIPT:
case content::RESOURCE_TYPE_WORKER:
case content::RESOURCE_TYPE_SHARED_WORKER:
case content::RESOURCE_TYPE_SERVICE_WORKER:
return flat_rule::ElementType_SCRIPT;
case content::RESOURCE_TYPE_IMAGE:
case content::RESOURCE_TYPE_FAVICON:
return flat_rule::ElementType_IMAGE;
case content::RESOURCE_TYPE_STYLESHEET:
return flat_rule::ElementType_STYLESHEET;
case content::RESOURCE_TYPE_OBJECT:
case content::RESOURCE_TYPE_PLUGIN_RESOURCE:
return flat_rule::ElementType_OBJECT;
case content::RESOURCE_TYPE_XHR:
return flat_rule::ElementType_XMLHTTPREQUEST;
case content::RESOURCE_TYPE_SUB_FRAME:
return flat_rule::ElementType_SUBDOCUMENT;
case content::RESOURCE_TYPE_PING:
return flat_rule::ElementType_PING;
case content::RESOURCE_TYPE_MEDIA:
return flat_rule::ElementType_MEDIA;
case content::RESOURCE_TYPE_FONT_RESOURCE:
return flat_rule::ElementType_FONT;
}
NOTREACHED();
return flat_rule::ElementType_OTHER;
}
// Returns the flat_rule::ElementType for the given |request|.
flat_rule::ElementType GetElementType(const WebRequestInfo& request) {
if (request.url.SchemeIsWSOrWSS())
return flat_rule::ElementType_WEBSOCKET;
return request.type.has_value() ? GetElementType(request.type.value())
: flat_rule::ElementType_OTHER;
}
// Returns whether the request to |url| is third party to its |document_origin|.
// TODO(crbug.com/696822): Look into caching this.
bool IsThirdPartyRequest(const GURL& url, const url::Origin& document_origin) {
if (document_origin.unique())
return true;
return !net::registry_controlled_domains::SameDomainOrHost(
url, document_origin,
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
}
void ClearRendererCacheOnUI() {
web_cache::WebCacheManager::GetInstance()->ClearCacheOnNavigation();
}
// Helper to clear each renderer's in-memory cache the next time it navigates.
void ClearRendererCacheOnNavigation() {
if (content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)) {
ClearRendererCacheOnUI();
} else {
content::BrowserThread::PostTask(content::BrowserThread::UI, FROM_HERE,
base::BindOnce(&ClearRendererCacheOnUI));
}
}
// Returns true if |request| came from a page from the set of
// |allowed_pages|. This necessitates finding the main frame url
// corresponding to |request|. The logic behind how this is done is subtle and
// as follows:
// - Requests made by the browser (not including navigation/frame requests) or
// service worker: These requests don't correspond to a render frame and
// hence they are not considered for allowing using the page
// allowing API.
// - Requests that correspond to a page: These include:
// - Main frame request: To check if it is allowed, check the request
// url against the set of allowed pages.
// - Main frame subresource request: We might not be able to
// deterministically map a main frame subresource to the main frame url.
// This is because when a main frame subresource request reaches the
// browser, the main frame navigation would have been committed in the
// renderer, but the browser may not have been notified of the commit.
// Hence the FrameData for the request may not have the correct value for
// the |last_committed_main_frame_url|. To get around this we use
// FrameData's |pending_main_frame_url| which is populated in
// WebContentsObserver::ReadyToCommitNavigation. This happens before the
// renderer is asked to commit the navigation.
// - Subframe subresources: When a subframe subresource request reaches the
// browser, it is assured that the browser knows about its parent frame
// commit. For these requests, use the |last_committed_main_frame_url| and
// match it against the set of allowed pages.
bool IsRequestPageAllowed(const WebRequestInfo& request,
const URLPatternSet& allowed_pages) {
if (allowed_pages.is_empty())
return false;
// If this is a main frame request, |request.url| will be the main frame url.
if (request.type == content::RESOURCE_TYPE_MAIN_FRAME)
return allowed_pages.MatchesURL(request.url);
// This should happen for:
// - Requests not corresponding to a render frame e.g. non-navigation
// browser requests or service worker requests.
// - Requests made by a render frame but when we don't have cached FrameData
// for the request. This should occur rarely and is tracked by the
// "Extensions.ExtensionFrameMapCacheHit" histogram
if (!request.frame_data)
return false;
const bool evaluate_pending_main_frame_url =
request.frame_data->pending_main_frame_url &&
*request.frame_data->pending_main_frame_url !=
request.frame_data->last_committed_main_frame_url;
if (!evaluate_pending_main_frame_url) {
return allowed_pages.MatchesURL(
request.frame_data->last_committed_main_frame_url);
}
// |pending_main_frame_url| should only be set for main-frame subresource
// loads.
DCHECK_EQ(ExtensionApiFrameIdMap::kTopFrameId, request.frame_data->frame_id);
auto log_uma = [](PageAllowingInitiatorCheck value) {
UMA_HISTOGRAM_ENUMERATION(
"Extensions.DeclarativeNetRequest.PageWhitelistingInitiatorCheck",
value);
};
// At this point, we are evaluating a main-frame subresource. There are two
// candidate main frame urls - |pending_main_frame_url| and
// |last_committed_main_frame_url|. To predict the correct main frame url,
// compare the request initiator (origin of the requesting frame i.e. origin
// of the main frame in this case) with the candidate urls' origins. If only
// one of the candidate url's origin matches the request initiator, we can be
// reasonably sure that it is the correct main frame url.
if (!request.initiator) {
log_uma(PageAllowingInitiatorCheck::kInitiatorAbsent);
} else {
const bool initiator_matches_pending_url =
url::Origin::Create(*request.frame_data->pending_main_frame_url) ==
*request.initiator;
const bool initiator_matches_committed_url =
url::Origin::Create(
request.frame_data->last_committed_main_frame_url) ==
*request.initiator;
if (initiator_matches_pending_url && !initiator_matches_committed_url) {
// We predict that |pending_main_frame_url| is the actual main frame url.
log_uma(PageAllowingInitiatorCheck::kPendingCandidateMatchesInitiator);
return allowed_pages.MatchesURL(
*request.frame_data->pending_main_frame_url);
}
if (initiator_matches_committed_url && !initiator_matches_pending_url) {
// We predict that |last_committed_main_frame_url| is the actual main
// frame url.
log_uma(PageAllowingInitiatorCheck::kCommittedCandidateMatchesInitiator);
return allowed_pages.MatchesURL(
request.frame_data->last_committed_main_frame_url);
}
if (initiator_matches_pending_url && initiator_matches_committed_url) {
log_uma(PageAllowingInitiatorCheck::kBothCandidatesMatchInitiator);
} else {
DCHECK(!initiator_matches_pending_url);
DCHECK(!initiator_matches_committed_url);
log_uma(PageAllowingInitiatorCheck::kNeitherCandidateMatchesInitiator);
}
}
// If we are not able to correctly predict the main frame url, simply test
// against both the possible URLs. This means a small proportion of main frame
// subresource requests might be incorrectly allowed by the page
// allowing API.
return allowed_pages.MatchesURL(
request.frame_data->last_committed_main_frame_url) ||
allowed_pages.MatchesURL(*request.frame_data->pending_main_frame_url);
}
bool ShouldCollapseResourceType(flat_rule::ElementType type) {
// TODO(crbug.com/848842): Add support for other element types like
// OBJECT.
return type == flat_rule::ElementType_IMAGE ||
type == flat_rule::ElementType_SUBDOCUMENT;
}
} // namespace
RulesetManager::RulesetManager(const InfoMap* info_map) : info_map_(info_map) {
DCHECK(info_map_);
// RulesetManager can be created on any sequence.
DETACH_FROM_SEQUENCE(sequence_checker_);
}
RulesetManager::~RulesetManager() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}
void RulesetManager::AddRuleset(const ExtensionId& extension_id,
std::unique_ptr<RulesetMatcher> ruleset_matcher,
URLPatternSet allowed_pages) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(IsAPIAvailable());
bool inserted;
std::tie(std::ignore, inserted) =
rulesets_.emplace(extension_id, info_map_->GetInstallTime(extension_id),
std::move(ruleset_matcher), std::move(allowed_pages));
DCHECK(inserted) << "AddRuleset called twice in succession for "
<< extension_id;
if (test_observer_)
test_observer_->OnRulesetCountChanged(rulesets_.size());
// Clear the renderers' cache so that they take the new rules into account.
ClearRendererCacheOnNavigation();
}
void RulesetManager::RemoveRuleset(const ExtensionId& extension_id) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(IsAPIAvailable());
auto compare_by_id =
[&extension_id](const ExtensionRulesetData& ruleset_data) {
return ruleset_data.extension_id == extension_id;
};
DCHECK(std::find_if(rulesets_.begin(), rulesets_.end(), compare_by_id) !=
rulesets_.end())
<< "RemoveRuleset called without a corresponding AddRuleset for "
<< extension_id;
base::EraseIf(rulesets_, compare_by_id);
if (test_observer_)
test_observer_->OnRulesetCountChanged(rulesets_.size());
// Clear the renderers' cache so that they take the removed rules into
// account.
ClearRendererCacheOnNavigation();
}
void RulesetManager::UpdateAllowedPages(const ExtensionId& extension_id,
URLPatternSet allowed_pages) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(IsAPIAvailable());
// This is O(n) but it's ok since the number of extensions will be small and
// we have to maintain the rulesets sorted in decreasing order of installation
// time.
auto iter =
std::find_if(rulesets_.begin(), rulesets_.end(),
[&extension_id](const ExtensionRulesetData& ruleset) {
return ruleset.extension_id == extension_id;
});
// There must be ExtensionRulesetData corresponding to this |extension_id|.
DCHECK(iter != rulesets_.end());
iter->allowed_pages = std::move(allowed_pages);
// Clear the renderers' cache so that they take the updated allowed pages
// into account.
ClearRendererCacheOnNavigation();
}
RulesetManager::Action RulesetManager::EvaluateRequest(
const WebRequestInfo& request,
bool is_incognito_context,
GURL* redirect_url) const {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(redirect_url);
if (!ShouldEvaluateRequest(request))
return Action::NONE;
SCOPED_UMA_HISTOGRAM_TIMER(
"Extensions.DeclarativeNetRequest.EvaluateRequestTime.AllExtensions");
if (test_observer_)
test_observer_->OnEvaluateRequest(request, is_incognito_context);
const GURL& url = request.url;
const url::Origin first_party_origin =
request.initiator.value_or(url::Origin());
const flat_rule::ElementType element_type = GetElementType(request);
const bool is_third_party = IsThirdPartyRequest(url, first_party_origin);
std::vector<bool> should_evaluate_rulesets_for_request(rulesets_.size());
// We first check if any extension wants the request to be blocked.
{
size_t i = 0;
auto ruleset_data = rulesets_.begin();
for (; ruleset_data != rulesets_.end(); ++ruleset_data, ++i) {
// As a minor optimization, cache the value of
// |ShouldEvaluateRulesetForRequest|.
should_evaluate_rulesets_for_request[i] = ShouldEvaluateRulesetForRequest(
*ruleset_data, request, is_incognito_context);
if (!should_evaluate_rulesets_for_request[i])
continue;
if (ruleset_data->matcher->ShouldBlockRequest(
url, first_party_origin, element_type, is_third_party)) {
return ShouldCollapseResourceType(element_type) ? Action::COLLAPSE
: Action::BLOCK;
}
}
}
// The request shouldn't be blocked. Now check if any extension wants to
// redirect the request.
// Redirecting WebSocket handshake request is prohibited.
if (element_type == flat_rule::ElementType_WEBSOCKET)
return Action::NONE;
// This iterates in decreasing order of extension installation time. Hence
// more recently installed extensions get higher priority in choosing the
// redirect url.
{
size_t i = 0;
auto ruleset_data = rulesets_.begin();
for (; ruleset_data != rulesets_.end(); ++ruleset_data, ++i) {
if (!should_evaluate_rulesets_for_request[i])
continue;
if (ruleset_data->matcher->ShouldRedirectRequest(
url, first_party_origin, element_type, is_third_party,
redirect_url)) {
return Action::REDIRECT;
}
}
}
return Action::NONE;
}
void RulesetManager::SetObserverForTest(TestObserver* observer) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
test_observer_ = observer;
}
RulesetManager::ExtensionRulesetData::ExtensionRulesetData(
const ExtensionId& extension_id,
const base::Time& extension_install_time,
std::unique_ptr<RulesetMatcher> matcher,
URLPatternSet allowed_pages)
: extension_id(extension_id),
extension_install_time(extension_install_time),
matcher(std::move(matcher)),
allowed_pages(std::move(allowed_pages)) {}
RulesetManager::ExtensionRulesetData::~ExtensionRulesetData() = default;
RulesetManager::ExtensionRulesetData::ExtensionRulesetData(
ExtensionRulesetData&& other) = default;
RulesetManager::ExtensionRulesetData& RulesetManager::ExtensionRulesetData::
operator=(ExtensionRulesetData&& other) = default;
bool RulesetManager::ExtensionRulesetData::operator<(
const ExtensionRulesetData& other) const {
// Sort based on descending installation time, using extension id to break
// ties.
return (extension_install_time != other.extension_install_time)
? (extension_install_time > other.extension_install_time)
: (extension_id < other.extension_id);
}
bool RulesetManager::ShouldEvaluateRequest(
const WebRequestInfo& request) const {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
// Ensure clients filter out sensitive requests.
DCHECK(!WebRequestPermissions::HideRequest(info_map_, request));
if (!IsAPIAvailable()) {
DCHECK(rulesets_.empty());
return false;
}
// Prevent extensions from modifying any resources on the chrome-extension
// scheme. Practically, this has the effect of not allowing an extension to
// modify its own resources (The extension wouldn't have the permission to
// other extension origins anyway).
if (request.url.SchemeIs(kExtensionScheme))
return false;
return true;
}
bool RulesetManager::ShouldEvaluateRulesetForRequest(
const ExtensionRulesetData& ruleset,
const WebRequestInfo& request,
bool is_incognito_context) const {
// Only extensions enabled in incognito should have access to requests in an
// incognito context.
if (is_incognito_context &&
!info_map_->IsIncognitoEnabled(ruleset.extension_id)) {
return false;
}
if (IsRequestPageAllowed(request, ruleset.allowed_pages))
return false;
const int tab_id = request.frame_data ? request.frame_data->tab_id
: extension_misc::kUnknownTabId;
// We have already checked that the extension has access to the request as far
// as the browser context is concerned. Since there is nothing special that we
// have to do for split mode incognito extensions, pass false for
// |crosses_incognito|.
const bool crosses_incognito = false;
PermissionsData::PageAccess result =
WebRequestPermissions::CanExtensionAccessURL(
info_map_, ruleset.extension_id, request.url, tab_id,
crosses_incognito,
WebRequestPermissions::REQUIRE_HOST_PERMISSION_FOR_URL_AND_INITIATOR,
request.initiator);
// TODO(crbug.com/809680): Handle ACCESS_WITHHELD.
return result == PermissionsData::PageAccess::kAllowed;
}
} // namespace declarative_net_request
} // namespace extensions