| // Copyright 2017 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "extensions/browser/api/declarative_net_request/ruleset_matcher.h" |
| |
| #include <algorithm> |
| #include <limits> |
| #include <utility> |
| |
| #include "base/containers/span.h" |
| #include "base/files/file_util.h" |
| #include "base/logging.h" |
| #include "base/memory/ptr_util.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/strings/strcat.h" |
| #include "base/timer/elapsed_timer.h" |
| #include "content/public/common/resource_type.h" |
| #include "extensions/browser/api/declarative_net_request/constants.h" |
| #include "extensions/browser/api/declarative_net_request/ruleset_source.h" |
| #include "extensions/browser/api/declarative_net_request/utils.h" |
| #include "extensions/browser/api/web_request/web_request_info.h" |
| #include "extensions/common/api/declarative_net_request.h" |
| #include "extensions/common/api/declarative_net_request/utils.h" |
| #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| #include "net/base/url_util.h" |
| #include "url/url_constants.h" |
| |
| namespace extensions { |
| namespace declarative_net_request { |
| namespace flat_rule = url_pattern_index::flat; |
| namespace dnr_api = api::declarative_net_request; |
| |
| namespace { |
| |
| using FindRuleStrategy = |
| url_pattern_index::UrlPatternIndexMatcher::FindRuleStrategy; |
| |
| // Maps content::ResourceType to flat_rule::ElementType. |
| flat_rule::ElementType GetElementType(content::ResourceType type) { |
| switch (type) { |
| case content::ResourceType::kPrefetch: |
| case content::ResourceType::kSubResource: |
| return flat_rule::ElementType_OTHER; |
| case content::ResourceType::kMainFrame: |
| case content::ResourceType::kNavigationPreloadMainFrame: |
| return flat_rule::ElementType_MAIN_FRAME; |
| case content::ResourceType::kCspReport: |
| return flat_rule::ElementType_CSP_REPORT; |
| case content::ResourceType::kScript: |
| case content::ResourceType::kWorker: |
| case content::ResourceType::kSharedWorker: |
| case content::ResourceType::kServiceWorker: |
| return flat_rule::ElementType_SCRIPT; |
| case content::ResourceType::kImage: |
| case content::ResourceType::kFavicon: |
| return flat_rule::ElementType_IMAGE; |
| case content::ResourceType::kStylesheet: |
| return flat_rule::ElementType_STYLESHEET; |
| case content::ResourceType::kObject: |
| case content::ResourceType::kPluginResource: |
| return flat_rule::ElementType_OBJECT; |
| case content::ResourceType::kXhr: |
| return flat_rule::ElementType_XMLHTTPREQUEST; |
| case content::ResourceType::kSubFrame: |
| case content::ResourceType::kNavigationPreloadSubFrame: |
| return flat_rule::ElementType_SUBDOCUMENT; |
| case content::ResourceType::kPing: |
| return flat_rule::ElementType_PING; |
| case content::ResourceType::kMedia: |
| return flat_rule::ElementType_MEDIA; |
| case content::ResourceType::kFontResource: |
| return flat_rule::ElementType_FONT; |
| } |
| NOTREACHED(); |
| return flat_rule::ElementType_OTHER; |
| } |
| |
| // Returns the flat_rule::ElementType for the given |request|. |
| flat_rule::ElementType GetElementType(const WebRequestInfo& request) { |
| if (request.url.SchemeIsWSOrWSS()) |
| return flat_rule::ElementType_WEBSOCKET; |
| |
| return GetElementType(request.type); |
| } |
| |
| // Returns whether the request to |url| is third party to its |document_origin|. |
| // TODO(crbug.com/696822): Look into caching this. |
| bool IsThirdPartyRequest(const GURL& url, const url::Origin& document_origin) { |
| if (document_origin.opaque()) |
| return true; |
| |
| return !net::registry_controlled_domains::SameDomainOrHost( |
| url, document_origin, |
| net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); |
| } |
| |
| std::vector<url_pattern_index::UrlPatternIndexMatcher> GetMatchers( |
| const flat::ExtensionIndexedRuleset* root) { |
| DCHECK(root); |
| DCHECK(root->index_list()); |
| DCHECK_EQ(flat::ActionIndex_count, root->index_list()->size()); |
| |
| std::vector<url_pattern_index::UrlPatternIndexMatcher> matchers; |
| matchers.reserve(flat::ActionIndex_count); |
| for (const flat_rule::UrlPatternIndex* index : *(root->index_list())) |
| matchers.emplace_back(index); |
| return matchers; |
| } |
| |
| bool HasAnyRules(const url_pattern_index::flat::UrlPatternIndex* index) { |
| DCHECK(index); |
| |
| if (index->fallback_rules()->size() > 0) |
| return true; |
| |
| // Iterate over all ngrams and check their corresponding rules. |
| for (auto* ngram_to_rules : *index->ngram_index()) { |
| if (ngram_to_rules == index->ngram_index_empty_slot()) |
| continue; |
| |
| if (ngram_to_rules->rule_list()->size() > 0) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool IsExtraHeadersMatcherInternal( |
| const flat::ExtensionIndexedRuleset& ruleset) { |
| // We only support removing a subset of extra headers currently. If that |
| // changes, the implementation here should change as well. |
| static_assert(flat::ActionIndex_count == 7, |
| "Modify this method to ensure IsExtraHeadersMatcherInternal is " |
| "updated as new actions are added."); |
| static const flat::ActionIndex extra_header_indices[] = { |
| flat::ActionIndex_remove_cookie_header, |
| flat::ActionIndex_remove_referer_header, |
| flat::ActionIndex_remove_set_cookie_header, |
| }; |
| |
| for (flat::ActionIndex index : extra_header_indices) { |
| if (HasAnyRules(ruleset.index_list()->Get(index))) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| base::StringPiece CreateStringPiece(const ::flatbuffers::String& str) { |
| return base::StringPiece(str.c_str(), str.size()); |
| } |
| |
| // Returns true if the given |vec| is nullptr or empty. |
| template <typename T> |
| bool IsEmpty(const flatbuffers::Vector<T>* vec) { |
| return !vec || vec->size() == 0; |
| } |
| |
| // Performs any required query transformations on the |url|. Returns true if the |
| // query should be modified and populates |modified_query|. |
| bool GetModifiedQuery(const GURL& url, |
| const flat::UrlTransform& transform, |
| std::string* modified_query) { |
| DCHECK(modified_query); |
| |
| // |remove_query_params| should always be sorted. |
| DCHECK( |
| IsEmpty(transform.remove_query_params()) || |
| std::is_sorted(transform.remove_query_params()->begin(), |
| transform.remove_query_params()->end(), |
| [](const flatbuffers::String* x1, |
| const flatbuffers::String* x2) { return *x1 < *x2; })); |
| |
| // Return early if there's nothing to modify. |
| if (IsEmpty(transform.remove_query_params()) && |
| IsEmpty(transform.add_or_replace_query_params())) { |
| return false; |
| } |
| |
| std::vector<base::StringPiece> remove_query_params; |
| if (!IsEmpty(transform.remove_query_params())) { |
| remove_query_params.reserve(transform.remove_query_params()->size()); |
| for (const ::flatbuffers::String* str : *transform.remove_query_params()) |
| remove_query_params.push_back(CreateStringPiece(*str)); |
| } |
| |
| // We don't use a map from keys to vector of values to ensure the relative |
| // order of different params specified by the extension is respected. We use a |
| // std::list to support fast removal from middle of the list. Note that the |
| // key value pairs should already be escaped. |
| std::list<std::pair<base::StringPiece, base::StringPiece>> |
| add_or_replace_query_params; |
| if (!IsEmpty(transform.add_or_replace_query_params())) { |
| for (const flat::QueryKeyValue* query_pair : |
| *transform.add_or_replace_query_params()) { |
| DCHECK(query_pair->key()); |
| DCHECK(query_pair->value()); |
| add_or_replace_query_params.emplace_back( |
| CreateStringPiece(*query_pair->key()), |
| CreateStringPiece(*query_pair->value())); |
| } |
| } |
| |
| std::vector<std::string> query_parts; |
| |
| auto create_query_part = [](base::StringPiece key, base::StringPiece value) { |
| return base::StrCat({key, "=", value}); |
| }; |
| |
| bool query_changed = false; |
| for (net::QueryIterator it(url); !it.IsAtEnd(); it.Advance()) { |
| std::string key = it.GetKey(); |
| // Remove query param. |
| if (std::binary_search(remove_query_params.begin(), |
| remove_query_params.end(), key)) { |
| query_changed = true; |
| continue; |
| } |
| |
| auto replace_iterator = std::find_if( |
| add_or_replace_query_params.begin(), add_or_replace_query_params.end(), |
| [&key](const std::pair<base::StringPiece, base::StringPiece>& param) { |
| return param.first == key; |
| }); |
| |
| // Nothing to do. |
| if (replace_iterator == add_or_replace_query_params.end()) { |
| query_parts.push_back(create_query_part(key, it.GetValue())); |
| continue; |
| } |
| |
| // Replace query param. |
| query_changed = true; |
| query_parts.push_back(create_query_part(key, replace_iterator->second)); |
| add_or_replace_query_params.erase(replace_iterator); |
| } |
| |
| // Append any remaining query params. |
| for (const auto& params : add_or_replace_query_params) { |
| query_changed = true; |
| query_parts.push_back(create_query_part(params.first, params.second)); |
| } |
| |
| if (!query_changed) |
| return false; |
| |
| *modified_query = base::JoinString(query_parts, "&"); |
| return true; |
| } |
| |
| GURL GetTransformedURL(const RequestParams& params, |
| const flat::UrlTransform& transform) { |
| GURL::Replacements replacements; |
| |
| if (transform.scheme()) |
| replacements.SetSchemeStr(CreateStringPiece(*transform.scheme())); |
| |
| if (transform.host()) |
| replacements.SetHostStr(CreateStringPiece(*transform.host())); |
| |
| DCHECK(!(transform.clear_port() && transform.port())); |
| if (transform.clear_port()) |
| replacements.ClearPort(); |
| else if (transform.port()) |
| replacements.SetPortStr(CreateStringPiece(*transform.port())); |
| |
| DCHECK(!(transform.clear_path() && transform.path())); |
| if (transform.clear_path()) |
| replacements.ClearPath(); |
| else if (transform.path()) |
| replacements.SetPathStr(CreateStringPiece(*transform.path())); |
| |
| // |query| is defined outside the if conditions since url::Replacements does |
| // not own the strings it uses. |
| std::string query; |
| if (transform.clear_query()) { |
| replacements.ClearQuery(); |
| } else if (transform.query()) { |
| replacements.SetQueryStr(CreateStringPiece(*transform.query())); |
| } else if (GetModifiedQuery(*params.url, transform, &query)) { |
| replacements.SetQueryStr(query); |
| } |
| |
| DCHECK(!(transform.clear_fragment() && transform.fragment())); |
| if (transform.clear_fragment()) |
| replacements.ClearRef(); |
| else if (transform.fragment()) |
| replacements.SetRefStr(CreateStringPiece(*transform.fragment())); |
| |
| if (transform.password()) |
| replacements.SetPasswordStr(CreateStringPiece(*transform.password())); |
| |
| if (transform.username()) |
| replacements.SetUsernameStr(CreateStringPiece(*transform.username())); |
| |
| return params.url->ReplaceComponents(replacements); |
| } |
| |
| } // namespace |
| |
| RequestParams::RequestParams(const WebRequestInfo& info) |
| : url(&info.url), |
| first_party_origin(info.initiator.value_or(url::Origin())), |
| element_type(GetElementType(info)) { |
| is_third_party = IsThirdPartyRequest(*url, first_party_origin); |
| } |
| |
| RequestParams::RequestParams() = default; |
| RequestParams::~RequestParams() = default; |
| |
| // static |
| RulesetMatcher::LoadRulesetResult RulesetMatcher::CreateVerifiedMatcher( |
| const RulesetSource& source, |
| int expected_ruleset_checksum, |
| std::unique_ptr<RulesetMatcher>* matcher) { |
| DCHECK(matcher); |
| DCHECK(IsAPIAvailable()); |
| |
| base::ElapsedTimer timer; |
| |
| if (!base::PathExists(source.indexed_path())) |
| return kLoadErrorInvalidPath; |
| |
| std::string ruleset_data; |
| if (!base::ReadFileToString(source.indexed_path(), &ruleset_data)) |
| return kLoadErrorFileRead; |
| |
| if (!StripVersionHeaderAndParseVersion(&ruleset_data)) |
| return kLoadErrorVersionMismatch; |
| |
| // This guarantees that no memory access will end up outside the buffer. |
| if (!IsValidRulesetData( |
| base::make_span(reinterpret_cast<const uint8_t*>(ruleset_data.data()), |
| ruleset_data.size()), |
| expected_ruleset_checksum)) { |
| return kLoadErrorChecksumMismatch; |
| } |
| |
| UMA_HISTOGRAM_TIMES( |
| "Extensions.DeclarativeNetRequest.CreateVerifiedMatcherTime", |
| timer.Elapsed()); |
| |
| // Using WrapUnique instead of make_unique since this class has a private |
| // constructor. |
| *matcher = base::WrapUnique(new RulesetMatcher( |
| std::move(ruleset_data), source.id(), source.priority())); |
| return kLoadSuccess; |
| } |
| |
| RulesetMatcher::~RulesetMatcher() = default; |
| |
| uint8_t RulesetMatcher::GetRemoveHeadersMask(const RequestParams& params, |
| uint8_t current_mask) const { |
| uint8_t mask = current_mask; |
| |
| static_assert(kRemoveHeadersMask_Max <= std::numeric_limits<uint8_t>::max(), |
| "RemoveHeadersMask can't fit in a uint8_t"); |
| |
| // Iterate over each RemoveHeaderType value. |
| uint8_t bit = 0; |
| for (int i = 0; i <= dnr_api::REMOVE_HEADER_TYPE_LAST; ++i) { |
| switch (i) { |
| case dnr_api::REMOVE_HEADER_TYPE_NONE: |
| break; |
| case dnr_api::REMOVE_HEADER_TYPE_COOKIE: |
| bit = kRemoveHeadersMask_Cookie; |
| if (mask & bit) |
| break; |
| if (GetMatchingRule(params, flat::ActionIndex_remove_cookie_header)) |
| mask |= bit; |
| break; |
| case dnr_api::REMOVE_HEADER_TYPE_REFERER: |
| bit = kRemoveHeadersMask_Referer; |
| if (mask & bit) |
| break; |
| if (GetMatchingRule(params, flat::ActionIndex_remove_referer_header)) |
| mask |= bit; |
| break; |
| case dnr_api::REMOVE_HEADER_TYPE_SETCOOKIE: |
| bit = kRemoveHeadersMask_SetCookie; |
| if (mask & bit) |
| break; |
| if (GetMatchingRule(params, flat::ActionIndex_remove_set_cookie_header)) |
| mask |= bit; |
| break; |
| } |
| } |
| |
| return mask; |
| } |
| |
| const flat_rule::UrlRule* RulesetMatcher::GetRedirectRule( |
| const RequestParams& params, |
| GURL* redirect_url) const { |
| DCHECK(redirect_url); |
| DCHECK_NE(flat_rule::ElementType_WEBSOCKET, params.element_type); |
| |
| const flat_rule::UrlRule* rule = GetMatchingRule( |
| params, flat::ActionIndex_redirect, FindRuleStrategy::kHighestPriority); |
| if (!rule) |
| return nullptr; |
| |
| // Find the UrlRuleMetadata corresponding to |rule|. Since |metadata_list_| is |
| // sorted by rule id, use LookupByKey which binary searches for fast lookup. |
| const flat::UrlRuleMetadata* metadata = |
| metadata_list_->LookupByKey(rule->id()); |
| |
| // There must be a UrlRuleMetadata object corresponding to each redirect rule. |
| DCHECK(metadata); |
| DCHECK_EQ(metadata->id(), rule->id()); |
| DCHECK(metadata->redirect_url() || metadata->transform()); |
| |
| if (metadata->redirect_url()) |
| *redirect_url = GURL(CreateStringPiece(*metadata->redirect_url())); |
| else |
| *redirect_url = GetTransformedURL(params, *metadata->transform()); |
| |
| // Sanity check that we don't redirect to a javascript url. Specifying |
| // redirect to a javascript url and specifying javascript as a transform |
| // scheme is prohibited. In addition extensions can't intercept requests to |
| // javascript urls. Hence we should never end up with a javascript url here. |
| DCHECK(!redirect_url->SchemeIs(url::kJavaScriptScheme)); |
| |
| // Prevent a redirect loop where a URL continuously redirects to itself. |
| return (redirect_url->is_valid() && *params.url != *redirect_url) ? rule |
| : nullptr; |
| } |
| |
| const flat_rule::UrlRule* RulesetMatcher::GetUpgradeRule( |
| const RequestParams& params) const { |
| const bool is_upgradeable = params.url->SchemeIs(url::kHttpScheme) || |
| params.url->SchemeIs(url::kFtpScheme); |
| return is_upgradeable |
| ? GetMatchingRule(params, flat::ActionIndex_upgrade_scheme, |
| FindRuleStrategy::kHighestPriority) |
| : nullptr; |
| } |
| |
| RulesetMatcher::RulesetMatcher(std::string ruleset_data, |
| size_t id, |
| size_t priority) |
| : ruleset_data_(std::move(ruleset_data)), |
| root_(flat::GetExtensionIndexedRuleset(ruleset_data_.data())), |
| matchers_(GetMatchers(root_)), |
| metadata_list_(root_->extension_metadata()), |
| id_(id), |
| priority_(priority), |
| is_extra_headers_matcher_(IsExtraHeadersMatcherInternal(*root_)) {} |
| |
| const flat_rule::UrlRule* RulesetMatcher::GetMatchingRule( |
| const RequestParams& params, |
| flat::ActionIndex index, |
| FindRuleStrategy strategy) const { |
| DCHECK_LT(index, flat::ActionIndex_count); |
| DCHECK_GE(index, 0); |
| DCHECK(params.url); |
| |
| // Don't exclude generic rules from being matched. A generic rule is one with |
| // an empty included domains list. |
| const bool kDisableGenericRules = false; |
| |
| return matchers_[index].FindMatch( |
| *params.url, params.first_party_origin, params.element_type, |
| flat_rule::ActivationType_NONE, params.is_third_party, |
| kDisableGenericRules, strategy); |
| } |
| |
| } // namespace declarative_net_request |
| } // namespace extensions |