blob: 75f3ffcbcd78d6aa1ca0703b6ea5f02f7be7f91e [file] [log] [blame]
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "extensions/browser/api/declarative_net_request/filter_list_converter/converter.h"
#include <fstream>
#include <sstream>
#include <string>
#include <utility>
#include "base/json/json_file_value_serializer.h"
#include "base/logging.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/values.h"
#include "components/subresource_filter/tools/ruleset_converter/rule_stream.h"
#include "extensions/browser/api/declarative_net_request/constants.h"
#include "extensions/browser/api/declarative_net_request/indexed_rule.h"
#include "extensions/common/api/declarative_net_request.h"
#include "extensions/common/api/declarative_net_request/constants.h"
#include "extensions/common/api/declarative_net_request/test_utils.h"
#include "url/gurl.h"
namespace extensions {
namespace declarative_net_request {
namespace {
namespace proto = ::url_pattern_index::proto;
namespace dnr_api = extensions::api::declarative_net_request;
using ElementTypeMap =
base::flat_map<proto::ElementType, dnr_api::ResourceType>;
// Utility class to convert the proto::UrlRule format to the JSON format
// supported by Declarative Net Request.
class ProtoToJSONRuleConverter {
public:
// Returns a dictionary value corresponding to a Declarative Net Request rule
// on success. On error, returns an empty/null value and populates |error|.
// |error| must be non-null.
static base::Value Convert(const proto::UrlRule& rule,
int rule_id,
std::string* error) {
CHECK(error);
ProtoToJSONRuleConverter json_rule(rule, rule_id);
return json_rule.Convert(error);
}
private:
ProtoToJSONRuleConverter(const proto::UrlRule& rule, int rule_id)
: input_rule_(rule),
rule_id_(rule_id),
json_rule_(base::Value::Type::DICTIONARY) {}
base::Value Convert(std::string* error) {
CHECK(error);
// Populate all the keys.
bool success = CheckActivationType() && PopulateID() &&
PopulatePriorirty() && PopulateURLFilter() &&
PopulateIsURLFilterCaseSensitive() && PopulateDomains() &&
PopulateExcludedDomains() && PopulateResourceTypes() &&
PopulateExcludedResourceTypes() && PopulateDomainType() &&
PopulateRuleActionType() && PopulateRedirectURL() &&
PopulateRemoveHeadersList();
if (!success) {
CHECK(!error_.empty());
*error = std::move(error_);
return base::Value();
}
// Sanity check that we can parse this rule.
base::string16 err;
dnr_api::Rule rule;
CHECK(dnr_api::Rule::Populate(json_rule_, &rule, &err) && err.empty())
<< "Converted rule can't be parsed " << json_rule_;
IndexedRule indexed_rule;
ParseResult result = IndexedRule::CreateIndexedRule(
std::move(rule), GURL() /* base_url */, &indexed_rule);
auto get_non_ascii_error = [this](const std::string& context) {
return base::StringPrintf(
"Rule with filter '%s' ignored due to non ascii characters in %s.",
input_rule_.url_pattern().c_str(), context.c_str());
};
// Non-ascii characters in rules are not supported.
if (result == ParseResult::ERROR_NON_ASCII_URL_FILTER) {
*error = get_non_ascii_error("url filter");
return base::Value();
}
if (result == ParseResult::ERROR_NON_ASCII_DOMAIN) {
*error = get_non_ascii_error("domains");
return base::Value();
}
if (result == ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN) {
*error = get_non_ascii_error("excluded domains");
return base::Value();
}
CHECK_EQ(ParseResult::SUCCESS, result)
<< "Unexpected parse error << " << static_cast<int>(result)
<< " for rule " << json_rule_;
return std::move(json_rule_);
}
bool CheckActivationType() {
if (input_rule_.activation_types() == proto::ACTIVATION_TYPE_UNSPECIFIED)
return true;
if (input_rule_.activation_types() == proto::ACTIVATION_TYPE_DOCUMENT) {
is_allow_all_requests_rule_ = true;
return true;
}
std::vector<std::string> activation_types;
for (int activation_type = 1; activation_type <= proto::ACTIVATION_TYPE_MAX;
activation_type <<= 1) {
CHECK(proto::ActivationType_IsValid(activation_type));
if (!(input_rule_.activation_types() & activation_type))
continue;
switch (static_cast<proto::ActivationType>(activation_type)) {
case proto::ACTIVATION_TYPE_UNSPECIFIED:
CHECK(false);
break;
case proto::ACTIVATION_TYPE_DOCUMENT:
activation_types.emplace_back("document");
break;
case proto::ACTIVATION_TYPE_ELEMHIDE:
activation_types.emplace_back("elemhide");
break;
case proto::ACTIVATION_TYPE_GENERICHIDE:
activation_types.emplace_back("generichide");
break;
case proto::ACTIVATION_TYPE_GENERICBLOCK:
activation_types.emplace_back("genericblock");
break;
case proto::ACTIVATION_TYPE_ALL:
CHECK(false);
break;
}
}
// We don't support any activation types.
error_ = base::StringPrintf(
"Rule with filter '%s' ignored due to invalid activation types-[%s].",
input_rule_.url_pattern().c_str(),
base::JoinString(activation_types, "," /* separator */).c_str());
return false;
}
bool PopulateID() {
CHECK_GE(rule_id_, kMinValidID);
CHECK(json_rule_.SetKey(kIDKey, base::Value(rule_id_)));
return true;
}
bool PopulatePriorirty() {
CHECK(json_rule_.SetKey(kPriorityKey, base::Value(kMinValidPriority)));
return true;
}
bool PopulateURLFilter() {
// Pattern type validation.
CHECK_NE(proto::URL_PATTERN_TYPE_UNSPECIFIED,
input_rule_.url_pattern_type());
// TODO(karandeepb): It would be nice to print the actual filter-list string
// in cases where rule conversion fails.
if (input_rule_.url_pattern_type() == proto::URL_PATTERN_TYPE_REGEXP) {
error_ = base::StringPrintf(
"Rule with filter %s ignored since regex rules are not supported.",
input_rule_.url_pattern().c_str());
return false;
}
std::string result;
switch (input_rule_.anchor_left()) {
case proto::ANCHOR_TYPE_NONE:
break;
case proto::ANCHOR_TYPE_BOUNDARY:
result += '|';
break;
case proto::ANCHOR_TYPE_SUBDOMAIN:
result += "||";
break;
case proto::ANCHOR_TYPE_UNSPECIFIED:
CHECK(false);
break;
}
result += input_rule_.url_pattern();
switch (input_rule_.anchor_right()) {
case proto::ANCHOR_TYPE_NONE:
break;
case proto::ANCHOR_TYPE_BOUNDARY:
result += '|';
break;
case proto::ANCHOR_TYPE_SUBDOMAIN:
case proto::ANCHOR_TYPE_UNSPECIFIED:
CHECK(false);
break;
}
// If |result| is empty, omit persisting the url pattern. In that case, it
// will match all urls.
if (!result.empty()) {
CHECK(json_rule_.SetPath({kRuleConditionKey, kUrlFilterKey},
base::Value(result)));
}
return true;
}
bool PopulateIsURLFilterCaseSensitive() {
// Omit if case sensitive, since it's the default.
const bool case_sensitive = input_rule_.match_case();
if (case_sensitive)
return true;
CHECK(json_rule_.SetPath({kRuleConditionKey, kIsUrlFilterCaseSensitiveKey},
base::Value(false)));
return true;
}
bool PopulateDomains() {
return PopulateDomainsInternal(kDomainsKey, false /*exclude_value*/);
}
bool PopulateExcludedDomains() {
return PopulateDomainsInternal(kExcludedDomainsKey, true /*exclude_value*/);
}
bool PopulateDomainsInternal(base::StringPiece sub_key, bool exclude_value) {
base::Value domains(base::Value::Type::LIST);
for (const proto::DomainListItem& item : input_rule_.domains()) {
if (item.exclude() == exclude_value)
domains.Append(item.domain());
}
// Omit empty domain list.
if (!domains.GetList().empty()) {
CHECK(
json_rule_.SetPath({kRuleConditionKey, sub_key}, std::move(domains)));
}
return true;
}
base::Value GetResourceTypeList(int element_mask) {
base::Value resource_types(base::Value::Type::LIST);
for (int element_type = 1; element_type <= proto::ElementType_MAX;
element_type <<= 1) {
CHECK(proto::ElementType_IsValid(element_type));
if (!(element_type & element_mask))
continue;
dnr_api::ResourceType resource_type = dnr_api::RESOURCE_TYPE_NONE;
switch (static_cast<proto::ElementType>(element_type)) {
case proto::ELEMENT_TYPE_UNSPECIFIED:
CHECK(false);
break;
case proto::ELEMENT_TYPE_OTHER:
resource_type = dnr_api::RESOURCE_TYPE_OTHER;
break;
case proto::ELEMENT_TYPE_SCRIPT:
resource_type = dnr_api::RESOURCE_TYPE_SCRIPT;
break;
case proto::ELEMENT_TYPE_IMAGE:
resource_type = dnr_api::RESOURCE_TYPE_IMAGE;
break;
case proto::ELEMENT_TYPE_STYLESHEET:
resource_type = dnr_api::RESOURCE_TYPE_STYLESHEET;
break;
case proto::ELEMENT_TYPE_OBJECT:
resource_type = dnr_api::RESOURCE_TYPE_OBJECT;
break;
case proto::ELEMENT_TYPE_XMLHTTPREQUEST:
resource_type = dnr_api::RESOURCE_TYPE_XMLHTTPREQUEST;
break;
case proto::ELEMENT_TYPE_OBJECT_SUBREQUEST:
CHECK(false);
break;
case proto::ELEMENT_TYPE_SUBDOCUMENT:
resource_type = dnr_api::RESOURCE_TYPE_SUB_FRAME;
break;
case proto::ELEMENT_TYPE_PING:
resource_type = dnr_api::RESOURCE_TYPE_PING;
break;
case proto::ELEMENT_TYPE_MEDIA:
resource_type = dnr_api::RESOURCE_TYPE_MEDIA;
break;
case proto::ELEMENT_TYPE_FONT:
resource_type = dnr_api::RESOURCE_TYPE_FONT;
break;
case proto::ELEMENT_TYPE_POPUP:
CHECK(false);
break;
case proto::ELEMENT_TYPE_WEBSOCKET:
resource_type = dnr_api::RESOURCE_TYPE_WEBSOCKET;
break;
case proto::ELEMENT_TYPE_ALL:
CHECK(false);
break;
}
resource_types.Append(dnr_api::ToString(resource_type));
}
return resource_types;
}
bool PopulateResourceTypes() {
// Ensure that |element_types()| is a subset of proto::ElementType_ALL.
CHECK_EQ(proto::ELEMENT_TYPE_ALL,
proto::ELEMENT_TYPE_ALL | input_rule_.element_types());
int kMaskUnsupported =
proto::ELEMENT_TYPE_POPUP | proto::ELEMENT_TYPE_OBJECT_SUBREQUEST;
int element_mask = input_rule_.element_types() & (~kMaskUnsupported);
// We don't support object-subrequest. Instead let these be treated as rules
// matching object requests.
if (input_rule_.element_types() & proto::ELEMENT_TYPE_OBJECT_SUBREQUEST)
element_mask |= proto::ELEMENT_TYPE_OBJECT;
if (is_allow_all_requests_rule_) {
// Any subresource types specified with ACTIVATION_TYPE_DOCUMENT are
// invalid.
if (element_mask && element_mask != proto::ELEMENT_TYPE_SUBDOCUMENT) {
std::stringstream error_stream;
error_stream << "$document rule with filter "
<< input_rule_.url_pattern()
<< " ignored. Invalid resource types: "
<< GetResourceTypeList(element_mask);
error_ = error_stream.str();
return false;
}
} else if (!element_mask) { // No supported element types.
const char* ignored_types =
input_rule_.element_types() & proto::ELEMENT_TYPE_POPUP ? "popup"
: "";
error_ = base::StringPrintf(
"Rule with filter %s and resource types [%s] ignored: No applicable "
"resource types",
input_rule_.url_pattern().c_str(), ignored_types);
return false;
}
// Omit resource types to block all subresources by default.
if (element_mask == (proto::ELEMENT_TYPE_ALL & ~kMaskUnsupported))
return true;
base::Value resource_types = GetResourceTypeList(element_mask);
if (is_allow_all_requests_rule_) {
resource_types.Append(
dnr_api::ToString(dnr_api::RESOURCE_TYPE_MAIN_FRAME));
}
CHECK(json_rule_.SetPath({kRuleConditionKey, kResourceTypesKey},
std::move(resource_types)));
return true;
}
bool PopulateExcludedResourceTypes() {
// We don't populate the "excludedResourceTypes" since that information has
// been processed away by conversion to a proto::UrlRule.
return true;
}
bool PopulateDomainType() {
dnr_api::DomainType domain_type = dnr_api::DOMAIN_TYPE_NONE;
switch (input_rule_.source_type()) {
case proto::SOURCE_TYPE_ANY:
// This is the default domain type and can be omitted.
return true;
case proto::SOURCE_TYPE_FIRST_PARTY:
domain_type = dnr_api::DOMAIN_TYPE_FIRSTPARTY;
break;
case proto::SOURCE_TYPE_THIRD_PARTY:
domain_type = dnr_api::DOMAIN_TYPE_THIRDPARTY;
break;
case proto::SOURCE_TYPE_UNSPECIFIED:
CHECK(false);
break;
}
CHECK_NE(dnr_api::DOMAIN_TYPE_NONE, domain_type);
CHECK(json_rule_.SetPath({kRuleConditionKey, kDomainTypeKey},
base::Value(dnr_api::ToString(domain_type))));
return true;
}
bool PopulateRuleActionType() {
dnr_api::RuleActionType action_type = dnr_api::RULE_ACTION_TYPE_NONE;
CHECK(!is_allow_all_requests_rule_ ||
input_rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST);
switch (input_rule_.semantics()) {
case proto::RULE_SEMANTICS_BLACKLIST:
action_type = dnr_api::RULE_ACTION_TYPE_BLOCK;
break;
case proto::RULE_SEMANTICS_WHITELIST:
if (is_allow_all_requests_rule_)
action_type = dnr_api::RULE_ACTION_TYPE_ALLOWALLREQUESTS;
else
action_type = dnr_api::RULE_ACTION_TYPE_ALLOW;
break;
case proto::RULE_SEMANTICS_UNSPECIFIED:
CHECK(false);
break;
}
CHECK_NE(dnr_api::RULE_ACTION_TYPE_NONE, action_type);
CHECK(json_rule_.SetPath({kRuleActionKey, kRuleActionTypeKey},
base::Value(dnr_api::ToString(action_type))));
return true;
}
bool PopulateRedirectURL() {
// Do nothing. The tool only supports allow and block rules.
return true;
}
bool PopulateRemoveHeadersList() {
// Do nothing. The tool only supports allow and block rules.
return true;
}
bool is_allow_all_requests_rule_ = false;
proto::UrlRule input_rule_;
int rule_id_;
std::string error_;
base::Value json_rule_;
DISALLOW_COPY_AND_ASSIGN(ProtoToJSONRuleConverter);
};
// Writes rules/extension to |output_path| in the format supported by
// Declarative Net Request.
class DNRJsonRuleOutputStream : public subresource_filter::RuleOutputStream {
public:
DNRJsonRuleOutputStream(const base::FilePath& output_path,
filter_list_converter::WriteType type,
bool noisy)
: rule_id_(kMinValidID),
output_rules_list_(base::Value::Type::LIST),
output_path_(output_path),
write_type_(type),
noisy_(noisy) {}
bool PutUrlRule(const proto::UrlRule& rule) override {
std::string error;
base::Value json_rule_value =
ProtoToJSONRuleConverter::Convert(rule, rule_id_, &error);
if (json_rule_value.is_none()) {
if (noisy_) {
LOG(ERROR) << base::StringPrintf("Error for id %d: %s", rule_id_,
error.c_str());
}
return false;
}
CHECK(error.empty());
CHECK(json_rule_value.is_dict());
output_rules_list_.Append(std::move(json_rule_value));
++rule_id_;
return true;
}
bool PutCssRule(const proto::CssRule& rule) override {
// Ignore CSS rules.
return true;
}
bool Finish() override {
constexpr char kJSONRulesFilename[] = "rules.json";
constexpr char kRulesetID[] = "filter_list";
switch (write_type_) {
case filter_list_converter::kExtension: {
TestRulesetInfo info(kRulesetID, kJSONRulesFilename,
output_rules_list_);
WriteManifestAndRuleset(output_path_, info, {} /* hosts */);
break;
}
case filter_list_converter::kJSONRuleset:
JSONFileValueSerializer(output_path_).Serialize(output_rules_list_);
break;
}
return true;
}
private:
int rule_id_ = kMinValidID;
base::Value output_rules_list_;
const base::FilePath output_path_;
const filter_list_converter::WriteType write_type_;
const bool noisy_;
DISALLOW_COPY_AND_ASSIGN(DNRJsonRuleOutputStream);
};
} // namespace
namespace filter_list_converter {
bool ConvertRuleset(const std::vector<base::FilePath>& filter_list_inputs,
const base::FilePath& output_path,
WriteType type,
bool noisy) {
DNRJsonRuleOutputStream rule_output_stream(output_path, type, noisy);
for (const auto& input_path : filter_list_inputs) {
auto rule_input_stream = subresource_filter::RuleInputStream::Create(
std::make_unique<std::ifstream>(input_path.AsUTF8Unsafe(),
std::ios::binary | std::ios::in),
subresource_filter::RulesetFormat::kFilterList);
CHECK(rule_input_stream);
CHECK(subresource_filter::TransferRules(rule_input_stream.get(),
&rule_output_stream,
nullptr /* css_rule_output */));
}
return rule_output_stream.Finish();
}
} // namespace filter_list_converter
} // namespace declarative_net_request
} // namespace extensions