blob: 69e1b85c67c388b1a43acf8a31cc84aa32cd8b31 [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/subresource_filter/tools/filter_tool.h"
#include <algorithm>
#include <istream>
#include <ostream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "base/json/json_reader.h"
#include "base/logging.h"
#include "base/values.h"
#include "components/subresource_filter/core/common/document_subresource_filter.h"
#include "components/subresource_filter/core/common/load_policy.h"
#include "components/subresource_filter/core/mojom/subresource_filter.mojom.h"
#include "components/url_pattern_index/flat/url_pattern_index_generated.h"
#include "components/url_pattern_index/proto/rules.pb.h"
#include "components/url_pattern_index/url_rule_util.h"
#include "url/gurl.h"
#include "url/origin.h"
namespace subresource_filter {
namespace {
url::Origin ParseOrigin(base::StringPiece arg) {
GURL origin_url(arg);
LOG_IF(FATAL, !origin_url.is_valid()) << "Invalid origin";
return url::Origin::Create(origin_url);
}
GURL ParseRequestUrl(base::StringPiece arg) {
GURL request_url(arg);
LOG_IF(FATAL, !request_url.is_valid());
return request_url;
}
url_pattern_index::proto::ElementType ParseType(base::StringPiece type) {
// If the user provided a resource type, use it. Else if it's the empty string
// it will default to ELEMENT_TYPE_OTHER.
if (type == "other")
return url_pattern_index::proto::ELEMENT_TYPE_OTHER;
if (type == "script")
return url_pattern_index::proto::ELEMENT_TYPE_SCRIPT;
if (type == "image")
return url_pattern_index::proto::ELEMENT_TYPE_IMAGE;
if (type == "stylesheet")
return url_pattern_index::proto::ELEMENT_TYPE_STYLESHEET;
if (type == "object")
return url_pattern_index::proto::ELEMENT_TYPE_OBJECT;
if (type == "xmlhttprequest")
return url_pattern_index::proto::ELEMENT_TYPE_XMLHTTPREQUEST;
if (type == "object_subrequest")
return url_pattern_index::proto::ELEMENT_TYPE_OBJECT_SUBREQUEST;
if (type == "subdocument")
return url_pattern_index::proto::ELEMENT_TYPE_SUBDOCUMENT;
if (type == "ping")
return url_pattern_index::proto::ELEMENT_TYPE_PING;
if (type == "media")
return url_pattern_index::proto::ELEMENT_TYPE_MEDIA;
if (type == "font")
return url_pattern_index::proto::ELEMENT_TYPE_FONT;
if (type == "popup")
return url_pattern_index::proto::ELEMENT_TYPE_POPUP;
if (type == "websocket")
return url_pattern_index::proto::ELEMENT_TYPE_WEBSOCKET;
return url_pattern_index::proto::ELEMENT_TYPE_OTHER;
}
const url_pattern_index::flat::UrlRule* FindMatchingUrlRule(
const subresource_filter::MemoryMappedRuleset* ruleset,
const url::Origin& document_origin,
const GURL& request_url,
url_pattern_index::proto::ElementType type) {
subresource_filter::mojom::ActivationState state;
state.activation_level = subresource_filter::mojom::ActivationLevel::kEnabled;
subresource_filter::DocumentSubresourceFilter filter(document_origin, state,
ruleset);
return filter.FindMatchingUrlRule(request_url, type);
}
const std::string& ExtractStringFromDictionary(base::Value* dictionary,
const std::string& key) {
DCHECK(dictionary->is_dict());
const base::Value* found = dictionary->FindKey(key);
CHECK(found);
return found->GetString();
}
} // namespace
FilterTool::FilterTool(
scoped_refptr<const subresource_filter::MemoryMappedRuleset> ruleset,
std::ostream* output)
: ruleset_(std::move(ruleset)), output_(output) {}
FilterTool::~FilterTool() = default;
void FilterTool::Match(const std::string& document_origin,
const std::string& url,
const std::string& type) {
bool blocked;
const url_pattern_index::flat::UrlRule* rule =
MatchImpl(document_origin, url, type, &blocked);
PrintResult(blocked, rule, document_origin, url, type);
}
void FilterTool::MatchBatch(std::istream* request_stream) {
MatchBatchImpl(request_stream, true /* print each request */, 1);
}
void FilterTool::MatchRules(std::istream* request_stream, int min_match_count) {
MatchBatchImpl(request_stream, false /* print each request */,
min_match_count);
}
void FilterTool::PrintResult(bool blocked,
const url_pattern_index::flat::UrlRule* rule,
base::StringPiece document_origin,
base::StringPiece url,
base::StringPiece type) {
*output_ << (blocked ? "BLOCKED " : "ALLOWED ");
if (rule) {
*output_ << url_pattern_index::FlatUrlRuleToFilterlistString(rule) << " ";
}
*output_ << document_origin << " " << url << " " << type << std::endl;
}
const url_pattern_index::flat::UrlRule* FilterTool::MatchImpl(
base::StringPiece document_origin,
base::StringPiece url,
base::StringPiece type,
bool* blocked) {
const url_pattern_index::flat::UrlRule* rule =
FindMatchingUrlRule(ruleset_.get(), ParseOrigin(document_origin),
ParseRequestUrl(url), ParseType(type));
*blocked = rule && !(rule->options() &
url_pattern_index::flat::OptionFlag_IS_WHITELIST);
return rule;
}
// If |print_each_request| is true, then the result of each match is written
// to |output_|, just as in Match. Otherwise, the set of matching rules is
// written to |output_|.
void FilterTool::MatchBatchImpl(std::istream* request_stream,
bool print_each_request,
int min_match_count) {
std::unordered_map<const url_pattern_index::flat::UrlRule*, int>
matched_rules;
std::string line;
while (std::getline(*request_stream, line)) {
if (line.empty())
continue;
std::unique_ptr<base::Value> dictionary = base::JSONReader::Read(line);
CHECK(dictionary);
const std::string& origin =
ExtractStringFromDictionary(dictionary.get(), "origin");
const std::string& request_url =
ExtractStringFromDictionary(dictionary.get(), "request_url");
const std::string& request_type =
ExtractStringFromDictionary(dictionary.get(), "request_type");
bool blocked;
const url_pattern_index::flat::UrlRule* rule =
MatchImpl(origin, request_url, request_type, &blocked);
if (rule)
matched_rules[rule] += 1;
if (print_each_request)
PrintResult(blocked, rule, origin, request_url, request_type);
}
if (print_each_request)
return;
// Sort the rules in descending order by match count.
std::vector<std::pair<std::string, int>> vector_rules;
for (auto rule_and_count : matched_rules) {
if (rule_and_count.second < min_match_count)
continue;
vector_rules.push_back(std::make_pair(
url_pattern_index::FlatUrlRuleToFilterlistString(rule_and_count.first)
.c_str(),
rule_and_count.second));
}
std::sort(vector_rules.begin(), vector_rules.end(),
[](const std::pair<std::string, int>& left,
const std::pair<std::string, int>& right) {
return left.second > right.second;
});
for (auto rule_and_count : vector_rules) {
*output_ << rule_and_count.second << " " << rule_and_count.first
<< std::endl;
}
}
} // namespace subresource_filter