blob: 24bfa09248372b6b525e1b077c988dbf033b580f [file] [log] [blame]
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "android_webview/browser/aw_safe_browsing_whitelist_manager.h"
#include <map>
#include <memory>
#include "base/bind.h"
#include "base/logging.h"
#include "base/sequenced_task_runner.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/task_runner_util.h"
#include "base/threading/thread_task_runner_handle.h"
#include "net/base/url_util.h"
#include "url/url_util.h"
namespace android_webview {
// This is a simple trie structure designed for handling host/domain matches
// for Safebrowsing whitelisting. For the match rules, see the class header.
//
// It is easy to visualize the trie edges as hostname components of a url in
// reverse order. For example a whitelist of google.com will have a tree
// tree structure as below.
// root
// | com
// Node1
// google/ \ example
// Node2 Node3
//
// Normally, a search in the tree should end in a leaf node for a positive
// match. For example in the tree above com.google and com.example are matches.
// However, the whitelisting also allows matching subdomains if there is a
// leading dot, for example, see ."google.com" and a.google.com below:
// root
// | com
// Node1
// | google
// Node2
// | a
// Node3
// Here, both Node2 and Node3 are valid terminal nodes to terminate a match.
// The boolean is_terminal indicates whether a node can successfully terminate
// a search (aka. whether this rule was entered to the whitelist) and
// match_prefix indicate if this should match exactly, or just do a prefix
// match.
// The structure is optimized such that if a node already allows a prefix
// match, then there is no need for it to have children, or if it has children
// these children are removed.
struct TrieNode {
std::map<std::string, std::unique_ptr<TrieNode>> children;
bool match_prefix = false;
bool is_terminal = false;
};
namespace {
void InsertRuleToTrie(const std::vector<base::StringPiece>& components,
TrieNode* root,
bool match_prefix) {
TrieNode* node = root;
for (auto hostcomp = components.rbegin(); hostcomp != components.rend();
++hostcomp) {
DCHECK(!node->match_prefix);
std::string component = hostcomp->as_string();
auto child_node = node->children.find(component);
if (child_node == node->children.end()) {
std::unique_ptr<TrieNode> temp = std::make_unique<TrieNode>();
TrieNode* current = temp.get();
node->children.emplace(component, std::move(temp));
node = current;
} else {
node = child_node->second.get();
// Optimization. No need to add new nodes as this node matches prefixes.
DCHECK(node);
if (node->match_prefix)
return;
}
}
DCHECK(node != root);
// Optimization. If match_prefix is true, remove all nodes originating
// from that node.
if (match_prefix) {
node->match_prefix = true;
node->children.clear();
}
node->is_terminal = true;
}
std::vector<base::StringPiece> SplitHost(const GURL& url) {
std::vector<base::StringPiece> components;
if (url.HostIsIPAddress()) {
components.push_back(url.host_piece());
} else {
components =
base::SplitStringPiece(url.host_piece(), ".", base::KEEP_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
}
DCHECK(components.size() > 0);
return components;
}
// Rule is a UTF-8 wide string.
bool AddRuleToWhitelist(base::StringPiece rule, TrieNode* root) {
if (rule.empty()) {
return false;
}
// Leading dot means to do an exact match.
bool started_with_dot = false;
if (rule.front() == '.') {
rule.remove_prefix(1); // Strip it for the rest of the handling.
started_with_dot = true;
}
// With the dot removed |rule| should look like a hostname.
GURL test_url("http://" + rule.as_string());
if (!test_url.is_valid()) {
return false;
}
bool has_path = test_url.has_path() && test_url.path() != "/";
// Verify that it is a hostname.
if (!test_url.has_host() || has_path || test_url.has_port() ||
test_url.has_query() || test_url.has_password() ||
test_url.has_username() || test_url.has_ref()) {
return false;
}
bool match_prefix = false;
if (test_url.HostIsIPAddress()) {
// leading dots are not allowed for IP addresses. IP addresses are always
// exact match.
if (started_with_dot) {
return false;
}
match_prefix = false;
} else {
match_prefix = !started_with_dot;
}
InsertRuleToTrie(SplitHost(test_url), root, match_prefix);
return true;
}
bool AddRules(const std::vector<std::string>& rules, TrieNode* root) {
for (auto rule : rules) {
if (!AddRuleToWhitelist(rule, root)) {
LOG(ERROR) << " invalid whitelist rule " << rule;
return false;
}
}
return true;
}
bool IsWhitelisted(const GURL& url, const TrieNode* node) {
std::vector<base::StringPiece> components = SplitHost(url);
for (auto component = components.rbegin(); component != components.rend();
++component) {
if (node->match_prefix) {
return true;
}
auto child_node = node->children.find(component->as_string());
if (child_node == node->children.end()) {
return false;
} else {
node = child_node->second.get();
DCHECK(node);
}
}
// DCHECK optimization. A match_prefix node should have no children.
DCHECK(!node->match_prefix || node->children.empty());
// If trie search finished in a terminal node, host is found in trie. The
// root node is not a terminal node.
return node->is_terminal;
}
} // namespace
AwSafeBrowsingWhitelistManager::AwSafeBrowsingWhitelistManager(
const scoped_refptr<base::SequencedTaskRunner>& background_task_runner,
const scoped_refptr<base::SequencedTaskRunner>& io_task_runner)
: background_task_runner_(background_task_runner),
io_task_runner_(io_task_runner),
ui_task_runner_(base::ThreadTaskRunnerHandle::Get()),
whitelist_(std::make_unique<TrieNode>()) {}
AwSafeBrowsingWhitelistManager::~AwSafeBrowsingWhitelistManager() {}
void AwSafeBrowsingWhitelistManager::SetWhitelist(
std::unique_ptr<TrieNode> whitelist) {
DCHECK(io_task_runner_->RunsTasksInCurrentSequence());
whitelist_ = std::move(whitelist);
}
// A task that builds the whitelist on a background thread.
void AwSafeBrowsingWhitelistManager::BuildWhitelist(
const std::vector<std::string>& rules,
base::OnceCallback<void(bool)> callback) {
DCHECK(background_task_runner_->RunsTasksInCurrentSequence());
std::unique_ptr<TrieNode> whitelist(std::make_unique<TrieNode>());
bool success = AddRules(rules, whitelist.get());
DCHECK(!whitelist->is_terminal);
DCHECK(!whitelist->match_prefix);
ui_task_runner_->PostTask(FROM_HERE,
base::BindOnce(std::move(callback), success));
if (success) {
// use base::Unretained as AwSafeBrowsingWhitelistManager is a singleton and
// not cleaned.
io_task_runner_->PostTask(
FROM_HERE,
base::BindOnce(&AwSafeBrowsingWhitelistManager::SetWhitelist,
base::Unretained(this), std::move(whitelist)));
}
}
void AwSafeBrowsingWhitelistManager::SetWhitelistOnUIThread(
std::vector<std::string>&& rules,
base::OnceCallback<void(bool)> callback) {
DCHECK(ui_task_runner_->RunsTasksInCurrentSequence());
// use base::Unretained as AwSafeBrowsingWhitelistManager is a singleton and
// not cleaned.
background_task_runner_->PostTask(
FROM_HERE, base::BindOnce(&AwSafeBrowsingWhitelistManager::BuildWhitelist,
base::Unretained(this), std::move(rules),
std::move(callback)));
}
bool AwSafeBrowsingWhitelistManager::IsURLWhitelisted(const GURL& url) const {
DCHECK(io_task_runner_->RunsTasksInCurrentSequence());
if (!url.has_host()) {
return false;
}
return IsWhitelisted(url, whitelist_.get());
}
} // namespace android_webview