| // Copyright 2014 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/supervised_user/supervised_user_url_filter.h" |
| |
| #include <stddef.h> |
| #include <stdint.h> |
| |
| #include <set> |
| #include <utility> |
| |
| #include "base/containers/hash_tables.h" |
| #include "base/files/file_path.h" |
| #include "base/json/json_file_value_serializer.h" |
| #include "base/macros.h" |
| #include "base/sha1.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_util.h" |
| #include "base/task_runner_util.h" |
| #include "base/threading/sequenced_worker_pool.h" |
| #include "chrome/browser/supervised_user/experimental/supervised_user_async_url_checker.h" |
| #include "chrome/browser/supervised_user/experimental/supervised_user_blacklist.h" |
| #include "chrome/grit/generated_resources.h" |
| #include "components/policy/core/browser/url_blacklist_manager.h" |
| #include "components/url_formatter/url_fixer.h" |
| #include "components/url_matcher/url_matcher.h" |
| #include "content/public/browser/browser_thread.h" |
| #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| #include "url/gurl.h" |
| |
| using content::BrowserThread; |
| using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES; |
| using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES; |
| using net::registry_controlled_domains::GetRegistryLength; |
| using policy::URLBlacklist; |
| using url_matcher::URLMatcher; |
| using url_matcher::URLMatcherConditionSet; |
| |
| using HostnameHash = SupervisedUserSiteList::HostnameHash; |
| |
| namespace { |
| |
| struct HashHostnameHash { |
| size_t operator()(const HostnameHash& value) const { |
| return value.hash(); |
| } |
| }; |
| |
| } // namespace |
| |
| struct SupervisedUserURLFilter::Contents { |
| URLMatcher url_matcher; |
| base::hash_multimap<HostnameHash, |
| scoped_refptr<SupervisedUserSiteList>, |
| HashHostnameHash> hostname_hashes; |
| // This only tracks pattern lists. |
| std::map<URLMatcherConditionSet::ID, scoped_refptr<SupervisedUserSiteList>> |
| site_lists_by_matcher_id; |
| }; |
| |
| namespace { |
| |
| // URL schemes not in this list (e.g., file:// and chrome://) will always be |
| // allowed. |
| const char* kFilteredSchemes[] = { |
| "http", |
| "https", |
| "ftp", |
| "gopher", |
| "ws", |
| "wss" |
| }; |
| |
| // This class encapsulates all the state that is required during construction of |
| // a new SupervisedUserURLFilter::Contents. |
| class FilterBuilder { |
| public: |
| FilterBuilder(); |
| ~FilterBuilder(); |
| |
| // Adds a single URL pattern and returns the id of its matcher. |
| URLMatcherConditionSet::ID AddPattern(const std::string& pattern); |
| |
| // Adds all the sites in |site_list|, with URL patterns and hostname hashes. |
| void AddSiteList(const scoped_refptr<SupervisedUserSiteList>& site_list); |
| |
| // Finalizes construction of the SupervisedUserURLFilter::Contents and returns |
| // them. This method should be called before this object is destroyed. |
| scoped_ptr<SupervisedUserURLFilter::Contents> Build(); |
| |
| private: |
| scoped_ptr<SupervisedUserURLFilter::Contents> contents_; |
| URLMatcherConditionSet::Vector all_conditions_; |
| URLMatcherConditionSet::ID matcher_id_; |
| std::map<URLMatcherConditionSet::ID, scoped_refptr<SupervisedUserSiteList>> |
| site_lists_by_matcher_id_; |
| }; |
| |
| FilterBuilder::FilterBuilder() |
| : contents_(new SupervisedUserURLFilter::Contents()), |
| matcher_id_(0) {} |
| |
| FilterBuilder::~FilterBuilder() { |
| DCHECK(!contents_.get()); |
| } |
| |
| URLMatcherConditionSet::ID FilterBuilder::AddPattern( |
| const std::string& pattern) { |
| std::string scheme; |
| std::string host; |
| uint16_t port = 0; |
| std::string path; |
| std::string query; |
| bool match_subdomains = true; |
| URLBlacklist::SegmentURLCallback callback = |
| static_cast<URLBlacklist::SegmentURLCallback>(url_formatter::SegmentURL); |
| if (!URLBlacklist::FilterToComponents( |
| callback, pattern, |
| &scheme, &host, &match_subdomains, &port, &path, &query)) { |
| LOG(ERROR) << "Invalid pattern " << pattern; |
| return -1; |
| } |
| |
| scoped_refptr<URLMatcherConditionSet> condition_set = |
| URLBlacklist::CreateConditionSet( |
| &contents_->url_matcher, ++matcher_id_, |
| scheme, host, match_subdomains, port, path, query, true); |
| all_conditions_.push_back(std::move(condition_set)); |
| return matcher_id_; |
| } |
| |
| void FilterBuilder::AddSiteList( |
| const scoped_refptr<SupervisedUserSiteList>& site_list) { |
| for (const std::string& pattern : site_list->patterns()) { |
| URLMatcherConditionSet::ID id = AddPattern(pattern); |
| if (id >= 0) { |
| site_lists_by_matcher_id_[id] = site_list; |
| } |
| } |
| |
| for (const HostnameHash& hash : site_list->hostname_hashes()) |
| contents_->hostname_hashes.insert(std::make_pair(hash, site_list)); |
| } |
| |
| scoped_ptr<SupervisedUserURLFilter::Contents> FilterBuilder::Build() { |
| contents_->url_matcher.AddConditionSets(all_conditions_); |
| contents_->site_lists_by_matcher_id.insert(site_lists_by_matcher_id_.begin(), |
| site_lists_by_matcher_id_.end()); |
| return std::move(contents_); |
| } |
| |
| scoped_ptr<SupervisedUserURLFilter::Contents> |
| CreateWhitelistFromPatternsForTesting( |
| const std::vector<std::string>& patterns) { |
| FilterBuilder builder; |
| for (const std::string& pattern : patterns) |
| builder.AddPattern(pattern); |
| |
| return builder.Build(); |
| } |
| |
| scoped_ptr<SupervisedUserURLFilter::Contents> |
| CreateWhitelistsFromSiteListsForTesting( |
| const std::vector<scoped_refptr<SupervisedUserSiteList>>& site_lists) { |
| FilterBuilder builder; |
| for (const scoped_refptr<SupervisedUserSiteList>& site_list : site_lists) |
| builder.AddSiteList(site_list); |
| return builder.Build(); |
| } |
| |
| scoped_ptr<SupervisedUserURLFilter::Contents> |
| LoadWhitelistsOnBlockingPoolThread( |
| const std::vector<scoped_refptr<SupervisedUserSiteList>>& site_lists) { |
| FilterBuilder builder; |
| for (const scoped_refptr<SupervisedUserSiteList>& site_list : site_lists) |
| builder.AddSiteList(site_list); |
| |
| return builder.Build(); |
| } |
| |
| } // namespace |
| |
| SupervisedUserURLFilter::SupervisedUserURLFilter() |
| : default_behavior_(ALLOW), |
| contents_(new Contents()), |
| blacklist_(nullptr), |
| blocking_task_runner_( |
| BrowserThread::GetBlockingPool() |
| ->GetTaskRunnerWithShutdownBehavior( |
| base::SequencedWorkerPool::CONTINUE_ON_SHUTDOWN).get()) { |
| // Detach from the current thread so we can be constructed on a different |
| // thread than the one where we're used. |
| DetachFromThread(); |
| } |
| |
| SupervisedUserURLFilter::~SupervisedUserURLFilter() { |
| DCHECK(CalledOnValidThread()); |
| } |
| |
| // static |
| SupervisedUserURLFilter::FilteringBehavior |
| SupervisedUserURLFilter::BehaviorFromInt(int behavior_value) { |
| DCHECK_GE(behavior_value, ALLOW); |
| DCHECK_LE(behavior_value, BLOCK); |
| return static_cast<FilteringBehavior>(behavior_value); |
| } |
| |
| // static |
| int SupervisedUserURLFilter::GetBlockMessageID( |
| FilteringBehaviorReason reason, bool is_child_account, bool single_parent) { |
| switch (reason) { |
| case DEFAULT: |
| return is_child_account ? |
| (single_parent ? |
| IDS_CHILD_BLOCK_MESSAGE_DEFAULT_SINGLE_PARENT : |
| IDS_CHILD_BLOCK_MESSAGE_DEFAULT_MULTI_PARENT) : |
| IDS_SUPERVISED_USER_BLOCK_MESSAGE_DEFAULT; |
| case BLACKLIST: |
| case ASYNC_CHECKER: |
| return IDS_SUPERVISED_USER_BLOCK_MESSAGE_SAFE_SITES; |
| case WHITELIST: |
| NOTREACHED(); |
| break; |
| case MANUAL: |
| return is_child_account ? |
| (single_parent ? |
| IDS_CHILD_BLOCK_MESSAGE_MANUAL_SINGLE_PARENT : |
| IDS_CHILD_BLOCK_MESSAGE_MANUAL_MULTI_PARENT) : |
| IDS_SUPERVISED_USER_BLOCK_MESSAGE_MANUAL; |
| } |
| NOTREACHED(); |
| return 0; |
| } |
| |
| // static |
| int SupervisedUserURLFilter::GetBlockHeaderID(FilteringBehaviorReason reason) { |
| switch (reason) { |
| case DEFAULT: |
| return IDS_SUPERVISED_USER_BLOCK_HEADER_DEFAULT; |
| case BLACKLIST: |
| case ASYNC_CHECKER: |
| return IDS_SUPERVISED_USER_BLOCK_HEADER_SAFE_SITES; |
| case WHITELIST: |
| NOTREACHED(); |
| break; |
| case MANUAL: |
| return IDS_SUPERVISED_USER_BLOCK_HEADER_MANUAL; |
| } |
| NOTREACHED(); |
| return 0; |
| } |
| |
| // static |
| bool SupervisedUserURLFilter::ReasonIsAutomatic( |
| FilteringBehaviorReason reason) { |
| return reason == ASYNC_CHECKER || reason == BLACKLIST; |
| } |
| |
| // static |
| GURL SupervisedUserURLFilter::Normalize(const GURL& url) { |
| GURL normalized_url = url; |
| GURL::Replacements replacements; |
| // Strip username, password, query, and ref. |
| replacements.ClearUsername(); |
| replacements.ClearPassword(); |
| replacements.ClearQuery(); |
| replacements.ClearRef(); |
| return url.ReplaceComponents(replacements); |
| } |
| |
| // static |
| bool SupervisedUserURLFilter::HasFilteredScheme(const GURL& url) { |
| for (size_t i = 0; i < arraysize(kFilteredSchemes); ++i) { |
| if (url.scheme() == kFilteredSchemes[i]) |
| return true; |
| } |
| return false; |
| } |
| |
| // static |
| bool SupervisedUserURLFilter::HostMatchesPattern(const std::string& host, |
| const std::string& pattern) { |
| std::string trimmed_pattern = pattern; |
| std::string trimmed_host = host; |
| if (base::EndsWith(pattern, ".*", base::CompareCase::SENSITIVE)) { |
| size_t registry_length = GetRegistryLength( |
| trimmed_host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES); |
| // A host without a known registry part does not match. |
| if (registry_length == 0) |
| return false; |
| |
| trimmed_pattern.erase(trimmed_pattern.length() - 2); |
| trimmed_host.erase(trimmed_host.length() - (registry_length + 1)); |
| } |
| |
| if (base::StartsWith(trimmed_pattern, "*.", base::CompareCase::SENSITIVE)) { |
| trimmed_pattern.erase(0, 2); |
| |
| // The remaining pattern should be non-empty, and it should not contain |
| // further stars. Also the trimmed host needs to end with the trimmed |
| // pattern. |
| if (trimmed_pattern.empty() || |
| trimmed_pattern.find('*') != std::string::npos || |
| !base::EndsWith(trimmed_host, trimmed_pattern, |
| base::CompareCase::SENSITIVE)) { |
| return false; |
| } |
| |
| // The trimmed host needs to have a dot separating the subdomain from the |
| // matched pattern piece, unless there is no subdomain. |
| int pos = trimmed_host.length() - trimmed_pattern.length(); |
| DCHECK_GE(pos, 0); |
| return (pos == 0) || (trimmed_host[pos - 1] == '.'); |
| } |
| |
| return trimmed_host == trimmed_pattern; |
| } |
| |
| SupervisedUserURLFilter::FilteringBehavior |
| SupervisedUserURLFilter::GetFilteringBehaviorForURL(const GURL& url) const { |
| FilteringBehaviorReason reason; |
| return GetFilteringBehaviorForURL(url, false, &reason); |
| } |
| |
| bool SupervisedUserURLFilter::GetManualFilteringBehaviorForURL( |
| const GURL& url, FilteringBehavior* behavior) const { |
| FilteringBehaviorReason reason; |
| *behavior = GetFilteringBehaviorForURL(url, true, &reason); |
| return reason == MANUAL; |
| } |
| |
| SupervisedUserURLFilter::FilteringBehavior |
| SupervisedUserURLFilter::GetFilteringBehaviorForURL( |
| const GURL& url, |
| bool manual_only, |
| FilteringBehaviorReason* reason) const { |
| DCHECK(CalledOnValidThread()); |
| |
| *reason = MANUAL; |
| |
| // URLs with a non-standard scheme (e.g. chrome://) are always allowed. |
| if (!HasFilteredScheme(url)) |
| return ALLOW; |
| |
| // Check manual overrides for the exact URL. |
| std::map<GURL, bool>::const_iterator url_it = url_map_.find(Normalize(url)); |
| if (url_it != url_map_.end()) |
| return url_it->second ? ALLOW : BLOCK; |
| |
| // Check manual overrides for the hostname. |
| std::string host = url.host(); |
| std::map<std::string, bool>::const_iterator host_it = host_map_.find(host); |
| if (host_it != host_map_.end()) |
| return host_it->second ? ALLOW : BLOCK; |
| |
| // Look for patterns matching the hostname, with a value that is different |
| // from the default (a value of true in the map meaning allowed). |
| for (const auto& host_entry : host_map_) { |
| if ((host_entry.second == (default_behavior_ == BLOCK)) && |
| HostMatchesPattern(host, host_entry.first)) { |
| return host_entry.second ? ALLOW : BLOCK; |
| } |
| } |
| |
| // Check the list of URL patterns. |
| std::set<URLMatcherConditionSet::ID> matching_ids = |
| contents_->url_matcher.MatchURL(url); |
| |
| if (!matching_ids.empty()) { |
| *reason = WHITELIST; |
| return ALLOW; |
| } |
| |
| // Check the list of hostname hashes. |
| if (contents_->hostname_hashes.count(HostnameHash(url.host()))) { |
| *reason = WHITELIST; |
| return ALLOW; |
| } |
| |
| // Check the static blacklist, unless the default is to block anyway. |
| if (!manual_only && default_behavior_ != BLOCK && |
| blacklist_ && blacklist_->HasURL(url)) { |
| *reason = BLACKLIST; |
| return BLOCK; |
| } |
| |
| // Fall back to the default behavior. |
| *reason = DEFAULT; |
| return default_behavior_; |
| } |
| |
| bool SupervisedUserURLFilter::GetFilteringBehaviorForURLWithAsyncChecks( |
| const GURL& url, |
| const FilteringBehaviorCallback& callback) const { |
| FilteringBehaviorReason reason = DEFAULT; |
| FilteringBehavior behavior = GetFilteringBehaviorForURL(url, false, &reason); |
| // Any non-default reason trumps the async checker. |
| // Also, if we're blocking anyway, then there's no need to check it. |
| if (reason != DEFAULT || behavior == BLOCK || !async_url_checker_) { |
| callback.Run(behavior, reason, false); |
| FOR_EACH_OBSERVER(Observer, observers_, |
| OnURLChecked(url, behavior, reason, false)); |
| return true; |
| } |
| |
| return async_url_checker_->CheckURL( |
| Normalize(url), |
| base::Bind(&SupervisedUserURLFilter::CheckCallback, |
| base::Unretained(this), |
| callback)); |
| } |
| |
| std::map<std::string, base::string16> |
| SupervisedUserURLFilter::GetMatchingWhitelistTitles(const GURL& url) const { |
| std::map<std::string, base::string16> whitelists; |
| |
| std::set<URLMatcherConditionSet::ID> matching_ids = |
| contents_->url_matcher.MatchURL(url); |
| |
| for (const auto& matching_id : matching_ids) { |
| const scoped_refptr<SupervisedUserSiteList>& site_list = |
| contents_->site_lists_by_matcher_id[matching_id]; |
| whitelists[site_list->id()] = site_list->title(); |
| } |
| |
| // Add the site lists that match the URL hostname hash to the map of |
| // whitelists (IDs -> titles). |
| const auto& range = |
| contents_->hostname_hashes.equal_range(HostnameHash(url.host())); |
| for (auto it = range.first; it != range.second; ++it) |
| whitelists[it->second->id()] = it->second->title(); |
| |
| return whitelists; |
| } |
| |
| void SupervisedUserURLFilter::SetDefaultFilteringBehavior( |
| FilteringBehavior behavior) { |
| DCHECK(CalledOnValidThread()); |
| default_behavior_ = behavior; |
| } |
| |
| SupervisedUserURLFilter::FilteringBehavior |
| SupervisedUserURLFilter::GetDefaultFilteringBehavior() const { |
| return default_behavior_; |
| } |
| |
| void SupervisedUserURLFilter::LoadWhitelists( |
| const std::vector<scoped_refptr<SupervisedUserSiteList>>& site_lists) { |
| DCHECK(CalledOnValidThread()); |
| |
| base::PostTaskAndReplyWithResult( |
| blocking_task_runner_.get(), |
| FROM_HERE, |
| base::Bind(&LoadWhitelistsOnBlockingPoolThread, site_lists), |
| base::Bind(&SupervisedUserURLFilter::SetContents, this)); |
| } |
| |
| void SupervisedUserURLFilter::SetBlacklist( |
| const SupervisedUserBlacklist* blacklist) { |
| blacklist_ = blacklist; |
| } |
| |
| bool SupervisedUserURLFilter::HasBlacklist() const { |
| return !!blacklist_; |
| } |
| |
| void SupervisedUserURLFilter::SetFromPatternsForTesting( |
| const std::vector<std::string>& patterns) { |
| DCHECK(CalledOnValidThread()); |
| |
| base::PostTaskAndReplyWithResult( |
| blocking_task_runner_.get(), |
| FROM_HERE, |
| base::Bind(&CreateWhitelistFromPatternsForTesting, patterns), |
| base::Bind(&SupervisedUserURLFilter::SetContents, this)); |
| } |
| |
| void SupervisedUserURLFilter::SetFromSiteListsForTesting( |
| const std::vector<scoped_refptr<SupervisedUserSiteList>>& site_lists) { |
| DCHECK(CalledOnValidThread()); |
| |
| base::PostTaskAndReplyWithResult( |
| blocking_task_runner_.get(), FROM_HERE, |
| base::Bind(&CreateWhitelistsFromSiteListsForTesting, site_lists), |
| base::Bind(&SupervisedUserURLFilter::SetContents, this)); |
| } |
| |
| void SupervisedUserURLFilter::SetManualHosts( |
| const std::map<std::string, bool>* host_map) { |
| DCHECK(CalledOnValidThread()); |
| host_map_ = *host_map; |
| } |
| |
| void SupervisedUserURLFilter::SetManualURLs( |
| const std::map<GURL, bool>* url_map) { |
| DCHECK(CalledOnValidThread()); |
| url_map_ = *url_map; |
| } |
| |
| void SupervisedUserURLFilter::InitAsyncURLChecker( |
| net::URLRequestContextGetter* context) { |
| async_url_checker_.reset(new SupervisedUserAsyncURLChecker(context)); |
| } |
| |
| void SupervisedUserURLFilter::ClearAsyncURLChecker() { |
| async_url_checker_.reset(); |
| } |
| |
| bool SupervisedUserURLFilter::HasAsyncURLChecker() const { |
| return !!async_url_checker_; |
| } |
| |
| void SupervisedUserURLFilter::Clear() { |
| default_behavior_ = ALLOW; |
| SetContents(make_scoped_ptr(new Contents())); |
| url_map_.clear(); |
| host_map_.clear(); |
| blacklist_ = nullptr; |
| async_url_checker_.reset(); |
| } |
| |
| void SupervisedUserURLFilter::AddObserver(Observer* observer) const { |
| observers_.AddObserver(observer); |
| } |
| |
| void SupervisedUserURLFilter::RemoveObserver(Observer* observer) const { |
| observers_.RemoveObserver(observer); |
| } |
| |
| void SupervisedUserURLFilter::SetBlockingTaskRunnerForTesting( |
| const scoped_refptr<base::TaskRunner>& task_runner) { |
| blocking_task_runner_ = task_runner; |
| } |
| |
| void SupervisedUserURLFilter::SetContents(scoped_ptr<Contents> contents) { |
| DCHECK(CalledOnValidThread()); |
| contents_ = std::move(contents); |
| FOR_EACH_OBSERVER(Observer, observers_, OnSiteListUpdated()); |
| } |
| |
| void SupervisedUserURLFilter::CheckCallback( |
| const FilteringBehaviorCallback& callback, |
| const GURL& url, |
| FilteringBehavior behavior, |
| bool uncertain) const { |
| DCHECK(default_behavior_ != BLOCK); |
| |
| callback.Run(behavior, ASYNC_CHECKER, uncertain); |
| FOR_EACH_OBSERVER(Observer, observers_, |
| OnURLChecked(url, behavior, ASYNC_CHECKER, uncertain)); |
| } |