blob: 801e1e2494e068a3e2c758166693d1215c15bfc6 [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/optimization_guide/optimization_filter.h"
#include <string>
#include "base/strings/string_util.h"
namespace optimization_guide {
// Maximum number of suffixes to check per url.
const int kMaxSuffixCount = 5;
// Realistic minimum length of a host suffix.
const int kMinHostSuffix = 6; // eg., abc.tv
OptimizationFilter::OptimizationFilter(
std::unique_ptr<BloomFilter> bloom_filter,
std::unique_ptr<RegexpList> regexps)
: bloom_filter_(std::move(bloom_filter)), regexps_(std::move(regexps)) {
// May be created on one thread but used on another. The first call to
// CalledOnValidSequence() will re-bind it.
DETACH_FROM_SEQUENCE(sequence_checker_);
}
OptimizationFilter::~OptimizationFilter() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}
bool OptimizationFilter::Matches(const GURL& url) const {
return ContainsHostSuffix(url) || MatchesRegexp(url);
}
bool OptimizationFilter::ContainsHostSuffix(const GURL& url) const {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (!bloom_filter_)
return false;
// First check full host name.
if (bloom_filter_->Contains(url.host()))
return true;
// Now check host suffixes from shortest to longest but skipping the
// root domain (eg, skipping "com", "org", "in", "uk").
std::string full_host(url.host());
int suffix_count = 1;
auto left_pos = full_host.find_last_of('.'); // root domain position
while ((left_pos = full_host.find_last_of('.', left_pos - 1)) !=
std::string::npos &&
suffix_count < kMaxSuffixCount) {
if (full_host.length() - left_pos > kMinHostSuffix) {
std::string suffix = full_host.substr(left_pos + 1);
suffix_count++;
if (bloom_filter_->Contains(suffix))
return true;
}
}
return false;
}
bool OptimizationFilter::MatchesRegexp(const GURL& url) const {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (!regexps_)
return false;
if (!url.is_valid())
return false;
std::string clean_url = base::ToLowerASCII(url.GetAsReferrer().spec());
for (auto& regexp : *regexps_) {
if (!regexp->ok()) {
continue;
}
if (re2::RE2::PartialMatch(clean_url, *regexp)) {
return true;
}
}
return false;
}
} // namespace optimization_guide