blob: 1e15f8ff13a58193ea2a86fd8ac6b217274d60c4 [file] [log] [blame]
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/url_matcher/url_util.h"
#include <memory>
#include "base/values.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
namespace url_matcher {
namespace util {
namespace {
GURL GetEmbeddedURL(const std::string& url) {
return url_matcher::util::GetEmbeddedURL(GURL(url));
}
// Parameters for the FilterToComponents test.
struct FilterTestParams {
public:
FilterTestParams(const std::string& filter,
const std::string& scheme,
const std::string& host,
bool match_subdomains,
uint16_t port,
const std::string& path)
: filter_(filter),
scheme_(scheme),
host_(host),
match_subdomains_(match_subdomains),
port_(port),
path_(path) {}
FilterTestParams(const FilterTestParams& params)
: filter_(params.filter_),
scheme_(params.scheme_),
host_(params.host_),
match_subdomains_(params.match_subdomains_),
port_(params.port_),
path_(params.path_) {}
const FilterTestParams& operator=(const FilterTestParams& params) {
filter_ = params.filter_;
scheme_ = params.scheme_;
host_ = params.host_;
match_subdomains_ = params.match_subdomains_;
port_ = params.port_;
path_ = params.path_;
return *this;
}
const std::string& filter() const { return filter_; }
const std::string& scheme() const { return scheme_; }
const std::string& host() const { return host_; }
bool match_subdomains() const { return match_subdomains_; }
uint16_t port() const { return port_; }
const std::string& path() const { return path_; }
private:
std::string filter_;
std::string scheme_;
std::string host_;
bool match_subdomains_;
uint16_t port_;
std::string path_;
};
// Prints better debug information for Valgrind. Without this function, a
// generic one will print the raw bytes in FilterTestParams, which due to some
// likely padding will access uninitialized memory.
void PrintTo(const FilterTestParams& params, std::ostream* os) {
*os << params.filter();
}
bool MatchFilters(const std::vector<std::string>& patterns,
const std::string& url) {
// Add the pattern to the matcher.
URLMatcher matcher;
base::Value::List list;
for (const auto& pattern : patterns)
list.Append(pattern);
AddAllowFilters(&matcher, list);
return !matcher.MatchURL(GURL(url)).empty();
}
class FilterToComponentsTest : public testing::TestWithParam<FilterTestParams> {
public:
FilterToComponentsTest() = default;
FilterToComponentsTest(const FilterToComponentsTest&) = delete;
FilterToComponentsTest& operator=(const FilterToComponentsTest&) = delete;
};
class OnlyWildcardTest
: public testing::TestWithParam<std::tuple<std::string /* scheme */,
std::string /* opt_host */,
std::string /* port */,
std::string /* path */,
std::string /* query*/>> {
public:
OnlyWildcardTest() = default;
OnlyWildcardTest(const OnlyWildcardTest&) = delete;
OnlyWildcardTest& operator=(const OnlyWildcardTest&) = delete;
};
} // namespace
TEST(URLUtilTest, Normalize) {
// Username is cleared.
EXPECT_EQ(Normalize(GURL("http://dino@example/foo")),
GURL("http://example/foo"));
// Username and password are cleared.
EXPECT_EQ(Normalize(GURL("http://dino:hunter2@example/")),
GURL("http://example/"));
// Query string is cleared.
EXPECT_EQ(Normalize(GURL("http://example.com/foo?widgetId=42")),
GURL("http://example.com/foo"));
EXPECT_EQ(Normalize(GURL("https://example.com/?widgetId=42&frobinate=true")),
GURL("https://example.com/"));
// Ref is cleared.
EXPECT_EQ(Normalize(GURL("http://example.com/foo#widgetSection")),
GURL("http://example.com/foo"));
// Port is NOT cleared.
EXPECT_EQ(Normalize(GURL("http://example.com:443/")),
GURL("http://example.com:443/"));
// All together now.
EXPECT_EQ(
Normalize(GURL("https://dino:hunter2@example.com:443/foo?widgetId=42")),
GURL("https://example.com:443/foo"));
}
TEST(URLUtilTest, GetEmbeddedURLAmpCache) {
// Base case.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://cdn.ampproject.org/c/example.com"));
// "s/" means "use https".
EXPECT_EQ(GURL("https://example.com"),
GetEmbeddedURL("https://cdn.ampproject.org/c/s/example.com"));
// With path and query. Fragment is not extracted.
EXPECT_EQ(GURL("https://example.com/path/to/file.html?q=asdf"),
GetEmbeddedURL("https://cdn.ampproject.org/c/s/example.com/path/to/"
"file.html?q=asdf#baz"));
// Publish subdomain can be included but doesn't affect embedded URL.
EXPECT_EQ(
GURL("http://example.com"),
GetEmbeddedURL("https://example-com.cdn.ampproject.org/c/example.com"));
EXPECT_EQ(
GURL("http://example.com"),
GetEmbeddedURL("https://example-org.cdn.ampproject.org/c/example.com"));
// Different host is not supported.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://www.ampproject.org/c/example.com"));
// Different TLD is not supported.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://cdn.ampproject.com/c/example.com"));
// Content type ("c/") is missing.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://cdn.ampproject.org/example.com"));
// Content type is mis-formatted, must be a single character.
EXPECT_EQ(GURL(),
GetEmbeddedURL("https://cdn.ampproject.org/cd/example.com"));
}
TEST(URLUtilTest, GetEmbeddedURLGoogleAmpViewer) {
// Base case.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://www.google.com/amp/example.com"));
// "s/" means "use https".
EXPECT_EQ(GURL("https://example.com"),
GetEmbeddedURL("https://www.google.com/amp/s/example.com"));
// Different Google TLDs are supported.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://www.google.de/amp/example.com"));
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://www.google.co.uk/amp/example.com"));
// With path.
EXPECT_EQ(GURL("http://example.com/path"),
GetEmbeddedURL("https://www.google.com/amp/example.com/path"));
// Query is *not* part of the embedded URL.
EXPECT_EQ(
GURL("http://example.com/path"),
GetEmbeddedURL("https://www.google.com/amp/example.com/path?q=baz"));
// Query and fragment in percent-encoded form *are* part of the embedded URL.
EXPECT_EQ(
GURL("http://example.com/path?q=foo#bar"),
GetEmbeddedURL(
"https://www.google.com/amp/example.com/path%3fq=foo%23bar?q=baz"));
// "/" may also be percent-encoded.
EXPECT_EQ(GURL("http://example.com/path?q=foo#bar"),
GetEmbeddedURL("https://www.google.com/amp/"
"example.com%2fpath%3fq=foo%23bar?q=baz"));
// Missing "amp/".
EXPECT_EQ(GURL(), GetEmbeddedURL("https://www.google.com/example.com"));
// Path component before the "amp/".
EXPECT_EQ(GURL(),
GetEmbeddedURL("https://www.google.com/foo/amp/example.com"));
// Different host.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://www.other.com/amp/example.com"));
// Different subdomain.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://mail.google.com/amp/example.com"));
// Invalid TLD.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://www.google.nope/amp/example.com"));
// Valid TLD that is not considered safe to display to the user by
// UnescapeURLComponent(). Note that when UTF-8 characters appear in a domain
// name, as is the case here, they're replaced by equivalent punycode by the
// GURL constructor.
EXPECT_EQ(GURL("http://www.xn--iv8h.com/"),
GetEmbeddedURL("https://www.google.com/amp/www.%F0%9F%94%8F.com/"));
// Invalid UTF-8 characters.
EXPECT_EQ(GURL("http://example.com/%81%82%83"),
GetEmbeddedURL("https://www.google.com/amp/example.com/%81%82%83"));
}
TEST(URLUtilTest, GetEmbeddedURLGoogleWebCache) {
// Base case.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?q=cache:ABCDEFGHI-JK:example.com/"));
// With search query.
EXPECT_EQ(
GURL("http://example.com"),
GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?q=cache:ABCDEFGHI-JK:example.com/+search_query"));
// Without fingerprint.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?q=cache:example.com/"));
// With search query, without fingerprint.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?q=cache:example.com/+search_query"));
// Query params other than "q=" don't matter.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?a=b&q=cache:example.com/&c=d"));
// With scheme.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?q=cache:http://example.com/"));
// Preserve https.
EXPECT_EQ(GURL("https://example.com"),
GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?q=cache:https://example.com/"));
// Wrong host.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://www.googleusercontent.com/"
"search?q=cache:example.com/"));
// Wrong path.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://webcache.googleusercontent.com/"
"path?q=cache:example.com/"));
EXPECT_EQ(GURL(), GetEmbeddedURL("https://webcache.googleusercontent.com/"
"path/search?q=cache:example.com/"));
// Missing "cache:".
EXPECT_EQ(GURL(), GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?q=example.com"));
// Wrong fingerprint.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?q=cache:123:example.com/"));
// Wrong query param.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?a=cache:example.com/"));
// Invalid scheme.
EXPECT_EQ(GURL(), GetEmbeddedURL("https://webcache.googleusercontent.com/"
"search?q=cache:abc://example.com/"));
}
TEST(URLUtilTest, GetEmbeddedURLTranslate) {
// Base case.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://translate.google.com/path?u=example.com"));
// Different TLD.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL("https://translate.google.de/path?u=example.com"));
// Alternate base URL.
EXPECT_EQ(GURL("http://example.com"),
GetEmbeddedURL(
"https://translate.googleusercontent.com/path?u=example.com"));
// With scheme.
EXPECT_EQ(
GURL("http://example.com"),
GetEmbeddedURL("https://translate.google.com/path?u=http://example.com"));
// With https scheme.
EXPECT_EQ(GURL("https://example.com"),
GetEmbeddedURL(
"https://translate.google.com/path?u=https://example.com"));
// With other parameters.
EXPECT_EQ(
GURL("http://example.com"),
GetEmbeddedURL(
"https://translate.google.com/path?a=asdf&u=example.com&b=fdsa"));
// Different subdomain is not supported.
EXPECT_EQ(GURL(), GetEmbeddedURL(
"https://translate.foo.google.com/path?u=example.com"));
EXPECT_EQ(GURL(), GetEmbeddedURL(
"https://translate.www.google.com/path?u=example.com"));
EXPECT_EQ(
GURL(),
GetEmbeddedURL("https://translate.google.google.com/path?u=example.com"));
EXPECT_EQ(GURL(), GetEmbeddedURL(
"https://foo.translate.google.com/path?u=example.com"));
EXPECT_EQ(GURL(),
GetEmbeddedURL("https://translate2.google.com/path?u=example.com"));
EXPECT_EQ(GURL(),
GetEmbeddedURL(
"https://translate2.googleusercontent.com/path?u=example.com"));
// Different TLD is not supported for googleusercontent.
EXPECT_EQ(GURL(),
GetEmbeddedURL(
"https://translate.googleusercontent.de/path?u=example.com"));
// Query parameter ("u=...") is missing.
EXPECT_EQ(GURL(),
GetEmbeddedURL("https://translate.google.com/path?t=example.com"));
}
INSTANTIATE_TEST_SUITE_P(URLUtilTest,
OnlyWildcardTest,
testing::Combine(testing::Values("", "https://"),
testing::Values("", "dev."),
testing::Values("", ":1234"),
testing::Values("", "/path"),
testing::Values("", "?query")));
TEST_P(OnlyWildcardTest, OnlyWildcard) {
// Check wildcard filter works on any permutations of format
// [scheme://][.]host[:port][/path][@query]
const std::string scheme = std::get<0>(GetParam());
const std::string opt_host = std::get<1>(GetParam());
const std::string port = std::get<2>(GetParam());
const std::string path = std::get<3>(GetParam());
const std::string query = std::get<4>(GetParam());
const std::string url =
scheme + opt_host + "google.com" + port + path + query;
EXPECT_TRUE(MatchFilters({"*"}, url));
}
TEST(URLUtilTest, SingleFilter) {
// Match domain and all subdomains, for any filtered scheme.
EXPECT_TRUE(MatchFilters({"google.com"}, "http://google.com"));
EXPECT_TRUE(MatchFilters({"google.com"}, "http://google.com/"));
EXPECT_TRUE(MatchFilters({"google.com"}, "http://google.com/whatever"));
EXPECT_TRUE(MatchFilters({"google.com"}, "https://google.com/"));
EXPECT_FALSE(MatchFilters({"google.com"}, "bogus://google.com/"));
EXPECT_FALSE(MatchFilters({"google.com"}, "http://notgoogle.com/"));
EXPECT_TRUE(MatchFilters({"google.com"}, "http://mail.google.com"));
EXPECT_TRUE(MatchFilters({"google.com"}, "http://x.mail.google.com"));
EXPECT_TRUE(MatchFilters({"google.com"}, "https://x.mail.google.com/"));
EXPECT_TRUE(MatchFilters({"google.com"}, "http://x.y.google.com/a/b"));
EXPECT_FALSE(MatchFilters({"google.com"}, "http://youtube.com/"));
// Filter only http, ftp and ws schemes.
EXPECT_TRUE(MatchFilters({"http://secure.com"}, "http://secure.com"));
EXPECT_TRUE(
MatchFilters({"http://secure.com"}, "http://secure.com/whatever"));
EXPECT_TRUE(MatchFilters({"ftp://secure.com"}, "ftp://secure.com/"));
EXPECT_TRUE(MatchFilters({"ws://secure.com"}, "ws://secure.com"));
EXPECT_FALSE(MatchFilters({"http://secure.com"}, "https://secure.com/"));
EXPECT_FALSE(MatchFilters({"ws://secure.com"}, "wss://secure.com"));
EXPECT_TRUE(MatchFilters({"http://secure.com"}, "http://www.secure.com"));
EXPECT_FALSE(MatchFilters({"http://secure.com"}, "https://www.secure.com"));
EXPECT_FALSE(MatchFilters({"ws://secure.com"}, "wss://www.secure.com"));
// Filter only a certain path prefix.
EXPECT_TRUE(MatchFilters({"path.to/ruin"}, "http://path.to/ruin"));
EXPECT_TRUE(MatchFilters({"path.to/ruin"}, "https://path.to/ruin"));
EXPECT_TRUE(MatchFilters({"path.to/ruin"}, "http://path.to/ruins"));
EXPECT_TRUE(MatchFilters({"path.to/ruin"}, "http://path.to/ruin/signup"));
EXPECT_TRUE(MatchFilters({"path.to/ruin"}, "http://www.path.to/ruin"));
EXPECT_FALSE(MatchFilters({"path.to/ruin"}, "http://path.to/fortune"));
// Filter only a certain path prefix and scheme.
EXPECT_TRUE(
MatchFilters({"https://s.aaa.com/path"}, "https://s.aaa.com/path"));
EXPECT_TRUE(
MatchFilters({"https://s.aaa.com/path"}, "https://s.aaa.com/path/bbb"));
EXPECT_FALSE(
MatchFilters({"https://s.aaa.com/path"}, "http://s.aaa.com/path"));
EXPECT_FALSE(
MatchFilters({"https://s.aaa.com/path"}, "https://aaa.com/path"));
EXPECT_FALSE(
MatchFilters({"https://s.aaa.com/path"}, "https://x.aaa.com/path"));
EXPECT_FALSE(
MatchFilters({"https://s.aaa.com/path"}, "https://s.aaa.com/bbb"));
EXPECT_FALSE(MatchFilters({"https://s.aaa.com/path"}, "https://s.aaa.com/"));
// Filter only ws and wss schemes.
EXPECT_TRUE(MatchFilters({"ws://ws.aaa.com"}, "ws://ws.aaa.com"));
EXPECT_TRUE(MatchFilters({"wss://ws.aaa.com"}, "wss://ws.aaa.com"));
EXPECT_FALSE(MatchFilters({"ws://ws.aaa.com"}, "http://ws.aaa.com"));
EXPECT_FALSE(MatchFilters({"ws://ws.aaa.com"}, "https://ws.aaa.com"));
EXPECT_FALSE(MatchFilters({"ws://ws.aaa.com"}, "ftp://ws.aaa.com"));
// Match an ip address.
EXPECT_TRUE(MatchFilters({"123.123.123.123"}, "http://123.123.123.123/"));
EXPECT_FALSE(MatchFilters({"123.123.123.123"}, "http://123.123.123.124/"));
// Open an exception.
EXPECT_FALSE(MatchFilters({"plus.google.com"}, "http://google.com/"));
EXPECT_FALSE(MatchFilters({"plus.google.com"}, "http://www.google.com/"));
EXPECT_TRUE(MatchFilters({"plus.google.com"}, "http://plus.google.com/"));
// Match exactly "google.com", only for http.
EXPECT_TRUE(MatchFilters({"http://.google.com"}, "http://google.com/"));
EXPECT_FALSE(MatchFilters({"http://.google.com"}, "https://google.com/"));
EXPECT_FALSE(MatchFilters({"http://.google.com"}, "http://www.google.com/"));
}
TEST(URLUtilTest, MultipleFilters) {
// Test exceptions to path prefixes, and most specific matches.
std::vector<std::string> patterns = {"s.xxx.com/a/b",
"https://s.xxx.com/a/b/c/d"};
EXPECT_FALSE(MatchFilters(patterns, "http://s.xxx.com/a"));
EXPECT_FALSE(MatchFilters(patterns, "http://s.xxx.com/a/x"));
EXPECT_FALSE(MatchFilters(patterns, "https://s.xxx.com/a/x"));
EXPECT_TRUE(MatchFilters(patterns, "http://s.xxx.com/a/b"));
EXPECT_TRUE(MatchFilters(patterns, "https://s.xxx.com/a/b"));
EXPECT_TRUE(MatchFilters(patterns, "http://s.xxx.com/a/b/x"));
EXPECT_TRUE(MatchFilters(patterns, "http://s.xxx.com/a/b/c"));
EXPECT_TRUE(MatchFilters(patterns, "https://s.xxx.com/a/b/c"));
EXPECT_TRUE(MatchFilters(patterns, "https://s.xxx.com/a/b/c/x"));
EXPECT_TRUE(MatchFilters(patterns, "https://s.xxx.com/a/b/c/d"));
EXPECT_TRUE(MatchFilters(patterns, "http://s.xxx.com/a/b/c/d"));
EXPECT_TRUE(MatchFilters(patterns, "https://s.xxx.com/a/b/c/d/x"));
EXPECT_TRUE(MatchFilters(patterns, "http://s.xxx.com/a/b/c/d/x"));
EXPECT_FALSE(MatchFilters(patterns, "http://xxx.com/a"));
EXPECT_FALSE(MatchFilters(patterns, "http://xxx.com/a/b"));
// Match queries.
std::vector<std::string> queries = {"*?q=1234", "*?q=5678", "*?a=1&b=2",
"youtube.com?foo=baz",
"youtube.com?foo=bar*"};
EXPECT_TRUE(MatchFilters(queries, "http://google.com?q=1234"));
EXPECT_TRUE(MatchFilters(queries, "http://google.com?q=5678"));
EXPECT_TRUE(MatchFilters(queries, "http://google.com?a=1&b=2"));
EXPECT_TRUE(MatchFilters(queries, "http://google.com?b=2&a=1"));
EXPECT_TRUE(MatchFilters(queries, "http://google.com?a=1&b=4&q=1234"));
EXPECT_TRUE(MatchFilters(queries, "http://youtube.com?foo=baz"));
EXPECT_TRUE(MatchFilters(queries, "http://youtube.com?foo=barbaz"));
EXPECT_TRUE(MatchFilters(queries, "http://youtube.com?a=1&foo=barbaz"));
EXPECT_FALSE(MatchFilters(queries, "http://google.com?r=1234"));
EXPECT_FALSE(MatchFilters(queries, "http://google.com?r=5678"));
EXPECT_FALSE(MatchFilters(queries, "http://google.com?a=2&b=1"));
EXPECT_FALSE(MatchFilters(queries, "http://google.com?b=1&a=2"));
EXPECT_FALSE(MatchFilters(queries, "http://google.com?a=1&b=3"));
EXPECT_FALSE(MatchFilters(queries, "http://youtube.com?foo=meh"));
EXPECT_FALSE(MatchFilters(queries, "http://youtube.com?foo=bazbar"));
EXPECT_FALSE(MatchFilters(queries, "http://youtube.com?foo=ba"));
}
TEST(URLUtilTest, BasicCoverage) {
// Tests to cover the documentation from
// http://www.chromium.org/administrators/url-blocklist-filter-format
// [scheme://][.]host[:port][/path][@query]
// Scheme can be http, https, ftp, chrome, etc. This field is optional, and
// must be followed by '://'.
EXPECT_TRUE(MatchFilters({"file://*"}, "file:///abc.txt"));
EXPECT_TRUE(MatchFilters({"file:*"}, "file:///usr/local/boot.txt"));
EXPECT_TRUE(MatchFilters({"https://*"}, "https:///abc.txt"));
EXPECT_TRUE(MatchFilters({"ftp://*"}, "ftp://ftp.txt"));
EXPECT_TRUE(MatchFilters({"chrome://*"}, "chrome:policy"));
EXPECT_TRUE(MatchFilters({"noscheme"}, "http://noscheme"));
// Filter custom schemes.
EXPECT_TRUE(MatchFilters({"custom://*"}, "custom://example_app"));
EXPECT_TRUE(MatchFilters({"custom:*"}, "custom:example2_app"));
EXPECT_FALSE(MatchFilters({"custom://*"}, "customs://example_apps"));
EXPECT_FALSE(MatchFilters({"custom://*"}, "cust*://example_ap"));
EXPECT_FALSE(MatchFilters({"custom://*"}, "ecustom:example_app"));
EXPECT_TRUE(MatchFilters({"custom://*"}, "custom:///abc.txt"));
// Tests for custom scheme patterns that are not supported.
EXPECT_FALSE(MatchFilters({"wrong://app"}, "wrong://app"));
EXPECT_FALSE(MatchFilters({"wrong ://*"}, "wrong ://app"));
EXPECT_FALSE(MatchFilters({" wrong:*"}, " wrong://app"));
// Omitting the scheme matches most standard schemes.
EXPECT_TRUE(MatchFilters({"example.com"}, "chrome:example.com"));
EXPECT_TRUE(MatchFilters({"example.com"}, "chrome://example.com"));
EXPECT_TRUE(MatchFilters({"example.com"}, "file://example.com/"));
EXPECT_TRUE(MatchFilters({"example.com"}, "ftp://example.com"));
EXPECT_TRUE(MatchFilters({"example.com"}, "http://example.com"));
EXPECT_TRUE(MatchFilters({"example.com"}, "https://example.com"));
EXPECT_TRUE(MatchFilters({"example.com"}, "ws://example.com"));
EXPECT_TRUE(MatchFilters({"example.com"}, "wss://example.com"));
// Some schemes are not matched when the scheme is omitted.
EXPECT_FALSE(MatchFilters({"example.com"}, "about://example.com"));
EXPECT_FALSE(MatchFilters({"example.com"}, "about:example.com"));
EXPECT_FALSE(MatchFilters({"example.com/*"}, "filesystem:///something"));
EXPECT_FALSE(MatchFilters({"example.com"}, "custom://example.com"));
EXPECT_FALSE(MatchFilters({"example"}, "custom://example"));
// An optional '.' (dot) can prefix the host field to disable subdomain
// matching, see below for details.
EXPECT_TRUE(MatchFilters({".example.com"}, "http://example.com/path"));
EXPECT_FALSE(MatchFilters({".example.com"}, "http://mail.example.com/path"));
EXPECT_TRUE(MatchFilters({"example.com"}, "http://mail.example.com/path"));
EXPECT_TRUE(MatchFilters({"ftp://.ftp.file"}, "ftp://ftp.file"));
EXPECT_FALSE(MatchFilters({"ftp://.ftp.file"}, "ftp://sub.ftp.file"));
// The host field is required, and is a valid hostname or an IP address. It
// can also take the special '*' value, see below for details.
EXPECT_TRUE(MatchFilters({"*"}, "http://anything"));
EXPECT_TRUE(MatchFilters({"*"}, "ftp://anything"));
EXPECT_TRUE(MatchFilters({"*"}, "custom://anything"));
EXPECT_TRUE(MatchFilters({"host"}, "http://host:8080"));
EXPECT_FALSE(MatchFilters({"host"}, "file:///host"));
EXPECT_TRUE(MatchFilters({"10.1.2.3"}, "http://10.1.2.3:8080/path"));
// No host, will match nothing.
EXPECT_FALSE(MatchFilters({":8080"}, "http://host:8080"));
EXPECT_FALSE(MatchFilters({":8080"}, "http://:8080"));
// An optional port can come after the host. It must be a valid port value
// from 1 to 65535.
EXPECT_TRUE(MatchFilters({"host:8080"}, "http://host:8080/path"));
EXPECT_TRUE(MatchFilters({"host:1"}, "http://host:1/path"));
// Out of range port.
EXPECT_FALSE(MatchFilters({"host:65536"}, "http://host:65536/path"));
// Star is not allowed in port numbers.
EXPECT_FALSE(MatchFilters({"example.com:*"}, "http://example.com"));
EXPECT_FALSE(MatchFilters({"example.com:*"}, "http://example.com:8888"));
// An optional path can come after port.
EXPECT_TRUE(MatchFilters({"host/path"}, "http://host:8080/path"));
EXPECT_TRUE(MatchFilters({"host/path/path2"}, "http://host/path/path2"));
EXPECT_TRUE(MatchFilters({"host/path"}, "http://host/path/path2"));
// An optional query can come in the end, which is a set of key-value and
// key-only tokens delimited by '&'. The key-value tokens are separated
// by '='. A query token can optionally end with a '*' to indicate prefix
// match. Token order is ignored during matching.
EXPECT_TRUE(MatchFilters({"host?q1=1&q2=2"}, "http://host?q2=2&q1=1"));
EXPECT_FALSE(MatchFilters({"host?q1=1&q2=2"}, "http://host?q2=1&q1=2"));
EXPECT_FALSE(MatchFilters({"host?q1=1&q2=2"}, "http://host?Q2=2&Q1=1"));
EXPECT_TRUE(MatchFilters({"host?q1=1&q2=2"}, "http://host?q2=2&q1=1&q3=3"));
EXPECT_TRUE(MatchFilters({"host?q1=1&q2=2*"}, "http://host?q2=21&q1=1&q3=3"));
// user:pass fields can be included but will be ignored
// (e.g. http://user:pass@ftp.example.com/pub/bigfile.iso).
EXPECT_TRUE(
MatchFilters({"host.com/path"}, "http://user:pass@host.com:8080/path"));
EXPECT_TRUE(MatchFilters({"ftp://host.com/path"},
"ftp://user:pass@host.com:8080/path"));
// Case sensitivity.
// Scheme is case insensitive.
EXPECT_TRUE(MatchFilters({"suPPort://*"}, "support:example"));
EXPECT_TRUE(MatchFilters({"FILE://*"}, "file:example"));
EXPECT_TRUE(MatchFilters({"FILE://*"}, "FILE://example"));
EXPECT_TRUE(MatchFilters({"FtP:*"}, "ftp://example"));
EXPECT_TRUE(MatchFilters({"http://example.com"}, "HTTP://example.com"));
EXPECT_TRUE(MatchFilters({"HTTP://example.com"}, "http://example.com"));
// Host is case insensitive.
EXPECT_TRUE(MatchFilters({"http://EXAMPLE.COM"}, "http://example.com"));
EXPECT_TRUE(MatchFilters({"Example.com"}, "http://examplE.com/Path?Query=1"));
// Path is case sensitive.
EXPECT_FALSE(MatchFilters({"example.com/Path"}, "http://example.com/path"));
EXPECT_TRUE(MatchFilters({"http://example.com/aB"}, "http://example.com/aB"));
EXPECT_FALSE(
MatchFilters({"http://example.com/aB"}, "http://example.com/Ab"));
EXPECT_FALSE(
MatchFilters({"http://example.com/aB"}, "http://example.com/ab"));
EXPECT_FALSE(
MatchFilters({"http://example.com/aB"}, "http://example.com/AB"));
// Query is case sensitive.
EXPECT_FALSE(MatchFilters({"host/path?Query=1"}, "http://host/path?query=1"));
}
INSTANTIATE_TEST_SUITE_P(
URLUtilTest,
FilterToComponentsTest,
testing::Values(
FilterTestParams("google.com",
std::string(),
".google.com",
true,
0u,
std::string()),
FilterTestParams(".google.com",
std::string(),
"google.com",
false,
0u,
std::string()),
FilterTestParams("http://google.com",
"http",
".google.com",
true,
0u,
std::string()),
FilterTestParams("google.com/",
std::string(),
".google.com",
true,
0u,
"/"),
FilterTestParams("http://google.com:8080/whatever",
"http",
".google.com",
true,
8080u,
"/whatever"),
FilterTestParams("http://user:pass@google.com:8080/whatever",
"http",
".google.com",
true,
8080u,
"/whatever"),
FilterTestParams("123.123.123.123",
std::string(),
"123.123.123.123",
false,
0u,
std::string()),
FilterTestParams("https://123.123.123.123",
"https",
"123.123.123.123",
false,
0u,
std::string()),
FilterTestParams("123.123.123.123/",
std::string(),
"123.123.123.123",
false,
0u,
"/"),
FilterTestParams("http://123.123.123.123:123/whatever",
"http",
"123.123.123.123",
false,
123u,
"/whatever"),
FilterTestParams("*",
std::string(),
std::string(),
true,
0u,
std::string()),
FilterTestParams("ftp://*",
"ftp",
std::string(),
true,
0u,
std::string()),
FilterTestParams("http://*/whatever",
"http",
std::string(),
true,
0u,
"/whatever")));
TEST_P(FilterToComponentsTest, FilterToComponents) {
std::string scheme;
std::string host;
bool match_subdomains = true;
uint16_t port = 42;
std::string path;
std::string query;
FilterToComponents(GetParam().filter(), &scheme, &host, &match_subdomains,
&port, &path, &query);
EXPECT_EQ(GetParam().scheme(), scheme);
EXPECT_EQ(GetParam().host(), host);
EXPECT_EQ(GetParam().match_subdomains(), match_subdomains);
EXPECT_EQ(GetParam().port(), port);
EXPECT_EQ(GetParam().path(), path);
}
} // namespace util
} // namespace url_matcher