blob: 4bb2faa0c1742cfcd022567f56781162620c485b [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/feedback/anonymizer_tool.h"
#include <gtest/gtest.h>
#include "base/strings/string_util.h"
namespace feedback {
class AnonymizerToolTest : public testing::Test {
std::string AnonymizeMACAddresses(const std::string& input) {
return anonymizer_.AnonymizeMACAddresses(input);
std::string AnonymizeCustomPatterns(const std::string& input) {
return anonymizer_.AnonymizeCustomPatterns(input);
std::string AnonymizeCustomPatternWithContext(
const std::string& input,
const std::string& pattern,
std::map<std::string, std::string>* space) {
return anonymizer_.AnonymizeCustomPatternWithContext(input, pattern, space);
std::string AnonymizeCustomPatternWithoutContext(
const std::string& input,
const CustomPatternWithoutContext& pattern,
std::map<std::string, std::string>* space) {
return anonymizer_.AnonymizeCustomPatternWithoutContext(input, pattern,
AnonymizerTool anonymizer_;
TEST_F(AnonymizerToolTest, Anonymize) {
EXPECT_EQ("", anonymizer_.Anonymize(""));
EXPECT_EQ("foo\nbar\n", anonymizer_.Anonymize("foo\nbar\n"));
// Make sure MAC address anonymization is invoked.
EXPECT_EQ("02:46:8a:00:00:01", anonymizer_.Anonymize("02:46:8a:ce:13:57"));
// Make sure custom pattern anonymization is invoked.
EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
// Make sure UUIDs are anonymized.
"REQUEST localhost - - \"POST /printers/<UUID: 1> HTTP/1.1\" 200 291 "
"Create-Job successful-ok",
"REQUEST localhost - - \"POST /printers/"
"cb738a9f-6433-4d95-a81e-94e4ae0ed30b HTTP/1.1\" 200 291 Create-Job "
"REQUEST localhost - - \"POST /printers/<UUID: 2> HTTP/1.1\" 200 286 "
"Create-Job successful-ok",
"REQUEST localhost - - \"POST /printers/"
"d17188da-9cd3-44f4-b148-3e1d748a3b0f HTTP/1.1\" 200 286 Create-Job "
TEST_F(AnonymizerToolTest, AnonymizeMACAddresses) {
EXPECT_EQ("", AnonymizeMACAddresses(""));
EXPECT_EQ("foo\nbar\n", AnonymizeMACAddresses("foo\nbar\n"));
EXPECT_EQ("11:22:33:44:55", AnonymizeMACAddresses("11:22:33:44:55"));
EXPECT_EQ("aa:bb:cc:00:00:01", AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff"));
"BSSID: aa:bb:cc:00:00:01 in the middle\n"
"bb:cc:dd:00:00:02 start of line\n"
"end of line aa:bb:cc:00:00:01\n"
"no match across lines aa:bb:cc:\n"
"dd:ee:ff two on the same line:\n"
"x bb:cc:dd:00:00:02 cc:dd:ee:00:00:03 x\n",
AnonymizeMACAddresses("BSSID: aa:bb:cc:dd:ee:ff in the middle\n"
"bb:cc:dd:ee:ff:00 start of line\n"
"end of line aa:bb:cc:dd:ee:ff\n"
"no match across lines aa:bb:cc:\n"
"dd:ee:ff two on the same line:\n"
"x bb:cc:dd:ee:ff:00 cc:dd:ee:ff:00:11 x\n"));
EXPECT_EQ("Remember bb:cc:dd:00:00:02?",
AnonymizeMACAddresses("Remember bB:Cc:DD:ee:ff:00?"));
TEST_F(AnonymizerToolTest, AnonymizeCustomPatterns) {
EXPECT_EQ("", AnonymizeCustomPatterns(""));
EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
EXPECT_EQ("Cell ID: '2'", AnonymizeCustomPatterns("Cell ID: 'C1D2'"));
EXPECT_EQ("foo Cell ID: '1' bar",
AnonymizeCustomPatterns("foo Cell ID: 'A1B2' bar"));
EXPECT_EQ("foo Location area code: '1' bar",
AnonymizeCustomPatterns("foo Location area code: 'A1B2' bar"));
EXPECT_EQ("foo\na SSID='1' b\n'",
AnonymizeCustomPatterns("foo\na SSID='Joe's' b\n'"));
EXPECT_EQ("ssid '2'", AnonymizeCustomPatterns("ssid 'My AP'"));
EXPECT_EQ("bssid 'aa:bb'", AnonymizeCustomPatterns("bssid 'aa:bb'"));
EXPECT_EQ("Scan SSID - hexdump(len=6): 1\nfoo",
"Scan SSID - hexdump(len=6): 47 6f 6f 67 6c 65\nfoo"));
"a\nb [SSID=1] [SSID=2] [SSID=foo\nbar] b",
AnonymizeCustomPatterns("a\nb [SSID=foo] [SSID=bar] [SSID=foo\nbar] b"));
EXPECT_EQ("SerialNumber: 1",
AnonymizeCustomPatterns("SerialNumber: 1217D7EF"));
EXPECT_EQ("serial number: 2",
AnonymizeCustomPatterns("serial number: 50C971FEE7F3x010900"));
EXPECT_EQ("SerialNumber: 3",
AnonymizeCustomPatterns("SerialNumber: EVT23-17BA01-004"));
EXPECT_EQ("serial=4", AnonymizeCustomPatterns("serial=\"1234AA5678\""));
EXPECT_EQ("gaia_id='2'", AnonymizeCustomPatterns("gaia_id='987654321'"));
EXPECT_EQ("{id: 1, email:",
AnonymizeCustomPatterns("{id: 123454321, email:"));
EXPECT_EQ("<email: 1>",
EXPECT_EQ("Email: <email: 1>.",
EXPECT_EQ("Email:\n<email: 2>\n",
EXPECT_EQ("[<IPv6: 1>]", AnonymizeCustomPatterns(
EXPECT_EQ("[<IPv6: 2>]",
EXPECT_EQ("[<IPv6: 3>]", AnonymizeCustomPatterns("[2001:db8::ff00:42:8329]"));
EXPECT_EQ("[<IPv6: 4>]", AnonymizeCustomPatterns("[aa::bb]"));
EXPECT_EQ("<IPv4: 1>", AnonymizeCustomPatterns(""));
EXPECT_EQ("Foo <URL: 2> Bar",
AnonymizeCustomPatterns("Foo Bar"));
const char* kURLs[] = {
for (size_t i = 0; i < base::size(kURLs); ++i) {
std::string got = AnonymizeCustomPatterns(kURLs[i]);
base::StartsWith(got, "<URL: ", base::CompareCase::INSENSITIVE_ASCII));
EXPECT_TRUE(base::EndsWith(got, ">", base::CompareCase::INSENSITIVE_ASCII));
// Test that "Android:" is not considered a schema with empty hier part.
EXPECT_EQ("The following applies to Android:",
AnonymizeCustomPatterns("The following applies to Android:"));
TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithContext) {
const char kPattern[] = "(\\b(?i)id:? ')(\\d+)(')";
std::map<std::string, std::string> space;
EXPECT_EQ("", AnonymizeCustomPatternWithContext("", kPattern, &space));
AnonymizeCustomPatternWithContext("foo\nbar\n", kPattern, &space));
EXPECT_EQ("id '1'",
AnonymizeCustomPatternWithContext("id '2345'", kPattern, &space));
EXPECT_EQ("id '2'",
AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space));
EXPECT_EQ("id: '2'",
AnonymizeCustomPatternWithContext("id: '1234'", kPattern, &space));
EXPECT_EQ("ID: '1'",
AnonymizeCustomPatternWithContext("ID: '2345'", kPattern, &space));
EXPECT_EQ("x1 id '1' 1x id '2'\nid '1'\n",
"x1 id '2345' 1x id '1234'\nid '2345'\n", kPattern, &space));
EXPECT_EQ("id '1'",
AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space));
AnonymizeCustomPatternWithContext("xyz", "()(y+)()", &space));
TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithoutContext) {
CustomPatternWithoutContext kPattern = {"pattern", "(o+)"};
std::map<std::string, std::string> space;
EXPECT_EQ("", AnonymizeCustomPatternWithoutContext("", kPattern, &space));
EXPECT_EQ("f<pattern: 1>\nf<pattern: 2>z\nf<pattern: 1>l\n",
AnonymizeCustomPatternWithoutContext("fo\nfooz\nfol\n", kPattern,
TEST_F(AnonymizerToolTest, AnonymizeChunk) {
std::string data =
"aaaaaaaa [SSID=123aaaaaa]aaaaa\n" // SSID.
"aaaaaaaahttp://tets.comaaaaaaa\n" // URL.
"aaaaaemail@example.comaaa\n" // Email address.
"example@@1234\n" // No PII, it is not valid email address.
"\n" // IP address.
"\n" // IP address.
"\n" // IPv4 loopback.
"\n" // IPv4 loopback.
"\n" // Any IPv4.
"\n" // Any IPv4.
"\n" // IPv4 private class A.
"\n" // Intentional duplicate.
"\n" // IPv4 private class A.
"\n" // IPv4 private class A.
"\n" // IPv4 private class B.
"\n" // IPv4 private class B.
"\n" // IP address.
"\n" // IP address.
"\n" // IPv4 private class C.
"\n" // IPv4 private class C.
"\n" // IP address.
"\n" // Link local.
"\n" // IP address.
"fe80::\n" // Link local.
"fe80::ffff\n" // Link local.
"febf:ffff::ffff\n" // Link local.
"fecc::1111\n" // IP address.
"\n" // Multicast.
"\n" // IP address.
"\n" // Broadcast.
"\n" // ChromeOS.
"\n" // IP address.
"\n" // DNS
"\n" // DNS
"\n" // DNS
"\n" // IP address.
"\n" // Not an IP address.
"255.300.255.255\n" // Not an IP address.
"aaaa123.123.45.4aaa\n" // IP address.
"11:11;11::11\n" // IP address.
"11::11\n" // IP address.
"11:11:abcdef:0:0:0:0:0\n" // No PII.
"::\n" // Unspecified.
"::1\n" // Local host.
"Instance::Set\n" // Ignore match, no PII.
"Instant::ff\n" // Ignore match, no PII.
"net::ERR_CONN_TIMEOUT\n" // Ignore match, no PII.
"ff01::1\n" // All nodes address (interface local).
"ff01::2\n" // All routers (interface local).
"ff01::3\n" // Multicast (interface local).
"ff02::1\n" // All nodes address (link local).
"ff02::2\n" // All routers (link local).
"ff02::3\n" // Multicast (link local).
"ff02::fb\n" // mDNSv6 (link local).
"ff08::fb\n" // mDNSv6.
"ff0f::101\n" // All NTP servers.
"::ffff:cb0c:10ea\n" // IPv4-mapped IPV6 (IP address).
"::ffff:a0a:a0a\n" // IPv4-mapped IPV6 (private class A).
"::ffff:a0a:a0a\n" // Intentional duplicate.
"::ffff:ac1e:1e1e\n" // IPv4-mapped IPV6 (private class B).
"::ffff:c0a8:640a\n" // IPv4-mapped IPV6 (private class C).
"::ffff:6473:5c01\n" // IPv4-mapped IPV6 (Chrome).
"64:ff9b::a0a:a0a\n" // IPv4-translated 6to4 IPV6 (private class A).
"64:ff9b::6473:5c01\n" // IPv4-translated 6to4 IPV6 (Chrome).
"::0101:ffff:c0a8:640a\n" // IP address.
"aa:aa:aa:aa:aa:aa\n" // MAC address (BSSID).
"chrome://resources/foo\n" // Secure chrome resource, whitelisted.
"chrome://resources/f?user=bar"; // Potentially PII in parameter.
std::string result =
"aaaaaaaa [SSID=1]aaaaa\n"
"aaaaaaaa<URL: 1>\n"
"<email: 1>\n"
"<IPv4: 1>\n"
"<IPv4: 2>\n"
"< 3>\n"
"< 4>\n"
"< 5>\n"
"< 6>\n"
"< 7>\n"
"< 7>\n"
"< 8>\n"
"< 9>\n"
"< 10>\n"
"< 11>\n"
"<IPv4: 12>\n"
"<IPv4: 13>\n"
"< 14>\n"
"< 15>\n"
"<IPv4: 16>\n"
"< 17>\n"
"<IPv4: 18>\n"
"<fe80::/10: 1>\n"
"<fe80::/10: 2>\n"
"<fe80::/10: 3>\n"
"<IPv6: 4>\n"
"< 19>\n"
"<IPv4: 20>\n"
"<IPv4: 23>\n"
"<IPv4: 27>\n"
"aaaa<IPv4: 28>aaa\n"
"11:11;<IPv6: 5>\n"
"<IPv6: 5>\n"
"<ff01::/16: 13>\n"
"<ff02::/16: 16>\n"
"<ff02::/16: 17>\n"
"<IPv6: 18>\n"
"<IPv6: 19>\n"
"<IPv6: 20>\n"
"<M 21>\n"
"<M 21>\n"
"<M 22>\n"
"<M 23>\n"
"<M 24>\n"
"<T 25>\n"
"<T 26>\n"
"<IPv6: 27>\n"
"<URL: 2>";
EXPECT_EQ(result, anonymizer_.Anonymize(data));
} // namespace feedback