blob: a4e2d3a2b7604d742b9f02f984b15182c4a6945d [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "base/macros.h"
#include "base/memory/ref_counted.h"
#include "base/sequence_checker.h"
#include "base/sequenced_task_runner.h"
namespace re2 {
class RE2;
namespace feedback {
struct CustomPatternWithoutContext {
// A string literal used in anonymized tests. Matches to the |pattern| are
// replaced with <|alias|: 1>, <|alias|: 2>, ...
const char* alias;
// A RE2 regexp with exactly one capture group. Matches will be replaced by
// the alias reference described above.
const char* pattern;
class AnonymizerTool {
// Returns an anonymized version of |input|. PII-sensitive data (such as MAC
// addresses) in |input| is replaced with unique identifiers.
// This is an expensive operation. Make sure not to execute this on the UI
// thread.
std::string Anonymize(const std::string& input);
friend class AnonymizerToolTest;
re2::RE2* GetRegExp(const std::string& pattern);
std::string AnonymizeMACAddresses(const std::string& input);
std::string AnonymizeCustomPatterns(std::string input);
std::string AnonymizeCustomPatternWithContext(
const std::string& input,
const std::string& pattern,
std::map<std::string, std::string>* identifier_space);
std::string AnonymizeCustomPatternWithoutContext(
const std::string& input,
const CustomPatternWithoutContext& pattern,
std::map<std::string, std::string>* identifier_space);
// Map of MAC addresses discovered in anonymized strings to anonymized
// representations. 11:22:33:44:55:66 gets anonymized to 11:22:33:00:00:01,
// where the first three bytes represent the manufacturer. The last three
// bytes are used to distinguish different MAC addresses and are incremented
// for each newly discovered MAC address.
std::map<std::string, std::string> mac_addresses_;
// Like mac addresses, identifiers in custom patterns are anonymized.
// custom_patterns_with_context_[i] contains a map of original identifier to
// anonymized identifier for custom pattern number i.
std::vector<std::map<std::string, std::string>> custom_patterns_with_context_;
std::vector<std::map<std::string, std::string>>
// Cache to prevent the repeated compilation of the same regular expression
// pattern. Key is the string representation of the RegEx.
std::map<std::string, std::unique_ptr<re2::RE2>> regexp_cache_;
// A container for a AnonymizerTool that is thread-safely ref-countable.
// This is useful for a class that wants to post an async anonymization task
// to a background sequence runner and not deal with its own life-cycle ending
// while the AnonymizerTool is busy on another sequence.
class AnonymizerToolContainer
: public base::RefCountedThreadSafe<AnonymizerToolContainer> {
explicit AnonymizerToolContainer(
scoped_refptr<base::SequencedTaskRunner> task_runner);
// Returns a pointer to the instance of this anonymier. May only be called
// on |task_runner_|.
AnonymizerTool* Get();
friend class base::RefCountedThreadSafe<AnonymizerToolContainer>;
std::unique_ptr<AnonymizerTool> anonymizer_;
scoped_refptr<base::SequencedTaskRunner> task_runner_;
} // namespace feedback