components/subresource_filter/tools/rule_parser/rule_parser.cc - chromium/src - Git at Google

 // Copyright 2018 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "components/subresource_filter/tools/rule_parser/rule_parser.h"

 #include <map>
 #include <utility>
 #include <vector>

 #include "base/logging.h"
 #include "base/strings/string_split.h"
 #include "base/strings/string_util.h"
 #include "components/subresource_filter/tools/rule_parser/rule_options.h"
 #include "components/url_pattern_index/proto/rules.pb.h"

 namespace subresource_filter {

 namespace {

 // Encapsulates meta-information of URL rule options identified by keywords.
 class KeywordMap {
  public:
   // Types of rule options that can be represented by keywords.
   enum OptionType {
     OPTION_UNDEFINED,
     OPTION_ELEMENT_TYPE,
     OPTION_ACTIVATION_TYPE,
     OPTION_THIRD_PARTY,
     OPTION_DOMAIN,
     OPTION_SITEKEY,
     OPTION_MATCH_CASE,
     OPTION_COLLAPSE,
     OPTION_DO_NOT_TRACK,
   };

   enum OptionFlag : int {
     FLAG_NONE = 0,
     // The option requires a value, e.g. 'domain=example.org'.
     FLAG_REQUIRES_VALUE = 1,
     // The option allows invertion, e.g. 'image' and '~image'.
     FLAG_IS_TRISTATE = 2,
     // The option can be used with whitelist rules only.
     FLAG_IS_WHITELIST_ONLY = 4,
     // The option is not supposed to be used any more.
     FLAG_IS_DEPRECATED = 8,
     // The option is not supported yet.
     FLAG_IS_NOT_SUPPORTED = 16,
   };

   // Meta-information about an option represented by a certain keyword.
   struct OptionDetails {
     // Creates an option that defines a filter for the specified |element_type|.
     // In addition to the provided |flags|, FLAG_IS_TRISTATE will always be set
     // by default.
     OptionDetails(url_pattern_index::proto::ElementType element_type, int flags)
         : type(OPTION_ELEMENT_TYPE),
           flags(FLAG_IS_TRISTATE | flags),
           element_type(element_type) {}

     // Creates an ActivationType option.
     explicit OptionDetails(
         url_pattern_index::proto::ActivationType activation_type)
         : type(OPTION_ACTIVATION_TYPE),
           flags(FLAG_IS_WHITELIST_ONLY),
           activation_type(activation_type) {}

     // Creates a generic option.
     OptionDetails(OptionType type, int flags) : type(type), flags(flags) {
       DCHECK_NE(type, OPTION_ELEMENT_TYPE);
       DCHECK_NE(type, OPTION_ACTIVATION_TYPE);
     }

     bool requires_value() const { return flags & FLAG_REQUIRES_VALUE; }
     bool is_tristate() const { return flags & FLAG_IS_TRISTATE; }
     bool is_whitelist_only() const { return flags & FLAG_IS_WHITELIST_ONLY; }
     bool is_deprecated() const { return flags & FLAG_IS_DEPRECATED; }
     bool is_not_supported() const { return flags & FLAG_IS_NOT_SUPPORTED; }

     OptionType type = OPTION_UNDEFINED;

     // Stores various OptionFlag's combined using bitwise OR.
     int flags = FLAG_NONE;

     // The element type that this option defines a filter for, if any. Set to
     // ELEMENT_TYPE_UNSPECIFIED for non-ElementType options.
     url_pattern_index::proto::ElementType element_type =
         url_pattern_index::proto::ELEMENT_TYPE_UNSPECIFIED;

     // The activation type that this option includes to the rule. Set to
     // ACTIVATION_TYPE_UNSPECIFIED for non-ActivationType options.
     url_pattern_index::proto::ActivationType activation_type =
         url_pattern_index::proto::ACTIVATION_TYPE_UNSPECIFIED;
   };

   // Initializes the map with default keywords.
   KeywordMap();
   ~KeywordMap();

   // Returns detailed information associated with the provided |name| option.
   // Returns nullptr on unknown options.
   const OptionDetails* Lookup(base::StringPiece name) const;

  private:
   // Associates |details| with a specified option |name|.
   void AddOption(base::StringPiece name, const OptionDetails& details);

   std::map<std::string, OptionDetails> options_;

   DISALLOW_COPY_AND_ASSIGN(KeywordMap);
 };

 KeywordMap::KeywordMap() {
   // ElementType options.
   for (const auto& element_type : kElementTypes) {
     OptionDetails details(element_type.type, FLAG_NONE);
     AddOption(element_type.name, details);
   }
   // Deprecated ElementType options.
   for (const auto& element_type : kDeprecatedElementTypes) {
     OptionDetails details(element_type.maps_to_type, FLAG_IS_DEPRECATED);
     AddOption(element_type.name, details);
   }

   // ActivationType options.
   for (const auto& activation_type : kActivationTypes) {
     OptionDetails details(activation_type.type);
     AddOption(activation_type.name, details);
   }

   // TODO(pkalinnikov): Consider moving options metadata to a header.
   struct {
     const char* name;
     OptionType type;
     int flags;
   } const options[] = {
       // Tristate options.
       {"third-party", OPTION_THIRD_PARTY, FLAG_IS_TRISTATE},
       {"collapse", OPTION_COLLAPSE, FLAG_IS_TRISTATE | FLAG_IS_NOT_SUPPORTED},
       // Flag options.
       {"match-case", OPTION_MATCH_CASE, FLAG_NONE},
       {"donottrack", OPTION_DO_NOT_TRACK, FLAG_IS_NOT_SUPPORTED},
       // Value options.
       {"sitekey", OPTION_SITEKEY, FLAG_REQUIRES_VALUE | FLAG_IS_NOT_SUPPORTED},
       {"domain", OPTION_DOMAIN, FLAG_REQUIRES_VALUE},
   };

   for (const auto& option : options) {
     AddOption(option.name, OptionDetails(option.type, option.flags));
   }
 }

 KeywordMap::~KeywordMap() = default;

 const KeywordMap::OptionDetails* KeywordMap::Lookup(
     base::StringPiece name) const {
   // TODO(pkalinnikov): Avoid std::string allocation.
   auto iterator = options_.find(std::string(name));
   return iterator != options_.end() ? &iterator->second : nullptr;
 }

 void KeywordMap::AddOption(base::StringPiece name,
                            const OptionDetails& details) {
   auto inserted = options_.insert(std::make_pair(std::string(name), details));
   DCHECK(inserted.second);
 }

 KeywordMap* GetKeywordsMapSingleton() {
   // TODO(melandory): Get rid of this singleton.
   static auto* shared_keywords = new KeywordMap;
   return shared_keywords;
 }

 }  // namespace

 // RuleParser ------------------------------------------------------------------

 RuleParser::ParseError::ParseError() = default;
 RuleParser::ParseError::~ParseError() = default;

 RuleParser::RuleParser() = default;
 RuleParser::~RuleParser() = default;

 const char* RuleParser::GetParseErrorCodeDescription(
     ParseError::ErrorCode code) {
   switch (code) {
     case ParseError::NONE:
       return "Ok";
     case ParseError::EMPTY_RULE:
       return "The rule is empty";
     case ParseError::BAD_WHITELIST_SYNTAX:
       return "Wrong whitelist rule syntax";
     case ParseError::UNKNOWN_OPTION:
       return "Unknown URL rule option";
     case ParseError::NOT_A_TRISTATE_OPTION:
       return "Unexpected '~', the option is not invertable";
     case ParseError::DEPRECATED_OPTION:
       return "The option is deprecated";
     case ParseError::WHITELIST_ONLY_OPTION:
       return "The option can be used with whitelist rules only";
     case ParseError::NO_VALUE_PROVIDED:
       return "Expected '=', the option requires a value";
     case ParseError::WRONG_CSS_RULE_DELIM:
       return "Wrong CSS rule delimiter";
     case ParseError::EMPTY_CSS_SELECTOR:
       return "Expected non-empty CSS selector";
     case ParseError::UNSUPPORTED_FEATURE:
       return "The feature is not currently supported";
     default:
       return "Unknown error";
   }
 }

 // TODO(pkalinnikov): Refactor parsing approach to use a FSM.
 RuleType RuleParser::Parse(base::StringPiece line) {
   rule_type_ = url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
   parse_error_ = ParseError();

   // Strip all leading and trailing whitespaces.
   base::StringPiece part = line;
   part = base::TrimWhitespaceASCII(part, base::TRIM_ALL);
   // Check whether it's a trivial rule.
   if (part.empty()) {
     // Note: cannot use part.data() here because it is flaky to rely on *which*
     // empty StringPiece StripWhitespace will return.
     SetParseError(ParseError::EMPTY_RULE, line, line.data());
     return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
   }

   // Check whether it's a comment.
   // TODO(pkalinnikov): Handle special comments (e.g. 'Title', 'Expires' etc.).
   if (part[0] == '!' || part[0] == '[') {
     return rule_type_ = url_pattern_index::proto::RULE_TYPE_COMMENT;
   }

   // Suppose it is a CSS rule if a CSS-selector separator character ('#') is
   // present, followed by '#' or '@'.
   size_t css_separator_pos = part.find('#');
   for (; css_separator_pos != base::StringPiece::npos;
        css_separator_pos = part.find('#', css_separator_pos + 1)) {
     if (css_separator_pos + 1 == part.size()) {
       css_separator_pos = base::StringPiece::npos;
       break;
     }
     const char next_char = part[css_separator_pos + 1];
     if (next_char == '#' || next_char == '@')  // CSS rule starter.
       break;
   }

   if (css_separator_pos != base::StringPiece::npos) {
     return rule_type_ = ParseCssRule(line, part, css_separator_pos);
   }
   // Else assume we read a URL filtering rule.
   return rule_type_ = ParseUrlRule(line, part);
 }

 RuleType RuleParser::ParseUrlRule(base::StringPiece origin,
                                   base::StringPiece part) {
   CHECK(!part.empty() && part.data() >= origin.data());
   url_rule_ = UrlRule();

   // Check whether it's a whitelist rule.
   if (part[0] == '@') {
     part.remove_prefix(1);
     if (part.empty() || part[0] != '@') {
       SetParseError(ParseError::BAD_WHITELIST_SYNTAX, origin, part.data());
       return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
     }
     part.remove_prefix(1);
     url_rule_.is_whitelist = true;
   }

   size_t options_start = part.rfind('$');
   // If the URL pattern is a regular expression, |options_start| might be
   // pointing to a character inside the pattern. This can happen for those rules
   // which don't have options at all, e.g., "/.*substring$/". All such rules end
   // with '/', therefore the following code can detect them to work around.
   if (options_start != base::StringPiece::npos && part.back() == '/')
     options_start = base::StringPiece::npos;

   if (options_start != base::StringPiece::npos) {
     const base::StringPiece options = part.substr(options_start + 1);
     if (!ParseUrlRuleOptions(origin, options))
       return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
     part.remove_suffix(part.size() - options_start);
   }

   // Check for a left anchor.
   if (!part.empty() && part[0] == '|') {
     part.remove_prefix(1);
     if (!part.empty() && part[0] == '|') {
       part.remove_prefix(1);
       url_rule_.anchor_left = url_pattern_index::proto::ANCHOR_TYPE_SUBDOMAIN;
     } else {
       url_rule_.anchor_left = url_pattern_index::proto::ANCHOR_TYPE_BOUNDARY;
     }
   }

   // Check for a right anchor.
   if (!part.empty()) {
     if (part[part.size() - 1] == '|') {
       part.remove_suffix(1);
       url_rule_.anchor_right = url_pattern_index::proto::ANCHOR_TYPE_BOUNDARY;
     }
   }

   url_rule_.url_pattern = std::string(part);
   url_rule_.Canonicalize();

   return url_pattern_index::proto::RULE_TYPE_URL;
 }

 bool RuleParser::ParseUrlRuleOptions(base::StringPiece origin,
                                      base::StringPiece options) {
   CHECK_GE(options.data(), origin.data());

   bool has_seen_element_or_activation_type = false;
   for (base::StringPiece piece : base::SplitStringPiece(
            options, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY)) {
     DCHECK(!piece.empty());

     TriState tri_state = TriState::YES;
     if (base::StartsWith(piece, "~", base::CompareCase::SENSITIVE)) {
       piece.remove_prefix(1);
       tri_state = TriState::NO;
     }

     size_t option_name_end = piece.find('=');
     base::StringPiece option_name = piece.substr(0, option_name_end);

     const auto* option_details = GetKeywordsMapSingleton()->Lookup(option_name);
     if (!option_details) {
       // TODO(pkalinnikov): Add a flag to RuleParser allowing unknown options.
       SetParseError(ParseError::UNKNOWN_OPTION, origin, option_name.data());
       return false;
     }

     if (tri_state == TriState::NO && !option_details->is_tristate()) {
       SetParseError(ParseError::NOT_A_TRISTATE_OPTION, origin,
                     option_name.data());
       return false;
     }

     if (option_details->requires_value() &&
         option_name_end == base::StringPiece::npos) {
       SetParseError(ParseError::NO_VALUE_PROVIDED, origin, option_name.data());
       return false;
     }

     if (option_details->is_whitelist_only() && !url_rule_.is_whitelist) {
       SetParseError(ParseError::WHITELIST_ONLY_OPTION, origin,
                     option_name.data());
       return false;
     }

     if (option_details->is_deprecated()) {
       // TODO(pkalinnikov): Add a flag to RuleParser allowing deprecated
       // options (and issuing kind of a warning).
       SetParseError(ParseError::DEPRECATED_OPTION, origin, option_name.data());
       return false;
     }

     if (option_details->is_not_supported()) {
       // TODO(pkalinnikov): Add a flag to RuleParser allowing unsupported
       // features.
       SetParseError(ParseError::UNSUPPORTED_FEATURE, origin,
                     option_name.data());
       return false;
     }

     switch (option_details->type) {
       case KeywordMap::OPTION_ELEMENT_TYPE: {
         // The sign of the first element type option encountered determines
         // whether the unspecified element types will be included (if the first
         // option is negated) or excluded (otherwise).
         if (tri_state == TriState::YES) {
           // TODO(pkalinnikov): How about not resetting ActivationType options?
           if (!has_seen_element_or_activation_type)
             url_rule_.type_mask = 0;
           url_rule_.type_mask |= type_mask_for(option_details->element_type);
         } else {
           DCHECK(tri_state == TriState::NO);
           url_rule_.type_mask &= ~type_mask_for(option_details->element_type);
         }
         has_seen_element_or_activation_type = true;
         break;
       }
       case KeywordMap::OPTION_ACTIVATION_TYPE:
         if (!has_seen_element_or_activation_type)
           url_rule_.type_mask = 0;
         url_rule_.type_mask |= type_mask_for(option_details->activation_type);
         has_seen_element_or_activation_type = true;
         break;
       case KeywordMap::OPTION_THIRD_PARTY:
         url_rule_.is_third_party = tri_state;
         break;
       case KeywordMap::OPTION_MATCH_CASE:
         url_rule_.match_case = true;
         break;
       case KeywordMap::OPTION_DOMAIN:
         url_rule_.domains =
             base::SplitString(piece.substr(option_name_end + 1), "|",
                               base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
         break;
       default:
         LOG(FATAL);
     }
   }

   return true;
 }

 RuleType RuleParser::ParseCssRule(base::StringPiece origin,
                                   base::StringPiece part,
                                   size_t css_section_start) {
   CHECK(part.data() >= origin.data());
   css_rule_ = CssRule();

   // Check for a list of domains.
   if (css_section_start) {
     DCHECK(css_section_start != base::StringPiece::npos);
     auto pieces = base::SplitStringPiece(part.substr(0, css_section_start), ",",
                                          base::TRIM_WHITESPACE,
                                          base::SPLIT_WANT_NONEMPTY);
     for (base::StringPiece domain : pieces) {
       DCHECK(!domain.empty());
       css_rule_.domains.push_back(std::string(domain));
     }
   }

   part.remove_prefix(css_section_start + 1);
   if (part.empty()) {
     SetParseError(ParseError::WRONG_CSS_RULE_DELIM, origin, part.data());
     return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
   }
   if (part[0] == '@') {
     css_rule_.is_whitelist = true;
     part.remove_prefix(1);
   }
   if (part.empty() || part[0] != '#') {
     SetParseError(ParseError::WRONG_CSS_RULE_DELIM, origin, part.data());
     return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
   }
   part.remove_prefix(1);

   if (part.empty()) {
     SetParseError(ParseError::EMPTY_CSS_SELECTOR, origin, part.data());
     return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
   }

   css_rule_.css_selector = std::string(part);
   css_rule_.Canonicalize();
   return url_pattern_index::proto::RULE_TYPE_CSS;
 }

 void RuleParser::SetParseError(ParseError::ErrorCode code,
                                base::StringPiece origin,
                                const char* error_begin) {
   DCHECK(code != ParseError::NONE);
   DCHECK(error_begin >= origin.data());

   parse_error_.error_code = code;
   parse_error_.line = std::string(origin);
   parse_error_.error_index = error_begin - origin.data();
 }

 std::ostream& operator<<(std::ostream& out,
                          const RuleParser::ParseError& error) {
   if (error.error_code != RuleParser::ParseError::NONE) {
     out << "(error:" << error.error_index + 1 << ") "
         << RuleParser::GetParseErrorCodeDescription(error.error_code) << ":\n"
         << error.line << '\n'
         << std::string(error.error_index, ' ') << "^\n";
   }
   return out;
 }

 }  // namespace subresource_filter
	// Copyright 2018 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "components/subresource_filter/tools/rule_parser/rule_parser.h"

	#include <map>
	#include <utility>
	#include <vector>

	#include "base/logging.h"
	#include "base/strings/string_split.h"
	#include "base/strings/string_util.h"
	#include "components/subresource_filter/tools/rule_parser/rule_options.h"
	#include "components/url_pattern_index/proto/rules.pb.h"

	namespace subresource_filter {

	namespace {

	// Encapsulates meta-information of URL rule options identified by keywords.
	class KeywordMap {
	public:
	// Types of rule options that can be represented by keywords.
	enum OptionType {
	OPTION_UNDEFINED,
	OPTION_ELEMENT_TYPE,
	OPTION_ACTIVATION_TYPE,
	OPTION_THIRD_PARTY,
	OPTION_DOMAIN,
	OPTION_SITEKEY,
	OPTION_MATCH_CASE,
	OPTION_COLLAPSE,
	OPTION_DO_NOT_TRACK,
	};

	enum OptionFlag : int {
	FLAG_NONE = 0,
	// The option requires a value, e.g. 'domain=example.org'.
	FLAG_REQUIRES_VALUE = 1,
	// The option allows invertion, e.g. 'image' and '~image'.
	FLAG_IS_TRISTATE = 2,
	// The option can be used with whitelist rules only.
	FLAG_IS_WHITELIST_ONLY = 4,
	// The option is not supposed to be used any more.
	FLAG_IS_DEPRECATED = 8,
	// The option is not supported yet.
	FLAG_IS_NOT_SUPPORTED = 16,
	};

	// Meta-information about an option represented by a certain keyword.
	struct OptionDetails {
	// Creates an option that defines a filter for the specified \|element_type\|.
	// In addition to the provided \|flags\|, FLAG_IS_TRISTATE will always be set
	// by default.
	OptionDetails(url_pattern_index::proto::ElementType element_type, int flags)
	: type(OPTION_ELEMENT_TYPE),
	flags(FLAG_IS_TRISTATE \| flags),
	element_type(element_type) {}

	// Creates an ActivationType option.
	explicit OptionDetails(
	url_pattern_index::proto::ActivationType activation_type)
	: type(OPTION_ACTIVATION_TYPE),
	flags(FLAG_IS_WHITELIST_ONLY),
	activation_type(activation_type) {}

	// Creates a generic option.
	OptionDetails(OptionType type, int flags) : type(type), flags(flags) {
	DCHECK_NE(type, OPTION_ELEMENT_TYPE);
	DCHECK_NE(type, OPTION_ACTIVATION_TYPE);
	}

	bool requires_value() const { return flags & FLAG_REQUIRES_VALUE; }
	bool is_tristate() const { return flags & FLAG_IS_TRISTATE; }
	bool is_whitelist_only() const { return flags & FLAG_IS_WHITELIST_ONLY; }
	bool is_deprecated() const { return flags & FLAG_IS_DEPRECATED; }
	bool is_not_supported() const { return flags & FLAG_IS_NOT_SUPPORTED; }

	OptionType type = OPTION_UNDEFINED;

	// Stores various OptionFlag's combined using bitwise OR.
	int flags = FLAG_NONE;

	// The element type that this option defines a filter for, if any. Set to
	// ELEMENT_TYPE_UNSPECIFIED for non-ElementType options.
	url_pattern_index::proto::ElementType element_type =
	url_pattern_index::proto::ELEMENT_TYPE_UNSPECIFIED;

	// The activation type that this option includes to the rule. Set to
	// ACTIVATION_TYPE_UNSPECIFIED for non-ActivationType options.
	url_pattern_index::proto::ActivationType activation_type =
	url_pattern_index::proto::ACTIVATION_TYPE_UNSPECIFIED;
	};

	// Initializes the map with default keywords.
	KeywordMap();
	~KeywordMap();

	// Returns detailed information associated with the provided \|name\| option.
	// Returns nullptr on unknown options.
	const OptionDetails* Lookup(base::StringPiece name) const;

	private:
	// Associates \|details\| with a specified option \|name\|.
	void AddOption(base::StringPiece name, const OptionDetails& details);

	std::map<std::string, OptionDetails> options_;

	DISALLOW_COPY_AND_ASSIGN(KeywordMap);
	};

	KeywordMap::KeywordMap() {
	// ElementType options.
	for (const auto& element_type : kElementTypes) {
	OptionDetails details(element_type.type, FLAG_NONE);
	AddOption(element_type.name, details);
	}
	// Deprecated ElementType options.
	for (const auto& element_type : kDeprecatedElementTypes) {
	OptionDetails details(element_type.maps_to_type, FLAG_IS_DEPRECATED);
	AddOption(element_type.name, details);
	}

	// ActivationType options.
	for (const auto& activation_type : kActivationTypes) {
	OptionDetails details(activation_type.type);
	AddOption(activation_type.name, details);
	}

	// TODO(pkalinnikov): Consider moving options metadata to a header.
	struct {
	const char* name;
	OptionType type;
	int flags;
	} const options[] = {
	// Tristate options.
	{"third-party", OPTION_THIRD_PARTY, FLAG_IS_TRISTATE},
	{"collapse", OPTION_COLLAPSE, FLAG_IS_TRISTATE \| FLAG_IS_NOT_SUPPORTED},
	// Flag options.
	{"match-case", OPTION_MATCH_CASE, FLAG_NONE},
	{"donottrack", OPTION_DO_NOT_TRACK, FLAG_IS_NOT_SUPPORTED},
	// Value options.
	{"sitekey", OPTION_SITEKEY, FLAG_REQUIRES_VALUE \| FLAG_IS_NOT_SUPPORTED},
	{"domain", OPTION_DOMAIN, FLAG_REQUIRES_VALUE},
	};

	for (const auto& option : options) {
	AddOption(option.name, OptionDetails(option.type, option.flags));
	}
	}

	KeywordMap::~KeywordMap() = default;

	const KeywordMap::OptionDetails* KeywordMap::Lookup(
	base::StringPiece name) const {
	// TODO(pkalinnikov): Avoid std::string allocation.
	auto iterator = options_.find(std::string(name));
	return iterator != options_.end() ? &iterator->second : nullptr;
	}

	void KeywordMap::AddOption(base::StringPiece name,
	const OptionDetails& details) {
	auto inserted = options_.insert(std::make_pair(std::string(name), details));
	DCHECK(inserted.second);
	}

	KeywordMap* GetKeywordsMapSingleton() {
	// TODO(melandory): Get rid of this singleton.
	static auto* shared_keywords = new KeywordMap;
	return shared_keywords;
	}

	} // namespace

	// RuleParser ------------------------------------------------------------------

	RuleParser::ParseError::ParseError() = default;
	RuleParser::ParseError::~ParseError() = default;

	RuleParser::RuleParser() = default;
	RuleParser::~RuleParser() = default;

	const char* RuleParser::GetParseErrorCodeDescription(
	ParseError::ErrorCode code) {
	switch (code) {
	case ParseError::NONE:
	return "Ok";
	case ParseError::EMPTY_RULE:
	return "The rule is empty";
	case ParseError::BAD_WHITELIST_SYNTAX:
	return "Wrong whitelist rule syntax";
	case ParseError::UNKNOWN_OPTION:
	return "Unknown URL rule option";
	case ParseError::NOT_A_TRISTATE_OPTION:
	return "Unexpected '~', the option is not invertable";
	case ParseError::DEPRECATED_OPTION:
	return "The option is deprecated";
	case ParseError::WHITELIST_ONLY_OPTION:
	return "The option can be used with whitelist rules only";
	case ParseError::NO_VALUE_PROVIDED:
	return "Expected '=', the option requires a value";
	case ParseError::WRONG_CSS_RULE_DELIM:
	return "Wrong CSS rule delimiter";
	case ParseError::EMPTY_CSS_SELECTOR:
	return "Expected non-empty CSS selector";
	case ParseError::UNSUPPORTED_FEATURE:
	return "The feature is not currently supported";
	default:
	return "Unknown error";
	}
	}

	// TODO(pkalinnikov): Refactor parsing approach to use a FSM.
	RuleType RuleParser::Parse(base::StringPiece line) {
	rule_type_ = url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
	parse_error_ = ParseError();

	// Strip all leading and trailing whitespaces.
	base::StringPiece part = line;
	part = base::TrimWhitespaceASCII(part, base::TRIM_ALL);
	// Check whether it's a trivial rule.
	if (part.empty()) {
	// Note: cannot use part.data() here because it is flaky to rely on which
	// empty StringPiece StripWhitespace will return.
	SetParseError(ParseError::EMPTY_RULE, line, line.data());
	return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
	}

	// Check whether it's a comment.
	// TODO(pkalinnikov): Handle special comments (e.g. 'Title', 'Expires' etc.).
	if (part[0] == '!' \|\| part[0] == '[') {
	return rule_type_ = url_pattern_index::proto::RULE_TYPE_COMMENT;
	}

	// Suppose it is a CSS rule if a CSS-selector separator character ('#') is
	// present, followed by '#' or '@'.
	size_t css_separator_pos = part.find('#');
	for (; css_separator_pos != base::StringPiece::npos;
	css_separator_pos = part.find('#', css_separator_pos + 1)) {
	if (css_separator_pos + 1 == part.size()) {
	css_separator_pos = base::StringPiece::npos;
	break;
	}
	const char next_char = part[css_separator_pos + 1];
	if (next_char == '#' \|\| next_char == '@') // CSS rule starter.
	break;
	}

	if (css_separator_pos != base::StringPiece::npos) {
	return rule_type_ = ParseCssRule(line, part, css_separator_pos);
	}
	// Else assume we read a URL filtering rule.
	return rule_type_ = ParseUrlRule(line, part);
	}

	RuleType RuleParser::ParseUrlRule(base::StringPiece origin,
	base::StringPiece part) {
	CHECK(!part.empty() && part.data() >= origin.data());
	url_rule_ = UrlRule();

	// Check whether it's a whitelist rule.
	if (part[0] == '@') {
	part.remove_prefix(1);
	if (part.empty() \|\| part[0] != '@') {
	SetParseError(ParseError::BAD_WHITELIST_SYNTAX, origin, part.data());
	return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
	}
	part.remove_prefix(1);
	url_rule_.is_whitelist = true;
	}

	size_t options_start = part.rfind('$');
	// If the URL pattern is a regular expression, \|options_start\| might be
	// pointing to a character inside the pattern. This can happen for those rules
	// which don't have options at all, e.g., "/.*substring$/". All such rules end
	// with '/', therefore the following code can detect them to work around.
	if (options_start != base::StringPiece::npos && part.back() == '/')
	options_start = base::StringPiece::npos;

	if (options_start != base::StringPiece::npos) {
	const base::StringPiece options = part.substr(options_start + 1);
	if (!ParseUrlRuleOptions(origin, options))
	return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
	part.remove_suffix(part.size() - options_start);
	}

	// Check for a left anchor.
	if (!part.empty() && part[0] == '\|') {
	part.remove_prefix(1);
	if (!part.empty() && part[0] == '\|') {
	part.remove_prefix(1);
	url_rule_.anchor_left = url_pattern_index::proto::ANCHOR_TYPE_SUBDOMAIN;
	} else {
	url_rule_.anchor_left = url_pattern_index::proto::ANCHOR_TYPE_BOUNDARY;
	}
	}

	// Check for a right anchor.
	if (!part.empty()) {
	if (part[part.size() - 1] == '\|') {
	part.remove_suffix(1);
	url_rule_.anchor_right = url_pattern_index::proto::ANCHOR_TYPE_BOUNDARY;
	}
	}

	url_rule_.url_pattern = std::string(part);
	url_rule_.Canonicalize();

	return url_pattern_index::proto::RULE_TYPE_URL;
	}

	bool RuleParser::ParseUrlRuleOptions(base::StringPiece origin,
	base::StringPiece options) {
	CHECK_GE(options.data(), origin.data());

	bool has_seen_element_or_activation_type = false;
	for (base::StringPiece piece : base::SplitStringPiece(
	options, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY)) {
	DCHECK(!piece.empty());

	TriState tri_state = TriState::YES;
	if (base::StartsWith(piece, "~", base::CompareCase::SENSITIVE)) {
	piece.remove_prefix(1);
	tri_state = TriState::NO;
	}

	size_t option_name_end = piece.find('=');
	base::StringPiece option_name = piece.substr(0, option_name_end);

	const auto* option_details = GetKeywordsMapSingleton()->Lookup(option_name);
	if (!option_details) {
	// TODO(pkalinnikov): Add a flag to RuleParser allowing unknown options.
	SetParseError(ParseError::UNKNOWN_OPTION, origin, option_name.data());
	return false;
	}

	if (tri_state == TriState::NO && !option_details->is_tristate()) {
	SetParseError(ParseError::NOT_A_TRISTATE_OPTION, origin,
	option_name.data());
	return false;
	}

	if (option_details->requires_value() &&
	option_name_end == base::StringPiece::npos) {
	SetParseError(ParseError::NO_VALUE_PROVIDED, origin, option_name.data());
	return false;
	}

	if (option_details->is_whitelist_only() && !url_rule_.is_whitelist) {
	SetParseError(ParseError::WHITELIST_ONLY_OPTION, origin,
	option_name.data());
	return false;
	}

	if (option_details->is_deprecated()) {
	// TODO(pkalinnikov): Add a flag to RuleParser allowing deprecated
	// options (and issuing kind of a warning).
	SetParseError(ParseError::DEPRECATED_OPTION, origin, option_name.data());
	return false;
	}

	if (option_details->is_not_supported()) {
	// TODO(pkalinnikov): Add a flag to RuleParser allowing unsupported
	// features.
	SetParseError(ParseError::UNSUPPORTED_FEATURE, origin,
	option_name.data());
	return false;
	}

	switch (option_details->type) {
	case KeywordMap::OPTION_ELEMENT_TYPE: {
	// The sign of the first element type option encountered determines
	// whether the unspecified element types will be included (if the first
	// option is negated) or excluded (otherwise).
	if (tri_state == TriState::YES) {
	// TODO(pkalinnikov): How about not resetting ActivationType options?
	if (!has_seen_element_or_activation_type)
	url_rule_.type_mask = 0;
	url_rule_.type_mask \|= type_mask_for(option_details->element_type);
	} else {
	DCHECK(tri_state == TriState::NO);
	url_rule_.type_mask &= ~type_mask_for(option_details->element_type);
	}
	has_seen_element_or_activation_type = true;
	break;
	}
	case KeywordMap::OPTION_ACTIVATION_TYPE:
	if (!has_seen_element_or_activation_type)
	url_rule_.type_mask = 0;
	url_rule_.type_mask \|= type_mask_for(option_details->activation_type);
	has_seen_element_or_activation_type = true;
	break;
	case KeywordMap::OPTION_THIRD_PARTY:
	url_rule_.is_third_party = tri_state;
	break;
	case KeywordMap::OPTION_MATCH_CASE:
	url_rule_.match_case = true;
	break;
	case KeywordMap::OPTION_DOMAIN:
	url_rule_.domains =
	base::SplitString(piece.substr(option_name_end + 1), "\|",
	base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
	break;
	default:
	LOG(FATAL);
	}
	}

	return true;
	}

	RuleType RuleParser::ParseCssRule(base::StringPiece origin,
	base::StringPiece part,
	size_t css_section_start) {
	CHECK(part.data() >= origin.data());
	css_rule_ = CssRule();

	// Check for a list of domains.
	if (css_section_start) {
	DCHECK(css_section_start != base::StringPiece::npos);
	auto pieces = base::SplitStringPiece(part.substr(0, css_section_start), ",",
	base::TRIM_WHITESPACE,
	base::SPLIT_WANT_NONEMPTY);
	for (base::StringPiece domain : pieces) {
	DCHECK(!domain.empty());
	css_rule_.domains.push_back(std::string(domain));
	}
	}

	part.remove_prefix(css_section_start + 1);
	if (part.empty()) {
	SetParseError(ParseError::WRONG_CSS_RULE_DELIM, origin, part.data());
	return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
	}
	if (part[0] == '@') {
	css_rule_.is_whitelist = true;
	part.remove_prefix(1);
	}
	if (part.empty() \|\| part[0] != '#') {
	SetParseError(ParseError::WRONG_CSS_RULE_DELIM, origin, part.data());
	return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
	}
	part.remove_prefix(1);

	if (part.empty()) {
	SetParseError(ParseError::EMPTY_CSS_SELECTOR, origin, part.data());
	return url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
	}

	css_rule_.css_selector = std::string(part);
	css_rule_.Canonicalize();
	return url_pattern_index::proto::RULE_TYPE_CSS;
	}

	void RuleParser::SetParseError(ParseError::ErrorCode code,
	base::StringPiece origin,
	const char* error_begin) {
	DCHECK(code != ParseError::NONE);
	DCHECK(error_begin >= origin.data());

	parse_error_.error_code = code;
	parse_error_.line = std::string(origin);
	parse_error_.error_index = error_begin - origin.data();
	}

	std::ostream& operator<<(std::ostream& out,
	const RuleParser::ParseError& error) {
	if (error.error_code != RuleParser::ParseError::NONE) {
	out << "(error:" << error.error_index + 1 << ") "
	<< RuleParser::GetParseErrorCodeDescription(error.error_code) << ":\n"
	<< error.line << '\n'
	<< std::string(error.error_index, ' ') << "^\n";
	}
	return out;
	}

	} // namespace subresource_filter