components/url_pattern_index/flat/url_pattern_index.fbs - chromium/src.git - Git at Google

 // Copyright 2017 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 namespace url_pattern_index.flat;

 // NOTE: Increment url_pattern_index::kUrlPatternIndexFormatVersion whenever
 // making a breaking change to this schema.

 // Corresponds to url_pattern_index::proto::UrlPatternType.
 enum UrlPatternType : ubyte {
   SUBSTRING,
   WILDCARDED,
   REGEXP,
 }

 // Corresponds to url_pattern_index::proto::AnchorType.
 enum AnchorType : ubyte {
   NONE,
   BOUNDARY,
   SUBDOMAIN,
 }

 // URL rule matching options. These correspond to multiple fields of
 // url_pattern_index::proto::UrlRule, but here, they are represented as flags
 // of the same bitmask to allow for compact storage.
 enum OptionFlag : ubyte (bit_flags) {
   IS_WHITELIST,
   APPLIES_TO_FIRST_PARTY,
   APPLIES_TO_THIRD_PARTY,
   IS_CASE_INSENSITIVE,
 }

 // The options controlling whether or not to activate filtering for subresources
 // of documents that match the URL pattern of the rule.
 // Corresponds to url_pattern_index::proto::ActivationType.
 enum ActivationType : ubyte (bit_flags) {
   DOCUMENT,       // Disable all rules on the page.
   GENERIC_BLOCK,  // Disable generic URL rules on the page.
 }

 // The types of subresource requests that a URL rule should be applied to.
 enum ElementType : ushort (bit_flags) {
   OTHER,
   SCRIPT,
   IMAGE,
   STYLESHEET,
   OBJECT,
   XMLHTTPREQUEST,
   // TODO(crbug.com/713774): Remove OBJECT_SUBREQUEST type once
   // url_pattern_index no longer has a dependency on proto::UrlRule.
   OBJECT_SUBREQUEST,
   SUBDOCUMENT,
   PING,
   MEDIA,
   FONT,
   WEBSOCKET,
   CSP_REPORT,
   MAIN_FRAME,
   // Note: Update the default value for |element_types| field in UrlRule, on
   // adding/removing values from this enum.
 }

 // The flat representation of a single URL rule. For more details regarding the
 // fields please see the comments to url_pattern_index::proto::UrlRule.
 table UrlRule {
   // Rule matching options, a bitmask consisting of OptionFlags.
   options : ubyte;

   // A bitmask of ElementType. Equals ElementType_ANY & ~ElementType_MAIN_FRAME
   // by default for compactness. We expect most rules to not use
   // ElementType_MAIN_FRAME. Keep this in sync with
   // url_pattern_index::kDefaultFlatElementTypesMask.
   element_types : ushort = 8191;

   // A bitmask of ActivationType. Disables all activation types by default.
   activation_types : ubyte = 0;

   // Use SUBSTRING as default, since it's the most used pattern type. Same as
   // the corresponding proto::UrlRule::url_pattern_type.
   url_pattern_type : UrlPatternType = SUBSTRING;

   // Use NONE as default, since most of the rules are not anchored.
   anchor_left : AnchorType = NONE;
   anchor_right : AnchorType = NONE;

   // The list of domains to be included/excluded from the filter's affected set.
   // Should either be null or have at least a single element. The domains
   // should be in lower-case and kept sorted as defined by
   // url_pattern_index::CompareDomains. The entries must consist of only ascii
   // characters. Use punycode encoding for internationalized domains.
   domains_included : [string];
   domains_excluded : [string];

   // A URL pattern in the format defined by |url_pattern_type|.  This should
   // only consist of ascii characters, since it's matched against a url where
   // the host is encoded in the punycode format (in case of internationalized
   // domains) and any other non-ascii characters are percent-escaped in utf-8.
   // This should be in lower case if the rule is case-insensitive.
   url_pattern : string;

   // An id which uniquely identifies the rule. Clients must ensure uniqueness if
   // they use this field.
   id : uint;

   // Priority of the rule. Larger the value, greater the priority.
   priority : uint;
 }

 // Contains an N-gram (acting as a key in a hash table) and a list of URL rules
 // associated with that N-gram.
 table NGramToRules {
   // A string consisting of N (up to 8) ascii-only non-special characters, which
   // are stored in the lowest N non-zero bytes, lower bytes corresponding to
   // later symbols. These are lower-cased to support case-insensitive matching.
   ngram : ulong;

   // The list of rules containing |ngram| as a substring of their URL pattern.
   // Sorted in descending order of rule priority.
   rule_list : [UrlRule];
 }

 // A data structure used to select only a handful of URL rule candidates that
 // need to be matched against a certain resource URL.
 table UrlPatternIndex {
   // The N of an N-gram index. Note: |n| should be between 1 and 8.
   n : uint;

   // A hash table with open addressing. The keys of the table are N-grams.
   ngram_index : [NGramToRules];

   // The slot that is pointed to by all empty slots of |ngram_index| hash table.
   // Note: This is a workaround needed because null offsets are not allowed as
   // elements of FlatBuffer arrays.
   ngram_index_empty_slot : NGramToRules;

   // A list storing the rules that doesn't contain any valid N-grams in their
   // URL patterns. Contains all the REGEXP rules as well. Sorted in descending
   // order of rule priority.
   // TODO(pkalinnikov): Think about better implementation for the fallback
   // index. Possibly make it a hash map and maybe merge it with the N-gram
   // index, since we can treat any sequence of characters shorter than N as an
   // N-gram with zero bytes used for padding.
   fallback_rules : [UrlRule];
 }

 root_type UrlPatternIndex;
	// Copyright 2017 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	namespace url_pattern_index.flat;

	// NOTE: Increment url_pattern_index::kUrlPatternIndexFormatVersion whenever
	// making a breaking change to this schema.

	// Corresponds to url_pattern_index::proto::UrlPatternType.
	enum UrlPatternType : ubyte {
	SUBSTRING,
	WILDCARDED,
	REGEXP,
	}

	// Corresponds to url_pattern_index::proto::AnchorType.
	enum AnchorType : ubyte {
	NONE,
	BOUNDARY,
	SUBDOMAIN,
	}

	// URL rule matching options. These correspond to multiple fields of
	// url_pattern_index::proto::UrlRule, but here, they are represented as flags
	// of the same bitmask to allow for compact storage.
	enum OptionFlag : ubyte (bit_flags) {
	IS_WHITELIST,
	APPLIES_TO_FIRST_PARTY,
	APPLIES_TO_THIRD_PARTY,
	IS_CASE_INSENSITIVE,
	}

	// The options controlling whether or not to activate filtering for subresources
	// of documents that match the URL pattern of the rule.
	// Corresponds to url_pattern_index::proto::ActivationType.
	enum ActivationType : ubyte (bit_flags) {
	DOCUMENT, // Disable all rules on the page.
	GENERIC_BLOCK, // Disable generic URL rules on the page.
	}

	// The types of subresource requests that a URL rule should be applied to.
	enum ElementType : ushort (bit_flags) {
	OTHER,
	SCRIPT,
	IMAGE,
	STYLESHEET,
	OBJECT,
	XMLHTTPREQUEST,
	// TODO(crbug.com/713774): Remove OBJECT_SUBREQUEST type once
	// url_pattern_index no longer has a dependency on proto::UrlRule.
	OBJECT_SUBREQUEST,
	SUBDOCUMENT,
	PING,
	MEDIA,
	FONT,
	WEBSOCKET,
	CSP_REPORT,
	MAIN_FRAME,
	// Note: Update the default value for \|element_types\| field in UrlRule, on
	// adding/removing values from this enum.
	}

	// The flat representation of a single URL rule. For more details regarding the
	// fields please see the comments to url_pattern_index::proto::UrlRule.
	table UrlRule {
	// Rule matching options, a bitmask consisting of OptionFlags.
	options : ubyte;

	// A bitmask of ElementType. Equals ElementType_ANY & ~ElementType_MAIN_FRAME
	// by default for compactness. We expect most rules to not use
	// ElementType_MAIN_FRAME. Keep this in sync with
	// url_pattern_index::kDefaultFlatElementTypesMask.
	element_types : ushort = 8191;

	// A bitmask of ActivationType. Disables all activation types by default.
	activation_types : ubyte = 0;

	// Use SUBSTRING as default, since it's the most used pattern type. Same as
	// the corresponding proto::UrlRule::url_pattern_type.
	url_pattern_type : UrlPatternType = SUBSTRING;

	// Use NONE as default, since most of the rules are not anchored.
	anchor_left : AnchorType = NONE;
	anchor_right : AnchorType = NONE;

	// The list of domains to be included/excluded from the filter's affected set.
	// Should either be null or have at least a single element. The domains
	// should be in lower-case and kept sorted as defined by
	// url_pattern_index::CompareDomains. The entries must consist of only ascii
	// characters. Use punycode encoding for internationalized domains.
	domains_included : [string];
	domains_excluded : [string];

	// A URL pattern in the format defined by \|url_pattern_type\|. This should
	// only consist of ascii characters, since it's matched against a url where
	// the host is encoded in the punycode format (in case of internationalized
	// domains) and any other non-ascii characters are percent-escaped in utf-8.
	// This should be in lower case if the rule is case-insensitive.
	url_pattern : string;

	// An id which uniquely identifies the rule. Clients must ensure uniqueness if
	// they use this field.
	id : uint;

	// Priority of the rule. Larger the value, greater the priority.
	priority : uint;
	}

	// Contains an N-gram (acting as a key in a hash table) and a list of URL rules
	// associated with that N-gram.
	table NGramToRules {
	// A string consisting of N (up to 8) ascii-only non-special characters, which
	// are stored in the lowest N non-zero bytes, lower bytes corresponding to
	// later symbols. These are lower-cased to support case-insensitive matching.
	ngram : ulong;

	// The list of rules containing \|ngram\| as a substring of their URL pattern.
	// Sorted in descending order of rule priority.
	rule_list : [UrlRule];
	}

	// A data structure used to select only a handful of URL rule candidates that
	// need to be matched against a certain resource URL.
	table UrlPatternIndex {
	// The N of an N-gram index. Note: \|n\| should be between 1 and 8.
	n : uint;

	// A hash table with open addressing. The keys of the table are N-grams.
	ngram_index : [NGramToRules];

	// The slot that is pointed to by all empty slots of \|ngram_index\| hash table.
	// Note: This is a workaround needed because null offsets are not allowed as
	// elements of FlatBuffer arrays.
	ngram_index_empty_slot : NGramToRules;

	// A list storing the rules that doesn't contain any valid N-grams in their
	// URL patterns. Contains all the REGEXP rules as well. Sorted in descending
	// order of rule priority.
	// TODO(pkalinnikov): Think about better implementation for the fallback
	// index. Possibly make it a hash map and maybe merge it with the N-gram
	// index, since we can treat any sequence of characters shorter than N as an
	// N-gram with zero bytes used for padding.
	fallback_rules : [UrlRule];
	}

	root_type UrlPatternIndex;