| // Copyright 2017 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| namespace url_pattern_index.flat; |
| |
| // NOTE: Increment url_pattern_index::kUrlPatternIndexFormatVersion whenever |
| // making a breaking change to this schema. |
| |
| // Corresponds to url_pattern_index::proto::UrlPatternType. |
| enum UrlPatternType : ubyte { |
| SUBSTRING, |
| WILDCARDED, |
| REGEXP, |
| } |
| |
| // Corresponds to url_pattern_index::proto::AnchorType. |
| enum AnchorType : ubyte { |
| NONE, |
| BOUNDARY, |
| SUBDOMAIN, |
| } |
| |
| // URL rule matching options. These correspond to multiple fields of |
| // url_pattern_index::proto::UrlRule, but here, they are represented as flags |
| // of the same bitmask to allow for compact storage. |
| enum OptionFlag : ubyte (bit_flags) { |
| IS_WHITELIST, |
| APPLIES_TO_FIRST_PARTY, |
| APPLIES_TO_THIRD_PARTY, |
| IS_CASE_INSENSITIVE, |
| } |
| |
| // The options controlling whether or not to activate filtering for subresources |
| // of documents that match the URL pattern of the rule. |
| // Corresponds to url_pattern_index::proto::ActivationType. |
| enum ActivationType : ubyte (bit_flags) { |
| DOCUMENT, // Disable all rules on the page. |
| GENERIC_BLOCK, // Disable generic URL rules on the page. |
| } |
| |
| // The types of subresource requests that a URL rule should be applied to. |
| enum ElementType : ushort (bit_flags) { |
| OTHER, |
| SCRIPT, |
| IMAGE, |
| STYLESHEET, |
| OBJECT, |
| XMLHTTPREQUEST, |
| // TODO(crbug.com/713774): Remove OBJECT_SUBREQUEST type once |
| // url_pattern_index no longer has a dependency on proto::UrlRule. |
| OBJECT_SUBREQUEST, |
| SUBDOCUMENT, |
| PING, |
| MEDIA, |
| FONT, |
| WEBSOCKET, |
| CSP_REPORT, |
| MAIN_FRAME, |
| // Note: Update the default value for |element_types| field in UrlRule, on |
| // adding/removing values from this enum. |
| } |
| |
| // The flat representation of a single URL rule. For more details regarding the |
| // fields please see the comments to url_pattern_index::proto::UrlRule. |
| table UrlRule { |
| // Rule matching options, a bitmask consisting of OptionFlags. |
| options : ubyte; |
| |
| // A bitmask of ElementType. Equals ElementType_ANY & ~ElementType_MAIN_FRAME |
| // by default for compactness. We expect most rules to not use |
| // ElementType_MAIN_FRAME. Keep this in sync with |
| // url_pattern_index::kDefaultFlatElementTypesMask. |
| element_types : ushort = 8191; |
| |
| // A bitmask of ActivationType. Disables all activation types by default. |
| activation_types : ubyte = 0; |
| |
| // Use SUBSTRING as default, since it's the most used pattern type. Same as |
| // the corresponding proto::UrlRule::url_pattern_type. |
| url_pattern_type : UrlPatternType = SUBSTRING; |
| |
| // Use NONE as default, since most of the rules are not anchored. |
| anchor_left : AnchorType = NONE; |
| anchor_right : AnchorType = NONE; |
| |
| // The list of domains to be included/excluded from the filter's affected set. |
| // Should either be null or have at least a single element. The domains |
| // should be in lower-case and kept sorted as defined by |
| // url_pattern_index::CompareDomains. The entries must consist of only ascii |
| // characters. Use punycode encoding for internationalized domains. |
| domains_included : [string]; |
| domains_excluded : [string]; |
| |
| // A URL pattern in the format defined by |url_pattern_type|. This should |
| // only consist of ascii characters, since it's matched against a url where |
| // the host is encoded in the punycode format (in case of internationalized |
| // domains) and any other non-ascii characters are percent-escaped in utf-8. |
| // This should be in lower case if the rule is case-insensitive. |
| url_pattern : string; |
| |
| // An id which uniquely identifies the rule. Clients must ensure uniqueness if |
| // they use this field. |
| id : uint; |
| |
| // Priority of the rule. Larger the value, greater the priority. |
| priority : uint; |
| } |
| |
| // Contains an N-gram (acting as a key in a hash table) and a list of URL rules |
| // associated with that N-gram. |
| table NGramToRules { |
| // A string consisting of N (up to 8) ascii-only non-special characters, which |
| // are stored in the lowest N non-zero bytes, lower bytes corresponding to |
| // later symbols. These are lower-cased to support case-insensitive matching. |
| ngram : ulong; |
| |
| // The list of rules containing |ngram| as a substring of their URL pattern. |
| // Sorted in descending order of rule priority. |
| rule_list : [UrlRule]; |
| } |
| |
| // A data structure used to select only a handful of URL rule candidates that |
| // need to be matched against a certain resource URL. |
| table UrlPatternIndex { |
| // The N of an N-gram index. Note: |n| should be between 1 and 8. |
| n : uint; |
| |
| // A hash table with open addressing. The keys of the table are N-grams. |
| ngram_index : [NGramToRules]; |
| |
| // The slot that is pointed to by all empty slots of |ngram_index| hash table. |
| // Note: This is a workaround needed because null offsets are not allowed as |
| // elements of FlatBuffer arrays. |
| ngram_index_empty_slot : NGramToRules; |
| |
| // A list storing the rules that doesn't contain any valid N-grams in their |
| // URL patterns. Contains all the REGEXP rules as well. Sorted in descending |
| // order of rule priority. |
| // TODO(pkalinnikov): Think about better implementation for the fallback |
| // index. Possibly make it a hash map and maybe merge it with the N-gram |
| // index, since we can treat any sequence of characters shorter than N as an |
| // N-gram with zero bytes used for padding. |
| fallback_rules : [UrlRule]; |
| } |
| |
| root_type UrlPatternIndex; |