blob: 5337c0c9d491e199fe15518902501f76105101dc [file] [log] [blame]
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""This module defines the core structure of the classification rules.
This module does NOT specify how the rules filter the data: this responsibility
is of to the concrete classifiers, which have to override the Rule class herein
defined and know how to do the math.
This module, instead, defines the format of the rules and the way they are
encoded and loaded (in a python-style dictionary file).
Rules are organized in a tree, where the root is always represented by a 'Total'
node, and the leaves are arbitrarily defined by the user, according to the
following principles:
- Order of siblings rules matter: what is caught by a rule will not be caught
by the next ones, but it is propagated to its children rules if any.
- Every non-leaf node X gets an implicit extra-children named X-other. This
catch-all child catches everything (within the parent rule scope) that is
not caught by the other siblings. This is to guarantee that, when doing the
math (the aggregation), at any level, the sum of the values in the leaves
match the value of their parent.
The format of a rule dictionary is the following:
[
{
'name': 'Name of the rule',
'filter-X': 'The embedder will know how to interpret this value and will use
it to filter the data'
'filter-Y': 'Idem'
children: [
{
'name': 'Name of the sub-rule 1'
... and so on recursively ,
},
]
},
]
And a typical resulting rule tree looks like this:
+----------------------+
| Total |
|----------------------|
+------------------+ Match all. +--------------------+
| +----------+-----------+ |
| | |
+-----v-----+ +-----v-----+ +------v----+
| Foo | | Bar | |Total-other|
|-----------| |-----------| |-----------|
|File: foo* | +---+File: bar* +-----+ | Match all |
+-----------+ | +-----------+ | +-----------+
| |
+------v------+ +------v----+
| Bar::Coffee | | Bar-other |
|-------------| |-----------|
|File: bar*cof| | Match all |
+-------------+ +-----------+
"""
import ast
def Load(content, rule_builder):
"""Construct a rule tree from a python-style dict representation.
Args:
content: a string containing the dict (i.e. content of the rule file).
rule_builder: a method which takes two arguments (rule_name, filters_dict)
and returns a subclass of |Rule|. |filters_dict| is a dict of the keys
(filter-foo, filter-bar in the example above) for the rule node.
"""
rules_dict = ast.literal_eval(content)
root = Rule('Total')
_MakeRuleNodeFromDictNode(root, rules_dict, rule_builder)
return root
class Rule(object):
""" An abstract class representing a rule node in the rules tree.
Embedders must override the Match method when deriving this class.
"""
def __init__(self, name):
self.name = name
self.children = []
def Match(self, _): # pylint: disable=R0201
""" The rationale of this default implementation is modeling the root
('Total') and the catch-all (*-other) rules that every |RuleTree| must have,
regardless of the embedder-specific children rules. This is to guarantee
that the totals match at any level of the tree.
"""
return True
def AppendChild(self, child_rule):
assert(isinstance(child_rule, Rule))
duplicates = filter(lambda x: x.name == child_rule.name, self.children)
assert(not duplicates), 'Duplicate rule ' + child_rule.name
self.children.append(child_rule)
def _MakeRuleNodeFromDictNode(rule_node, dict_nodes, rule_builder):
"""Recursive rule tree builder for traversing the rule dict."""
for dict_node in dict_nodes:
assert('name' in dict_node)
# Extract the filter keys (e.g., mmap-file, mmap-prot) that will be passed
# to the |rule_builder|
filter_keys = set(dict_node.keys()) - set(('name', 'children'))
filters = dict((k, dict_node[k]) for k in filter_keys)
child_rule = rule_builder(dict_node['name'], filters)
rule_node.AppendChild(child_rule)
dict_children = dict_node.get('children', {})
_MakeRuleNodeFromDictNode(child_rule, dict_children, rule_builder)
# If the rule_node isn't a leaf, add the 'name-other' catch-all sibling to
# catch all the entries that matched this node but none of its children.
if len(rule_node.children):
rule_node.AppendChild(Rule(rule_node.name + '-other'))