blob: e424afc4845627be4f8aeae31a6a907d69c872ab [file] [log] [blame]
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""This module classifies NativeHeap objects filtering their allocations.
The only filter currently available is 'stacktrace', which works as follows:
{'name': 'rule-1', 'stacktrace': 'foo' }
{'name': 'rule-2', 'stacktrace': ['foo', r'bar\s+baz']}
{'name': 'rule-3', 'source_path': 'sk.*allocator'}
{'name': 'rule-3', 'source_path': 'sk', 'stacktrace': 'SkAllocator'}
rule-1 will match any allocation that has 'foo' in one of its stack frames.
rule-2 will match any allocation that has a stack frame matching 'foo' AND a
followed by a stack frame matching 'bar baz'. Note that order matters, so rule-2
will not match a stacktrace like ['bar baz', 'foo'].
rule-3 will match any allocation in which at least one of the source paths in
its stack frames matches the regex sk.*allocator.
rule-4 will match any allocation which satisfies both the conditions.
TODO(primiano): introduce more filters after the first prototype with UI, for
instance, filter by library file name or by allocation size.
"""
import collections
import posixpath
import re
from memory_inspector.classification import results
from memory_inspector.classification import rules
from memory_inspector.core import exceptions
from memory_inspector.core import native_heap
_RESULT_KEYS = ['bytes_allocated', 'bytes_resident']
def LoadRules(content):
"""Loads and parses a native-heap rule tree from a content (string).
Returns:
An instance of |rules.Rule|, which nodes are |_NHeapRule| instances.
"""
return rules.Load(content, _NHeapRule)
def Classify(nativeheap, rule_tree):
"""Creates aggregated results of native heaps using the provided rules.
Args:
nativeheap: the heap dump being processed (a |NativeHeap| instance).
rule_tree: the user-defined rules that define the filtering categories.
Returns:
An instance of |AggreatedResults|.
"""
assert(isinstance(nativeheap, native_heap.NativeHeap))
assert(isinstance(rule_tree, rules.Rule))
res = results.AggreatedResults(rule_tree, _RESULT_KEYS)
for allocation in nativeheap.allocations:
res.AddToMatchingNodes(allocation,
[allocation.size, allocation.resident_size])
return res
def InferHeuristicRulesFromHeap(nheap, max_depth=3, threshold=0.02):
"""Infers the rules tree from a symbolized heap snapshot.
In lack of a specific set of rules, this method can be invoked to infer a
meaningful rule tree starting from a heap snapshot. It will build a compact
radix tree from the source paths of the stack traces, which height is at most
|max_depth|, selecting only those nodes which contribute for at least
|threshold| (1.0 = 100%) w.r.t. the total allocation of the heap snapshot.
"""
assert(isinstance(nheap, native_heap.NativeHeap))
def RadixTreeInsert(node, path):
"""Inserts a string (path) into a radix tree (a python recursive dict).
e.g.: [/a/b/c, /a/b/d, /z/h] -> {'/a/b/': {'c': {}, 'd': {}}, '/z/h': {}}
"""
def GetCommonPrefix(args):
"""Returns the common prefix between two paths (no partial paths).
e.g.: /tmp/bar, /tmp/baz will return /tmp/ (and not /tmp/ba as the dumb
posixpath.commonprefix implementation would do)
"""
parts = posixpath.commonprefix(args).rpartition(posixpath.sep)[0]
return parts + posixpath.sep if parts else ''
for node_path in node.iterkeys():
pfx = GetCommonPrefix([node_path, path])
if not pfx:
continue
if len(pfx) < len(node_path):
node[pfx] = {node_path[len(pfx):] : node[node_path]}
del node[node_path]
if len(path) > len(pfx):
RadixTreeInsert(node[pfx], path[len(pfx):])
return
node[path] = {} # No common prefix, create new child in current node.
# Given an allocation of size N and its stack trace, heuristically determines
# the source directory to be blamed for the N bytes.
# The blamed_dir is the one which appears more times in the top 8 stack frames
# (excluding the first 2, which usually are just the (m|c)alloc call sites).
# At the end, this will generate a *leaderboard* (|blamed_dirs|) which
# associates, to each source path directory, the number of bytes allocated.
blamed_dirs = collections.Counter() # '/s/path' : bytes_from_this_path (int)
total_allocated = 0
for alloc in nheap.allocations:
dir_histogram = collections.Counter()
for frame in alloc.stack_trace.frames[2:10]:
# Compute a histogram (for each allocation) of the top source dirs.
if not frame.symbol or not frame.symbol.source_info:
continue
src_file = frame.symbol.source_info[0].source_file_path
src_dir = posixpath.dirname(src_file.replace('\\', '/')) + '/'
dir_histogram.update([src_dir])
if not dir_histogram:
continue
# Add the blamed dir to the leaderboard.
blamed_dir = dir_histogram.most_common()[0][0]
blamed_dirs.update({blamed_dir : alloc.size})
total_allocated += alloc.size
# Select only the top paths from the leaderboard which contribute for more
# than |threshold| and make a radix tree out of them.
radix_tree = {}
for blamed_dir, alloc_size in blamed_dirs.most_common():
if (1.0 * alloc_size / total_allocated) < threshold:
break
RadixTreeInsert(radix_tree, blamed_dir)
# The final step consists in generating a rule tree from the radix tree. This
# is a pretty straightforward tree-clone operation, they have the same shape.
def GenRulesFromRadixTree(radix_tree_node, max_depth, parent_path=''):
children = []
if max_depth > 0:
for node_path, node_children in radix_tree_node.iteritems():
child_rule = {
'name': node_path[-16:],
'source_path': '^' + re.escape(parent_path + node_path),
'children': GenRulesFromRadixTree(
node_children, max_depth - 1, parent_path + node_path)}
children += [child_rule]
return children
rules_tree = GenRulesFromRadixTree(radix_tree, max_depth)
return LoadRules(str(rules_tree))
class _NHeapRule(rules.Rule):
def __init__(self, name, filters):
super(_NHeapRule, self).__init__(name)
# The 'stacktrace' filter can be either a string (simple case, one regex) or
# a list of strings (complex case, see doc in the header of this file).
stacktrace_regexs = filters.get('stacktrace', [])
if isinstance(stacktrace_regexs, basestring):
stacktrace_regexs = [stacktrace_regexs]
self._stacktrace_regexs = []
for regex in stacktrace_regexs:
try:
self._stacktrace_regexs.append(re.compile(regex))
except re.error, descr:
raise exceptions.MemoryInspectorException(
'Stacktrace regex error "%s" : %s' % (regex, descr))
# The 'source_path' regex, instead, simply matches the source file path.
self._path_regex = None
path_regex = filters.get('source_path')
if path_regex:
try:
self._path_regex = re.compile(path_regex)
except re.error, descr:
raise exceptions.MemoryInspectorException(
'Path regex error "%s" : %s' % (path_regex, descr))
def Match(self, allocation):
# Match the source file path, if the 'source_path' filter is specified.
if self._path_regex:
path_matches = False
for frame in allocation.stack_trace.frames:
if frame.symbol and frame.symbol.source_info:
if self._path_regex.search(
frame.symbol.source_info[0].source_file_path):
path_matches = True
break
if not path_matches:
return False
# Match the stack traces symbols, if the 'stacktrace' filter is specified.
if not self._stacktrace_regexs:
return True
cur_regex_idx = 0
cur_regex = self._stacktrace_regexs[0]
for frame in allocation.stack_trace.frames:
if frame.symbol and cur_regex.search(frame.symbol.name):
# The current regex has been matched.
if cur_regex_idx == len(self._stacktrace_regexs) - 1:
return True # All the provided regexs have been matched, we're happy.
cur_regex_idx += 1
cur_regex = self._stacktrace_regexs[cur_regex_idx]
return False # Not all the provided regexs have been matched.