| #!/usr/bin/env python |
| # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """The deep heap profiler script for Chrome.""" |
| |
| from datetime import datetime |
| import json |
| import os |
| import re |
| import subprocess |
| import sys |
| import tempfile |
| |
| BUCKET_ID = 5 |
| VIRTUAL = 0 |
| COMMITTED = 1 |
| ALLOC_COUNT = 2 |
| FREE_COUNT = 3 |
| NULL_REGEX = re.compile('') |
| PPROF_PATH = os.path.join(os.path.dirname(__file__), |
| os.pardir, |
| os.pardir, |
| 'third_party', |
| 'tcmalloc', |
| 'chromium', |
| 'src', |
| 'pprof') |
| |
| # Heap Profile Dump versions |
| |
| # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks. |
| # Their stacktraces DO contain mmap* or tc-* at their tops. |
| # They should be processed by POLICY_DEEP_1. |
| DUMP_DEEP_1 = 'DUMP_DEEP_1' |
| |
| # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks. |
| # Their stacktraces still DO contain mmap* or tc-*. |
| # They should be processed by POLICY_DEEP_1. |
| DUMP_DEEP_2 = 'DUMP_DEEP_2' |
| |
| # DUMP_DEEP_3 DOES distinct mmap regions and malloc chunks. |
| # Their stacktraces DO NOT contain mmap* or tc-*. |
| # They should be processed by POLICY_DEEP_2. |
| DUMP_DEEP_3 = 'DUMP_DEEP_3' |
| |
| # DUMP_DEEP_4 adds some features to DUMP_DEEP_3: |
| # 1. Support comments starting with '#' |
| # 2. Support additional global stats: e.g. nonprofiled-*. |
| DUMP_DEEP_4 = 'DUMP_DEEP_4' |
| |
| # Heap Profile Policy versions |
| |
| # POLICY_DEEP_1 DOES NOT include allocation_type columns. |
| # mmap regions are distincted w/ mmap frames in the pattern column. |
| POLICY_DEEP_1 = 'POLICY_DEEP_1' |
| |
| # POLICY_DEEP_2 DOES include allocation_type columns. |
| # mmap regions are distincted w/ the allocation_type column. |
| POLICY_DEEP_2 = 'POLICY_DEEP_2' |
| |
| # TODO(dmikurube): Avoid global variables. |
| address_symbol_dict = {} |
| appeared_addresses = set() |
| components = [] |
| |
| |
| class ParsingException(Exception): |
| def __init__(self, value): |
| self.value = value |
| def __str__(self): |
| return repr(self.value) |
| |
| |
| class Policy(object): |
| |
| def __init__(self, name, mmap, pattern): |
| self.name = name |
| self.mmap = mmap |
| self.condition = re.compile(pattern + r'\Z') |
| |
| |
| def get_component(policy_list, bucket, mmap): |
| """Returns a component name which a given bucket belongs to. |
| |
| Args: |
| policy_list: A list containing Policy objects. (Parsed policy data by |
| parse_policy.) |
| bucket: A Bucket object to be searched for. |
| mmap: True if searching for a mmap region. |
| |
| Returns: |
| A string representing a component name. |
| """ |
| if not bucket: |
| return 'no-bucket' |
| if bucket.component: |
| return bucket.component |
| |
| stacktrace = ''.join( |
| address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip() |
| |
| for policy in policy_list: |
| if mmap == policy.mmap and policy.condition.match(stacktrace): |
| bucket.component = policy.name |
| return policy.name |
| |
| assert False |
| |
| |
| class Bucket(object): |
| |
| def __init__(self, stacktrace): |
| self.stacktrace = stacktrace |
| self.component = '' |
| |
| |
| class Log(object): |
| |
| """A class representing one dumped log data.""" |
| def __init__(self, log_path): |
| self.log_path = log_path |
| self.log_lines = [ |
| l for l in open(self.log_path, 'r') if l and not l.startswith('#')] |
| self.log_version = '' |
| sys.stderr.write('Loading a dump: %s\n' % log_path) |
| self.mmap_stacktrace_lines = [] |
| self.malloc_stacktrace_lines = [] |
| self.counters = {} |
| self.log_time = os.stat(self.log_path).st_mtime |
| |
| @staticmethod |
| def dump_stacktrace_lines(stacktrace_lines, buckets): |
| """Prints a given stacktrace. |
| |
| Args: |
| stacktrace_lines: A list of strings which are valid as stacktraces. |
| buckets: A dict mapping bucket ids and their corresponding Bucket |
| objects. |
| """ |
| for l in stacktrace_lines: |
| words = l.split() |
| bucket = buckets.get(int(words[BUCKET_ID])) |
| if not bucket: |
| continue |
| for i in range(0, BUCKET_ID - 1): |
| sys.stdout.write(words[i] + ' ') |
| for address in bucket.stacktrace: |
| sys.stdout.write((address_symbol_dict.get(address) or address) + ' ') |
| sys.stdout.write('\n') |
| |
| def dump_stacktrace(self, buckets): |
| """Prints stacktraces contained in the log. |
| |
| Args: |
| buckets: A dict mapping bucket ids and their corresponding Bucket |
| objects. |
| """ |
| self.dump_stacktrace_lines(self.mmap_stacktrace_lines, buckets) |
| self.dump_stacktrace_lines(self.malloc_stacktrace_lines, buckets) |
| |
| @staticmethod |
| def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets, |
| component_name, mmap): |
| """Accumulates size of committed chunks and the number of allocated chunks. |
| |
| Args: |
| stacktrace_lines: A list of strings which are valid as stacktraces. |
| policy_list: A list containing Policy objects. (Parsed policy data by |
| parse_policy.) |
| buckets: A dict mapping bucket ids and their corresponding Bucket |
| objects. |
| component_name: A name of component for filtering. |
| mmap: True if searching for a mmap region. |
| |
| Returns: |
| Two integers which are the accumulated size of committed regions and the |
| number of allocated chunks, respectively. |
| """ |
| com_committed = 0 |
| com_allocs = 0 |
| for l in stacktrace_lines: |
| words = l.split() |
| bucket = buckets.get(int(words[BUCKET_ID])) |
| if (not bucket or |
| (component_name and |
| component_name != get_component(policy_list, bucket, mmap))): |
| continue |
| |
| com_committed += int(words[COMMITTED]) |
| com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) |
| |
| return com_committed, com_allocs |
| |
| @staticmethod |
| def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list, |
| buckets, component_name, mmap): |
| """Prints information of stacktrace lines for pprof. |
| |
| Args: |
| stacktrace_lines: A list of strings which are valid as stacktraces. |
| policy_list: A list containing Policy objects. (Parsed policy data by |
| parse_policy.) |
| buckets: A dict mapping bucket ids and their corresponding Bucket |
| objects. |
| component_name: A name of component for filtering. |
| mmap: True if searching for a mmap region. |
| """ |
| for l in stacktrace_lines: |
| words = l.split() |
| bucket = buckets.get(int(words[BUCKET_ID])) |
| if (not bucket or |
| (component_name and |
| component_name != get_component(policy_list, bucket, mmap))): |
| continue |
| |
| sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( |
| int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), |
| words[COMMITTED], |
| int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), |
| words[COMMITTED])) |
| for address in bucket.stacktrace: |
| sys.stdout.write(' ' + address) |
| sys.stdout.write('\n') |
| |
| def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name): |
| """Converts the log file so it can be processed by pprof. |
| |
| Args: |
| policy_list: A list containing Policy objects. (Parsed policy data by |
| parse_policy.) |
| buckets: A dict mapping bucket ids and their corresponding Bucket |
| objects. |
| mapping_lines: A list of strings containing /proc/.../maps. |
| component_name: A name of component for filtering. |
| """ |
| sys.stdout.write('heap profile: ') |
| com_committed, com_allocs = self.accumulate_size_for_pprof( |
| self.mmap_stacktrace_lines, policy_list, buckets, component_name, |
| True) |
| add_committed, add_allocs = self.accumulate_size_for_pprof( |
| self.malloc_stacktrace_lines, policy_list, buckets, component_name, |
| False) |
| com_committed += add_committed |
| com_allocs += add_allocs |
| |
| sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( |
| com_allocs, com_committed, com_allocs, com_committed)) |
| |
| self.dump_stacktrace_lines_for_pprof( |
| self.mmap_stacktrace_lines, policy_list, buckets, component_name, |
| True) |
| self.dump_stacktrace_lines_for_pprof( |
| self.malloc_stacktrace_lines, policy_list, buckets, component_name, |
| False) |
| |
| sys.stdout.write('MAPPED_LIBRARIES:\n') |
| for l in mapping_lines: |
| sys.stdout.write(l) |
| |
| @staticmethod |
| def check_stacktrace_line(stacktrace_line, buckets): |
| """Checks if a given stacktrace_line is valid as stacktrace. |
| |
| Args: |
| stacktrace_line: A string to be checked. |
| buckets: A dict mapping bucket ids and their corresponding Bucket |
| objects. |
| |
| Returns: |
| True if the given stacktrace_line is valid. |
| """ |
| words = stacktrace_line.split() |
| if len(words) < BUCKET_ID + 1: |
| return False |
| if words[BUCKET_ID - 1] != '@': |
| return False |
| bucket = buckets.get(int(words[BUCKET_ID])) |
| if bucket: |
| for address in bucket.stacktrace: |
| appeared_addresses.add(address) |
| return True |
| |
| @staticmethod |
| def skip_lines_while(line_number, max_line_number, skipping_condition): |
| """Increments line_number until skipping_condition(line_number) is false. |
| |
| Returns: |
| A pair of an integer indicating a line number after skipped, and a |
| boolean value which is True if found a line which skipping_condition |
| is False for. |
| """ |
| while skipping_condition(line_number): |
| line_number += 1 |
| if line_number >= max_line_number: |
| return line_number, False |
| return line_number, True |
| |
| def parse_stacktraces_while_valid(self, buckets, log_lines, line_number): |
| """Parses stacktrace lines while the lines are valid. |
| |
| Args: |
| buckets: A dict mapping bucket ids and their corresponding Bucket |
| objects. |
| log_lines: A list of lines to be parsed. |
| line_number: An integer representing the starting line number in |
| log_lines. |
| |
| Returns: |
| A pair of a list of valid lines and an integer representing the last |
| line number in log_lines. |
| """ |
| (line_number, _) = self.skip_lines_while( |
| line_number, len(log_lines), |
| lambda n: not log_lines[n].split()[0].isdigit()) |
| stacktrace_lines_start = line_number |
| (line_number, _) = self.skip_lines_while( |
| line_number, len(log_lines), |
| lambda n: self.check_stacktrace_line(log_lines[n], buckets)) |
| return (log_lines[stacktrace_lines_start:line_number], line_number) |
| |
| def parse_stacktraces(self, buckets, line_number): |
| """Parses lines in self.log_lines as stacktrace. |
| |
| Valid stacktrace lines are stored into self.mmap_stacktrace_lines and |
| self.malloc_stacktrace_lines. |
| |
| Args: |
| buckets: A dict mapping bucket ids and their corresponding Bucket |
| objects. |
| line_number: An integer representing the starting line number in |
| log_lines. |
| |
| Raises: |
| ParsingException for invalid dump versions. |
| """ |
| sys.stderr.write(' Version: %s\n' % self.log_version) |
| |
| if self.log_version in (DUMP_DEEP_3, DUMP_DEEP_4): |
| (self.mmap_stacktrace_lines, line_number) = ( |
| self.parse_stacktraces_while_valid( |
| buckets, self.log_lines, line_number)) |
| (line_number, _) = self.skip_lines_while( |
| line_number, len(self.log_lines), |
| lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n') |
| (self.malloc_stacktrace_lines, line_number) = ( |
| self.parse_stacktraces_while_valid( |
| buckets, self.log_lines, line_number)) |
| |
| elif self.log_version == DUMP_DEEP_2: |
| (self.mmap_stacktrace_lines, line_number) = ( |
| self.parse_stacktraces_while_valid( |
| buckets, self.log_lines, line_number)) |
| (line_number, _) = self.skip_lines_while( |
| line_number, len(self.log_lines), |
| lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n') |
| (self.malloc_stacktrace_lines, line_number) = ( |
| self.parse_stacktraces_while_valid( |
| buckets, self.log_lines, line_number)) |
| self.malloc_stacktrace_lines.extend(self.mmap_stacktrace_lines) |
| self.mmap_stacktrace_lines = [] |
| |
| elif self.log_version == DUMP_DEEP_1: |
| (self.malloc_stacktrace_lines, line_number) = ( |
| self.parse_stacktraces_while_valid( |
| buckets, self.log_lines, line_number)) |
| |
| else: |
| raise ParsingException('invalid heap profile dump version: %s' % ( |
| self.log_version)) |
| |
| def parse_global_stats(self): |
| """Parses lines in self.log_lines as global stats.""" |
| (ln, _) = self.skip_lines_while( |
| 0, len(self.log_lines), |
| lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n') |
| |
| if self.log_version == DUMP_DEEP_4: |
| global_stat_names = [ |
| 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', |
| 'nonprofiled-absent', 'nonprofiled-anonymous', |
| 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', |
| 'nonprofiled-stack', 'nonprofiled-other', |
| 'profiled-mmap', 'profiled-malloc'] |
| else: |
| global_stat_names = [ |
| 'total', 'file', 'anonymous', 'other', 'mmap', 'tcmalloc'] |
| |
| for prefix in global_stat_names: |
| (ln, _) = self.skip_lines_while( |
| ln, len(self.log_lines), |
| lambda n: self.log_lines[n].split()[0] != prefix) |
| words = self.log_lines[ln].split() |
| self.counters[prefix + '_virtual'] = int(words[-2]) |
| self.counters[prefix + '_committed'] = int(words[-1]) |
| |
| def parse_version(self): |
| """Parses a version string in self.log_lines. |
| |
| Returns: |
| A pair of (a string representing a version of the stacktrace dump, |
| and an integer indicating a line number next to the version string). |
| |
| Raises: |
| ParsingException for invalid dump versions. |
| """ |
| version = '' |
| |
| # Skip until an identifiable line. |
| headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') |
| if not self.log_lines: |
| raise ParsingException('Empty heap dump file.') |
| (ln, found) = self.skip_lines_while( |
| 0, len(self.log_lines), |
| lambda n: not self.log_lines[n].startswith(headers)) |
| if not found: |
| raise ParsingException('Invalid heap dump file (no version header).') |
| |
| # Identify a version. |
| if self.log_lines[ln].startswith('heap profile: '): |
| version = self.log_lines[ln][13:].strip() |
| if (version == DUMP_DEEP_2 or version == DUMP_DEEP_3 or |
| version == DUMP_DEEP_4): |
| (ln, _) = self.skip_lines_while( |
| ln, len(self.log_lines), |
| lambda n: self.log_lines[n] != 'MMAP_STACKTRACES:\n') |
| else: |
| raise ParsingException('invalid heap profile dump version: %s' |
| % version) |
| elif self.log_lines[ln] == 'STACKTRACES:\n': |
| version = DUMP_DEEP_1 |
| elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n': |
| version = DUMP_DEEP_2 |
| |
| return (version, ln) |
| |
| def parse_log(self, buckets): |
| self.log_version, ln = self.parse_version() |
| self.parse_global_stats() |
| self.parse_stacktraces(buckets, ln) |
| |
| @staticmethod |
| def accumulate_size_for_policy(stacktrace_lines, |
| policy_list, buckets, sizes, mmap): |
| for l in stacktrace_lines: |
| words = l.split() |
| bucket = buckets.get(int(words[BUCKET_ID])) |
| component_match = get_component(policy_list, bucket, mmap) |
| sizes[component_match] += int(words[COMMITTED]) |
| |
| if component_match.startswith('tc-'): |
| sizes['tc-total-log'] += int(words[COMMITTED]) |
| elif component_match.startswith('mmap-'): |
| sizes['mmap-total-log'] += int(words[COMMITTED]) |
| else: |
| sizes['other-total-log'] += int(words[COMMITTED]) |
| |
| def apply_policy(self, policy_list, buckets, first_log_time): |
| """Aggregates the total memory size of each component. |
| |
| Iterate through all stacktraces and attribute them to one of the components |
| based on the policy. It is important to apply policy in right order. |
| |
| Args: |
| policy_list: A list containing Policy objects. (Parsed policy data by |
| parse_policy.) |
| buckets: A dict mapping bucket ids and their corresponding Bucket |
| objects. |
| first_log_time: An integer representing time when the first log is |
| dumped. |
| |
| Returns: |
| A dict mapping components and their corresponding sizes. |
| """ |
| |
| sys.stderr.write('apply policy:%s\n' % (self.log_path)) |
| sizes = dict((c, 0) for c in components) |
| |
| self.accumulate_size_for_policy(self.mmap_stacktrace_lines, |
| policy_list, buckets, sizes, True) |
| self.accumulate_size_for_policy(self.malloc_stacktrace_lines, |
| policy_list, buckets, sizes, False) |
| |
| if self.log_version == DUMP_DEEP_4: |
| mmap_prefix = 'profiled-mmap' |
| malloc_prefix = 'profiled-malloc' |
| else: |
| mmap_prefix = 'mmap' |
| malloc_prefix = 'tcmalloc' |
| |
| sizes['mmap-no-log'] = ( |
| self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log']) |
| sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix] |
| sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix] |
| |
| sizes['tc-no-log'] = ( |
| self.counters['%s_committed' % malloc_prefix] - sizes['tc-total-log']) |
| sizes['tc-total-record'] = self.counters['%s_committed' % malloc_prefix] |
| sizes['tc-unused'] = ( |
| sizes['mmap-tcmalloc'] - self.counters['%s_committed' % malloc_prefix]) |
| sizes['tc-total'] = sizes['mmap-tcmalloc'] |
| |
| for key, value in { |
| 'total': 'total_committed', |
| 'filemapped': 'file_committed', |
| 'file-exec': 'file-exec_committed', |
| 'file-nonexec': 'file-nonexec_committed', |
| 'anonymous': 'anonymous_committed', |
| 'stack': 'stack_committed', |
| 'other': 'other_committed', |
| 'nonprofiled-absent': 'nonprofiled-absent_committed', |
| 'nonprofiled-anonymous': 'nonprofiled-anonymous_committed', |
| 'nonprofiled-file-exec': 'nonprofiled-file-exec_committed', |
| 'nonprofiled-file-nonexec': 'nonprofiled-file-nonexec_committed', |
| 'nonprofiled-stack': 'nonprofiled-stack_committed', |
| 'nonprofiled-other': 'nonprofiled-other_committed', |
| 'total-vm': 'total_virtual', |
| 'filemapped-vm': 'file_virtual', |
| 'anonymous-vm': 'anonymous_virtual', |
| 'other-vm': 'other_virtual' }.iteritems(): |
| if key in sizes: |
| sizes[key] = self.counters[value] |
| |
| if 'mustbezero' in sizes: |
| removed = ( |
| '%s_committed' % mmap_prefix, |
| 'nonprofiled-absent_committed', |
| 'nonprofiled-anonymous_committed', |
| 'nonprofiled-file-exec_committed', |
| 'nonprofiled-file-nonexec_committed', |
| 'nonprofiled-stack_committed', |
| 'nonprofiled-other_committed') |
| sizes['mustbezero'] = ( |
| self.counters['total_committed'] - |
| sum(self.counters[i] for i in removed)) |
| if 'total-exclude-profiler' in sizes: |
| sizes['total-exclude-profiler'] = ( |
| self.counters['total_committed'] - sizes['mmap-profiler']) |
| if 'hour' in sizes: |
| sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0 |
| if 'minute' in sizes: |
| sizes['minute'] = (self.log_time - first_log_time) / 60.0 |
| if 'second' in sizes: |
| sizes['second'] = self.log_time - first_log_time |
| |
| return sizes |
| |
| @staticmethod |
| def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets, |
| component_name, depth, sizes, mmap): |
| for line in stacktrace_lines: |
| words = line.split() |
| bucket = buckets.get(int(words[BUCKET_ID])) |
| component_match = get_component(policy_list, bucket, mmap) |
| if component_match == component_name: |
| stacktrace_sequence = '' |
| for address in bucket.stacktrace[0 : min(len(bucket.stacktrace), |
| 1 + depth)]: |
| stacktrace_sequence += address_symbol_dict[address] + ' ' |
| if not stacktrace_sequence in sizes: |
| sizes[stacktrace_sequence] = 0 |
| sizes[stacktrace_sequence] += int(words[COMMITTED]) |
| |
| def expand(self, policy_list, buckets, component_name, depth): |
| """Prints all stacktraces in a given component of given depth. |
| |
| Args: |
| policy_list: A list containing Policy objects. (Parsed policy data by |
| parse_policy.) |
| buckets: A dict mapping bucket ids and their corresponding Bucket |
| objects. |
| component_name: A name of component for filtering. |
| depth: An integer representing depth to be printed. |
| """ |
| sizes = {} |
| |
| self.accumulate_size_for_expand( |
| self.mmap_stacktrace_lines, policy_list, buckets, component_name, |
| depth, sizes, True) |
| self.accumulate_size_for_expand( |
| self.malloc_stacktrace_lines, policy_list, buckets, component_name, |
| depth, sizes, False) |
| |
| sorted_sizes_list = sorted( |
| sizes.iteritems(), key=(lambda x: x[1]), reverse=True) |
| total = 0 |
| for size_pair in sorted_sizes_list: |
| sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0])) |
| total += size_pair[1] |
| sys.stderr.write('total: %d\n' % (total)) |
| |
| |
| def update_symbols(symbol_path, mapping_lines, chrome_path): |
| """Updates address/symbol mapping on memory and in a .symbol cache file. |
| |
| It reads cached address/symbol mapping from a .symbol file if it exists. |
| Then, it resolves unresolved addresses from a Chrome binary with pprof. |
| Both mappings on memory and in a .symbol cache file are updated. |
| |
| Symbol files are formatted as follows: |
| <Address> <Symbol> |
| <Address> <Symbol> |
| <Address> <Symbol> |
| ... |
| |
| Args: |
| symbol_path: A string representing a path for a .symbol file. |
| mapping_lines: A list of strings containing /proc/.../maps. |
| chrome_path: A string representing a path for a Chrome binary. |
| """ |
| with open(symbol_path, mode='a+') as symbol_f: |
| symbol_lines = symbol_f.readlines() |
| if symbol_lines: |
| for line in symbol_lines: |
| items = line.split(None, 1) |
| address_symbol_dict[items[0]] = items[1].rstrip() |
| |
| unresolved_addresses = sorted( |
| a for a in appeared_addresses if a not in address_symbol_dict) |
| |
| if unresolved_addresses: |
| with tempfile.NamedTemporaryFile( |
| suffix='maps', prefix="dmprof", mode='w+') as pprof_in: |
| with tempfile.NamedTemporaryFile( |
| suffix='symbols', prefix="dmprof", mode='w+') as pprof_out: |
| for line in mapping_lines: |
| pprof_in.write(line) |
| |
| for address in unresolved_addresses: |
| pprof_in.write(address + '\n') |
| |
| pprof_in.seek(0) |
| |
| p = subprocess.Popen( |
| '%s --symbols %s' % (PPROF_PATH, chrome_path), |
| shell=True, stdin=pprof_in, stdout=pprof_out) |
| p.wait() |
| |
| pprof_out.seek(0) |
| symbols = pprof_out.readlines() |
| symbol_f.seek(0, 2) |
| for address, symbol in zip(unresolved_addresses, symbols): |
| stripped_symbol = symbol.strip() |
| address_symbol_dict[address] = stripped_symbol |
| symbol_f.write('%s %s\n' % (address, symbol.strip())) |
| |
| |
| def parse_policy(policy_path): |
| """Parses policy file. |
| |
| A policy file contains component's names and their |
| stacktrace pattern written in regular expression. |
| Those patterns are matched against each symbols of |
| each stacktraces in the order written in the policy file |
| |
| Args: |
| policy_path: A path for a policy file. |
| Returns: |
| A list containing component's name and its regex object |
| """ |
| with open(policy_path, mode='r') as policy_f: |
| policy_lines = policy_f.readlines() |
| |
| policy_version = POLICY_DEEP_1 |
| if policy_lines[0].startswith('heap profile policy: '): |
| policy_version = policy_lines[0][21:].strip() |
| policy_lines.pop(0) |
| policy_list = [] |
| |
| if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: |
| sys.stderr.write(' heap profile policy version: %s\n' % policy_version) |
| for line in policy_lines: |
| if line[0] == '#': |
| continue |
| |
| if policy_version == POLICY_DEEP_2: |
| (name, allocation_type, pattern) = line.strip().split(None, 2) |
| mmap = False |
| if allocation_type == 'mmap': |
| mmap = True |
| elif policy_version == POLICY_DEEP_1: |
| name = line.split()[0] |
| pattern = line[len(name) : len(line)].strip() |
| mmap = False |
| |
| if pattern != 'default': |
| policy_list.append(Policy(name, mmap, pattern)) |
| if components.count(name) == 0: |
| components.append(name) |
| |
| else: |
| sys.stderr.write(' invalid heap profile policy version: %s\n' % ( |
| policy_version)) |
| |
| return policy_list |
| |
| |
| def main(): |
| if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv', |
| '--json', |
| '--expand', |
| '--list', |
| '--stacktrace', |
| '--pprof'])): |
| sys.stderr.write("""Usage: |
| %s [options] <chrome-binary> <policy> <profile> [component-name] [depth] |
| |
| Options: |
| --csv Output result in csv format |
| --json Output result in json format |
| --stacktrace Convert raw address to symbol names |
| --list Lists components and their sizes |
| --expand Show all stacktraces in the specified component |
| of given depth with their sizes |
| --pprof Format the profile file so it can be processed |
| by pprof |
| |
| Examples: |
| dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv |
| dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json |
| dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap |
| dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4 |
| dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt |
| """ % (sys.argv[0])) |
| sys.exit(1) |
| |
| action = sys.argv[1] |
| chrome_path = sys.argv[2] |
| policy_path = sys.argv[3] |
| log_path = sys.argv[4] |
| |
| sys.stderr.write('parsing a policy file\n') |
| policy_list = parse_policy(policy_path) |
| |
| p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap') |
| prefix = p.sub('', log_path) |
| symbol_path = prefix + '.symbols' |
| |
| sys.stderr.write('parsing the maps file\n') |
| maps_path = prefix + '.maps' |
| with open(maps_path, 'r') as maps_f: |
| maps_lines = maps_f.readlines() |
| |
| # Reading buckets |
| sys.stderr.write('parsing the bucket file\n') |
| buckets = {} |
| bucket_count = 0 |
| n = 0 |
| while True: |
| buckets_path = '%s.%04d.buckets' % (prefix, n) |
| if not os.path.exists(buckets_path): |
| if n > 10: |
| break |
| n += 1 |
| continue |
| sys.stderr.write('reading buckets from %s\n' % (buckets_path)) |
| with open(buckets_path, 'r') as buckets_f: |
| for l in buckets_f: |
| words = l.split() |
| buckets[int(words[0])] = Bucket(words[1:]) |
| n += 1 |
| |
| log_path_list = [log_path] |
| |
| if action in ('--csv', '--json'): |
| # search for the sequence of files |
| n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) |
| n += 1 # skip current file |
| while True: |
| p = '%s.%04d.heap' % (prefix, n) |
| if os.path.exists(p): |
| log_path_list.append(p) |
| else: |
| break |
| n += 1 |
| |
| logs = [] |
| for path in log_path_list: |
| new_log = Log(path) |
| sys.stderr.write('Parsing a dump: %s\n' % path) |
| try: |
| new_log.parse_log(buckets) |
| except ParsingException: |
| sys.stderr.write(' Ignored an invalid dump: %s\n' % path) |
| else: |
| logs.append(new_log) |
| |
| sys.stderr.write('getting symbols\n') |
| update_symbols(symbol_path, maps_lines, chrome_path) |
| |
| # TODO(dmikurube): Many modes now. Split them into separete functions. |
| if action == '--stacktrace': |
| logs[0].dump_stacktrace(buckets) |
| |
| elif action == '--csv': |
| sys.stdout.write(','.join(components)) |
| sys.stdout.write('\n') |
| |
| for log in logs: |
| component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) |
| s = [] |
| for c in components: |
| if c in ('hour', 'minute', 'second'): |
| s.append('%05.5f' % (component_sizes[c])) |
| else: |
| s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) |
| sys.stdout.write(','.join(s)) |
| sys.stdout.write('\n') |
| |
| elif action == '--json': |
| json_base = { |
| 'version': 'JSON_DEEP_1', |
| 'legends': components, |
| 'snapshots': [], |
| } |
| for log in logs: |
| component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) |
| component_sizes['log_path'] = log.log_path |
| component_sizes['log_time'] = datetime.fromtimestamp( |
| log.log_time).strftime('%Y-%m-%d %H:%M:%S') |
| json_base['snapshots'].append(component_sizes) |
| json.dump(json_base, sys.stdout, indent=2, sort_keys=True) |
| |
| elif action == '--list': |
| component_sizes = logs[0].apply_policy( |
| policy_list, buckets, logs[0].log_time) |
| for c in components: |
| if c in ['hour', 'minute', 'second']: |
| sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) |
| else: |
| sys.stdout.write('%30s %10.3f\n' % ( |
| c, component_sizes[c] / 1024.0 / 1024.0)) |
| |
| elif action == '--expand': |
| component_name = sys.argv[5] |
| depth = sys.argv[6] |
| logs[0].expand(policy_list, buckets, component_name, int(depth)) |
| |
| elif action == '--pprof': |
| if len(sys.argv) > 5: |
| logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5]) |
| else: |
| logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |