blob: a2f6919a63ea2afd9a7ef87c40bddf05abe7e619 [file] [log] [blame]
#!/usr/bin/env python
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""The deep heap profiler script for Chrome."""
from datetime import datetime
import json
import os
import re
import subprocess
import sys
import tempfile
BUCKET_ID = 5
VIRTUAL = 0
COMMITTED = 1
ALLOC_COUNT = 2
FREE_COUNT = 3
NULL_REGEX = re.compile('')
PPROF_PATH = os.path.join(os.path.dirname(__file__),
os.pardir,
os.pardir,
'third_party',
'tcmalloc',
'chromium',
'src',
'pprof')
# Heap Profile Dump versions
# DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.
# Their stacktraces DO contain mmap* or tc-* at their tops.
# They should be processed by POLICY_DEEP_1.
DUMP_DEEP_1 = 'DUMP_DEEP_1'
# DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.
# Their stacktraces still DO contain mmap* or tc-*.
# They should be processed by POLICY_DEEP_1.
DUMP_DEEP_2 = 'DUMP_DEEP_2'
# DUMP_DEEP_3 DOES distinct mmap regions and malloc chunks.
# Their stacktraces DO NOT contain mmap* or tc-*.
# They should be processed by POLICY_DEEP_2.
DUMP_DEEP_3 = 'DUMP_DEEP_3'
# DUMP_DEEP_4 adds some features to DUMP_DEEP_3:
# 1. Support comments starting with '#'
# 2. Support additional global stats: e.g. nonprofiled-*.
DUMP_DEEP_4 = 'DUMP_DEEP_4'
# Heap Profile Policy versions
# POLICY_DEEP_1 DOES NOT include allocation_type columns.
# mmap regions are distincted w/ mmap frames in the pattern column.
POLICY_DEEP_1 = 'POLICY_DEEP_1'
# POLICY_DEEP_2 DOES include allocation_type columns.
# mmap regions are distincted w/ the allocation_type column.
POLICY_DEEP_2 = 'POLICY_DEEP_2'
# TODO(dmikurube): Avoid global variables.
address_symbol_dict = {}
appeared_addresses = set()
components = []
class ParsingException(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class Policy(object):
def __init__(self, name, mmap, pattern):
self.name = name
self.mmap = mmap
self.condition = re.compile(pattern + r'\Z')
def get_component(policy_list, bucket, mmap):
"""Returns a component name which a given bucket belongs to.
Args:
policy_list: A list containing Policy objects. (Parsed policy data by
parse_policy.)
bucket: A Bucket object to be searched for.
mmap: True if searching for a mmap region.
Returns:
A string representing a component name.
"""
if not bucket:
return 'no-bucket'
if bucket.component:
return bucket.component
stacktrace = ''.join(
address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip()
for policy in policy_list:
if mmap == policy.mmap and policy.condition.match(stacktrace):
bucket.component = policy.name
return policy.name
assert False
class Bucket(object):
def __init__(self, stacktrace):
self.stacktrace = stacktrace
self.component = ''
class Log(object):
"""A class representing one dumped log data."""
def __init__(self, log_path):
self.log_path = log_path
self.log_lines = [
l for l in open(self.log_path, 'r') if l and not l.startswith('#')]
self.log_version = ''
sys.stderr.write('Loading a dump: %s\n' % log_path)
self.mmap_stacktrace_lines = []
self.malloc_stacktrace_lines = []
self.counters = {}
self.log_time = os.stat(self.log_path).st_mtime
@staticmethod
def dump_stacktrace_lines(stacktrace_lines, buckets):
"""Prints a given stacktrace.
Args:
stacktrace_lines: A list of strings which are valid as stacktraces.
buckets: A dict mapping bucket ids and their corresponding Bucket
objects.
"""
for l in stacktrace_lines:
words = l.split()
bucket = buckets.get(int(words[BUCKET_ID]))
if not bucket:
continue
for i in range(0, BUCKET_ID - 1):
sys.stdout.write(words[i] + ' ')
for address in bucket.stacktrace:
sys.stdout.write((address_symbol_dict.get(address) or address) + ' ')
sys.stdout.write('\n')
def dump_stacktrace(self, buckets):
"""Prints stacktraces contained in the log.
Args:
buckets: A dict mapping bucket ids and their corresponding Bucket
objects.
"""
self.dump_stacktrace_lines(self.mmap_stacktrace_lines, buckets)
self.dump_stacktrace_lines(self.malloc_stacktrace_lines, buckets)
@staticmethod
def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets,
component_name, mmap):
"""Accumulates size of committed chunks and the number of allocated chunks.
Args:
stacktrace_lines: A list of strings which are valid as stacktraces.
policy_list: A list containing Policy objects. (Parsed policy data by
parse_policy.)
buckets: A dict mapping bucket ids and their corresponding Bucket
objects.
component_name: A name of component for filtering.
mmap: True if searching for a mmap region.
Returns:
Two integers which are the accumulated size of committed regions and the
number of allocated chunks, respectively.
"""
com_committed = 0
com_allocs = 0
for l in stacktrace_lines:
words = l.split()
bucket = buckets.get(int(words[BUCKET_ID]))
if (not bucket or
(component_name and
component_name != get_component(policy_list, bucket, mmap))):
continue
com_committed += int(words[COMMITTED])
com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])
return com_committed, com_allocs
@staticmethod
def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list,
buckets, component_name, mmap):
"""Prints information of stacktrace lines for pprof.
Args:
stacktrace_lines: A list of strings which are valid as stacktraces.
policy_list: A list containing Policy objects. (Parsed policy data by
parse_policy.)
buckets: A dict mapping bucket ids and their corresponding Bucket
objects.
component_name: A name of component for filtering.
mmap: True if searching for a mmap region.
"""
for l in stacktrace_lines:
words = l.split()
bucket = buckets.get(int(words[BUCKET_ID]))
if (not bucket or
(component_name and
component_name != get_component(policy_list, bucket, mmap))):
continue
sys.stdout.write('%6d: %8s [%6d: %8s] @' % (
int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
words[COMMITTED],
int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
words[COMMITTED]))
for address in bucket.stacktrace:
sys.stdout.write(' ' + address)
sys.stdout.write('\n')
def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name):
"""Converts the log file so it can be processed by pprof.
Args:
policy_list: A list containing Policy objects. (Parsed policy data by
parse_policy.)
buckets: A dict mapping bucket ids and their corresponding Bucket
objects.
mapping_lines: A list of strings containing /proc/.../maps.
component_name: A name of component for filtering.
"""
sys.stdout.write('heap profile: ')
com_committed, com_allocs = self.accumulate_size_for_pprof(
self.mmap_stacktrace_lines, policy_list, buckets, component_name,
True)
add_committed, add_allocs = self.accumulate_size_for_pprof(
self.malloc_stacktrace_lines, policy_list, buckets, component_name,
False)
com_committed += add_committed
com_allocs += add_allocs
sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
com_allocs, com_committed, com_allocs, com_committed))
self.dump_stacktrace_lines_for_pprof(
self.mmap_stacktrace_lines, policy_list, buckets, component_name,
True)
self.dump_stacktrace_lines_for_pprof(
self.malloc_stacktrace_lines, policy_list, buckets, component_name,
False)
sys.stdout.write('MAPPED_LIBRARIES:\n')
for l in mapping_lines:
sys.stdout.write(l)
@staticmethod
def check_stacktrace_line(stacktrace_line, buckets):
"""Checks if a given stacktrace_line is valid as stacktrace.
Args:
stacktrace_line: A string to be checked.
buckets: A dict mapping bucket ids and their corresponding Bucket
objects.
Returns:
True if the given stacktrace_line is valid.
"""
words = stacktrace_line.split()
if len(words) < BUCKET_ID + 1:
return False
if words[BUCKET_ID - 1] != '@':
return False
bucket = buckets.get(int(words[BUCKET_ID]))
if bucket:
for address in bucket.stacktrace:
appeared_addresses.add(address)
return True
@staticmethod
def skip_lines_while(line_number, max_line_number, skipping_condition):
"""Increments line_number until skipping_condition(line_number) is false.
Returns:
A pair of an integer indicating a line number after skipped, and a
boolean value which is True if found a line which skipping_condition
is False for.
"""
while skipping_condition(line_number):
line_number += 1
if line_number >= max_line_number:
return line_number, False
return line_number, True
def parse_stacktraces_while_valid(self, buckets, log_lines, line_number):
"""Parses stacktrace lines while the lines are valid.
Args:
buckets: A dict mapping bucket ids and their corresponding Bucket
objects.
log_lines: A list of lines to be parsed.
line_number: An integer representing the starting line number in
log_lines.
Returns:
A pair of a list of valid lines and an integer representing the last
line number in log_lines.
"""
(line_number, _) = self.skip_lines_while(
line_number, len(log_lines),
lambda n: not log_lines[n].split()[0].isdigit())
stacktrace_lines_start = line_number
(line_number, _) = self.skip_lines_while(
line_number, len(log_lines),
lambda n: self.check_stacktrace_line(log_lines[n], buckets))
return (log_lines[stacktrace_lines_start:line_number], line_number)
def parse_stacktraces(self, buckets, line_number):
"""Parses lines in self.log_lines as stacktrace.
Valid stacktrace lines are stored into self.mmap_stacktrace_lines and
self.malloc_stacktrace_lines.
Args:
buckets: A dict mapping bucket ids and their corresponding Bucket
objects.
line_number: An integer representing the starting line number in
log_lines.
Raises:
ParsingException for invalid dump versions.
"""
sys.stderr.write(' Version: %s\n' % self.log_version)
if self.log_version in (DUMP_DEEP_3, DUMP_DEEP_4):
(self.mmap_stacktrace_lines, line_number) = (
self.parse_stacktraces_while_valid(
buckets, self.log_lines, line_number))
(line_number, _) = self.skip_lines_while(
line_number, len(self.log_lines),
lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n')
(self.malloc_stacktrace_lines, line_number) = (
self.parse_stacktraces_while_valid(
buckets, self.log_lines, line_number))
elif self.log_version == DUMP_DEEP_2:
(self.mmap_stacktrace_lines, line_number) = (
self.parse_stacktraces_while_valid(
buckets, self.log_lines, line_number))
(line_number, _) = self.skip_lines_while(
line_number, len(self.log_lines),
lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n')
(self.malloc_stacktrace_lines, line_number) = (
self.parse_stacktraces_while_valid(
buckets, self.log_lines, line_number))
self.malloc_stacktrace_lines.extend(self.mmap_stacktrace_lines)
self.mmap_stacktrace_lines = []
elif self.log_version == DUMP_DEEP_1:
(self.malloc_stacktrace_lines, line_number) = (
self.parse_stacktraces_while_valid(
buckets, self.log_lines, line_number))
else:
raise ParsingException('invalid heap profile dump version: %s' % (
self.log_version))
def parse_global_stats(self):
"""Parses lines in self.log_lines as global stats."""
(ln, _) = self.skip_lines_while(
0, len(self.log_lines),
lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n')
if self.log_version == DUMP_DEEP_4:
global_stat_names = [
'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',
'nonprofiled-absent', 'nonprofiled-anonymous',
'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
'nonprofiled-stack', 'nonprofiled-other',
'profiled-mmap', 'profiled-malloc']
else:
global_stat_names = [
'total', 'file', 'anonymous', 'other', 'mmap', 'tcmalloc']
for prefix in global_stat_names:
(ln, _) = self.skip_lines_while(
ln, len(self.log_lines),
lambda n: self.log_lines[n].split()[0] != prefix)
words = self.log_lines[ln].split()
self.counters[prefix + '_virtual'] = int(words[-2])
self.counters[prefix + '_committed'] = int(words[-1])
def parse_version(self):
"""Parses a version string in self.log_lines.
Returns:
A pair of (a string representing a version of the stacktrace dump,
and an integer indicating a line number next to the version string).
Raises:
ParsingException for invalid dump versions.
"""
version = ''
# Skip until an identifiable line.
headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
if not self.log_lines:
raise ParsingException('Empty heap dump file.')
(ln, found) = self.skip_lines_while(
0, len(self.log_lines),
lambda n: not self.log_lines[n].startswith(headers))
if not found:
raise ParsingException('Invalid heap dump file (no version header).')
# Identify a version.
if self.log_lines[ln].startswith('heap profile: '):
version = self.log_lines[ln][13:].strip()
if (version == DUMP_DEEP_2 or version == DUMP_DEEP_3 or
version == DUMP_DEEP_4):
(ln, _) = self.skip_lines_while(
ln, len(self.log_lines),
lambda n: self.log_lines[n] != 'MMAP_STACKTRACES:\n')
else:
raise ParsingException('invalid heap profile dump version: %s'
% version)
elif self.log_lines[ln] == 'STACKTRACES:\n':
version = DUMP_DEEP_1
elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n':
version = DUMP_DEEP_2
return (version, ln)
def parse_log(self, buckets):
self.log_version, ln = self.parse_version()
self.parse_global_stats()
self.parse_stacktraces(buckets, ln)
@staticmethod
def accumulate_size_for_policy(stacktrace_lines,
policy_list, buckets, sizes, mmap):
for l in stacktrace_lines:
words = l.split()
bucket = buckets.get(int(words[BUCKET_ID]))
component_match = get_component(policy_list, bucket, mmap)
sizes[component_match] += int(words[COMMITTED])
if component_match.startswith('tc-'):
sizes['tc-total-log'] += int(words[COMMITTED])
elif component_match.startswith('mmap-'):
sizes['mmap-total-log'] += int(words[COMMITTED])
else:
sizes['other-total-log'] += int(words[COMMITTED])
def apply_policy(self, policy_list, buckets, first_log_time):
"""Aggregates the total memory size of each component.
Iterate through all stacktraces and attribute them to one of the components
based on the policy. It is important to apply policy in right order.
Args:
policy_list: A list containing Policy objects. (Parsed policy data by
parse_policy.)
buckets: A dict mapping bucket ids and their corresponding Bucket
objects.
first_log_time: An integer representing time when the first log is
dumped.
Returns:
A dict mapping components and their corresponding sizes.
"""
sys.stderr.write('apply policy:%s\n' % (self.log_path))
sizes = dict((c, 0) for c in components)
self.accumulate_size_for_policy(self.mmap_stacktrace_lines,
policy_list, buckets, sizes, True)
self.accumulate_size_for_policy(self.malloc_stacktrace_lines,
policy_list, buckets, sizes, False)
if self.log_version == DUMP_DEEP_4:
mmap_prefix = 'profiled-mmap'
malloc_prefix = 'profiled-malloc'
else:
mmap_prefix = 'mmap'
malloc_prefix = 'tcmalloc'
sizes['mmap-no-log'] = (
self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])
sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]
sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]
sizes['tc-no-log'] = (
self.counters['%s_committed' % malloc_prefix] - sizes['tc-total-log'])
sizes['tc-total-record'] = self.counters['%s_committed' % malloc_prefix]
sizes['tc-unused'] = (
sizes['mmap-tcmalloc'] - self.counters['%s_committed' % malloc_prefix])
sizes['tc-total'] = sizes['mmap-tcmalloc']
for key, value in {
'total': 'total_committed',
'filemapped': 'file_committed',
'file-exec': 'file-exec_committed',
'file-nonexec': 'file-nonexec_committed',
'anonymous': 'anonymous_committed',
'stack': 'stack_committed',
'other': 'other_committed',
'nonprofiled-absent': 'nonprofiled-absent_committed',
'nonprofiled-anonymous': 'nonprofiled-anonymous_committed',
'nonprofiled-file-exec': 'nonprofiled-file-exec_committed',
'nonprofiled-file-nonexec': 'nonprofiled-file-nonexec_committed',
'nonprofiled-stack': 'nonprofiled-stack_committed',
'nonprofiled-other': 'nonprofiled-other_committed',
'total-vm': 'total_virtual',
'filemapped-vm': 'file_virtual',
'anonymous-vm': 'anonymous_virtual',
'other-vm': 'other_virtual' }.iteritems():
if key in sizes:
sizes[key] = self.counters[value]
if 'mustbezero' in sizes:
removed = (
'%s_committed' % mmap_prefix,
'nonprofiled-absent_committed',
'nonprofiled-anonymous_committed',
'nonprofiled-file-exec_committed',
'nonprofiled-file-nonexec_committed',
'nonprofiled-stack_committed',
'nonprofiled-other_committed')
sizes['mustbezero'] = (
self.counters['total_committed'] -
sum(self.counters[i] for i in removed))
if 'total-exclude-profiler' in sizes:
sizes['total-exclude-profiler'] = (
self.counters['total_committed'] - sizes['mmap-profiler'])
if 'hour' in sizes:
sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0
if 'minute' in sizes:
sizes['minute'] = (self.log_time - first_log_time) / 60.0
if 'second' in sizes:
sizes['second'] = self.log_time - first_log_time
return sizes
@staticmethod
def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets,
component_name, depth, sizes, mmap):
for line in stacktrace_lines:
words = line.split()
bucket = buckets.get(int(words[BUCKET_ID]))
component_match = get_component(policy_list, bucket, mmap)
if component_match == component_name:
stacktrace_sequence = ''
for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),
1 + depth)]:
stacktrace_sequence += address_symbol_dict[address] + ' '
if not stacktrace_sequence in sizes:
sizes[stacktrace_sequence] = 0
sizes[stacktrace_sequence] += int(words[COMMITTED])
def expand(self, policy_list, buckets, component_name, depth):
"""Prints all stacktraces in a given component of given depth.
Args:
policy_list: A list containing Policy objects. (Parsed policy data by
parse_policy.)
buckets: A dict mapping bucket ids and their corresponding Bucket
objects.
component_name: A name of component for filtering.
depth: An integer representing depth to be printed.
"""
sizes = {}
self.accumulate_size_for_expand(
self.mmap_stacktrace_lines, policy_list, buckets, component_name,
depth, sizes, True)
self.accumulate_size_for_expand(
self.malloc_stacktrace_lines, policy_list, buckets, component_name,
depth, sizes, False)
sorted_sizes_list = sorted(
sizes.iteritems(), key=(lambda x: x[1]), reverse=True)
total = 0
for size_pair in sorted_sizes_list:
sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))
total += size_pair[1]
sys.stderr.write('total: %d\n' % (total))
def update_symbols(symbol_path, mapping_lines, chrome_path):
"""Updates address/symbol mapping on memory and in a .symbol cache file.
It reads cached address/symbol mapping from a .symbol file if it exists.
Then, it resolves unresolved addresses from a Chrome binary with pprof.
Both mappings on memory and in a .symbol cache file are updated.
Symbol files are formatted as follows:
<Address> <Symbol>
<Address> <Symbol>
<Address> <Symbol>
...
Args:
symbol_path: A string representing a path for a .symbol file.
mapping_lines: A list of strings containing /proc/.../maps.
chrome_path: A string representing a path for a Chrome binary.
"""
with open(symbol_path, mode='a+') as symbol_f:
symbol_lines = symbol_f.readlines()
if symbol_lines:
for line in symbol_lines:
items = line.split(None, 1)
address_symbol_dict[items[0]] = items[1].rstrip()
unresolved_addresses = sorted(
a for a in appeared_addresses if a not in address_symbol_dict)
if unresolved_addresses:
with tempfile.NamedTemporaryFile(
suffix='maps', prefix="dmprof", mode='w+') as pprof_in:
with tempfile.NamedTemporaryFile(
suffix='symbols', prefix="dmprof", mode='w+') as pprof_out:
for line in mapping_lines:
pprof_in.write(line)
for address in unresolved_addresses:
pprof_in.write(address + '\n')
pprof_in.seek(0)
p = subprocess.Popen(
'%s --symbols %s' % (PPROF_PATH, chrome_path),
shell=True, stdin=pprof_in, stdout=pprof_out)
p.wait()
pprof_out.seek(0)
symbols = pprof_out.readlines()
symbol_f.seek(0, 2)
for address, symbol in zip(unresolved_addresses, symbols):
stripped_symbol = symbol.strip()
address_symbol_dict[address] = stripped_symbol
symbol_f.write('%s %s\n' % (address, symbol.strip()))
def parse_policy(policy_path):
"""Parses policy file.
A policy file contains component's names and their
stacktrace pattern written in regular expression.
Those patterns are matched against each symbols of
each stacktraces in the order written in the policy file
Args:
policy_path: A path for a policy file.
Returns:
A list containing component's name and its regex object
"""
with open(policy_path, mode='r') as policy_f:
policy_lines = policy_f.readlines()
policy_version = POLICY_DEEP_1
if policy_lines[0].startswith('heap profile policy: '):
policy_version = policy_lines[0][21:].strip()
policy_lines.pop(0)
policy_list = []
if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:
sys.stderr.write(' heap profile policy version: %s\n' % policy_version)
for line in policy_lines:
if line[0] == '#':
continue
if policy_version == POLICY_DEEP_2:
(name, allocation_type, pattern) = line.strip().split(None, 2)
mmap = False
if allocation_type == 'mmap':
mmap = True
elif policy_version == POLICY_DEEP_1:
name = line.split()[0]
pattern = line[len(name) : len(line)].strip()
mmap = False
if pattern != 'default':
policy_list.append(Policy(name, mmap, pattern))
if components.count(name) == 0:
components.append(name)
else:
sys.stderr.write(' invalid heap profile policy version: %s\n' % (
policy_version))
return policy_list
def main():
if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv',
'--json',
'--expand',
'--list',
'--stacktrace',
'--pprof'])):
sys.stderr.write("""Usage:
%s [options] <chrome-binary> <policy> <profile> [component-name] [depth]
Options:
--csv Output result in csv format
--json Output result in json format
--stacktrace Convert raw address to symbol names
--list Lists components and their sizes
--expand Show all stacktraces in the specified component
of given depth with their sizes
--pprof Format the profile file so it can be processed
by pprof
Examples:
dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv
dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json
dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap
dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4
dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt
""" % (sys.argv[0]))
sys.exit(1)
action = sys.argv[1]
chrome_path = sys.argv[2]
policy_path = sys.argv[3]
log_path = sys.argv[4]
sys.stderr.write('parsing a policy file\n')
policy_list = parse_policy(policy_path)
p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')
prefix = p.sub('', log_path)
symbol_path = prefix + '.symbols'
sys.stderr.write('parsing the maps file\n')
maps_path = prefix + '.maps'
with open(maps_path, 'r') as maps_f:
maps_lines = maps_f.readlines()
# Reading buckets
sys.stderr.write('parsing the bucket file\n')
buckets = {}
bucket_count = 0
n = 0
while True:
buckets_path = '%s.%04d.buckets' % (prefix, n)
if not os.path.exists(buckets_path):
if n > 10:
break
n += 1
continue
sys.stderr.write('reading buckets from %s\n' % (buckets_path))
with open(buckets_path, 'r') as buckets_f:
for l in buckets_f:
words = l.split()
buckets[int(words[0])] = Bucket(words[1:])
n += 1
log_path_list = [log_path]
if action in ('--csv', '--json'):
# search for the sequence of files
n = int(log_path[len(log_path) - 9 : len(log_path) - 5])
n += 1 # skip current file
while True:
p = '%s.%04d.heap' % (prefix, n)
if os.path.exists(p):
log_path_list.append(p)
else:
break
n += 1
logs = []
for path in log_path_list:
new_log = Log(path)
sys.stderr.write('Parsing a dump: %s\n' % path)
try:
new_log.parse_log(buckets)
except ParsingException:
sys.stderr.write(' Ignored an invalid dump: %s\n' % path)
else:
logs.append(new_log)
sys.stderr.write('getting symbols\n')
update_symbols(symbol_path, maps_lines, chrome_path)
# TODO(dmikurube): Many modes now. Split them into separete functions.
if action == '--stacktrace':
logs[0].dump_stacktrace(buckets)
elif action == '--csv':
sys.stdout.write(','.join(components))
sys.stdout.write('\n')
for log in logs:
component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)
s = []
for c in components:
if c in ('hour', 'minute', 'second'):
s.append('%05.5f' % (component_sizes[c]))
else:
s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
sys.stdout.write(','.join(s))
sys.stdout.write('\n')
elif action == '--json':
json_base = {
'version': 'JSON_DEEP_1',
'legends': components,
'snapshots': [],
}
for log in logs:
component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)
component_sizes['log_path'] = log.log_path
component_sizes['log_time'] = datetime.fromtimestamp(
log.log_time).strftime('%Y-%m-%d %H:%M:%S')
json_base['snapshots'].append(component_sizes)
json.dump(json_base, sys.stdout, indent=2, sort_keys=True)
elif action == '--list':
component_sizes = logs[0].apply_policy(
policy_list, buckets, logs[0].log_time)
for c in components:
if c in ['hour', 'minute', 'second']:
sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))
else:
sys.stdout.write('%30s %10.3f\n' % (
c, component_sizes[c] / 1024.0 / 1024.0))
elif action == '--expand':
component_name = sys.argv[5]
depth = sys.argv[6]
logs[0].expand(policy_list, buckets, component_name, int(depth))
elif action == '--pprof':
if len(sys.argv) > 5:
logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5])
else:
logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None)
if __name__ == '__main__':
sys.exit(main())