tools/deep_memory_profiler/dmprof - chromium/src - Git at Google

 #!/usr/bin/env python
 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """The deep heap profiler script for Chrome."""

 from datetime import datetime
 import json
 import os
 import re
 import subprocess
 import sys
 import tempfile

 BUCKET_ID = 5
 VIRTUAL = 0
 COMMITTED = 1
 ALLOC_COUNT = 2
 FREE_COUNT = 3
 NULL_REGEX = re.compile('')
 PPROF_PATH = os.path.join(os.path.dirname(__file__),
                           os.pardir,
                           os.pardir,
                           'third_party',
                           'tcmalloc',
                           'chromium',
                           'src',
                           'pprof')

 # Heap Profile Dump versions

 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.
 # Their stacktraces DO contain mmap* or tc-* at their tops.
 # They should be processed by POLICY_DEEP_1.
 DUMP_DEEP_1 = 'DUMP_DEEP_1'

 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.
 # Their stacktraces still DO contain mmap* or tc-*.
 # They should be processed by POLICY_DEEP_1.
 DUMP_DEEP_2 = 'DUMP_DEEP_2'

 # DUMP_DEEP_3 DOES distinct mmap regions and malloc chunks.
 # Their stacktraces DO NOT contain mmap* or tc-*.
 # They should be processed by POLICY_DEEP_2.
 DUMP_DEEP_3 = 'DUMP_DEEP_3'

 # DUMP_DEEP_4 adds some features to DUMP_DEEP_3:
 # 1. Support comments starting with '#'
 # 2. Support additional global stats: e.g. nonprofiled-*.
 DUMP_DEEP_4 = 'DUMP_DEEP_4'

 # Heap Profile Policy versions

 # POLICY_DEEP_1 DOES NOT include allocation_type columns.
 # mmap regions are distincted w/ mmap frames in the pattern column.
 POLICY_DEEP_1 = 'POLICY_DEEP_1'

 # POLICY_DEEP_2 DOES include allocation_type columns.
 # mmap regions are distincted w/ the allocation_type column.
 POLICY_DEEP_2 = 'POLICY_DEEP_2'

 # TODO(dmikurube): Avoid global variables.
 address_symbol_dict = {}
 appeared_addresses = set()
 components = []


 class ParsingException(Exception):
   def __init__(self, value):
     self.value = value
   def __str__(self):
     return repr(self.value)


 class Policy(object):

   def __init__(self, name, mmap, pattern):
     self.name = name
     self.mmap = mmap
     self.condition = re.compile(pattern + r'\Z')


 def get_component(policy_list, bucket, mmap):
   """Returns a component name which a given bucket belongs to.

   Args:
       policy_list: A list containing Policy objects.  (Parsed policy data by
           parse_policy.)
       bucket: A Bucket object to be searched for.
       mmap: True if searching for a mmap region.

   Returns:
       A string representing a component name.
   """
   if not bucket:
     return 'no-bucket'
   if bucket.component:
     return bucket.component

   stacktrace = ''.join(
       address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip()

   for policy in policy_list:
     if mmap == policy.mmap and policy.condition.match(stacktrace):
       bucket.component = policy.name
       return policy.name

   assert False


 class Bucket(object):

   def __init__(self, stacktrace):
     self.stacktrace = stacktrace
     self.component = ''


 class Log(object):

   """A class representing one dumped log data."""
   def __init__(self, log_path):
     self.log_path = log_path
     self.log_lines = [
         l for l in open(self.log_path, 'r') if l and not l.startswith('#')]
     self.log_version = ''
     sys.stderr.write('Loading a dump: %s\n' % log_path)
     self.mmap_stacktrace_lines = []
     self.malloc_stacktrace_lines = []
     self.counters = {}
     self.log_time = os.stat(self.log_path).st_mtime

   @staticmethod
   def dump_stacktrace_lines(stacktrace_lines, buckets):
     """Prints a given stacktrace.

     Args:
         stacktrace_lines: A list of strings which are valid as stacktraces.
         buckets: A dict mapping bucket ids and their corresponding Bucket
             objects.
     """
     for l in stacktrace_lines:
       words = l.split()
       bucket = buckets.get(int(words[BUCKET_ID]))
       if not bucket:
         continue
       for i in range(0, BUCKET_ID - 1):
         sys.stdout.write(words[i] + ' ')
       for address in bucket.stacktrace:
         sys.stdout.write((address_symbol_dict.get(address) or address) + ' ')
       sys.stdout.write('\n')

   def dump_stacktrace(self, buckets):
     """Prints stacktraces contained in the log.

     Args:
         buckets: A dict mapping bucket ids and their corresponding Bucket
             objects.
     """
     self.dump_stacktrace_lines(self.mmap_stacktrace_lines, buckets)
     self.dump_stacktrace_lines(self.malloc_stacktrace_lines, buckets)

   @staticmethod
   def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets,
                                 component_name, mmap):
     """Accumulates size of committed chunks and the number of allocated chunks.

     Args:
         stacktrace_lines: A list of strings which are valid as stacktraces.
         policy_list: A list containing Policy objects.  (Parsed policy data by
             parse_policy.)
         buckets: A dict mapping bucket ids and their corresponding Bucket
             objects.
         component_name: A name of component for filtering.
         mmap: True if searching for a mmap region.

     Returns:
         Two integers which are the accumulated size of committed regions and the
         number of allocated chunks, respectively.
     """
     com_committed = 0
     com_allocs = 0
     for l in stacktrace_lines:
       words = l.split()
       bucket = buckets.get(int(words[BUCKET_ID]))
       if (not bucket or
           (component_name and
            component_name != get_component(policy_list, bucket, mmap))):
         continue

       com_committed += int(words[COMMITTED])
       com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])

     return com_committed, com_allocs

   @staticmethod
   def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list,
                                       buckets, component_name, mmap):
     """Prints information of stacktrace lines for pprof.

     Args:
         stacktrace_lines: A list of strings which are valid as stacktraces.
         policy_list: A list containing Policy objects.  (Parsed policy data by
             parse_policy.)
         buckets: A dict mapping bucket ids and their corresponding Bucket
             objects.
         component_name: A name of component for filtering.
         mmap: True if searching for a mmap region.
     """
     for l in stacktrace_lines:
       words = l.split()
       bucket = buckets.get(int(words[BUCKET_ID]))
       if (not bucket or
           (component_name and
            component_name != get_component(policy_list, bucket, mmap))):
         continue

       sys.stdout.write('%6d: %8s [%6d: %8s] @' % (
           int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
           words[COMMITTED],
           int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
           words[COMMITTED]))
       for address in bucket.stacktrace:
         sys.stdout.write(' ' + address)
       sys.stdout.write('\n')

   def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name):
     """Converts the log file so it can be processed by pprof.

     Args:
         policy_list: A list containing Policy objects.  (Parsed policy data by
             parse_policy.)
         buckets: A dict mapping bucket ids and their corresponding Bucket
             objects.
         mapping_lines: A list of strings containing /proc/.../maps.
         component_name: A name of component for filtering.
     """
     sys.stdout.write('heap profile: ')
     com_committed, com_allocs = self.accumulate_size_for_pprof(
         self.mmap_stacktrace_lines, policy_list, buckets, component_name,
         True)
     add_committed, add_allocs = self.accumulate_size_for_pprof(
         self.malloc_stacktrace_lines, policy_list, buckets, component_name,
         False)
     com_committed += add_committed
     com_allocs += add_allocs

     sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
         com_allocs, com_committed, com_allocs, com_committed))

     self.dump_stacktrace_lines_for_pprof(
         self.mmap_stacktrace_lines, policy_list, buckets, component_name,
         True)
     self.dump_stacktrace_lines_for_pprof(
         self.malloc_stacktrace_lines, policy_list, buckets, component_name,
         False)

     sys.stdout.write('MAPPED_LIBRARIES:\n')
     for l in mapping_lines:
       sys.stdout.write(l)

   @staticmethod
   def check_stacktrace_line(stacktrace_line, buckets):
     """Checks if a given stacktrace_line is valid as stacktrace.

     Args:
         stacktrace_line: A string to be checked.
         buckets: A dict mapping bucket ids and their corresponding Bucket
             objects.

     Returns:
         True if the given stacktrace_line is valid.
     """
     words = stacktrace_line.split()
     if len(words) < BUCKET_ID + 1:
       return False
     if words[BUCKET_ID - 1] != '@':
       return False
     bucket = buckets.get(int(words[BUCKET_ID]))
     if bucket:
       for address in bucket.stacktrace:
         appeared_addresses.add(address)
     return True

   @staticmethod
   def skip_lines_while(line_number, max_line_number, skipping_condition):
     """Increments line_number until skipping_condition(line_number) is false.

     Returns:
         A pair of an integer indicating a line number after skipped, and a
         boolean value which is True if found a line which skipping_condition
         is False for.
     """
     while skipping_condition(line_number):
       line_number += 1
       if line_number >= max_line_number:
         return line_number, False
     return line_number, True

   def parse_stacktraces_while_valid(self, buckets, log_lines, line_number):
     """Parses stacktrace lines while the lines are valid.

     Args:
         buckets: A dict mapping bucket ids and their corresponding Bucket
             objects.
         log_lines: A list of lines to be parsed.
         line_number: An integer representing the starting line number in
             log_lines.

     Returns:
         A pair of a list of valid lines and an integer representing the last
         line number in log_lines.
     """
     (line_number, _) = self.skip_lines_while(
         line_number, len(log_lines),
         lambda n: not log_lines[n].split()[0].isdigit())
     stacktrace_lines_start = line_number
     (line_number, _) = self.skip_lines_while(
         line_number, len(log_lines),
         lambda n: self.check_stacktrace_line(log_lines[n], buckets))
     return (log_lines[stacktrace_lines_start:line_number], line_number)

   def parse_stacktraces(self, buckets, line_number):
     """Parses lines in self.log_lines as stacktrace.

     Valid stacktrace lines are stored into self.mmap_stacktrace_lines and
     self.malloc_stacktrace_lines.

     Args:
         buckets: A dict mapping bucket ids and their corresponding Bucket
             objects.
         line_number: An integer representing the starting line number in
             log_lines.

     Raises:
         ParsingException for invalid dump versions.
     """
     sys.stderr.write('  Version: %s\n' % self.log_version)

     if self.log_version in (DUMP_DEEP_3, DUMP_DEEP_4):
       (self.mmap_stacktrace_lines, line_number) = (
           self.parse_stacktraces_while_valid(
               buckets, self.log_lines, line_number))
       (line_number, _) = self.skip_lines_while(
           line_number, len(self.log_lines),
           lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n')
       (self.malloc_stacktrace_lines, line_number) = (
           self.parse_stacktraces_while_valid(
               buckets, self.log_lines, line_number))

     elif self.log_version == DUMP_DEEP_2:
       (self.mmap_stacktrace_lines, line_number) = (
           self.parse_stacktraces_while_valid(
               buckets, self.log_lines, line_number))
       (line_number, _) = self.skip_lines_while(
           line_number, len(self.log_lines),
           lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n')
       (self.malloc_stacktrace_lines, line_number) = (
           self.parse_stacktraces_while_valid(
               buckets, self.log_lines, line_number))
       self.malloc_stacktrace_lines.extend(self.mmap_stacktrace_lines)
       self.mmap_stacktrace_lines = []

     elif self.log_version == DUMP_DEEP_1:
       (self.malloc_stacktrace_lines, line_number) = (
           self.parse_stacktraces_while_valid(
               buckets, self.log_lines, line_number))

     else:
       raise ParsingException('invalid heap profile dump version: %s' % (
           self.log_version))

   def parse_global_stats(self):
     """Parses lines in self.log_lines as global stats."""
     (ln, _) = self.skip_lines_while(
         0, len(self.log_lines),
         lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n')

     if self.log_version == DUMP_DEEP_4:
       global_stat_names = [
           'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',
           'nonprofiled-absent', 'nonprofiled-anonymous',
           'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
           'nonprofiled-stack', 'nonprofiled-other',
           'profiled-mmap', 'profiled-malloc']
     else:
       global_stat_names = [
           'total', 'file', 'anonymous', 'other', 'mmap', 'tcmalloc']

     for prefix in global_stat_names:
       (ln, _) = self.skip_lines_while(
           ln, len(self.log_lines),
           lambda n: self.log_lines[n].split()[0] != prefix)
       words = self.log_lines[ln].split()
       self.counters[prefix + '_virtual'] = int(words[-2])
       self.counters[prefix + '_committed'] = int(words[-1])

   def parse_version(self):
     """Parses a version string in self.log_lines.

     Returns:
         A pair of (a string representing a version of the stacktrace dump,
         and an integer indicating a line number next to the version string).

     Raises:
         ParsingException for invalid dump versions.
     """
     version = ''

     # Skip until an identifiable line.
     headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
     if not self.log_lines:
       raise ParsingException('Empty heap dump file.')
     (ln, found) = self.skip_lines_while(
         0, len(self.log_lines),
         lambda n: not self.log_lines[n].startswith(headers))
     if not found:
       raise ParsingException('Invalid heap dump file (no version header).')

     # Identify a version.
     if self.log_lines[ln].startswith('heap profile: '):
       version = self.log_lines[ln][13:].strip()
       if (version == DUMP_DEEP_2 or version == DUMP_DEEP_3 or
           version == DUMP_DEEP_4):
         (ln, _) = self.skip_lines_while(
             ln, len(self.log_lines),
             lambda n: self.log_lines[n] != 'MMAP_STACKTRACES:\n')
       else:
         raise ParsingException('invalid heap profile dump version: %s'
                                % version)
     elif self.log_lines[ln] == 'STACKTRACES:\n':
       version = DUMP_DEEP_1
     elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n':
       version = DUMP_DEEP_2

     return (version, ln)

   def parse_log(self, buckets):
     self.log_version, ln = self.parse_version()
     self.parse_global_stats()
     self.parse_stacktraces(buckets, ln)

   @staticmethod
   def accumulate_size_for_policy(stacktrace_lines,
                                  policy_list, buckets, sizes, mmap):
     for l in stacktrace_lines:
       words = l.split()
       bucket = buckets.get(int(words[BUCKET_ID]))
       component_match = get_component(policy_list, bucket, mmap)
       sizes[component_match] += int(words[COMMITTED])

       if component_match.startswith('tc-'):
         sizes['tc-total-log'] += int(words[COMMITTED])
       elif component_match.startswith('mmap-'):
         sizes['mmap-total-log'] += int(words[COMMITTED])
       else:
         sizes['other-total-log'] += int(words[COMMITTED])

   def apply_policy(self, policy_list, buckets, first_log_time):
     """Aggregates the total memory size of each component.

     Iterate through all stacktraces and attribute them to one of the components
     based on the policy.  It is important to apply policy in right order.

     Args:
         policy_list: A list containing Policy objects.  (Parsed policy data by
             parse_policy.)
         buckets: A dict mapping bucket ids and their corresponding Bucket
             objects.
         first_log_time: An integer representing time when the first log is
             dumped.

     Returns:
         A dict mapping components and their corresponding sizes.
     """

     sys.stderr.write('apply policy:%s\n' % (self.log_path))
     sizes = dict((c, 0) for c in components)

     self.accumulate_size_for_policy(self.mmap_stacktrace_lines,
                                     policy_list, buckets, sizes, True)
     self.accumulate_size_for_policy(self.malloc_stacktrace_lines,
                                     policy_list, buckets, sizes, False)

     if self.log_version == DUMP_DEEP_4:
       mmap_prefix = 'profiled-mmap'
       malloc_prefix = 'profiled-malloc'
     else:
       mmap_prefix = 'mmap'
       malloc_prefix = 'tcmalloc'

     sizes['mmap-no-log'] = (
         self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])
     sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]
     sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]

     sizes['tc-no-log'] = (
         self.counters['%s_committed' % malloc_prefix] - sizes['tc-total-log'])
     sizes['tc-total-record'] = self.counters['%s_committed' % malloc_prefix]
     sizes['tc-unused'] = (
         sizes['mmap-tcmalloc'] - self.counters['%s_committed' % malloc_prefix])
     sizes['tc-total'] = sizes['mmap-tcmalloc']

     for key, value in {
         'total': 'total_committed',
         'filemapped': 'file_committed',
         'file-exec': 'file-exec_committed',
         'file-nonexec': 'file-nonexec_committed',
         'anonymous': 'anonymous_committed',
         'stack': 'stack_committed',
         'other': 'other_committed',
         'nonprofiled-absent': 'nonprofiled-absent_committed',
         'nonprofiled-anonymous': 'nonprofiled-anonymous_committed',
         'nonprofiled-file-exec': 'nonprofiled-file-exec_committed',
         'nonprofiled-file-nonexec': 'nonprofiled-file-nonexec_committed',
         'nonprofiled-stack': 'nonprofiled-stack_committed',
         'nonprofiled-other': 'nonprofiled-other_committed',
         'total-vm': 'total_virtual',
         'filemapped-vm': 'file_virtual',
         'anonymous-vm': 'anonymous_virtual',
         'other-vm': 'other_virtual' }.iteritems():
       if key in sizes:
         sizes[key] = self.counters[value]

     if 'mustbezero' in sizes:
       removed = (
           '%s_committed' % mmap_prefix,
           'nonprofiled-absent_committed',
           'nonprofiled-anonymous_committed',
           'nonprofiled-file-exec_committed',
           'nonprofiled-file-nonexec_committed',
           'nonprofiled-stack_committed',
           'nonprofiled-other_committed')
       sizes['mustbezero'] = (
           self.counters['total_committed'] -
           sum(self.counters[i] for i in removed))
     if 'total-exclude-profiler' in sizes:
       sizes['total-exclude-profiler'] = (
           self.counters['total_committed'] - sizes['mmap-profiler'])
     if 'hour' in sizes:
       sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0
     if 'minute' in sizes:
       sizes['minute'] = (self.log_time - first_log_time) / 60.0
     if 'second' in sizes:
       sizes['second'] = self.log_time - first_log_time

     return sizes

   @staticmethod
   def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets,
                                  component_name, depth, sizes, mmap):
     for line in stacktrace_lines:
       words = line.split()
       bucket = buckets.get(int(words[BUCKET_ID]))
       component_match = get_component(policy_list, bucket, mmap)
       if component_match == component_name:
         stacktrace_sequence = ''
         for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),
                                                  1 + depth)]:
           stacktrace_sequence += address_symbol_dict[address] + ' '
         if not stacktrace_sequence in sizes:
           sizes[stacktrace_sequence] = 0
         sizes[stacktrace_sequence] += int(words[COMMITTED])

   def expand(self, policy_list, buckets, component_name, depth):
     """Prints all stacktraces in a given component of given depth.

     Args:
         policy_list: A list containing Policy objects.  (Parsed policy data by
             parse_policy.)
         buckets: A dict mapping bucket ids and their corresponding Bucket
             objects.
         component_name: A name of component for filtering.
         depth: An integer representing depth to be printed.
     """
     sizes = {}

     self.accumulate_size_for_expand(
         self.mmap_stacktrace_lines, policy_list, buckets, component_name,
         depth, sizes, True)
     self.accumulate_size_for_expand(
         self.malloc_stacktrace_lines, policy_list, buckets, component_name,
         depth, sizes, False)

     sorted_sizes_list = sorted(
         sizes.iteritems(), key=(lambda x: x[1]), reverse=True)
     total = 0
     for size_pair in sorted_sizes_list:
       sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))
       total += size_pair[1]
     sys.stderr.write('total: %d\n' % (total))


 def update_symbols(symbol_path, mapping_lines, chrome_path):
   """Updates address/symbol mapping on memory and in a .symbol cache file.

   It reads cached address/symbol mapping from a .symbol file if it exists.
   Then, it resolves unresolved addresses from a Chrome binary with pprof.
   Both mappings on memory and in a .symbol cache file are updated.

   Symbol files are formatted as follows:
     <Address> <Symbol>
     <Address> <Symbol>
     <Address> <Symbol>
     ...

   Args:
       symbol_path: A string representing a path for a .symbol file.
       mapping_lines: A list of strings containing /proc/.../maps.
       chrome_path: A string representing a path for a Chrome binary.
   """
   with open(symbol_path, mode='a+') as symbol_f:
     symbol_lines = symbol_f.readlines()
     if symbol_lines:
       for line in symbol_lines:
         items = line.split(None, 1)
         address_symbol_dict[items[0]] = items[1].rstrip()

     unresolved_addresses = sorted(
         a for a in appeared_addresses if a not in address_symbol_dict)

     if unresolved_addresses:
       with tempfile.NamedTemporaryFile(
           suffix='maps', prefix="dmprof", mode='w+') as pprof_in:
         with tempfile.NamedTemporaryFile(
             suffix='symbols', prefix="dmprof", mode='w+') as pprof_out:
           for line in mapping_lines:
             pprof_in.write(line)

           for address in unresolved_addresses:
             pprof_in.write(address + '\n')

           pprof_in.seek(0)

           p = subprocess.Popen(
               '%s --symbols %s' % (PPROF_PATH, chrome_path),
               shell=True, stdin=pprof_in, stdout=pprof_out)
           p.wait()

           pprof_out.seek(0)
           symbols = pprof_out.readlines()
           symbol_f.seek(0, 2)
           for address, symbol in zip(unresolved_addresses, symbols):
             stripped_symbol = symbol.strip()
             address_symbol_dict[address] = stripped_symbol
             symbol_f.write('%s %s\n' % (address, symbol.strip()))


 def parse_policy(policy_path):
   """Parses policy file.

   A policy file contains component's names and their
   stacktrace pattern written in regular expression.
   Those patterns are matched against each symbols of
   each stacktraces in the order written in the policy file

   Args:
        policy_path: A path for a policy file.
   Returns:
        A list containing component's name and its regex object
   """
   with open(policy_path, mode='r') as policy_f:
     policy_lines = policy_f.readlines()

   policy_version = POLICY_DEEP_1
   if policy_lines[0].startswith('heap profile policy: '):
     policy_version = policy_lines[0][21:].strip()
     policy_lines.pop(0)
   policy_list = []

   if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:
     sys.stderr.write('  heap profile policy version: %s\n' % policy_version)
     for line in policy_lines:
       if line[0] == '#':
         continue

       if policy_version == POLICY_DEEP_2:
         (name, allocation_type, pattern) = line.strip().split(None, 2)
         mmap = False
         if allocation_type == 'mmap':
           mmap = True
       elif policy_version == POLICY_DEEP_1:
         name = line.split()[0]
         pattern = line[len(name) : len(line)].strip()
         mmap = False

       if pattern != 'default':
         policy_list.append(Policy(name, mmap, pattern))
       if components.count(name) == 0:
         components.append(name)

   else:
     sys.stderr.write('  invalid heap profile policy version: %s\n' % (
         policy_version))

   return policy_list


 def main():
   if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv',
                                                   '--json',
                                                   '--expand',
                                                   '--list',
                                                   '--stacktrace',
                                                   '--pprof'])):
     sys.stderr.write("""Usage:
 %s [options] <chrome-binary> <policy> <profile> [component-name] [depth]

 Options:
   --csv                         Output result in csv format
   --json                        Output result in json format
   --stacktrace                  Convert raw address to symbol names
   --list                        Lists components and their sizes
   --expand                      Show all stacktraces in the specified component
                                 of given depth with their sizes
   --pprof                       Format the profile file so it can be processed
                                 by pprof

 Examples:
   dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv
   dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json
   dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap
   dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4
   dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt
 """ % (sys.argv[0]))
     sys.exit(1)

   action = sys.argv[1]
   chrome_path = sys.argv[2]
   policy_path = sys.argv[3]
   log_path = sys.argv[4]

   sys.stderr.write('parsing a policy file\n')
   policy_list = parse_policy(policy_path)

   p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')
   prefix = p.sub('', log_path)
   symbol_path = prefix + '.symbols'

   sys.stderr.write('parsing the maps file\n')
   maps_path = prefix + '.maps'
   with open(maps_path, 'r') as maps_f:
     maps_lines = maps_f.readlines()

   # Reading buckets
   sys.stderr.write('parsing the bucket file\n')
   buckets = {}
   bucket_count = 0
   n = 0
   while True:
     buckets_path = '%s.%04d.buckets' % (prefix, n)
     if not os.path.exists(buckets_path):
       if n > 10:
         break
       n += 1
       continue
     sys.stderr.write('reading buckets from %s\n' % (buckets_path))
     with open(buckets_path, 'r') as buckets_f:
       for l in buckets_f:
         words = l.split()
         buckets[int(words[0])] = Bucket(words[1:])
     n += 1

   log_path_list = [log_path]

   if action in ('--csv', '--json'):
     # search for the sequence of files
     n = int(log_path[len(log_path) - 9 : len(log_path) - 5])
     n += 1  # skip current file
     while True:
       p = '%s.%04d.heap' % (prefix, n)
       if os.path.exists(p):
         log_path_list.append(p)
       else:
         break
       n += 1

   logs = []
   for path in log_path_list:
     new_log = Log(path)
     sys.stderr.write('Parsing a dump: %s\n' % path)
     try:
       new_log.parse_log(buckets)
     except ParsingException:
       sys.stderr.write('  Ignored an invalid dump: %s\n' % path)
     else:
       logs.append(new_log)

   sys.stderr.write('getting symbols\n')
   update_symbols(symbol_path, maps_lines, chrome_path)

   # TODO(dmikurube): Many modes now.  Split them into separete functions.
   if action == '--stacktrace':
     logs[0].dump_stacktrace(buckets)

   elif action == '--csv':
     sys.stdout.write(','.join(components))
     sys.stdout.write('\n')

     for log in logs:
       component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)
       s = []
       for c in components:
         if c in ('hour', 'minute', 'second'):
           s.append('%05.5f' % (component_sizes[c]))
         else:
           s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
       sys.stdout.write(','.join(s))
       sys.stdout.write('\n')

   elif action == '--json':
     json_base = {
       'version': 'JSON_DEEP_1',
       'legends': components,
       'snapshots': [],
     }
     for log in logs:
       component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)
       component_sizes['log_path'] = log.log_path
       component_sizes['log_time'] = datetime.fromtimestamp(
           log.log_time).strftime('%Y-%m-%d %H:%M:%S')
       json_base['snapshots'].append(component_sizes)
     json.dump(json_base, sys.stdout, indent=2, sort_keys=True)

   elif action == '--list':
     component_sizes = logs[0].apply_policy(
         policy_list, buckets, logs[0].log_time)
     for c in components:
       if c in ['hour', 'minute', 'second']:
         sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))
       else:
         sys.stdout.write('%30s %10.3f\n' % (
             c, component_sizes[c] / 1024.0 / 1024.0))

   elif action == '--expand':
     component_name = sys.argv[5]
     depth = sys.argv[6]
     logs[0].expand(policy_list, buckets, component_name, int(depth))

   elif action == '--pprof':
     if len(sys.argv) > 5:
       logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5])
     else:
       logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None)


 if __name__ == '__main__':
   sys.exit(main())