Add namespaces, some unmangling, symbol colors, dominant symbols.
diff --git a/bloat.py b/bloat.py
index d7fc126..c5cbb31 100755
--- a/bloat.py
+++ b/bloat.py
@@ -1,10 +1,12 @@
#!/usr/bin/python
import fileinput
+import operator
import optparse
import os
import pprint
import re
+import subprocess
import sys
import json
@@ -77,66 +79,130 @@
print >>sys.stderr, 'unparsed:', repr(line)
-def filter_syms(types, symbols):
- for sym, type, size, path in symbols:
- if type in types:
- yield sym, type, size, path
+def parse_cpp_name(name):
+ # Turn prefixes into suffixes so namespacing works.
+ prefixes = [
+ ['bool ', ''],
+ ['construction vtable for ', ' [construction vtable]'],
+ ['global constructors keyed to ', ' [global constructors]'],
+ ['int ', ''],
+ ['non-virtual thunk to ', ' [non-virtual thunk]'],
+ ['typeinfo for ', ' [typeinfo]'],
+ ['typeinfo name for ', ' [typeinfo name]'],
+ ['virtual thunk to ', ' [virtual thunk]'],
+ ['void ', ''],
+ ['vtable for ', ' [vtable]'],
+ ]
+ for prefix, replacement in prefixes:
+ if name.startswith(prefix):
+ name = name[len(prefix):] + replacement
+ # Simplify parenthesis parsing.
+ replacements = [
+ ['(anonymous namespace)', '[anonymous namespace]'],
+ ]
+ for value, replacement in replacements:
+ name = name.replace(value, replacement)
+
+ def parse_one(val):
+ """Returns (leftmost-part, remaining)."""
+ if (val.startswith('operator') and
+ not (val[8].isalnum() or val[8] == '_')):
+ # Operator overload function, terminate.
+ return (val, '')
+ co = val.find('::')
+ lt = val.find('<')
+ pa = val.find('(')
+ co = len(val) if co == -1 else co
+ lt = len(val) if lt == -1 else lt
+ pa = len(val) if pa == -1 else pa
+ if co < lt and co < pa:
+ # Namespace or type name.
+ return (val[:co], val[co+2:])
+ if lt < pa:
+ # Template. Make sure we capture nested templates too.
+ open_tmpl = 1
+ gt = lt
+ while val[gt] != '>' or open_tmpl != 0:
+ gt = gt + 1
+ if val[gt] == '<':
+ open_tmpl = open_tmpl + 1
+ if val[gt] == '>':
+ open_tmpl = open_tmpl - 1
+ ret = val[gt+1:]
+ if ret.startswith('::'):
+ ret = ret[2:]
+ if ret.startswith('('):
+ # Template function, terminate.
+ return (val, '')
+ return (val[:gt+1], ret)
+ # Terminate with any function name, identifier, or unmangled name.
+ return (val, '')
+
+ parts = []
+ while len(name) > 0:
+ (part, name) = parse_one(name)
+ assert len(part) > 0
+ parts.append(part)
+ return parts
-def treeify_syms(symbols, strip_prefix=None):
+def treeify_syms(symbols, strip_prefix=None, cppfilt=None):
dirs = {}
for sym, type, size, path in symbols:
if path:
path = os.path.normpath(path)
if strip_prefix and path.startswith(strip_prefix):
path = path[len(strip_prefix):]
- elif path.startswith('/usr/include'):
- path = path.replace('/usr/include', 'usrinclude')
elif path.startswith('/'):
path = path[1:]
+ path = ['[path]'] + path.split('/')
- parts = None
- # TODO: make segmenting by namespace work.
- if False and '::' in sym:
- if sym.startswith('vtable for '):
- sym = sym[len('vtable for '):]
- parts = sym.split('::')
- parts.append('[vtable]')
- else:
- parts = sym.split('::')
- parts[0] = '::' + parts[0]
- elif path and '/' in path:
- parts = path.split('/')
+ parts = parse_cpp_name(sym)
+ if len(parts) == 1:
+ if path:
+ # No namespaces, group with path.
+ parts = path + parts
+ else:
+ new_prefix = ['[ungrouped]']
+ regroups = [
+ ['.L.str', '[str]'],
+ ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'],
+ ['.L__func__.', '[__func__]'],
+ ['.Lswitch.table', '[switch table]'],
+ ]
+ for prefix, group in regroups:
+ if parts[0].startswith(prefix):
+ parts[0] = parts[0][len(prefix):]
+ if cppfilt and parts[0].startswith('_Z'):
+ # Demangle names when possible.
+ # Mangled names all start with _Z.
+ parts[0] = subprocess.check_output(
+ [cppfilt, parts[0]]).strip()
+ new_prefix += [group]
+ break
+ parts = new_prefix + parts
- if parts:
- key = parts.pop()
- tree = dirs
- try:
- for part in parts:
- assert part != '', path
- if part not in tree:
- tree[part] = {}
- tree = tree[part]
- tree[key] = tree.get(key, 0) + size
- except:
- print >>sys.stderr, sym, parts, key
- raise
- else:
- key = 'symbols without paths'
- if key not in dirs:
- dirs[key] = {}
- tree = dirs[key]
- subkey = 'misc'
- if (sym.endswith('::__FUNCTION__') or
- sym.endswith('::__PRETTY_FUNCTION__')):
- subkey = '__FUNCTION__'
- elif sym.startswith('CSWTCH.'):
- subkey = 'CSWTCH'
- elif '::' in sym:
- subkey = sym[0:sym.find('::') + 2]
- else:
- print >>sys.stderr, 'unbucketed (no path?):', sym, type, size, path
- tree[subkey] = tree.get(subkey, 0) + size
+ key = parts.pop()
+ tree = dirs
+ try:
+ depth = 0
+ for part in parts:
+ depth = depth + 1
+ assert part != '', path
+ if part not in tree:
+ tree[part] = {'$bloat_symbols':{}}
+ if type not in tree[part]['$bloat_symbols']:
+ tree[part]['$bloat_symbols'][type] = 0
+ tree[part]['$bloat_symbols'][type] += 1
+ tree = tree[part]
+ old_size, old_symbols = tree.get(key, (0, {}))
+ if type not in old_symbols:
+ old_symbols[type] = 0
+ old_symbols[type] += 1
+ tree[key] = (old_size + size, old_symbols)
+ except:
+ print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts, key)
+ raise
return dirs
@@ -146,31 +212,45 @@
files = 0
for key, val in tree.iteritems():
+ if key == '$bloat_symbols':
+ continue
if isinstance(val, dict):
subtree = jsonify_tree(val, key)
total += subtree['data']['$area']
children.append(subtree)
else:
- total += val
+ (size, symbols) = val
+ total += size
+ assert len(symbols) == 1, symbols.values()[0] == 1
+ symbol = symbol_type_to_human(symbols.keys()[0])
children.append({
- 'name': key + ' ' + format_bytes(val),
- 'data': { '$area': val }
- })
+ 'name': key + ' ' + format_bytes(size),
+ 'data': {
+ '$area': size,
+ '$symbol': symbol,
+ }
+ })
children.sort(key=lambda child: -child['data']['$area'])
-
+ dominant_symbol = ''
+ if '$bloat_symbols' in tree:
+ dominant_symbol = symbol_type_to_human(
+ max(tree['$bloat_symbols'].iteritems(),
+ key=operator.itemgetter(1))[0])
return {
'name': name + ' ' + format_bytes(total),
'data': {
'$area': total,
+ '$dominant_symbol': dominant_symbol,
},
'children': children,
}
-def dump_nm(nmfile, strip_prefix):
- dirs = treeify_syms(parse_nm(nmfile), strip_prefix)
- print 'var kTree = ' + json.dumps(jsonify_tree(dirs, '/'), indent=2)
+def dump_nm(nmfile, strip_prefix, cppfilt):
+ dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt)
+ print ('var kTree = ' +
+ json.dumps(jsonify_tree(dirs, '[everything]'), indent=2))
def parse_objdump(input):
@@ -214,14 +294,14 @@
}
-def dump_sections():
- sections, debug_sections = parse_objdump(open('objdump.out'))
+def dump_sections(objdump):
+ sections, debug_sections = parse_objdump(objdump)
sections = jsonify_sections('sections', sections)
debug_sections = jsonify_sections('debug', debug_sections)
+ size = sections['data']['$area'] + debug_sections['data']['$area']
print 'var kTree = ' + json.dumps({
- 'name': 'top',
- 'data': { '$area': sections['data']['$area'] +
- debug_sections['data']['$area'] },
+ 'name': 'top ' + format_bytes(size),
+ 'data': { '$area': size },
'children': [ debug_sections, sections ]})
@@ -243,13 +323,17 @@
parser.add_option('--nm-output', action='store', dest='nmpath',
metavar='PATH', default='nm.out',
help='path to nm output [default=nm.out]')
-parser.add_option('--objdump-output', action='store', dest='objdump',
+parser.add_option('--objdump-output', action='store', dest='objdumppath',
metavar='PATH', default='objdump.out',
help='path to objdump output [default=objdump.out]')
parser.add_option('--strip-prefix', metavar='PATH', action='store',
help='strip PATH prefix from paths; e.g. /path/to/src/root')
parser.add_option('--filter', action='store',
help='include only symbols/files matching FILTER')
+parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt',
+ default='c++filt', help="Path to c++filt, used to demangle "
+ "symbols that weren't handled by nm. Set to an invalid path "
+ "to disable.")
opts, args = parser.parse_args()
if len(args) != 1:
@@ -259,9 +343,20 @@
mode = args[0]
if mode == 'syms':
nmfile = open(opts.nmpath, 'r')
- dump_nm(nmfile, strip_prefix=opts.strip_prefix)
+ try:
+ res = subprocess.check_output([opts.cppfilt, 'main'])
+ if res.strip() != 'main':
+ print >>sys.stderr, ("%s failed demangling, "
+ "output won't be demangled." % opt.cppfilt)
+ opts.cppfilt = None
+ except:
+ print >>sys.stderr, ("Could not find c++filt at %s, "
+ "output won't be demangled." % opt.cppfilt)
+ opts.cppfilt = None
+ dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt)
elif mode == 'sections':
- dump_sections()
+ objdumpfile = open(opts.objdumppath, 'r')
+ dump_sections(objdumpfile)
elif mode == 'dump':
nmfile = open(opts.nmpath, 'r')
syms = list(parse_nm(nmfile))