blob: 666e9da413f7b94ab08200368edce3a7e8317700 [file] [log] [blame]
#!/usr/bin/python
#
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""From a dump generated by dump_process.cc dump, prints statistics about
its content.
"""
import array
import collections
import hashlib
import logging
import os
import struct
import sys
import zlib
PAGE_SIZE = 1 << 12
def _ReadPage(f):
"""Reads a page of data from a file.
Args:
f: (file) An opened file to read from.
Returns:
An array.array() of unsigned int with the page content.
"""
result = array.array('I')
result.fromfile(f, PAGE_SIZE / result.itemsize)
return result
def _PrettyPrintSize(x):
"""Pretty print sizes in bytes, e.g. 123456 -> 123.45kB.
Args:
x: (int) size
Returns:
(str) Pretty printed version, 2 decimal places.
"""
if x < 1e3:
return str(x)
elif 1e3 <= x < 1e6:
return '%.2fkB' % (x / 1e3)
elif 1e6 <= x < 1e9:
return '%.2fMB' % (x / 1e6)
else:
return '%.2fGB' % (x / 1e9)
class MappingStats(object):
"""Statistics about a mapping, from a dump.
Slots:
filename: (str) Dump filename.
start: (int) Start address of the mapping.
end: (int) End address of the mapping.
pages: (int) Sizs of the mapping in pages.
is_zero: ([bool]) For each page, whether it's a zero page.
is_present: ([bool]) For each page, whether it's present.
is_swapped: ([bool]) For each page, whether it has been swapped out.
compressed_size: ([int]) If a page is not zero, its compressed size.
hashes: ([str]) If a page is not zero, its SHA1 hash.
"""
__slots__ = ('filename', 'start', 'end', 'pages', 'is_zero', 'is_present',
'is_swapped', 'compressed_size', 'hashes', 'freed')
def __init__(self, filename, start, end):
"""Init.
Args:
filename: (str) Dump filename.
start: (int) Start address.
end: (int) End address
"""
self.filename = filename
self.start = start
self.end = end
self.pages = (end - start) / PAGE_SIZE
self.is_zero = [False for i in range(self.pages)]
self.is_present = [False for i in range(self.pages)]
self.is_swapped = [False for i in range(self.pages)]
self.compressed_size = [0 for i in range(self.pages)]
self.hashes = [None for i in range(self.pages)]
self.freed = 0
def _GetStatsFromFileDump(filename):
"""Computes per-dump statistics.
Args:
filename: (str) Path to the dump.
Returns:
MappingStats for the mapping.
"""
# These are typically only populated with DCHECK() on.
FREED_PATTERNS = (0xcccccccc, # V8
0xcdcdcdcd, # PartitionAlloc "zapped"
0xabababab, # PartitionAlloc "uninitialized"
0xdeadbeef, # V8 "zapped"
0x0baddeaf, # V8 zapped handles
0x0baffedf, # V8 zapped global handles
0x0beefdaf, # V8 zapped from space
0xbeefdeef, # V8 zapped slots
0xbadbaddb, # V8 debug zapped
0xfeed1eaf) # V8 zapped freelist
# Dump integrity checks.
metadata_filename = filename + '.metadata'
pid_start_end = os.path.basename(filename)[:-len('.dump')]
(_, start, end) = [int(x, 10) for x in pid_start_end.split('-')]
file_stat = os.stat(filename)
assert start % PAGE_SIZE == 0
assert end % PAGE_SIZE == 0
assert file_stat.st_size == (end - start)
metadata_file_stat = os.stat(metadata_filename)
result = MappingStats(filename, start, end)
# each line is [01]{2}\n, eg '10\n', 1 line per page.
assert metadata_file_stat.st_size == 3 * result.pages
with open(filename, 'r') as f, open(metadata_filename, 'r') as metadata_f:
for i in range(result.pages):
page = _ReadPage(f)
assert len(page) == 1024
result.freed += 4 * sum(x in FREED_PATTERNS for x in page)
is_zero = max(page) == 0
present, swapped = (bool(int(x)) for x in metadata_f.readline().strip())
# Not present, not swapped private anonymous == lazily initialized zero
# page.
if not present and not swapped:
assert is_zero
result.is_zero[i] = is_zero
result.is_present[i] = present
result.is_swapped[i] = swapped
if not is_zero:
sha1 = hashlib.sha1()
sha1.update(page)
page_hash = sha1.digest()
result.hashes[i] = page_hash
compressed = zlib.compress(page, 1)
result.compressed_size[i] = len(compressed)
return result
def _FindPageFromHash(mappings, page_hash):
"""Returns a page with a given hash from a list of mappings.
Args:
mappings: ([MappingStats]) List of mappings.
page_hash: (str) Page hash to look for,
Returns:
array.array(uint32_t) with the page content
"""
for mapping in mappings:
for i in range(mapping.pages):
if mapping.hashes[i] == page_hash:
with open(mapping.filename, 'r') as f:
f.seek(i * PAGE_SIZE)
page = _ReadPage(f)
sha1 = hashlib.sha1()
sha1.update(page)
assert page_hash == sha1.digest()
return page
def _PrintPage(page):
"""Prints the content of a page."""
for i, x in enumerate(page):
print '{:08x}'.format(x),
if i % 16 == 15:
print
def PrintStats(dumps):
"""Logs statistics about a process mappings dump.
Args:
dumps: ([str]) List of dumps.
"""
dump_stats = [_GetStatsFromFileDump(filename) for filename in dumps]
content_to_count = collections.defaultdict(int)
total_pages = sum(stats.pages for stats in dump_stats)
total_zero_pages = sum(sum(stats.is_zero) for stats in dump_stats)
total_compressed_size = sum(sum(stats.compressed_size)
for stats in dump_stats)
total_swapped_pages = sum(sum(stats.is_swapped) for stats in dump_stats)
total_not_present_pages = sum(stats.pages - sum(stats.is_present)
for stats in dump_stats)
total_present_zero_pages = sum(
sum(x == (True, True) for x in zip(stats.is_zero, stats.is_present))
for stats in dump_stats)
total_freed_space = sum(stats.freed for stats in dump_stats)
content_to_count = collections.defaultdict(int)
for stats in dump_stats:
for page_hash in stats.hashes:
if page_hash:
content_to_count[page_hash] += 1
print 'Total pages = %d (%s)' % (total_pages,
_PrettyPrintSize(total_pages * PAGE_SIZE))
print 'Total zero pages = %d (%.02f%%)' % (
total_zero_pages, (100. * total_zero_pages) / total_pages)
print 'Total present zero pages = %d (%s)' % (
total_present_zero_pages,
_PrettyPrintSize(total_present_zero_pages * PAGE_SIZE))
total_size_non_zero_pages = (total_pages - total_zero_pages) * PAGE_SIZE
print 'Total size of non-zero pages = %d (%s)' % (
total_size_non_zero_pages, _PrettyPrintSize(total_size_non_zero_pages))
print 'Total compressed size = %d (%.02f%%)' % (
total_compressed_size,
(100. * total_compressed_size) / total_size_non_zero_pages)
duplicated_pages = sum(x - 1 for x in content_to_count.values())
print 'Duplicated non-zero pages = %d' % duplicated_pages
count_and_hashes = sorted(((v, k) for k, v in content_to_count.items()),
reverse=True)
max_common_pages = count_and_hashes[0][0] - 1
print 'Max non-zero pages with the same content = %d' % max_common_pages
print 'Swapped pages = %d (%s)' % (
total_swapped_pages, _PrettyPrintSize(total_swapped_pages * PAGE_SIZE))
print 'Non-present pages = %d (%s)' % (
total_not_present_pages,
_PrettyPrintSize(total_not_present_pages * PAGE_SIZE))
print 'Freed = %d (%s)' % (
total_freed_space, _PrettyPrintSize(total_freed_space))
print 'Top Duplicated Pages:'
for i in range(10):
count, page_hash = count_and_hashes[i]
print '%d common pages' % count
page = _FindPageFromHash(dump_stats, page_hash)
_PrintPage(page)
print
def main():
logging.basicConfig(level=logging.INFO)
if len(sys.argv) != 2:
logging.error('Usage: %s <dumps_directory>', sys.argv[0])
sys.exit(1)
directory = sys.argv[1]
dumps = [os.path.join(directory, f) for f in os.listdir(directory)
if f.endswith('.dump')]
PrintStats(dumps)
if __name__ == '__main__':
main()