| #!/usr/bin/python |
| # |
| # Copyright 2018 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """From a dump generated by dump_process.cc dump, prints statistics about |
| its content. |
| """ |
| |
| import array |
| import collections |
| import hashlib |
| import logging |
| import os |
| import struct |
| import sys |
| import zlib |
| |
| |
| PAGE_SIZE = 1 << 12 |
| |
| |
| def _ReadPage(f): |
| """Reads a page of data from a file. |
| |
| Args: |
| f: (file) An opened file to read from. |
| |
| Returns: |
| An array.array() of unsigned int with the page content. |
| """ |
| result = array.array('I') |
| result.fromfile(f, PAGE_SIZE / result.itemsize) |
| return result |
| |
| |
| def _PrettyPrintSize(x): |
| """Pretty print sizes in bytes, e.g. 123456 -> 123.45kB. |
| |
| Args: |
| x: (int) size |
| |
| Returns: |
| (str) Pretty printed version, 2 decimal places. |
| """ |
| if x < 1e3: |
| return str(x) |
| elif 1e3 <= x < 1e6: |
| return '%.2fkB' % (x / 1e3) |
| elif 1e6 <= x < 1e9: |
| return '%.2fMB' % (x / 1e6) |
| else: |
| return '%.2fGB' % (x / 1e9) |
| |
| |
| class MappingStats(object): |
| """Statistics about a mapping, from a dump. |
| |
| Slots: |
| filename: (str) Dump filename. |
| start: (int) Start address of the mapping. |
| end: (int) End address of the mapping. |
| pages: (int) Sizs of the mapping in pages. |
| is_zero: ([bool]) For each page, whether it's a zero page. |
| is_present: ([bool]) For each page, whether it's present. |
| is_swapped: ([bool]) For each page, whether it has been swapped out. |
| compressed_size: ([int]) If a page is not zero, its compressed size. |
| hashes: ([str]) If a page is not zero, its SHA1 hash. |
| """ |
| __slots__ = ('filename', 'start', 'end', 'pages', 'is_zero', 'is_present', |
| 'is_swapped', 'compressed_size', 'hashes', 'freed') |
| def __init__(self, filename, start, end): |
| """Init. |
| |
| Args: |
| filename: (str) Dump filename. |
| start: (int) Start address. |
| end: (int) End address |
| """ |
| self.filename = filename |
| self.start = start |
| self.end = end |
| self.pages = (end - start) / PAGE_SIZE |
| self.is_zero = [False for i in range(self.pages)] |
| self.is_present = [False for i in range(self.pages)] |
| self.is_swapped = [False for i in range(self.pages)] |
| self.compressed_size = [0 for i in range(self.pages)] |
| self.hashes = [None for i in range(self.pages)] |
| self.freed = 0 |
| |
| |
| def _GetStatsFromFileDump(filename): |
| """Computes per-dump statistics. |
| |
| Args: |
| filename: (str) Path to the dump. |
| |
| Returns: |
| MappingStats for the mapping. |
| """ |
| # These are typically only populated with DCHECK() on. |
| FREED_PATTERNS = (0xcccccccc, # V8 |
| 0xcdcdcdcd, # PartitionAlloc "zapped" |
| 0xabababab, # PartitionAlloc "uninitialized" |
| 0xdeadbeef, # V8 "zapped" |
| 0x0baddeaf, # V8 zapped handles |
| 0x0baffedf, # V8 zapped global handles |
| 0x0beefdaf, # V8 zapped from space |
| 0xbeefdeef, # V8 zapped slots |
| 0xbadbaddb, # V8 debug zapped |
| 0xfeed1eaf) # V8 zapped freelist |
| # Dump integrity checks. |
| metadata_filename = filename + '.metadata' |
| pid_start_end = os.path.basename(filename)[:-len('.dump')] |
| (_, start, end) = [int(x, 10) for x in pid_start_end.split('-')] |
| file_stat = os.stat(filename) |
| assert start % PAGE_SIZE == 0 |
| assert end % PAGE_SIZE == 0 |
| assert file_stat.st_size == (end - start) |
| metadata_file_stat = os.stat(metadata_filename) |
| result = MappingStats(filename, start, end) |
| # each line is [01]{2}\n, eg '10\n', 1 line per page. |
| assert metadata_file_stat.st_size == 3 * result.pages |
| |
| with open(filename, 'r') as f, open(metadata_filename, 'r') as metadata_f: |
| for i in range(result.pages): |
| page = _ReadPage(f) |
| assert len(page) == 1024 |
| result.freed += 4 * sum(x in FREED_PATTERNS for x in page) |
| is_zero = max(page) == 0 |
| present, swapped = (bool(int(x)) for x in metadata_f.readline().strip()) |
| # Not present, not swapped private anonymous == lazily initialized zero |
| # page. |
| if not present and not swapped: |
| assert is_zero |
| result.is_zero[i] = is_zero |
| result.is_present[i] = present |
| result.is_swapped[i] = swapped |
| if not is_zero: |
| sha1 = hashlib.sha1() |
| sha1.update(page) |
| page_hash = sha1.digest() |
| result.hashes[i] = page_hash |
| compressed = zlib.compress(page, 1) |
| result.compressed_size[i] = len(compressed) |
| return result |
| |
| |
| def _FindPageFromHash(mappings, page_hash): |
| """Returns a page with a given hash from a list of mappings. |
| |
| Args: |
| mappings: ([MappingStats]) List of mappings. |
| page_hash: (str) Page hash to look for, |
| |
| Returns: |
| array.array(uint32_t) with the page content |
| """ |
| for mapping in mappings: |
| for i in range(mapping.pages): |
| if mapping.hashes[i] == page_hash: |
| with open(mapping.filename, 'r') as f: |
| f.seek(i * PAGE_SIZE) |
| page = _ReadPage(f) |
| sha1 = hashlib.sha1() |
| sha1.update(page) |
| assert page_hash == sha1.digest() |
| return page |
| |
| |
| def _PrintPage(page): |
| """Prints the content of a page.""" |
| for i, x in enumerate(page): |
| print '{:08x}'.format(x), |
| if i % 16 == 15: |
| print |
| |
| |
| def PrintStats(dumps): |
| """Logs statistics about a process mappings dump. |
| |
| Args: |
| dumps: ([str]) List of dumps. |
| """ |
| dump_stats = [_GetStatsFromFileDump(filename) for filename in dumps] |
| content_to_count = collections.defaultdict(int) |
| total_pages = sum(stats.pages for stats in dump_stats) |
| total_zero_pages = sum(sum(stats.is_zero) for stats in dump_stats) |
| total_compressed_size = sum(sum(stats.compressed_size) |
| for stats in dump_stats) |
| total_swapped_pages = sum(sum(stats.is_swapped) for stats in dump_stats) |
| total_not_present_pages = sum(stats.pages - sum(stats.is_present) |
| for stats in dump_stats) |
| total_present_zero_pages = sum( |
| sum(x == (True, True) for x in zip(stats.is_zero, stats.is_present)) |
| for stats in dump_stats) |
| total_freed_space = sum(stats.freed for stats in dump_stats) |
| |
| content_to_count = collections.defaultdict(int) |
| for stats in dump_stats: |
| for page_hash in stats.hashes: |
| if page_hash: |
| content_to_count[page_hash] += 1 |
| |
| print 'Total pages = %d (%s)' % (total_pages, |
| _PrettyPrintSize(total_pages * PAGE_SIZE)) |
| print 'Total zero pages = %d (%.02f%%)' % ( |
| total_zero_pages, (100. * total_zero_pages) / total_pages) |
| print 'Total present zero pages = %d (%s)' % ( |
| total_present_zero_pages, |
| _PrettyPrintSize(total_present_zero_pages * PAGE_SIZE)) |
| total_size_non_zero_pages = (total_pages - total_zero_pages) * PAGE_SIZE |
| print 'Total size of non-zero pages = %d (%s)' % ( |
| total_size_non_zero_pages, _PrettyPrintSize(total_size_non_zero_pages)) |
| print 'Total compressed size = %d (%.02f%%)' % ( |
| total_compressed_size, |
| (100. * total_compressed_size) / total_size_non_zero_pages) |
| duplicated_pages = sum(x - 1 for x in content_to_count.values()) |
| print 'Duplicated non-zero pages = %d' % duplicated_pages |
| count_and_hashes = sorted(((v, k) for k, v in content_to_count.items()), |
| reverse=True) |
| max_common_pages = count_and_hashes[0][0] - 1 |
| print 'Max non-zero pages with the same content = %d' % max_common_pages |
| print 'Swapped pages = %d (%s)' % ( |
| total_swapped_pages, _PrettyPrintSize(total_swapped_pages * PAGE_SIZE)) |
| print 'Non-present pages = %d (%s)' % ( |
| total_not_present_pages, |
| _PrettyPrintSize(total_not_present_pages * PAGE_SIZE)) |
| print 'Freed = %d (%s)' % ( |
| total_freed_space, _PrettyPrintSize(total_freed_space)) |
| print 'Top Duplicated Pages:' |
| for i in range(10): |
| count, page_hash = count_and_hashes[i] |
| print '%d common pages' % count |
| page = _FindPageFromHash(dump_stats, page_hash) |
| _PrintPage(page) |
| print |
| |
| |
| def main(): |
| logging.basicConfig(level=logging.INFO) |
| if len(sys.argv) != 2: |
| logging.error('Usage: %s <dumps_directory>', sys.argv[0]) |
| sys.exit(1) |
| directory = sys.argv[1] |
| dumps = [os.path.join(directory, f) for f in os.listdir(directory) |
| if f.endswith('.dump')] |
| PrintStats(dumps) |
| |
| |
| if __name__ == '__main__': |
| main() |