| # Copyright 2021 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Parses traces into Python objects. |
| |
| Takes a trace from chrome://tracing and returns a Python dict containing the |
| result. The functions in the file are not very useful on their own, but are |
| intended as helpers for the other scripts in this directory. |
| |
| This parses an allocation profile generated by PartitionAlloc in the thread |
| cache. This will only give data on Chrome instances where the thread cache is |
| enabled, and PA_THREAD_CACHE_ALLOC_STATS is defined, that is non-official |
| builds. |
| |
| To collect a profile: |
| - Build a non-official chrome version (Should be a release build for accurate |
| reports; Debug builds have PA-E disabled) |
| - Collect a trace with the memory-infra category enabled (in chrome://tracing) |
| - Save it as json.gz, and load it here. |
| """ |
| |
| import gzip |
| import json |
| |
| from matplotlib import pylab as plt |
| from typing import Callable |
| |
| |
| def LoadTrace(filename: str) -> dict: |
| """Loads a JSON trace, gzipped or not. This trace should be taken directly |
| from Chrome (chrome://tracing). |
| |
| Args: |
| filename: Filename, gzipped or not. |
| |
| Returns: |
| A dictionary with the trace content. This is simply the content of the |
| trace from |filename|, as a Python dictionary. |
| """ |
| try: |
| f = None |
| if filename.endswith('.gz'): |
| f = gzip.open(filename, 'r') |
| else: |
| f = open(filename, 'r') |
| return json.load(f) |
| finally: |
| if f is not None: |
| f.close() |
| |
| |
| def _GetAllocatorDumps(trace: dict) -> list: |
| """Takes in a trace (as returned from |LoadTrace|), and returns the parts |
| of it related to the allocators. |
| |
| Args: |
| trace: Trace content as returned by LoadTrace. |
| |
| Returns: |
| The parts of the trace related to allocator metrics. Note that these |
| entries are taken as-is from the trace. |
| |
| Each entry of the return value has |
| the following format. The important field here is 'args', but there are |
| other unimportant ones as well (shown with ...): |
| { |
| 'args': { |
| 'dumps': { |
| 'allocators': dict[str, dict], |
| 'allocator_graph': list[dict], |
| 'level_of_detail': 'detailed' |
| } |
| }, |
| ... |
| } |
| |
| Of the fields listed above, we mainly care about 'allocators'. Each entry |
| here corresponds to an allocator or a subset of the allocations for a given |
| allocator. For example, we have an entry in 'allocators' with key |
| 'malloc/partitions/allocator/buckets/bucket_1024', which corresponds to the |
| 1024 byte bucket of PartitionAlloc's main malloc partition. |
| |
| Note that in the trace, we have entries for allocators besides just |
| PartitionAlloc, but in these scripts, we will only be looking at entries |
| for PartitionAlloc allocators. |
| |
| Each entry of 'allocators' has the |
| following format: |
| { |
| 'attrs': dict[str, dict], |
| 'guid': str |
| } |
| |
| The contents of 'attrs' varies depending on which entry we are looking at, |
| but generally contains all the relevant information we need for a |
| particular allocator. The attributes we care about here are mainly: |
| 'allocated_objects_size', 'size', and 'slot_size'. |
| |
| Finally, each entry of 'attrs' has the following format: |
| { |
| 'type': str, |
| 'units': str, |
| 'value': str |
| } |
| |
| For example, in 'malloc/partitions/allocator/buckets/bucket_1024', 'attrs' |
| has a 'allocated_objects_size' entry, which might look like so: |
| { |
| 'type': 'scalar', |
| 'units': 'bytes', |
| 'value': 'ef8000' |
| } |
| |
| The entry we care about here is 'value', which is a string representing a |
| hexadecimal number. |
| """ |
| events = trace['traceEvents'] |
| memory_infra_events = [ |
| e for e in events if e['cat'] == 'disabled-by-default-memory-infra' |
| ] |
| dumps = [ |
| e for e in memory_infra_events |
| if e['name'] == 'periodic_interval' and e['args']['dumps'] |
| ['level_of_detail'] == 'detailed' and 'allocators' in e['args']['dumps'] |
| ] |
| return dumps |
| |
| |
| def _ProcessNamesAndLabels(trace: dict) -> (dict, dict): |
| """Get mappings of pid to name and pid to label. |
| |
| Args: |
| trace: Trace content as returned by LoadTrace. |
| |
| Returns: |
| A tuple containing two dicts. The first maps pids to names, the second maps |
| pids to labels. |
| """ |
| # Process names and labels. |
| pid_to_name = {} |
| pid_to_labels = {} |
| |
| metadata_events = [ |
| e for e in trace['traceEvents'] if e['cat'] == '__metadata' |
| ] |
| |
| process_name_events = [ |
| e for e in metadata_events if e['name'] == 'process_name' |
| ] |
| for e in process_name_events: |
| pid_to_name[e['pid']] = e['args']['name'] |
| |
| process_labels_events = [ |
| e for e in metadata_events if e['name'] == 'process_labels' |
| ] |
| for e in process_labels_events: |
| pid_to_labels[e['pid']] = e['args']['labels'] |
| |
| return pid_to_name, pid_to_labels |
| |
| |
| def ParseTrace(trace: dict, |
| compute_result: Callable[[dict, dict], None]) -> dict[int, dict]: |
| """Parses a trace, and returns thread cache stats. |
| |
| Args: |
| trace: As returned by LoadTrace() |
| compute_result: function taking |result_for_pid| (which it modifies |
| in-place), and |allocators|. It should add any results we'd like to graph |
| into |result_for_pid['data']|, as an |np.array|. |
| |
| Returns: |
| {pid -> {'name': str, 'labels': str, 'data': np.array}. |
| Where the data array contains the data we would like to graph. For example, |
| it may contain 'size' and 'fragmentation' columns, which is what |
| |PlotProcessFragmentation| expects. |
| """ |
| dumps = _GetAllocatorDumps(trace) |
| pid_to_name, pid_to_labels = _ProcessNamesAndLabels(trace) |
| |
| result = {} |
| for dump in dumps: |
| pid = dump['pid'] |
| allocators = dump['args']['dumps']['allocators'] |
| |
| # The browser process also has global dumps, we do not care about these. |
| # These dumps are also returned from |_GetAllocatorDumps|, but the |
| # corresponding metadata for them is not returned from |
| # |_ProcessNamesAndLabels|. We have to special-case things _somewhere_ to |
| # exclude these dumps, and this seems like the best spot. |
| if 'global' in allocators: |
| continue |
| |
| # We only use the last dump for a given pid, overwritting all previous |
| # ones. |
| result[pid] = { |
| 'name': pid_to_name[pid], |
| 'labels': pid_to_labels.get(pid, '') |
| } |
| compute_result(result[pid], allocators) |
| |
| return result |
| |
| |
| def PlotProcessFragmentation(title, data, output): |
| """Plots the Fragmentation vs size for a single process. |
| |
| Args: |
| title: Title of the graph |
| data: Data to plot. Should contain 'size' and 'fragmentation' entries. |
| output: Filename to save the result to. |
| """ |
| plt.figure(figsize=(16, 8)) |
| plt.title(title) |
| plt.stem(data['data']['size'], data['data']['fragmentation']) |
| plt.xscale('log', base=2) |
| plt.yscale('linear') |
| plt.ylim(ymin=0, ymax=100) |
| plt.xlabel('Size (log)') |
| plt.ylabel('Fragmentation (%)') |
| plt.savefig(output, bbox_inches='tight') |
| plt.close() |
| |
| |
| def PlotProcessWaste(title, data, output): |
| """Plots the Unused memory vs size for a single process. |
| |
| Args: |
| title: Title of the graph |
| data: Data to plot. Should contain 'size' and 'unused' entries. |
| output: Filename to save the result to. |
| """ |
| plt.figure(figsize=(16, 8)) |
| plt.title(title) |
| plt.xscale('log', base=2) |
| plt.yscale('log', base=2) |
| plt.stem(data['data']['size'][data['data']['unused'] != 0], |
| data['data']['unused'][data['data']['unused'] != 0]) |
| plt.ylim(ymin=1, ymax=2**20) |
| plt.xlabel('Size (log)') |
| plt.ylabel('Unused Size (log)') |
| plt.savefig(output, bbox_inches='tight') |
| plt.close() |
| |
| |
| def GetAllocatorAttr(attrs: dict, name: str) -> str: |
| return int(attrs[name]['value'], base=16) |