blob: 21caf1dee5fd8b9417ba1e739fe0a3e9d65be34f [file] [log] [blame]
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Parses traces into Python objects.
Takes a trace from chrome://tracing and returns a Python dict containing the
result. The functions in the file are not very useful on their own, but are
intended as helpers for the other scripts in this directory.
This parses an allocation profile generated by PartitionAlloc in the thread
cache. This will only give data on Chrome instances where the thread cache is
enabled, and PA_THREAD_CACHE_ALLOC_STATS is defined, that is non-official
builds.
To collect a profile:
- Build a non-official chrome version (Should be a release build for accurate
reports; Debug builds have PA-E disabled)
- Collect a trace with the memory-infra category enabled (in chrome://tracing)
- Save it as json.gz, and load it here.
"""
import gzip
import json
from matplotlib import pylab as plt
from typing import Callable
def LoadTrace(filename: str) -> dict:
"""Loads a JSON trace, gzipped or not. This trace should be taken directly
from Chrome (chrome://tracing).
Args:
filename: Filename, gzipped or not.
Returns:
A dictionary with the trace content. This is simply the content of the
trace from |filename|, as a Python dictionary.
"""
try:
f = None
if filename.endswith('.gz'):
f = gzip.open(filename, 'r')
else:
f = open(filename, 'r')
return json.load(f)
finally:
if f is not None:
f.close()
def _GetAllocatorDumps(trace: dict) -> list:
"""Takes in a trace (as returned from |LoadTrace|), and returns the parts
of it related to the allocators.
Args:
trace: Trace content as returned by LoadTrace.
Returns:
The parts of the trace related to allocator metrics. Note that these
entries are taken as-is from the trace.
Each entry of the return value has
the following format. The important field here is 'args', but there are
other unimportant ones as well (shown with ...):
{
'args': {
'dumps': {
'allocators': dict[str, dict],
'allocator_graph': list[dict],
'level_of_detail': 'detailed'
}
},
...
}
Of the fields listed above, we mainly care about 'allocators'. Each entry
here corresponds to an allocator or a subset of the allocations for a given
allocator. For example, we have an entry in 'allocators' with key
'malloc/partitions/allocator/buckets/bucket_1024', which corresponds to the
1024 byte bucket of PartitionAlloc's main malloc partition.
Note that in the trace, we have entries for allocators besides just
PartitionAlloc, but in these scripts, we will only be looking at entries
for PartitionAlloc allocators.
Each entry of 'allocators' has the
following format:
{
'attrs': dict[str, dict],
'guid': str
}
The contents of 'attrs' varies depending on which entry we are looking at,
but generally contains all the relevant information we need for a
particular allocator. The attributes we care about here are mainly:
'allocated_objects_size', 'size', and 'slot_size'.
Finally, each entry of 'attrs' has the following format:
{
'type': str,
'units': str,
'value': str
}
For example, in 'malloc/partitions/allocator/buckets/bucket_1024', 'attrs'
has a 'allocated_objects_size' entry, which might look like so:
{
'type': 'scalar',
'units': 'bytes',
'value': 'ef8000'
}
The entry we care about here is 'value', which is a string representing a
hexadecimal number.
"""
events = trace['traceEvents']
memory_infra_events = [
e for e in events if e['cat'] == 'disabled-by-default-memory-infra'
]
dumps = [
e for e in memory_infra_events
if e['name'] == 'periodic_interval' and e['args']['dumps']
['level_of_detail'] == 'detailed' and 'allocators' in e['args']['dumps']
]
return dumps
def _ProcessNamesAndLabels(trace: dict) -> (dict, dict):
"""Get mappings of pid to name and pid to label.
Args:
trace: Trace content as returned by LoadTrace.
Returns:
A tuple containing two dicts. The first maps pids to names, the second maps
pids to labels.
"""
# Process names and labels.
pid_to_name = {}
pid_to_labels = {}
metadata_events = [
e for e in trace['traceEvents'] if e['cat'] == '__metadata'
]
process_name_events = [
e for e in metadata_events if e['name'] == 'process_name'
]
for e in process_name_events:
pid_to_name[e['pid']] = e['args']['name']
process_labels_events = [
e for e in metadata_events if e['name'] == 'process_labels'
]
for e in process_labels_events:
pid_to_labels[e['pid']] = e['args']['labels']
return pid_to_name, pid_to_labels
def ParseTrace(trace: dict,
compute_result: Callable[[dict, dict], None]) -> dict[int, dict]:
"""Parses a trace, and returns thread cache stats.
Args:
trace: As returned by LoadTrace()
compute_result: function taking |result_for_pid| (which it modifies
in-place), and |allocators|. It should add any results we'd like to graph
into |result_for_pid['data']|, as an |np.array|.
Returns:
{pid -> {'name': str, 'labels': str, 'data': np.array}.
Where the data array contains the data we would like to graph. For example,
it may contain 'size' and 'fragmentation' columns, which is what
|PlotProcessFragmentation| expects.
"""
dumps = _GetAllocatorDumps(trace)
pid_to_name, pid_to_labels = _ProcessNamesAndLabels(trace)
result = {}
for dump in dumps:
pid = dump['pid']
allocators = dump['args']['dumps']['allocators']
# The browser process also has global dumps, we do not care about these.
# These dumps are also returned from |_GetAllocatorDumps|, but the
# corresponding metadata for them is not returned from
# |_ProcessNamesAndLabels|. We have to special-case things _somewhere_ to
# exclude these dumps, and this seems like the best spot.
if 'global' in allocators:
continue
# We only use the last dump for a given pid, overwritting all previous
# ones.
result[pid] = {
'name': pid_to_name[pid],
'labels': pid_to_labels.get(pid, '')
}
compute_result(result[pid], allocators)
return result
def PlotProcessFragmentation(title, data, output):
"""Plots the Fragmentation vs size for a single process.
Args:
title: Title of the graph
data: Data to plot. Should contain 'size' and 'fragmentation' entries.
output: Filename to save the result to.
"""
plt.figure(figsize=(16, 8))
plt.title(title)
plt.stem(data['data']['size'], data['data']['fragmentation'])
plt.xscale('log', base=2)
plt.yscale('linear')
plt.ylim(ymin=0, ymax=100)
plt.xlabel('Size (log)')
plt.ylabel('Fragmentation (%)')
plt.savefig(output, bbox_inches='tight')
plt.close()
def PlotProcessWaste(title, data, output):
"""Plots the Unused memory vs size for a single process.
Args:
title: Title of the graph
data: Data to plot. Should contain 'size' and 'unused' entries.
output: Filename to save the result to.
"""
plt.figure(figsize=(16, 8))
plt.title(title)
plt.xscale('log', base=2)
plt.yscale('log', base=2)
plt.stem(data['data']['size'][data['data']['unused'] != 0],
data['data']['unused'][data['data']['unused'] != 0])
plt.ylim(ymin=1, ymax=2**20)
plt.xlabel('Size (log)')
plt.ylabel('Unused Size (log)')
plt.savefig(output, bbox_inches='tight')
plt.close()
def GetAllocatorAttr(attrs: dict, name: str) -> str:
return int(attrs[name]['value'], base=16)