tools/memory/partition_allocator/compute_internal_fragmentation.py - chromium/src - Git at Google

 #!/usr/bin/env python3
 # Copyright 2021 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Parses allocation profiles from a trace and computes the internal
 fragmentation from PartitionAlloc, given a list of buckets.

 We compute this as the difference in memory usage between the given trace, and
 that same trace if we were to only allocate memory into one of the given
 buckets. This provides a rough estimate of the increased memory usage given
 a specific bucket mapping. We probably underestimate the increase here, as this
 script only considers surviving allocations.

 NOTE: Buckets (given with --buckets flag) should be a subset of the buckets in
 Chrome when the trace was recorded. To see why, say we have a request size of
 999B, and the next bucket in Chrome is 1024B. The 999B allocation will be
 recorded in the trace as 1024B (since we don't record the requested size in
 most cases, just the allocated size). Say our buckets (from --buckets flag)
 have a 1000B bucket. Our 999B allocation would have gone there, if we were
 actually using that bucket mapping in Chrome, which would have reduced
 fragmentation. However, this script would round this upwards, based on the
 1024B size, and tell us we increased fragmentation instead.

 This script outputs two sets of graphs. The first shows the increase in memory
 usage that would occur for each bucket, using the given bucket mapping, in
 percent. The second shows the increase in memory (in bytes) that would occur
 from using the given bucket mapping compared with the one from the trace.

 See: trace_utils.py for details on collecting a trace.
 """

 import argparse
 import logging
 import os
 import re

 from matplotlib import pylab as plt
 import numpy as np

 import trace_utils


 def _LoadBuckets(filename: str) -> list[int]:
   """Loads a list of bucket sizes from a file. The file should contain
   integers, in Newline Separated Value format.

   Args:
     filename: Filename.

   Returns:
     A sorted list of bucket sizes to use.
   """
   try:
     f = open(filename, 'r')
     buckets = [int(line) for line in f.readlines()]
   finally:
     if f is not None:
       f.close()

   buckets.sort()
   return buckets


 def _RoundUpToNearestBucket(buckets: list[int], size: int) -> int:
   """Rounds a size up to the nearest bucket.

   Args:
     buckets: Sorted list of bucket sizes.
     size: The size to round upwards.

   Returns:
     The closest bucket size to |size|, rounding upwards.
   """
   curr = None
   for bucket_size in buckets:
     curr = bucket_size
     if size <= bucket_size:
       break
   assert curr is not None
   return curr


 def _SummarizeStatsForAllocators(result_for_pid: dict, allocators: dict,
                                  buckets: list[int]):
   """We compute the wasted memory here by taking the 'allocated_objects_size'
   and rounding it up to nearest bucket (in |buckets|), and comparing the two.
   """
   size_counts = []
   total_allocated_size = 0
   rounded_allocated_size = 0
   # We are only interested in the main malloc partition, reported in
   # |ReportPartitionAllocStats|.
   pattern = re.compile("malloc/partitions/allocator/buckets/bucket_\d+")
   for entry in allocators:
     # |entry| can represent a subset of the allocations for a given allocator.
     if (not pattern.match(entry)):
       continue
     attrs = allocators[entry]['attrs']

     allocated_objects_size = trace_utils.GetAllocatorAttr(
         attrs, 'allocated_objects_size')
     slot_size = trace_utils.GetAllocatorAttr(attrs, 'slot_size')
     if allocated_objects_size != 0:
       # See: |CanStoreRawSize| in
       # base/allocator/partition_allocator/src/partition_alloc/partition_bucket.h
       # The single slot span size below assumes we have 4 KiB pages.
       if slot_size >= 0xE0000:  # Direct Mapped, so not affected by bucket size
         rounded_allocated_objects_size = allocated_objects_size
       elif slot_size >= 4 << 14:  # Single slot span
         # We care about the increase in all memory here, not just the increase
         # in allocated size. To deal with this, we round up the
         # allocated_objects_size.
         allocated_objects_size = slot_size
         rounded_allocated_objects_size = _RoundUpToNearestBucket(
             buckets, slot_size)
       else:
         bucket_size = _RoundUpToNearestBucket(buckets, slot_size)
         rounded_allocated_objects_size = (allocated_objects_size /
                                           slot_size) * bucket_size
         assert bucket_size in buckets
         assert slot_size <= bucket_size
       fragmentation = (1 -
                        allocated_objects_size / rounded_allocated_objects_size)
       total_allocated_size += allocated_objects_size
       rounded_allocated_size += rounded_allocated_objects_size
     else:
       bucket_size = 0
       rounded_allocated_objects_size = 0
       fragmentation = 0

     size_counts.append(
         (slot_size, fragmentation * 100,
          rounded_allocated_objects_size - allocated_objects_size))
   size_counts.sort()
   result_for_pid['regression'] = (rounded_allocated_size - total_allocated_size
                                   ) * 100 / total_allocated_size
   result_for_pid['data'] = np.array(size_counts,
                                     dtype=[('size', np.int),
                                            ('fragmentation', np.int),
                                            ('unused', np.int)])


 def _PlotProcess(all_data: dict, pid: int, output_prefix: str):
   """Represents the allocation size distribution.

   Args:
     all_data: As returned by _ParseTrace().
     pid: PID to plot the data for.
     output_prefix: Prefix of the output file.
   """
   data = all_data[pid]
   logging.info('Plotting data for PID %d' % pid)

   fragmentation_title = ('Internal Fragmentation (%%) vs Size - %s - %s' %
                          (data['name'], data['labels']))
   fragmentation_output = ('%s_%s_fragmentation.png' % (output_prefix, pid))
   trace_utils.PlotProcessFragmentation(fragmentation_title, data,
                                        fragmentation_output)

   unused_title = (
       'Internal Unused Memory vs Size - %s - %s (%.2f%% Regression)' %
       (data['name'], data['labels'], data['regression']))
   unused_output = ('%s_%d_unused.png' % (output_prefix, pid))
   trace_utils.PlotProcessWaste(unused_title, data, unused_output)


 def _CreateArgumentParser():
   parser = argparse.ArgumentParser()
   parser.add_argument(
       '--trace',
       type=str,
       required=True,
       help='Path to a trace.json[.gz] with memory-infra enabled.')
   parser.add_argument('--output-dir',
                       type=str,
                       required=True,
                       help='Output directory for graphs.')
   parser.add_argument('--buckets',
                       type=str,
                       required=True,
                       help='Path to file containing bucket sizes.')
   return parser


 def main():
   logging.basicConfig(level=logging.INFO)
   parser = _CreateArgumentParser()
   args = parser.parse_args()

   logging.info('Loading the trace')
   trace = trace_utils.LoadTrace(args.trace)
   buckets = _LoadBuckets(args.buckets)

   logging.info('Parsing the trace')
   stats_per_process = trace_utils.ParseTrace(
       trace, (lambda result_for_pid, allocators: _SummarizeStatsForAllocators(
           result_for_pid, allocators, buckets)))

   logging.info('Plotting the results')
   for pid in stats_per_process:
     if 'data' in stats_per_process[pid]:
       _PlotProcess(stats_per_process, pid,
                    os.path.join(args.output_dir, 'internal'))


 if __name__ == '__main__':
   main()
	#!/usr/bin/env python3
	# Copyright 2021 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Parses allocation profiles from a trace and computes the internal
	fragmentation from PartitionAlloc, given a list of buckets.

	We compute this as the difference in memory usage between the given trace, and
	that same trace if we were to only allocate memory into one of the given
	buckets. This provides a rough estimate of the increased memory usage given
	a specific bucket mapping. We probably underestimate the increase here, as this
	script only considers surviving allocations.

	NOTE: Buckets (given with --buckets flag) should be a subset of the buckets in
	Chrome when the trace was recorded. To see why, say we have a request size of
	999B, and the next bucket in Chrome is 1024B. The 999B allocation will be
	recorded in the trace as 1024B (since we don't record the requested size in
	most cases, just the allocated size). Say our buckets (from --buckets flag)
	have a 1000B bucket. Our 999B allocation would have gone there, if we were
	actually using that bucket mapping in Chrome, which would have reduced
	fragmentation. However, this script would round this upwards, based on the
	1024B size, and tell us we increased fragmentation instead.

	This script outputs two sets of graphs. The first shows the increase in memory
	usage that would occur for each bucket, using the given bucket mapping, in
	percent. The second shows the increase in memory (in bytes) that would occur
	from using the given bucket mapping compared with the one from the trace.

	See: trace_utils.py for details on collecting a trace.
	"""

	import argparse
	import logging
	import os
	import re

	from matplotlib import pylab as plt
	import numpy as np

	import trace_utils


	def _LoadBuckets(filename: str) -> list[int]:
	"""Loads a list of bucket sizes from a file. The file should contain
	integers, in Newline Separated Value format.

	Args:
	filename: Filename.

	Returns:
	A sorted list of bucket sizes to use.
	"""
	try:
	f = open(filename, 'r')
	buckets = [int(line) for line in f.readlines()]
	finally:
	if f is not None:
	f.close()

	buckets.sort()
	return buckets


	def _RoundUpToNearestBucket(buckets: list[int], size: int) -> int:
	"""Rounds a size up to the nearest bucket.

	Args:
	buckets: Sorted list of bucket sizes.
	size: The size to round upwards.

	Returns:
	The closest bucket size to \|size\|, rounding upwards.
	"""
	curr = None
	for bucket_size in buckets:
	curr = bucket_size
	if size <= bucket_size:
	break
	assert curr is not None
	return curr


	def _SummarizeStatsForAllocators(result_for_pid: dict, allocators: dict,
	buckets: list[int]):
	"""We compute the wasted memory here by taking the 'allocated_objects_size'
	and rounding it up to nearest bucket (in \|buckets\|), and comparing the two.
	"""
	size_counts = []
	total_allocated_size = 0
	rounded_allocated_size = 0
	# We are only interested in the main malloc partition, reported in
	# \|ReportPartitionAllocStats\|.
	pattern = re.compile("malloc/partitions/allocator/buckets/bucket_\d+")
	for entry in allocators:
	# \|entry\| can represent a subset of the allocations for a given allocator.
	if (not pattern.match(entry)):
	continue
	attrs = allocators[entry]['attrs']

	allocated_objects_size = trace_utils.GetAllocatorAttr(
	attrs, 'allocated_objects_size')
	slot_size = trace_utils.GetAllocatorAttr(attrs, 'slot_size')
	if allocated_objects_size != 0:
	# See: \|CanStoreRawSize\| in
	# base/allocator/partition_allocator/src/partition_alloc/partition_bucket.h
	# The single slot span size below assumes we have 4 KiB pages.
	if slot_size >= 0xE0000: # Direct Mapped, so not affected by bucket size
	rounded_allocated_objects_size = allocated_objects_size
	elif slot_size >= 4 << 14: # Single slot span
	# We care about the increase in all memory here, not just the increase
	# in allocated size. To deal with this, we round up the
	# allocated_objects_size.
	allocated_objects_size = slot_size
	rounded_allocated_objects_size = _RoundUpToNearestBucket(
	buckets, slot_size)
	else:
	bucket_size = _RoundUpToNearestBucket(buckets, slot_size)
	rounded_allocated_objects_size = (allocated_objects_size /
	slot_size) * bucket_size
	assert bucket_size in buckets
	assert slot_size <= bucket_size
	fragmentation = (1 -
	allocated_objects_size / rounded_allocated_objects_size)
	total_allocated_size += allocated_objects_size
	rounded_allocated_size += rounded_allocated_objects_size
	else:
	bucket_size = 0
	rounded_allocated_objects_size = 0
	fragmentation = 0

	size_counts.append(
	(slot_size, fragmentation * 100,
	rounded_allocated_objects_size - allocated_objects_size))
	size_counts.sort()
	result_for_pid['regression'] = (rounded_allocated_size - total_allocated_size
	) * 100 / total_allocated_size
	result_for_pid['data'] = np.array(size_counts,
	dtype=[('size', np.int),
	('fragmentation', np.int),
	('unused', np.int)])


	def _PlotProcess(all_data: dict, pid: int, output_prefix: str):
	"""Represents the allocation size distribution.

	Args:
	all_data: As returned by _ParseTrace().
	pid: PID to plot the data for.
	output_prefix: Prefix of the output file.
	"""
	data = all_data[pid]
	logging.info('Plotting data for PID %d' % pid)

	fragmentation_title = ('Internal Fragmentation (%%) vs Size - %s - %s' %
	(data['name'], data['labels']))
	fragmentation_output = ('%s_%s_fragmentation.png' % (output_prefix, pid))
	trace_utils.PlotProcessFragmentation(fragmentation_title, data,
	fragmentation_output)

	unused_title = (
	'Internal Unused Memory vs Size - %s - %s (%.2f%% Regression)' %
	(data['name'], data['labels'], data['regression']))
	unused_output = ('%s_%d_unused.png' % (output_prefix, pid))
	trace_utils.PlotProcessWaste(unused_title, data, unused_output)


	def _CreateArgumentParser():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'--trace',
	type=str,
	required=True,
	help='Path to a trace.json[.gz] with memory-infra enabled.')
	parser.add_argument('--output-dir',
	type=str,
	required=True,
	help='Output directory for graphs.')
	parser.add_argument('--buckets',
	type=str,
	required=True,
	help='Path to file containing bucket sizes.')
	return parser


	def main():
	logging.basicConfig(level=logging.INFO)
	parser = _CreateArgumentParser()
	args = parser.parse_args()

	logging.info('Loading the trace')
	trace = trace_utils.LoadTrace(args.trace)
	buckets = _LoadBuckets(args.buckets)

	logging.info('Parsing the trace')
	stats_per_process = trace_utils.ParseTrace(
	trace, (lambda result_for_pid, allocators: _SummarizeStatsForAllocators(
	result_for_pid, allocators, buckets)))

	logging.info('Plotting the results')
	for pid in stats_per_process:
	if 'data' in stats_per_process[pid]:
	_PlotProcess(stats_per_process, pid,
	os.path.join(args.output_dir, 'internal'))


	if __name__ == '__main__':
	main()