cros_utils/perf_diff.py - chromiumos/third_party/toolchain-utils - Git at Google

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Copyright 2019 The ChromiumOS Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """One-line documentation for perf_diff module.

 A detailed description of perf_diff.
 """


 __author__ = "asharif@google.com (Ahmad Sharif)"

 import argparse
 import functools
 import re
 import sys

 from cros_utils import misc
 from cros_utils import tabulator


 ROWS_TO_SHOW = "Rows_to_show_in_the_perf_table"
 TOTAL_EVENTS = "Total_events_of_this_profile"


 def GetPerfDictFromReport(report_file):
     output = {}
     perf_report = PerfReport(report_file)
     for k, v in perf_report.sections.items():
         if k not in output:
             output[k] = {}
         output[k][ROWS_TO_SHOW] = 0
         output[k][TOTAL_EVENTS] = 0
         for function in v.functions:
             out_key = "%s" % (function.name)
             output[k][out_key] = function.count
             output[k][TOTAL_EVENTS] += function.count
             if function.percent > 1:
                 output[k][ROWS_TO_SHOW] += 1
     return output


 def _SortDictionaryByValue(d):
     l = d.items()

     def GetFloat(x):
         if misc.IsFloat(x):
             return float(x)
         else:
             return x

     sorted_l = sorted(l, key=lambda x: GetFloat(x[1]))
     sorted_l.reverse()
     return [f[0] for f in sorted_l]


 class Tabulator(object):
     """Make tables."""

     def __init__(self, all_dicts):
         self._all_dicts = all_dicts

     def PrintTable(self):
         for dicts in self._all_dicts:
             self.PrintTableHelper(dicts)

     def PrintTableHelper(self, dicts):
         """Transfrom dicts to tables."""
         fields = {}
         for d in dicts:
             for f in d.keys():
                 if f not in fields:
                     fields[f] = d[f]
                 else:
                     fields[f] = max(fields[f], d[f])
         table = []
         header = ["name"]
         for i in range(len(dicts)):
             header.append(i)

         table.append(header)

         sorted_fields = _SortDictionaryByValue(fields)

         for f in sorted_fields:
             row = [f]
             for d in dicts:
                 if f in d:
                     row.append(d[f])
                 else:
                     row.append("0")
             table.append(row)

         print(tabulator.GetSimpleTable(table))


 class Function(object):
     """Function for formatting."""

     def __init__(self):
         self.count = 0
         self.name = ""
         self.percent = 0


 class Section(object):
     """Section formatting."""

     def __init__(self, contents):
         self.name = ""
         self.raw_contents = contents
         self._ParseSection()

     def _ParseSection(self):
         matches = re.findall(r"Events: (\w+)\s+(.*)", self.raw_contents)
         assert len(matches) <= 1, "More than one event found in 1 section"
         if not matches:
             return
         match = matches[0]
         self.name = match[1]
         self.count = misc.UnitToNumber(match[0])

         self.functions = []
         for line in self.raw_contents.splitlines():
             if not line.strip():
                 continue
             if "%" not in line:
                 continue
             if not line.startswith("#"):
                 fields = [f for f in line.split(" ") if f]
                 function = Function()
                 function.percent = float(fields[0].strip("%"))
                 function.count = int(fields[1])
                 function.name = " ".join(fields[2:])
                 self.functions.append(function)


 class PerfReport(object):
     """Get report from raw report."""

     def __init__(self, perf_file):
         self.perf_file = perf_file
         self._ReadFile()
         self.sections = {}
         self.metadata = {}
         self._section_contents = []
         self._section_header = ""
         self._SplitSections()
         self._ParseSections()
         self._ParseSectionHeader()

     def _ParseSectionHeader(self):
         """Parse a header of a perf report file."""
         # The "captured on" field is inaccurate - this actually refers to when the
         # report was generated, not when the data was captured.
         for line in self._section_header.splitlines():
             line = line[2:]
             if ":" in line:
                 key, val = line.strip().split(":", 1)
                 key = key.strip()
                 val = val.strip()
                 self.metadata[key] = val

     def _ReadFile(self):
         self._perf_contents = open(self.perf_file).read()

     def _ParseSections(self):
         self.event_counts = {}
         self.sections = {}
         for section_content in self._section_contents:
             section = Section(section_content)
             section.name = self._GetHumanReadableName(section.name)
             self.sections[section.name] = section

     # TODO(asharif): Do this better.
     def _GetHumanReadableName(self, section_name):
         if not "raw" in section_name:
             return section_name
         raw_number = section_name.strip().split(" ")[-1]
         for line in self._section_header.splitlines():
             if raw_number in line:
                 name = line.strip().split(" ")[5]
                 return name

     def _SplitSections(self):
         self._section_contents = []
         indices = [
             m.start() for m in re.finditer("# Events:", self._perf_contents)
         ]
         indices.append(len(self._perf_contents))
         for i in range(len(indices) - 1):
             section_content = self._perf_contents[indices[i] : indices[i + 1]]
             self._section_contents.append(section_content)
         self._section_header = ""
         if indices:
             self._section_header = self._perf_contents[0 : indices[0]]


 class PerfDiffer(object):
     """Perf differ class."""

     def __init__(self, reports, num_symbols, common_only):
         self._reports = reports
         self._num_symbols = num_symbols
         self._common_only = common_only
         self._common_function_names = {}

     def DoDiff(self):
         """The function that does the diff."""
         section_names = self._FindAllSections()

         filename_dicts = []
         summary_dicts = []
         for report in self._reports:
             d = {}
             filename_dicts.append({"file": report.perf_file})
             for section_name in section_names:
                 if section_name in report.sections:
                     d[section_name] = report.sections[section_name].count
             summary_dicts.append(d)

         all_dicts = [filename_dicts, summary_dicts]

         for section_name in section_names:
             function_names = self._GetTopFunctions(
                 section_name, self._num_symbols
             )
             self._FindCommonFunctions(section_name)
             dicts = []
             for report in self._reports:
                 d = {}
                 if section_name in report.sections:
                     section = report.sections[section_name]

                     # Get a common scaling factor for this report.
                     common_scaling_factor = self._GetCommonScalingFactor(
                         section
                     )

                     for function in section.functions:
                         if function.name in function_names:
                             key = "%s %s" % (section.name, function.name)
                             d[key] = function.count
                             # Compute a factor to scale the function count by in common_only
                             # mode.
                             if self._common_only and (
                                 function.name
                                 in self._common_function_names[section.name]
                             ):
                                 d[key + " scaled"] = (
                                     common_scaling_factor * function.count
                                 )
                 dicts.append(d)

             all_dicts.append(dicts)

         mytabulator = Tabulator(all_dicts)
         mytabulator.PrintTable()

     def _FindAllSections(self):
         sections = {}
         for report in self._reports:
             for section in report.sections.values():
                 if section.name not in sections:
                     sections[section.name] = section.count
                 else:
                     sections[section.name] = max(
                         sections[section.name], section.count
                     )
         return _SortDictionaryByValue(sections)

     def _GetCommonScalingFactor(self, section):
         unique_count = self._GetCount(
             section, lambda x: x in self._common_function_names[section.name]
         )
         return 100.0 / unique_count

     def _GetCount(self, section, filter_fun=None):
         total_count = 0
         for function in section.functions:
             if not filter_fun or filter_fun(function.name):
                 total_count += int(function.count)
         return total_count

     def _FindCommonFunctions(self, section_name):
         function_names_list = []
         for report in self._reports:
             if section_name in report.sections:
                 section = report.sections[section_name]
                 function_names = {f.name for f in section.functions}
                 function_names_list.append(function_names)

         self._common_function_names[section_name] = functools.reduce(
             set.intersection, function_names_list
         )

     def _GetTopFunctions(self, section_name, num_functions):
         all_functions = {}
         for report in self._reports:
             if section_name in report.sections:
                 section = report.sections[section_name]
                 for f in section.functions[:num_functions]:
                     if f.name in all_functions:
                         all_functions[f.name] = max(
                             all_functions[f.name], f.count
                         )
                     else:
                         all_functions[f.name] = f.count
         # FIXME(asharif): Don't really need to sort these...
         return _SortDictionaryByValue(all_functions)

     def _GetFunctionsDict(self, section, function_names):
         d = {}
         for function in section.functions:
             if function.name in function_names:
                 d[function.name] = function.count
         return d


 def Main(argv):
     """The entry of the main."""
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "-n",
         "--num_symbols",
         dest="num_symbols",
         default="5",
         help="The number of symbols to show.",
     )
     parser.add_argument(
         "-c",
         "--common_only",
         dest="common_only",
         action="store_true",
         default=False,
         help="Diff common symbols only.",
     )

     options, args = parser.parse_known_args(argv)

     try:
         reports = []
         for report in args[1:]:
             report = PerfReport(report)
             reports.append(report)
         pd = PerfDiffer(reports, int(options.num_symbols), options.common_only)
         pd.DoDiff()
     finally:
         pass

     return 0


 if __name__ == "__main__":
     sys.exit(Main(sys.argv))
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	# Copyright 2019 The ChromiumOS Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""One-line documentation for perf_diff module.

	A detailed description of perf_diff.
	"""


	__author__ = "asharif@google.com (Ahmad Sharif)"

	import argparse
	import functools
	import re
	import sys

	from cros_utils import misc
	from cros_utils import tabulator


	ROWS_TO_SHOW = "Rows_to_show_in_the_perf_table"
	TOTAL_EVENTS = "Total_events_of_this_profile"


	def GetPerfDictFromReport(report_file):
	output = {}
	perf_report = PerfReport(report_file)
	for k, v in perf_report.sections.items():
	if k not in output:
	output[k] = {}
	output[k][ROWS_TO_SHOW] = 0
	output[k][TOTAL_EVENTS] = 0
	for function in v.functions:
	out_key = "%s" % (function.name)
	output[k][out_key] = function.count
	output[k][TOTAL_EVENTS] += function.count
	if function.percent > 1:
	output[k][ROWS_TO_SHOW] += 1
	return output


	def _SortDictionaryByValue(d):
	l = d.items()

	def GetFloat(x):
	if misc.IsFloat(x):
	return float(x)
	else:
	return x

	sorted_l = sorted(l, key=lambda x: GetFloat(x[1]))
	sorted_l.reverse()
	return [f[0] for f in sorted_l]


	class Tabulator(object):
	"""Make tables."""

	def __init__(self, all_dicts):
	self._all_dicts = all_dicts

	def PrintTable(self):
	for dicts in self._all_dicts:
	self.PrintTableHelper(dicts)

	def PrintTableHelper(self, dicts):
	"""Transfrom dicts to tables."""
	fields = {}
	for d in dicts:
	for f in d.keys():
	if f not in fields:
	fields[f] = d[f]
	else:
	fields[f] = max(fields[f], d[f])
	table = []
	header = ["name"]
	for i in range(len(dicts)):
	header.append(i)

	table.append(header)

	sorted_fields = _SortDictionaryByValue(fields)

	for f in sorted_fields:
	row = [f]
	for d in dicts:
	if f in d:
	row.append(d[f])
	else:
	row.append("0")
	table.append(row)

	print(tabulator.GetSimpleTable(table))


	class Function(object):
	"""Function for formatting."""

	def __init__(self):
	self.count = 0
	self.name = ""
	self.percent = 0


	class Section(object):
	"""Section formatting."""

	def __init__(self, contents):
	self.name = ""
	self.raw_contents = contents
	self._ParseSection()

	def _ParseSection(self):
	matches = re.findall(r"Events: (\w+)\s+(.*)", self.raw_contents)
	assert len(matches) <= 1, "More than one event found in 1 section"
	if not matches:
	return
	match = matches[0]
	self.name = match[1]
	self.count = misc.UnitToNumber(match[0])

	self.functions = []
	for line in self.raw_contents.splitlines():
	if not line.strip():
	continue
	if "%" not in line:
	continue
	if not line.startswith("#"):
	fields = [f for f in line.split(" ") if f]
	function = Function()
	function.percent = float(fields[0].strip("%"))
	function.count = int(fields[1])
	function.name = " ".join(fields[2:])
	self.functions.append(function)


	class PerfReport(object):
	"""Get report from raw report."""

	def __init__(self, perf_file):
	self.perf_file = perf_file
	self._ReadFile()
	self.sections = {}
	self.metadata = {}
	self._section_contents = []
	self._section_header = ""
	self._SplitSections()
	self._ParseSections()
	self._ParseSectionHeader()

	def _ParseSectionHeader(self):
	"""Parse a header of a perf report file."""
	# The "captured on" field is inaccurate - this actually refers to when the
	# report was generated, not when the data was captured.
	for line in self._section_header.splitlines():
	line = line[2:]
	if ":" in line:
	key, val = line.strip().split(":", 1)
	key = key.strip()
	val = val.strip()
	self.metadata[key] = val

	def _ReadFile(self):
	self._perf_contents = open(self.perf_file).read()

	def _ParseSections(self):
	self.event_counts = {}
	self.sections = {}
	for section_content in self._section_contents:
	section = Section(section_content)
	section.name = self._GetHumanReadableName(section.name)
	self.sections[section.name] = section

	# TODO(asharif): Do this better.
	def _GetHumanReadableName(self, section_name):
	if not "raw" in section_name:
	return section_name
	raw_number = section_name.strip().split(" ")[-1]
	for line in self._section_header.splitlines():
	if raw_number in line:
	name = line.strip().split(" ")[5]
	return name

	def _SplitSections(self):
	self._section_contents = []
	indices = [
	m.start() for m in re.finditer("# Events:", self._perf_contents)
	]
	indices.append(len(self._perf_contents))
	for i in range(len(indices) - 1):
	section_content = self._perf_contents[indices[i] : indices[i + 1]]
	self._section_contents.append(section_content)
	self._section_header = ""
	if indices:
	self._section_header = self._perf_contents[0 : indices[0]]


	class PerfDiffer(object):
	"""Perf differ class."""

	def __init__(self, reports, num_symbols, common_only):
	self._reports = reports
	self._num_symbols = num_symbols
	self._common_only = common_only
	self._common_function_names = {}

	def DoDiff(self):
	"""The function that does the diff."""
	section_names = self._FindAllSections()

	filename_dicts = []
	summary_dicts = []
	for report in self._reports:
	d = {}
	filename_dicts.append({"file": report.perf_file})
	for section_name in section_names:
	if section_name in report.sections:
	d[section_name] = report.sections[section_name].count
	summary_dicts.append(d)

	all_dicts = [filename_dicts, summary_dicts]

	for section_name in section_names:
	function_names = self._GetTopFunctions(
	section_name, self._num_symbols
	)
	self._FindCommonFunctions(section_name)
	dicts = []
	for report in self._reports:
	d = {}
	if section_name in report.sections:
	section = report.sections[section_name]

	# Get a common scaling factor for this report.
	common_scaling_factor = self._GetCommonScalingFactor(
	section
	)

	for function in section.functions:
	if function.name in function_names:
	key = "%s %s" % (section.name, function.name)
	d[key] = function.count
	# Compute a factor to scale the function count by in common_only
	# mode.
	if self._common_only and (
	function.name
	in self._common_function_names[section.name]
	):
	d[key + " scaled"] = (
	common_scaling_factor * function.count
	)
	dicts.append(d)

	all_dicts.append(dicts)

	mytabulator = Tabulator(all_dicts)
	mytabulator.PrintTable()

	def _FindAllSections(self):
	sections = {}
	for report in self._reports:
	for section in report.sections.values():
	if section.name not in sections:
	sections[section.name] = section.count
	else:
	sections[section.name] = max(
	sections[section.name], section.count
	)
	return _SortDictionaryByValue(sections)

	def _GetCommonScalingFactor(self, section):
	unique_count = self._GetCount(
	section, lambda x: x in self._common_function_names[section.name]
	)
	return 100.0 / unique_count

	def _GetCount(self, section, filter_fun=None):
	total_count = 0
	for function in section.functions:
	if not filter_fun or filter_fun(function.name):
	total_count += int(function.count)
	return total_count

	def _FindCommonFunctions(self, section_name):
	function_names_list = []
	for report in self._reports:
	if section_name in report.sections:
	section = report.sections[section_name]
	function_names = {f.name for f in section.functions}
	function_names_list.append(function_names)

	self._common_function_names[section_name] = functools.reduce(
	set.intersection, function_names_list
	)

	def _GetTopFunctions(self, section_name, num_functions):
	all_functions = {}
	for report in self._reports:
	if section_name in report.sections:
	section = report.sections[section_name]
	for f in section.functions[:num_functions]:
	if f.name in all_functions:
	all_functions[f.name] = max(
	all_functions[f.name], f.count
	)
	else:
	all_functions[f.name] = f.count
	# FIXME(asharif): Don't really need to sort these...
	return _SortDictionaryByValue(all_functions)

	def _GetFunctionsDict(self, section, function_names):
	d = {}
	for function in section.functions:
	if function.name in function_names:
	d[function.name] = function.count
	return d


	def Main(argv):
	"""The entry of the main."""
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"-n",
	"--num_symbols",
	dest="num_symbols",
	default="5",
	help="The number of symbols to show.",
	)
	parser.add_argument(
	"-c",
	"--common_only",
	dest="common_only",
	action="store_true",
	default=False,
	help="Diff common symbols only.",
	)

	options, args = parser.parse_known_args(argv)

	try:
	reports = []
	for report in args[1:]:
	report = PerfReport(report)
	reports.append(report)
	pd = PerfDiffer(reports, int(options.num_symbols), options.common_only)
	pd.DoDiff()
	finally:
	pass

	return 0


	if __name__ == "__main__":
	sys.exit(Main(sys.argv))