blob: 19da05a183df1560662cae409dc1d8940ab98734 [file] [log] [blame]
#!/usr/bin/python2.6
# Copyright 2011 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Wrappers for the IBM Performance Inspector toolkit.
This module provides thin wrappers for the IBM Performance Inspector toolkit,
which unfortunately does not expose any interface other than the command-line.
The toolkit, as well as supporting documentation and examples, may be found
here:
http://perfinsp.sourceforge.net/index.html
The module defines the following classes:
Error: The exception type that is used by the module.
IbmPerfTool: A base class containing utility functions for running
toolkit utilities, automatically ensuring it is installed.
HardwarePerformanceCounter: Class for controlling and querying hardware
performance counters on a per-process basis.
"""
__author__ = "chrisha@chromium.org (Chris Hamilton)"
import logging
import os.path
import re
import subprocess
_LOGGER = logging.getLogger(__name__)
# The default directory where we expect the IBM Performance Inspector toolkit
# to be installed.
DEFAULT_DIR = "C:\\ibmperf\\bin"
# These are the names of the various executables we wrap.
_DDQ = "ddq"
_MPEVT = "mpevt"
_PTT = "ptt"
_TINSTALL = "tinstall.cmd"
class Error(Exception):
"""Base exception class for all exceptions raised by this module."""
class ExecutionFailed(Error):
"""Raised when a command-line tool fails."""
class InvalidMetric(Error):
"""Raised when an invalid metric is specified."""
class NotRunning(Error):
"""Raised when the toolkit is not running but should be."""
class UnexpectedOutput(Error):
"""Raised when the output of the underlying tools is not as expected."""
class IbmPerfTool(object):
"""Base class wrapper for IBM Performance Inspector tools.
Provides utility functions for accessing the toolkit, and automatically
checks if it is installed, trying to install it if necessary.
"""
def __init__(self, ibmperf_dir=DEFAULT_DIR):
"""Initializes this instance.
Checks to see if the toolkit is installed (the running kernel has been
patched), and attempts to install if not.
Args:
ibmperf_dir: path to the IBM Performance Inspector tools. Defaults to
DEFAULT_DIR.
Raises:
ExecutionFailed: An error occurred trying to install the toolkit, or
while checking if it was installed.
OSError: The toolkit was not installed at the provided path.
"""
self._ibmperf_dir = os.path.abspath(ibmperf_dir)
try:
_LOGGER.info("Checking if driver installed.")
self._Run(_DDQ, [])
_LOGGER.info("Driver already installed.")
except Error:
# If ddq fails, it's because the driver is not installed. Try
# to install it.
_LOGGER.info("Installing IBM Performance Inspector driver.")
self._Run(_TINSTALL, [])
def _Popen(self, cmd_line):
"""Creates a subprocess.Popen object for the given command-line.
Separated for easy injection of results in unittests.
Args:
cmd_line: The command line to execute, with the executable and each
argument as a separate entry in a list.
Returns:
An instance of subprocess.Popen for the given command-line.
"""
return subprocess.Popen(cmd_line,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=self._ibmperf_dir)
def _Run(self, toolname, args, expected_returncode=0):
"""Runs the wrapped tool with the given arguments.
Runs the wrapped tool with the given arguments, and returns its output
as a string. Raises an exception if the executable is non-existent or
its return code is not as expected.
Args:
toolname: the name of the executable to be run.
args: a list of arguments to pass on the command-line.
expected_returncode: the return code the tool should return on success.
Defaults to zero.
Returns:
The standard output of the command, as a list of lines.
Raises:
ExecutionFailed: The executable returned with an unexpected code.
OSError: The executable does not exist, or some other error occurred in
subprocess.Popen or Pipe.communicate.
"""
tool_path = os.path.join(self._ibmperf_dir, toolname)
cmd_line = [tool_path] + args
_LOGGER.debug("Running command '%s'.", " ".join(cmd_line))
cmd = self._Popen(cmd_line)
stdout, stderr = cmd.communicate()
returncode = cmd.returncode
if returncode != expected_returncode:
raise ExecutionFailed("'%s' returned code '%d'.\n STDOUT: %s\n"
" STDERR: %s\n" %
(toolname, returncode, stdout, stderr))
# Pylint doesn't know the type of 'stdout', so complains about a missing
# member function. Ignore it.
# pylint: disable=E1103
return stdout.splitlines()
class PrivilegeLevel(object):
"""An enumeration of code privilege levels."""
# Only monitors code in Ring 0 (kernel code).
KERNEL = 0
# Only monitors instructions in Ring 3 (user code).
USER = 1
# Monitors instructions in Ring 0 and Ring 3 (both kernel and user code).
ANY = 2
# Maps PrivilegeLevel constants to command-line options as used by "ptt".
NAMES = {KERNEL: "priv_kernel", USER: "priv_user", ANY: "priv_any"}
class HardwarePerformanceCounter(IbmPerfTool):
"""Wraps ptt/mpevt, allowing access to hardware performance metrics.
This class is intended to be used via a simple Start/Query/Stop interface.
Not that Query must be called while the counters are running (prior to
calling Stop), as all data is discarded upon calling Stop. Example usage:
hpc = HardwarePerformanceCounter(ibmperf_dir="C:\\ibmperf\\bin")
hpc.Start(["CYCLES", "NONHALTED_CYCLES", "TLB_MISSES"],
PrivilegeLevel.USER)
... run some benchmark here ...
chrome_results = hpc.Query("chrome")
hpc.Stop()
To get a list of available metrics and their descriptions, simply inspect
the |metrics| dict. To determine which of these are free (do not occupy
a hardware counter) and non-free (require a dedicated hardware counter),
look at the sets |free_metrics| and |non_free_metrics|.
It is possible for the utilities to stop working in the middle of a
performance run. If this happens, the likely culprit is that the kernel
took an in-place update, and the kernel patch created by the toolkit
driver installation was overwritten. The only solution is to reinstall
the driver and run the tests again.
Attributes:
free_metrics: A list of metrics that may be measured without occupying
a hardware counter.
max_counters: The maximum number of hardware counters that may be used
simultaneously.
metrics: A dict of valid metrics, with metric names as keys and their
descriptions as values.
non_free_metrics: A list of metrics that require a hardware counter.
"""
# The CYCLES counter is a special case, as it is always available on any
# machine.
_CYCLES = "CYCLES"
# If 'mpevt' is unable to provide a description for a metric we use this by
# default.
_NO_DESCRIPTION = "(no description provided)"
# We wrap the command-line tools by parsing their output. This is a
# collection of strings and regular expressions that simplifies the task.
# We parse the output of 'ptt' to get a list of supported metrics. The output
# is as follows:
#
# ***** ptt v2.0.8 for x86 ***** pid=3304/0xce8 *****
#
# ptt {-? | -??}
# ptt {ints | noints}
# ptt {autoterm | noautoterm}
# ptt numthreads [## | default | large | max]
# ptt init [metric [metric] [...]]] <PrivLevel> [-force]
# ptt term
# ptt info
# ptt dump <ProcList> <JtnmOpt> <SortOpt> <AutoOpt> [-t sec] [-f fn] [-cpi]
#
# <PrivLevel>: [priv_kernel | priv_user | priv_any]
# <ProcList>: [-pl {pid[,pid,...] | name[,name,...]}]
# <JtnmOpt>: [-jtnm | -jtnm=path[\prefix] [-allnames]]
# <SortOpt>: [-sm ## | -sp | -st | -sd | -si] [-a | -d]
# <AutoOpt>: [-s #sec] [r #sec]
#
# Valid 'metric' values are:
# - CTR0 thru CTR1
# - CYCLES
# - NONHALTED_CYCLES
# - INSTR
# - UOPS
# - ALL_UOPS
# - BRANCH
# - MISPRED_BRANCH
# - ITLB_MISS
# - L2_READ_MISS
# - L2_READ_REFS
# - BRANCH_TAKEN
# - DATA_MEM_REFS
# - MISALIGN_MEM_REF
#
# Enter "ptt -??" for more help.
# This matches a line with a metric name on it, as output by 'ptt'. The
# metric name is in the first captured group.
_PTT_METRIC = re.compile("^- ([A-Z0-9_]+)$")
# The output of 'ptt dump' is as follows (if there are more metrics running,
# there are more columns reported):
#
# ***** ptt v2.0.8 for x86 ***** pid=1944/0x798 *****
#
# PID 432 is chrome
# PID 3052 is chrome
#
# PTT Facility Per-Thread Information
# -----------------------------------
#
# PID TID Disp Intr INSTR MISPRED_BRANCH
# ----- ----- -------- -------- ---------------- ----------------
# 432 3848 1833 1837 677298430 2191266
# 432 480 99 63 194081825 425171
# 432 4056 244 105 57974343 404942
# 432 3156 86 48 12158721 150688
# 432 3540 38 32 7715634 122006
# 432 2324 15 10 5110476 48280
# 432 2844 18 7 2830719 41646
# 432 192 25 13 1397386 20040
# 432 2108 58 5 440747 5070
# 432 2028 2 2 123323 1666
# 432 2316 3 1 120130 1460
# 432 3972 11 1 116428 1282
# 432 3632 4 3 73104 2028
# 432 2832 2 2 62340 1302
# 432 2996 1 1 48720 959
# 432 2300 2 1 43568 1027
# 432 260 5 5 32896 1118
# 432 3628 2 1 32539 634
# ---------------- ----------------
# 959661329 3420585
#
# 3052 3304 409 514 286527847 2114053
# 3052 3136 145 18 990444 19262
# 3052 2620 331 188 398905 8614
# 3052 608 2 1 49511 875
# 3052 3984 3 4 48383 1394
# ---------------- ----------------
# 288015090 2144198
#
# Execution ended: 1 iterations.
# This matches a line of data in the output of 'ptt'. The individual
# column values (integers) are in capturing groups.
_DUMP_DATA = re.compile("""^(?:[0-9]+\s+){4} # PID/TID/Disp/Intr.
[0-9]+(?:\s+[0-9]+)*$ # Metric values.""",
re.VERBOSE)
# This matches the header prior to the start of data reported by 'ptt'.
# The metric names are in capturing groups.
_DUMP_HEADER = re.compile("""^PID\s+TID\s+Disp\s+Intr\s+ # Fixed columns.
[A-Z0-9_].*$ # Metric names.""",
re.VERBOSE)
# If there is no data collected for a given process, this will be output
# instead.
_NO_DATA = re.compile("""^([0-9]+) # PID.
\s+\*\*\s+No Data\s+\*\*$""",
re.VERBOSE)
# The command 'mpevt -l' has output like the following:
#
# ***** mpevt v2.0.8 for x86 *****
#
# Id Name
# --- --------------------
# 101 NONHALTED_CYCLES
# 102 INSTR
# 104 UOPS
# 105 ALL_UOPS
# 106 BRANCH
# 107 MISPRED_BRANCH
# 109 ITLB_MISS
# 114 L2_READ_MISS
# 115 L2_READ_REFS
# 123 BRANCH_TAKEN
# 124 DATA_MEM_REFS
# 125 MISALIGN_MEM_REF
# Matches the divider prior to the beginning of metric names and
# descriptions.
_DIVIDER = re.compile("^-+\s+-+$")
_INT = re.compile("^[0-9]+$")
_METRIC_NAME = re.compile("^[A-Z0-9_]+$")
def __init__(self, *args, **kwargs):
"""Initializes a HardwarePerformanceCounter object.
All arguments are passed directly to the IbmPerfTool base class, and any
errors raised there are left uncaught.
"""
super(HardwarePerformanceCounter, self).__init__(*args, **kwargs)
self.metrics = {}
self._GetAvailableMetrics()
self._running_metrics = None
# Some metrics are "free", in the sense that they can always be
# collected and don't use up a performance counter.
self.free_metrics = set([self._CYCLES])
self.non_free_metrics = set(self.metrics) - self.free_metrics
# Some CPUs can actually handle more than this, but we currently have
# no reliable way of detecting this.
self.max_counters = 2
def _GetAvailableMetrics(self):
"""Populates the internal dictionary of supported metrics.
This routine populates the internal dictionary of support metrics,
|metrics|. The key is the metric name, the value is its description.
Raises:
ExecutionFailed: Upon failure of the underlying command-line utilities.
"""
# Get the available metrics from ptt.
# Metric names have the form: "- METRIC1_NAME".
lines = self._Run(_PTT, [])
for line in lines:
line = line.strip()
match = re.match(self._PTT_METRIC, line)
if match:
self.metrics[match.group(1)] = self._NO_DESCRIPTION
continue
# Provide a default description for CYCLES.
self.metrics[self._CYCLES] = "Number of CPU cycles."
# Get descriptions for the various metrics using "mpevt".
lines = self._Run(_MPEVT, ["-ld"], expected_returncode=-1)
seen_divider = False
for line in lines:
line = line.strip()
if not seen_divider:
seen_divider = re.match(self._DIVIDER, line)
else:
counter = re.split("\s+", line, 2)
if (len(counter) == 3 and re.match(self._INT, counter[0]) and
re.match(self._METRIC_NAME, counter[1]) and
counter[1] in self.metrics):
desc = counter[2].strip()
if not desc.endswith("."):
desc += "."
self.metrics[counter[1]] = desc
def Start(self, metric_names, privilege_level=PrivilegeLevel.USER):
"""Starts the hardware performance counter for the given metrics.
Metrics that are free (do not require the use of a dedicated CPU counter)
may always be specified. However, metrics that require the use of a CPU
counter are limited in number to |max_counters|.
Args:
metric_names: a list of metrics to run. These must match the names of
metrics in |metrics|. No more than |max_counters| metrics may be
specified that are present in the list |non_free_metrics|.
privilege_level: the privilege level at which to monitor instructions.
This must be one of the values from the PrivilegeLevel enumeration.
Raises:
ExecutionFailed: Upon failure of any of the command-line utilities.
InvalidMetric: Upon specification of an invalid metric.
"""
for metric_name in metric_names:
if metric_name not in self.metrics:
raise InvalidMetric("Unknown metric name '%s'." % metric_name)
# Get the privilege level. If invalid, default to priv_user.
priv = PrivilegeLevel.NAMES.get(privilege_level, None)
if not priv:
priv = PrivilegeLevel.NAMES.get(PrivilegeLevel.USER)
_LOGGER.info("Starting counters for metrics: %s.", metric_names)
self._Run(_PTT, ["term"])
self._Run(_PTT, ["noautoterm"])
metric_names = list(metric_names)
args = ["init"] + metric_names + [priv, "-force"]
self._Run(_PTT, args)
self._running_metrics = metric_names
def Query(self, program_name):
"""Queries the running performance counters for the given program name.
The metrics must be running (Start has been called, but not Stop).
Args:
program_name: the name of the executable for which to gather
metrics. This will be split and only the executable name (without
path or extension) will be used. For example, passing in
"C:\Program Files\Google\Chrome\chrome.exe" is equivalent to
passing in "chrome".
Returns:
A dict mapping the metric name to a dict of values, one per running
instance of the specified executable at the time of the query. The nested
dict maps process IDs to counter values. For example:
{"CYCLES": {100: 123456, 200: 1234},
"NONHALTED_CYCLES": {100: 100000, 200: 1000}}
Raises:
ExecutionFailed: Upon failure of the underlying command-line utilities.
NotRunning: If Start has not been previously called.
UnexpectedOutput: If the output of the underlying command-line utilities
was not as expected.
"""
if not self._running_metrics:
raise NotRunning("No metrics are running.")
# Get the bare executable name.
tail = os.path.split(program_name)[1]
root = os.path.splitext(tail)[0]
_LOGGER.info("Querying performance counters for '%s': %s.",
root, self._running_metrics)
lines = self._Run(_PTT, ["dump", "-pl", root])
values = {}
metrics = None
for line in lines:
line = line.strip()
# Keep an eye out for the line containing the metric names. If
# the header pattern is matched, then we are guaranteed to have at
# least 5 items after the split.
if not metrics:
if re.match(self._DUMP_HEADER, line):
columns = re.split("\s+", line)
metrics = columns[4:]
if set(metrics) != set(self._running_metrics):
raise UnexpectedOutput("Reported metrics do not match running "
"metrics: %s." % metrics)
for metric in metrics:
values[metric] = {}
continue
# Is this a PID without data? Then store zero values for the metrics.
match = re.match(self._NO_DATA, line)
if match:
pid = int(match.group(1))
for metric in metrics:
values[metric][pid] = 0
continue
# Is this a PID/TID/Disp/Intr/Metrics line? Then tally the
# running sum for the PID. We manually summarize because
# summary lines are only produced if there is more than one
# thread for a PID.
if re.match(self._DUMP_DATA, line):
data = re.split("\s+", line)
if len(data) == len(metrics) + 4:
pid = int(data[0])
for i, metric in enumerate(metrics):
count = int(data[4+i])
values[metric][pid] = values[metric].get(pid, 0) + count
if not metrics:
raise UnexpectedOutput("No results seen for metrics: %s." %
self._running_metrics)
return values
def Stop(self):
"""Stops the hardware performance counters.
After calling this, all metric data is discarded and further calls to
Query will fail. New metrics may be gathered with another call to Start.
Raises:
ExecutionFailed: Upon failure of the underlying command-line utilities.
NotRunning: If Start has not been previously called.
"""
if not self._running_metrics:
raise NotRunning("No metrics are running.")
_LOGGER.info("Stopping metrics: %s.", self._running_metrics)
self._running_metrics = None
self._Run(_PTT, ["term"])