| # Copyright 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Scans the Chromium source for histograms that are absent from histograms.xml. |
| |
| This is a heuristic scan, so a clean run of this script does not guarantee that |
| all histograms in the Chromium source are properly mapped. Notably, field |
| trials are entirely ignored by this script. |
| |
| """ |
| |
| import commands |
| import extract_histograms |
| import hashlib |
| import logging |
| import optparse |
| import os |
| import re |
| import sys |
| |
| |
| ADJACENT_C_STRING_REGEX = re.compile(r""" |
| (" # Opening quotation mark |
| [^"]*) # Literal string contents |
| " # Closing quotation mark |
| \s* # Any number of spaces |
| " # Another opening quotation mark |
| """, re.VERBOSE) |
| CONSTANT_REGEX = re.compile(r""" |
| (\w*::)? # Optional namespace |
| k[A-Z] # Match a constant identifier: 'k' followed by an uppercase letter |
| \w* # Match the rest of the constant identifier |
| $ # Make sure there's only the identifier, nothing else |
| """, re.VERBOSE) |
| HISTOGRAM_REGEX = re.compile(r""" |
| UMA_HISTOGRAM # Match the shared prefix for standard UMA histogram macros |
| \w* # Match the rest of the macro name, e.g. '_ENUMERATION' |
| \( # Match the opening parenthesis for the macro |
| \s* # Match any whitespace -- especially, any newlines |
| ([^,)]*) # Capture the first parameter to the macro |
| [,)] # Match the comma/paren that delineates the first parameter |
| """, re.VERBOSE) |
| |
| |
| class DirectoryNotFoundException(Exception): |
| """Base class to distinguish locally defined exceptions from standard ones.""" |
| def __init__(self, msg): |
| self.msg = msg |
| |
| def __str__(self): |
| return self.msg |
| |
| |
| def findDefaultRoot(): |
| """Find the root of the chromium repo, in case the script is run from the |
| histograms dir. |
| |
| Returns: |
| string: path to the src dir of the repo. |
| |
| Raises: |
| DirectoryNotFoundException if the target directory cannot be found. |
| """ |
| path = os.getcwd() |
| while path: |
| head, tail = os.path.split(path) |
| if tail == 'src': |
| return path |
| if path == head: |
| break |
| path = head |
| raise DirectoryNotFoundException('Could not find src/ dir') |
| |
| |
| def collapseAdjacentCStrings(string): |
| """Collapses any adjacent C strings into a single string. |
| |
| Useful to re-combine strings that were split across multiple lines to satisfy |
| the 80-col restriction. |
| |
| Args: |
| string: The string to recombine, e.g. '"Foo"\n "bar"' |
| |
| Returns: |
| The collapsed string, e.g. "Foobar" for an input of '"Foo"\n "bar"' |
| """ |
| while True: |
| collapsed = ADJACENT_C_STRING_REGEX.sub(r'\1', string, count=1) |
| if collapsed == string: |
| return collapsed |
| |
| string = collapsed |
| |
| |
| def logNonLiteralHistogram(filename, histogram): |
| """Logs a statement warning about a non-literal histogram name found in the |
| Chromium source. |
| |
| Filters out known acceptable exceptions. |
| |
| Args: |
| filename: The filename for the file containing the histogram, e.g. |
| 'chrome/browser/memory_details.cc' |
| histogram: The expression that evaluates to the name of the histogram, e.g. |
| '"FakeHistogram" + variant' |
| |
| Returns: |
| None |
| """ |
| # Ignore histogram macros, which typically contain backslashes so that they |
| # can be formatted across lines. |
| if '\\' in histogram: |
| return |
| |
| # Ignore histogram names that have been pulled out into C++ constants. |
| if CONSTANT_REGEX.match(histogram): |
| return |
| |
| # TODO(isherman): This is still a little noisy... needs further filtering to |
| # reduce the noise. |
| logging.warning('%s contains non-literal histogram name <%s>', filename, |
| histogram) |
| |
| |
| def readChromiumHistograms(): |
| """Searches the Chromium source for all histogram names. |
| |
| Also prints warnings for any invocations of the UMA_HISTOGRAM_* macros with |
| names that might vary during a single run of the app. |
| |
| Returns: |
| A set cotaining any found literal histogram names. |
| """ |
| logging.info('Scanning Chromium source for histograms...') |
| |
| # Use git grep to find all invocations of the UMA_HISTOGRAM_* macros. |
| # Examples: |
| # 'path/to/foo.cc:420: UMA_HISTOGRAM_COUNTS_100("FooGroup.FooName",' |
| # 'path/to/bar.cc:632: UMA_HISTOGRAM_ENUMERATION(' |
| locations = commands.getoutput('git gs UMA_HISTOGRAM').split('\n') |
| filenames = set([location.split(':')[0] for location in locations]) |
| |
| histograms = set() |
| for filename in filenames: |
| contents = '' |
| with open(filename, 'r') as f: |
| contents = f.read() |
| |
| matches = set(HISTOGRAM_REGEX.findall(contents)) |
| for histogram in matches: |
| histogram = collapseAdjacentCStrings(histogram) |
| |
| # Must begin and end with a quotation mark. |
| if histogram[0] != '"' or histogram[-1] != '"': |
| logNonLiteralHistogram(filename, histogram) |
| continue |
| |
| # Must not include any quotation marks other than at the beginning or end. |
| histogram_stripped = histogram.strip('"') |
| if '"' in histogram_stripped: |
| logNonLiteralHistogram(filename, histogram) |
| continue |
| |
| histograms.add(histogram_stripped) |
| |
| return histograms |
| |
| |
| def readXmlHistograms(histograms_file_location): |
| """Parses all histogram names from histograms.xml. |
| |
| Returns: |
| A set cotaining the parsed histogram names. |
| """ |
| logging.info('Reading histograms from %s...' % histograms_file_location) |
| histograms = extract_histograms.ExtractHistograms(histograms_file_location) |
| return set(extract_histograms.ExtractNames(histograms)) |
| |
| |
| def hashHistogramName(name): |
| """Computes the hash of a histogram name. |
| |
| Args: |
| name: The string to hash (a histogram name). |
| |
| Returns: |
| Histogram hash as a string representing a hex number (with leading 0x). |
| """ |
| return '0x' + hashlib.md5(name).hexdigest()[:16] |
| |
| |
| def main(): |
| # Find default paths. |
| default_root = findDefaultRoot() |
| default_histograms_path = os.path.join( |
| default_root, 'tools/metrics/histograms/histograms.xml') |
| default_extra_histograms_path = os.path.join( |
| default_root, 'tools/histograms/histograms.xml') |
| |
| # Parse command line options |
| parser = optparse.OptionParser() |
| parser.add_option( |
| '--root-directory', dest='root_directory', default=default_root, |
| help='scan within DIRECTORY for histograms [optional, defaults to "%s"]' % |
| default_root, |
| metavar='DIRECTORY') |
| parser.add_option( |
| '--histograms-file', dest='histograms_file_location', |
| default=default_histograms_path, |
| help='read histogram definitions from FILE (relative to --root-directory) ' |
| '[optional, defaults to "%s"]' % default_histograms_path, |
| metavar='FILE') |
| parser.add_option( |
| '--exrta_histograms-file', dest='extra_histograms_file_location', |
| default=default_extra_histograms_path, |
| help='read additional histogram definitions from FILE (relative to ' |
| '--root-directory) [optional, defaults to "%s"]' % |
| default_extra_histograms_path, |
| metavar='FILE') |
| |
| (options, args) = parser.parse_args() |
| if args: |
| parser.print_help() |
| sys.exit(1) |
| |
| logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) |
| |
| try: |
| os.chdir(options.root_directory) |
| except EnvironmentError as e: |
| logging.error("Could not change to root directory: %s", e) |
| sys.exit(1) |
| chromium_histograms = readChromiumHistograms() |
| xml_histograms = readXmlHistograms(options.histograms_file_location) |
| unmapped_histograms = chromium_histograms - xml_histograms |
| |
| if os.path.isfile(options.extra_histograms_file_location): |
| xml_histograms2 = readXmlHistograms(options.extra_histograms_file_location) |
| unmapped_histograms -= xml_histograms2 |
| else: |
| logging.warning('No such file: %s', options.extra_histograms_file_location) |
| |
| if len(unmapped_histograms): |
| logging.info('') |
| logging.info('') |
| logging.info('Histograms in Chromium but not in XML files:') |
| logging.info('-------------------------------------------------') |
| for histogram in sorted(unmapped_histograms): |
| logging.info(' %s - %s', histogram, hashHistogramName(histogram)) |
| else: |
| logging.info('Success! No unmapped histograms found.') |
| |
| |
| if __name__ == '__main__': |
| main() |