| #!/usr/bin/python |
| # Copyright 2015 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Symbolizes a log file produced by cyprofile instrumentation. |
| |
| Given a log file and the binary being profiled, creates an orderfile. |
| """ |
| |
| import logging |
| import multiprocessing |
| import optparse |
| import os |
| import re |
| import string |
| import sys |
| import tempfile |
| |
| import cygprofile_utils |
| import symbol_extractor |
| |
| |
| def _ParseLogLines(log_file_lines): |
| """Parses a merged cyglog produced by mergetraces.py. |
| |
| Args: |
| log_file_lines: array of lines in log file produced by profiled run |
| |
| Below is an example of a small log file: |
| 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so |
| secs usecs pid:threadid func |
| START |
| 1314897086 795828 3587:1074648168 0x509e105c |
| 1314897086 795874 3587:1074648168 0x509e0eb4 |
| 1314897086 796326 3587:1074648168 0x509e0e3c |
| 1314897086 796552 3587:1074648168 0x509e07bc |
| END |
| |
| Returns: |
| An ordered list of callee offsets. |
| """ |
| call_lines = [] |
| vm_start = 0 |
| line = log_file_lines[0] |
| assert 'r-xp' in line |
| end_index = line.find('-') |
| vm_start = int(line[:end_index], 16) |
| for line in log_file_lines[3:]: |
| fields = line.split() |
| if len(fields) == 4: |
| call_lines.append(fields) |
| else: |
| assert fields[0] == 'END' |
| # Convert strings to int in fields. |
| call_info = [] |
| for call_line in call_lines: |
| addr = int(call_line[3], 16) |
| if vm_start < addr: |
| addr -= vm_start |
| call_info.append(addr) |
| return call_info |
| |
| |
| def _GroupLibrarySymbolInfosByOffset(lib_filename): |
| """Returns a dict {offset: [SymbolInfo]} from a library.""" |
| symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename) |
| return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos) |
| |
| |
| class SymbolNotFoundException(Exception): |
| def __init__(self, value): |
| super(SymbolNotFoundException, self).__init__(value) |
| self.value = value |
| |
| def __str__(self): |
| return repr(self.value) |
| |
| |
| def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset): |
| """Finds all SymbolInfo at a given offset. |
| |
| Args: |
| offset_to_symbol_infos: {offset: [SymbolInfo]} |
| offset: offset to look the symbols at |
| |
| Returns: |
| The list of SymbolInfo at the given offset |
| |
| Raises: |
| SymbolNotFoundException if the offset doesn't match any symbol. |
| """ |
| if offset in offset_to_symbol_infos: |
| return offset_to_symbol_infos[offset] |
| elif offset % 2 and (offset - 1) in offset_to_symbol_infos: |
| # On ARM, odd addresses are used to signal thumb instruction. They are |
| # generated by setting the LSB to 1 (see |
| # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html). |
| # TODO(lizeb): Make sure this hack doesn't propagate to other archs. |
| return offset_to_symbol_infos[offset - 1] |
| else: |
| raise SymbolNotFoundException(offset) |
| |
| |
| def _GetObjectFileNames(obj_dir): |
| """Returns the list of object files in a directory.""" |
| obj_files = [] |
| for (dirpath, _, filenames) in os.walk(obj_dir): |
| for file_name in filenames: |
| if file_name.endswith('.o'): |
| obj_files.append(os.path.join(dirpath, file_name)) |
| return obj_files |
| |
| |
| def _AllSymbolInfos(object_filenames): |
| """Returns a list of SymbolInfo from an iterable of filenames.""" |
| pool = multiprocessing.Pool() |
| # Hopefully the object files are in the page cache at this step, so IO should |
| # not be a problem (hence no concurrency limit on the pool). |
| symbol_infos_nested = pool.map( |
| symbol_extractor.SymbolInfosFromBinary, object_filenames) |
| result = [] |
| for symbol_infos in symbol_infos_nested: |
| result += symbol_infos |
| return result |
| |
| |
| def _SameCtorOrDtorNames(symbol1, symbol2): |
| """Returns True if two symbols refer to the same constructor or destructor. |
| |
| The Itanium C++ ABI specifies dual constructor and destructor |
| emmission (section 5.1.4.3): |
| https://refspecs.linuxbase.org/cxxabi-1.83.html#mangling-special |
| To avoid fully parsing all mangled symbols, a heuristic is used with c++filt. |
| |
| Note: some compilers may name generated copies differently. If this becomes |
| an issue this heuristic will need to be updated. |
| """ |
| # Check if this is the understood case of constructor/destructor |
| # signatures. GCC emits up to three types of constructor/destructors: |
| # complete, base, and allocating. If they're all the same they'll |
| # get folded together. |
| return (re.search('(C[123]|D[012])E', symbol1) and |
| symbol_extractor.DemangleSymbol(symbol1) == |
| symbol_extractor.DemangleSymbol(symbol2)) |
| |
| |
| def GetSymbolToSectionsMapFromObjectFiles(obj_dir): |
| """Scans object files to create a {symbol: linker section(s)} map. |
| |
| Args: |
| obj_dir: The root of the output object file directory, which will be |
| scanned for .o files to form the mapping. |
| |
| Returns: |
| A map {symbol_name: [section_name1, section_name2...]} |
| """ |
| object_files = _GetObjectFileNames(obj_dir) |
| symbol_to_sections_map = {} |
| symbol_warnings = cygprofile_utils.WarningCollector(300) |
| symbol_infos = _AllSymbolInfos(object_files) |
| for symbol_info in symbol_infos: |
| symbol = symbol_info.name |
| if symbol.startswith('.LTHUNK'): |
| continue |
| section = symbol_info.section |
| if ((symbol in symbol_to_sections_map) and |
| (symbol_info.section not in symbol_to_sections_map[symbol])): |
| symbol_to_sections_map[symbol].append(section) |
| |
| if not _SameCtorOrDtorNames( |
| symbol, symbol_to_sections_map[symbol][0].lstrip('.text.')): |
| symbol_warnings.Write('Symbol ' + symbol + |
| ' unexpectedly in more than one section: ' + |
| ', '.join(symbol_to_sections_map[symbol])) |
| elif not section.startswith('.text.'): |
| symbol_warnings.Write('Symbol ' + symbol + |
| ' in incorrect section ' + section) |
| else: |
| # In most cases we expect just one item in this list, and maybe 4 or so in |
| # the worst case. |
| symbol_to_sections_map[symbol] = [section] |
| symbol_warnings.WriteEnd('bad sections') |
| return symbol_to_sections_map |
| |
| |
| def _WarnAboutDuplicates(offsets): |
| """Warns about duplicate offsets. |
| |
| Args: |
| offsets: list of offsets to check for duplicates |
| |
| Returns: |
| True if there are no duplicates, False otherwise. |
| """ |
| seen_offsets = set() |
| ok = True |
| for offset in offsets: |
| if offset not in seen_offsets: |
| seen_offsets.add(offset) |
| else: |
| ok = False |
| logging.warning('Duplicate offset: ' + hex(offset)) |
| return ok |
| |
| |
| def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_sections_map, |
| output_file): |
| """Outputs the orderfile to output_file. |
| |
| Args: |
| offsets: Iterable of offsets to match to section names |
| offset_to_symbol_infos: {offset: [SymbolInfo]} |
| symbol_to_sections_map: {name: [section1, section2]} |
| output_file: file-like object to write the results to |
| |
| Returns: |
| True if all symbols were found in the library. |
| """ |
| success = True |
| unknown_symbol_warnings = cygprofile_utils.WarningCollector(300) |
| symbol_not_found_errors = cygprofile_utils.WarningCollector( |
| 300, level=logging.ERROR) |
| output_sections = set() |
| for offset in offsets: |
| try: |
| symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset) |
| for symbol_info in symbol_infos: |
| if symbol_info.name in symbol_to_sections_map: |
| sections = symbol_to_sections_map[symbol_info.name] |
| for section in sections: |
| if not section in output_sections: |
| output_file.write(section + '\n') |
| output_sections.add(section) |
| else: |
| unknown_symbol_warnings.Write( |
| 'No known section for symbol ' + symbol_info.name) |
| except SymbolNotFoundException: |
| symbol_not_found_errors.Write( |
| 'Did not find function in binary. offset: ' + hex(offset)) |
| success = False |
| unknown_symbol_warnings.WriteEnd('no known section for symbol.') |
| symbol_not_found_errors.WriteEnd('symbol not found in the binary.') |
| return success |
| |
| |
| def main(): |
| parser = optparse.OptionParser(usage= |
| 'usage: %prog [options] <merged_cyglog> <library> <output_filename>') |
| parser.add_option('--target-arch', action='store', dest='arch', |
| choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'], |
| help='The target architecture for libchrome.so') |
| options, argv = parser.parse_args(sys.argv) |
| if not options.arch: |
| options.arch = cygprofile_utils.DetectArchitecture() |
| if len(argv) != 4: |
| parser.print_help() |
| return 1 |
| (log_filename, lib_filename, output_filename) = argv[1:] |
| symbol_extractor.SetArchitecture(options.arch) |
| |
| obj_dir = cygprofile_utils.GetObjDir(lib_filename) |
| |
| log_file_lines = map(string.rstrip, open(log_filename).readlines()) |
| offsets = _ParseLogLines(log_file_lines) |
| _WarnAboutDuplicates(offsets) |
| |
| offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_filename) |
| symbol_to_sections_map = GetSymbolToSectionsMapFromObjectFiles(obj_dir) |
| |
| success = False |
| temp_filename = None |
| output_file = None |
| try: |
| (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename)) |
| output_file = os.fdopen(fd, 'w') |
| ok = _OutputOrderfile( |
| offsets, offset_to_symbol_infos, symbol_to_sections_map, output_file) |
| output_file.close() |
| os.rename(temp_filename, output_filename) |
| temp_filename = None |
| success = ok |
| finally: |
| if output_file: |
| output_file.close() |
| if temp_filename: |
| os.remove(temp_filename) |
| |
| return 0 if success else 1 |
| |
| |
| if __name__ == '__main__': |
| logging.basicConfig(level=logging.INFO) |
| sys.exit(main()) |