| #!/usr/bin/python |
| # |
| # Copyright 2017 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Maps code pages to object files. |
| |
| For all pages from the native library .text section, extract all object files |
| the code maps to. Outputs a web-based visualization of page -> symbol mappings, |
| reached symbols and code residency. |
| """ |
| |
| import argparse |
| import collections |
| import json |
| import logging |
| import multiprocessing |
| import os |
| import shutil |
| import SimpleHTTPServer |
| import SocketServer |
| import sys |
| |
| _SRC_PATH = os.path.abspath(os.path.join( |
| os.path.dirname(__file__), os.pardir, os.pardir, os.pardir)) |
| |
| sys.path.append(os.path.join(_SRC_PATH, 'tools', 'cygprofile')) |
| import cyglog_to_orderfile |
| import symbol_extractor |
| |
| _PAGE_SIZE = 1 << 12 |
| _PAGE_MASK = ~(_PAGE_SIZE - 1) |
| |
| |
| def _GetSymbolNameToFilename(build_directory): |
| """Parses object files in a directory, and maps mangled symbol names to files. |
| |
| Object files are assumed to actually be LLVM bitcode files, that is this |
| assumes clang as the compiler and the use of LTO. |
| |
| Args: |
| build_directory: (str) Build directory. |
| |
| Returns: |
| {symbol name (str): object filename (str)}. Filenames are stripped of the |
| output_directory part. |
| """ |
| symbol_extractor.CheckLlvmNmExists() |
| path = os.path.join(build_directory, 'obj') |
| object_filenames = cyglog_to_orderfile.GetObjectFilenames(path) |
| pool = multiprocessing.Pool() |
| symbol_names_filename = zip( |
| pool.map(symbol_extractor.SymbolNamesFromLlvmBitcodeFile, |
| object_filenames), |
| object_filenames) |
| pool.close() |
| result = {} |
| for (symbol_names, filename) in symbol_names_filename: |
| stripped_filename = filename[len(build_directory):] |
| if stripped_filename.startswith('/obj/'): |
| stripped_filename = stripped_filename[len('/obj/'):] |
| for s in symbol_names: |
| result[s] = stripped_filename |
| return result |
| |
| |
| def CodePagesToMangledSymbols(symbol_infos, text_start_offset): |
| """Groups a list of symbol per code page. |
| |
| Args: |
| symbol_infos: (symbol_extractor.SymbolInfo) List of symbols. |
| text_start_offset: (int) Offset to add to symbol offsets. This is used to |
| account for the start of .text not being at the start of |
| a page in memory. |
| |
| Returns: |
| {offset: [(mangled_name, size_in_page), ...]} |
| """ |
| # Different symbols can be at the same address, through identical code folding |
| # for instance. In this case, only keep the first one. This is not ideal, as |
| # file attribution will be incorrect in this case. However ICF mostly works |
| # with small symbols, so it shouldn't impact numbers too much. |
| result = collections.defaultdict(set) |
| known_offsets = set() |
| for s in symbol_infos: |
| assert s.offset % 2 == 0, 'Wrong alignment' |
| if s.offset in known_offsets: |
| continue |
| known_offsets.add(s.offset) |
| start, end = (s.offset + text_start_offset, |
| (s.offset + s.size + text_start_offset)) |
| start_page, end_page = start & _PAGE_MASK, end & _PAGE_MASK |
| page = start_page |
| while page <= end_page: |
| symbol_start_in_page = max(page, start) |
| symbol_end_in_page = min(page + _PAGE_SIZE, end) |
| size_in_page = symbol_end_in_page - symbol_start_in_page |
| result[page].add((s.name, size_in_page)) |
| page += _PAGE_SIZE |
| for page in result: |
| total_size = sum(s[1] for s in result[page]) |
| if total_size > _PAGE_SIZE: |
| logging.warning('Too many symbols in page (%d * 4k)! Total size: %d', |
| page / _PAGE_SIZE, total_size) |
| return result |
| |
| |
| def ReadReachedSymbols(filename): |
| """Reads a list of reached symbols from a file. |
| |
| Args: |
| filename: (str) File to read. |
| |
| Returns: |
| [str] List of symbol names. |
| """ |
| with open(filename, 'r') as f: |
| return [line.strip() for line in f.readlines()] |
| |
| |
| def WriteReachedData(filename, page_to_reached_data): |
| """Writes the page to reached fraction to a JSON file. |
| |
| The output format is suited for visualize.html. |
| |
| Args: |
| filename: (str) Output filename. |
| page_to_reached_data: (dict) As returned by CodePagesToReachedSize(). |
| """ |
| json_object = [] |
| for (offset, data) in page_to_reached_data.items(): |
| json_object.append({'offset': offset, 'total': data['total'], |
| 'reached': data['reached']}) |
| with open(filename, 'w') as f: |
| json.dump(json_object, f) |
| |
| |
| def CodePagesToReachedSize(reached_symbol_names, page_to_symbols): |
| """From page offset -> [all_symbols], return the reached portion per page. |
| |
| Args: |
| reached_symbol_names: ([str]) List of reached symbol names. |
| page_to_symbols: (dict) As returned by CodePagesToMangledSymbols(). |
| |
| Returns: |
| {page offset (int) -> {'total': int, 'reached': int}} |
| """ |
| reached_symbol_names = set(reached_symbol_names) |
| page_to_reached = {} |
| for offset in page_to_symbols: |
| total_size = sum(x[1] for x in page_to_symbols[offset]) |
| reached_size = sum( |
| size_in_page for (name, size_in_page) in page_to_symbols[offset] |
| if name in reached_symbol_names) |
| page_to_reached[offset] = {'total': total_size, 'reached': reached_size} |
| return page_to_reached |
| |
| |
| def CodePagesToObjectFiles(symbols_to_object_files, code_pages_to_symbols): |
| """From symbols in object files and symbols in pages, gives code page to |
| object files. |
| |
| Args: |
| symbols_to_object_files: (dict) as returned by _GetSymbolNameToFilename() |
| code_pages_to_symbols: (dict) as returned by CodePagesToMangledSymbols() |
| |
| Returns: |
| {page_offset: {object_filename: size_in_page}} |
| """ |
| result = {} |
| unmatched_symbols_count = 0 |
| unmatched_symbols_size = 0 |
| for page_address in code_pages_to_symbols: |
| result[page_address] = {} |
| for name, size_in_page in code_pages_to_symbols[page_address]: |
| if name not in symbols_to_object_files: |
| unmatched_symbols_count += 1 |
| unmatched_symbols_size += size_in_page |
| continue |
| object_filename = symbols_to_object_files[name] |
| if object_filename not in result[page_address]: |
| result[page_address][object_filename] = 0 |
| result[page_address][object_filename] += size_in_page |
| logging.warning('%d unmatched symbols (total size %d).', |
| unmatched_symbols_count, unmatched_symbols_size) |
| return result |
| |
| |
| def WriteCodePageAttribution(page_to_object_files, text_filename, |
| json_filename): |
| """Writes the code page -> file mapping in text and JSON format. |
| |
| Args: |
| page_to_object_files: As returned by CodePagesToObjectFiles(). |
| text_filename: (str) Text output filename. |
| json_filename: (str) JSON output filename. |
| """ |
| json_data = [] |
| with open(text_filename, 'w') as f: |
| for page_offset in sorted(page_to_object_files.keys()): |
| size_and_filenames = [(kv[1], kv[0]) |
| for kv in page_to_object_files[page_offset].items()] |
| size_and_filenames.sort(reverse=True) |
| total_size = sum(x[0] for x in size_and_filenames) |
| json_data.append({'offset': page_offset, 'accounted_for': total_size, |
| 'size_and_filenames': size_and_filenames}) |
| f.write('Page Offset: %d * 4k (accounted for: %d)\n' % ( |
| page_offset / (1 << 12), total_size)) |
| for size, filename in size_and_filenames: |
| f.write(' %d\t%s\n' % (size, filename)) |
| with open(json_filename, 'w') as f: |
| json.dump(json_data, f) |
| |
| |
| def CreateArgumentParser(): |
| """Creates and returns the argument parser.""" |
| parser = argparse.ArgumentParser(description='Map code pages to paths') |
| parser.add_argument('--native-library', type=str, default='libchrome.so', |
| help=('Native Library, e.g. libchrome.so or ' |
| 'libmonochrome.so')) |
| parser.add_argument('--reached-symbols-file', type=str, |
| help='Path to the list of reached symbols, as generated ' |
| 'by tools/cygprofile/process_profiles.py', |
| required=False) |
| parser.add_argument('--residency', type=str, |
| help='Path to a JSON file with residency data, as written' |
| ' by process_resdency.py', required=False) |
| parser.add_argument('--build-directory', type=str, help='Build directory', |
| required=True) |
| parser.add_argument('--output-directory', type=str, help='Output directory', |
| required=True) |
| parser.add_argument('--arch', type=str, help='Architecture', default='arm') |
| parser.add_argument('--start-server', action='store_true', default=False, |
| help='Run an HTTP server in the output directory') |
| parser.add_argument('--port', type=int, default=8000, |
| help='Port to use for the HTTP server.') |
| return parser |
| |
| |
| def main(): |
| parser = CreateArgumentParser() |
| args = parser.parse_args() |
| logging.basicConfig(level=logging.INFO) |
| |
| symbol_extractor.SetArchitecture(args.arch) |
| logging.info('Parsing object files in %s', args.build_directory) |
| object_files_symbols = _GetSymbolNameToFilename(args.build_directory) |
| native_lib_filename = os.path.join( |
| args.build_directory, 'lib.unstripped', args.native_library) |
| if not os.path.exists(native_lib_filename): |
| logging.error('Native library not found. Did you build the APK?') |
| return 1 |
| |
| offset = 0 |
| if args.residency: |
| with open(args.residency) as f: |
| residency = json.load(f) |
| offset = residency['offset'] |
| |
| logging.info('Extracting symbols from %s', native_lib_filename) |
| native_lib_symbols = symbol_extractor.SymbolInfosFromBinary( |
| native_lib_filename) |
| logging.info('%d Symbols found', len(native_lib_symbols)) |
| logging.info('Mapping symbols and object files to code pages') |
| page_to_symbols = CodePagesToMangledSymbols(native_lib_symbols, offset) |
| page_to_object_files = CodePagesToObjectFiles(object_files_symbols, |
| page_to_symbols) |
| |
| if args.reached_symbols_file: |
| logging.info('Mapping reached symbols to code pages') |
| reached_symbol_names = ReadReachedSymbols(args.reached_symbols_file) |
| reached_data = CodePagesToReachedSize(reached_symbol_names, page_to_symbols) |
| WriteReachedData(os.path.join(args.output_directory, 'reached.json'), |
| reached_data) |
| |
| if not os.path.exists(args.output_directory): |
| os.makedirs(args.output_directory) |
| text_output_filename = os.path.join(args.output_directory, 'map.txt') |
| json_output_filename = os.path.join(args.output_directory, 'map.json') |
| WriteCodePageAttribution( |
| page_to_object_files, text_output_filename, json_output_filename) |
| directory = os.path.dirname(__file__) |
| |
| for filename in ['visualize.html', 'visualize.js', 'visualize.css']: |
| if args.residency: |
| shutil.copy(args.residency, |
| os.path.join(args.output_directory, 'residency.json')) |
| shutil.copy(os.path.join(directory, filename), |
| os.path.join(args.output_directory, filename)) |
| |
| if args.start_server: |
| os.chdir(args.output_directory) |
| httpd = SocketServer.TCPServer( |
| ('', args.port), SimpleHTTPServer.SimpleHTTPRequestHandler) |
| logging.warning('Serving on port %d', args.port) |
| httpd.serve_forever() |
| |
| return 0 |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |