blob: da1b383f851ad139d12366f00b8e83897cbb2bba [file] [log] [blame]
#!/usr/bin/python
#
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Maps code pages to object files.
For all pages from the native library .text section, extract all object files
the code maps to. Outputs a web-based visualization of page -> symbol mappings,
reached symbols and code residency.
"""
import argparse
import collections
import json
import logging
import multiprocessing
import os
import shutil
import SimpleHTTPServer
import SocketServer
import sys
_SRC_PATH = os.path.abspath(os.path.join(
os.path.dirname(__file__), os.pardir, os.pardir, os.pardir))
sys.path.append(os.path.join(_SRC_PATH, 'tools', 'cygprofile'))
import cyglog_to_orderfile
import symbol_extractor
_PAGE_SIZE = 1 << 12
_PAGE_MASK = ~(_PAGE_SIZE - 1)
def GetSymbolNameToFilename(build_directory):
"""Parses object files in a directory, and maps mangled symbol names to files.
Args:
build_directory: (str) Build directory.
Returns:
{symbol_info.name: (symbol_info, object filename)}. Filenames are stripped
of the output_directory part.
"""
path = os.path.join(build_directory, 'obj')
object_filenames = cyglog_to_orderfile.GetObjectFilenames(path)
pool = multiprocessing.Pool()
symbol_infos_filename = zip(
pool.map(symbol_extractor.SymbolInfosFromBinary, object_filenames),
object_filenames)
result = {}
for (symbol_infos, filename) in symbol_infos_filename:
stripped_filename = filename[len(build_directory):]
if stripped_filename.startswith('/obj/'):
stripped_filename = stripped_filename[len('/obj/'):]
for s in symbol_infos:
result[s.name] = (s, stripped_filename)
return result
def CodePagesToMangledSymbols(symbol_infos, text_start_offset):
"""Groups a list of symbol per code page.
Args:
symbol_infos: (symbol_extractor.SymbolInfo) List of symbols.
text_start_offset: (int) Offset to add to symbol offsets. This is used to
account for the start of .text not being at the start of
a page in memory.
Returns:
{offset: [(mangled_name, size_in_page), ...]}
"""
# Different symbols can be at the same address, through identical code folding
# for instance. In this case, only keep the first one. This is not ideal, as
# file attribution will be incorrect in this case. However ICF mostly works
# with small symbols, so it shouldn't impact numbers too much.
result = collections.defaultdict(set)
known_offsets = set()
for s in symbol_infos:
assert s.offset % 2 == 0, 'Wrong alignment'
if s.offset in known_offsets:
continue
known_offsets.add(s.offset)
start, end = (s.offset + text_start_offset,
(s.offset + s.size + text_start_offset))
start_page, end_page = start & _PAGE_MASK, end & _PAGE_MASK
page = start_page
while page <= end_page:
symbol_start_in_page = max(page, start)
symbol_end_in_page = min(page + _PAGE_SIZE, end)
size_in_page = symbol_end_in_page - symbol_start_in_page
result[page].add((s.name, size_in_page))
page += _PAGE_SIZE
for page in result:
total_size = sum(s[1] for s in result[page])
if total_size > _PAGE_SIZE:
logging.warning('Too many symbols in page (%d * 4k)! Total size: %d',
page / _PAGE_SIZE, total_size)
return result
def ReadReachedSymbols(filename):
"""Reads a list of reached symbols from a file.
Args:
filename: (str) File to read.
Returns:
[str] List of symbol names.
"""
with open(filename, 'r') as f:
return [line.strip() for line in f.readlines()]
def WriteReachedData(filename, page_to_reached_data):
"""Writes the page to reached fraction to a JSON file.
The output format is suited for visualize.html.
Args:
filename: (str) Output filename.
page_to_reached_data: (dict) As returned by CodePagesToReachedSize().
"""
json_object = []
for (offset, data) in page_to_reached_data.items():
json_object.append({'offset': offset, 'total': data['total'],
'reached': data['reached']})
with open(filename, 'w') as f:
json.dump(json_object, f)
def CodePagesToReachedSize(reached_symbol_names, page_to_symbols):
"""From page offset -> [all_symbols], return the reached portion per page.
Args:
reached_symbol_names: ([str]) List of reached symbol names.
page_to_symbols: (dict) As returned by CodePagesToMangledSymbols().
Returns:
{page offset (int) -> {'total': int, 'reached': int}}
"""
reached_symbol_names = set(reached_symbol_names)
page_to_reached = {}
for offset in page_to_symbols:
total_size = sum(x[1] for x in page_to_symbols[offset])
reached_size = sum(
size_in_page for (name, size_in_page) in page_to_symbols[offset]
if name in reached_symbol_names)
page_to_reached[offset] = {'total': total_size, 'reached': reached_size}
return page_to_reached
def CodePagesToObjectFiles(symbols_to_object_files, code_pages_to_symbols):
"""From symbols in object files and symbols in pages, gives code page to
object files.
Args:
symbols_to_object_files: (dict) as returned by GetSymbolNameToFilename()
code_pages_to_symbols: (dict) as returned by CodePagesToMangledSymbols()
Returns:
{page_offset: {object_filename: size_in_page}}
"""
result = {}
unmatched_symbols_count = 0
for page_address in code_pages_to_symbols:
result[page_address] = {}
for name, size_in_page in code_pages_to_symbols[page_address]:
if name not in symbols_to_object_files:
unmatched_symbols_count += 1
continue
object_filename = symbols_to_object_files[name][1]
if object_filename not in result[page_address]:
result[page_address][object_filename] = 0
result[page_address][object_filename] += size_in_page
logging.warning('%d unmatched symbols.', unmatched_symbols_count)
return result
def WriteCodePageAttribution(page_to_object_files, text_filename,
json_filename):
"""Writes the code page -> file mapping in text and JSON format.
Args:
page_to_object_files: As returned by CodePagesToObjectFiles().
text_filename: (str) Text output filename.
json_filename: (str) JSON output filename.
"""
json_data = []
with open(text_filename, 'w') as f:
for page_offset in sorted(page_to_object_files.keys()):
size_and_filenames = [(kv[1], kv[0])
for kv in page_to_object_files[page_offset].items()]
size_and_filenames.sort(reverse=True)
total_size = sum(x[0] for x in size_and_filenames)
json_data.append({'offset': page_offset, 'accounted_for': total_size,
'size_and_filenames': size_and_filenames})
f.write('Page Offset: %d * 4k (accounted for: %d)\n' % (
page_offset / (1 << 12), total_size))
for size, filename in size_and_filenames:
f.write(' %d\t%s\n' % (size, filename))
with open(json_filename, 'w') as f:
json.dump(json_data, f)
def CreateArgumentParser():
"""Creates and returns the argument parser."""
parser = argparse.ArgumentParser(description='Map code pages to paths')
parser.add_argument('--native-library', type=str, default='libchrome.so',
help=('Native Library, e.g. libchrome.so or '
'libmonochrome.so'))
parser.add_argument('--reached-symbols-file', type=str,
help='Path to the list of reached symbols, as generated '
'by tools/cygprofile/process_profiles.py',
required=False)
parser.add_argument('--residency', type=str,
help='Path to a JSON file with residency data, as written'
' by process_resdency.py', required=False)
parser.add_argument('--build-directory', type=str, help='Build directory',
required=True)
parser.add_argument('--output-directory', type=str, help='Output directory',
required=True)
parser.add_argument('--arch', type=str, help='Architecture', default='arm')
parser.add_argument('--start-server', action='store_true', default=False,
help='Run an HTTP server in the output directory')
parser.add_argument('--port', type=int, default=8000,
help='Port to use for the HTTP server.')
return parser
def main():
parser = CreateArgumentParser()
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
symbol_extractor.SetArchitecture(args.arch)
logging.info('Parsing object files in %s', args.build_directory)
object_files_symbols = GetSymbolNameToFilename(args.build_directory)
native_lib_filename = os.path.join(
args.build_directory, 'lib.unstripped', args.native_library)
if not os.path.exists(native_lib_filename):
logging.error('Native library not found. Did you build the APK?')
return 1
offset = 0
if args.residency:
with open(args.residency) as f:
residency = json.load(f)
offset = residency['offset']
logging.info('Extracting symbols from %s', native_lib_filename)
native_lib_symbols = symbol_extractor.SymbolInfosFromBinary(
native_lib_filename)
logging.info('Mapping symbols and object files to code pages')
page_to_symbols = CodePagesToMangledSymbols(native_lib_symbols, offset)
page_to_object_files = CodePagesToObjectFiles(object_files_symbols,
page_to_symbols)
if args.reached_symbols_file:
logging.info('Mapping reached symbols to code pages')
reached_symbol_names = ReadReachedSymbols(args.reached_symbols_file)
reached_data = CodePagesToReachedSize(reached_symbol_names, page_to_symbols)
WriteReachedData(os.path.join(args.output_directory, 'reached.json'),
reached_data)
if not os.path.exists(args.output_directory):
os.makedirs(args.output_directory)
text_output_filename = os.path.join(args.output_directory, 'map.txt')
json_output_filename = os.path.join(args.output_directory, 'map.json')
WriteCodePageAttribution(
page_to_object_files, text_output_filename, json_output_filename)
directory = os.path.dirname(__file__)
for filename in ['visualize.html', 'visualize.js', 'visualize.css']:
if args.residency:
shutil.copy(args.residency,
os.path.join(args.output_directory, 'residency.json'))
shutil.copy(os.path.join(directory, filename),
os.path.join(args.output_directory, filename))
if args.start_server:
os.chdir(args.output_directory)
httpd = SocketServer.TCPServer(
('', args.port), SimpleHTTPServer.SimpleHTTPRequestHandler)
logging.warning('Serving on port %d', args.port)
httpd.serve_forever()
return 0
if __name__ == '__main__':
sys.exit(main())