| #!/usr/bin/python |
| # Copyright 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Patch an orderfile. |
| |
| Starting with a list of symbols in a binary and an orderfile (ordered list of |
| symbols), matches the symbols in the orderfile and augments each symbol with the |
| symbols residing at the same address (due to having identical code). |
| |
| Note: It is possible to have. |
| - Several symbols mapping to the same offset in the binary. |
| - Several offsets for a given symbol (because we strip the ".clone." suffix) |
| |
| TODO(lizeb): Since the suffix ".clone." is only used with -O3 that we don't |
| currently use, simplify the logic by removing the suffix handling. |
| |
| The general pipeline is: |
| 1. Get the symbol infos (offset, length, name) from the binary |
| 2. Get the symbol names from the orderfile |
| 3. Find the orderfile symbol names in the symbols coming from the binary |
| 4. For each symbol found, get all the symbols at the same address |
| 5. Output them to an updated orderfile, with several different prefixes |
| """ |
| |
| import collections |
| import logging |
| import subprocess |
| import sys |
| |
| # Prefixes for the symbols. We strip them from the incoming symbols, and add |
| # them back in the output file. |
| _PREFIXES = ('.text.startup.', '.text.hot.', '.text.unlikely.', '.text.') |
| |
| |
| SymbolInfo = collections.namedtuple('SymbolInfo', ['offset', 'size', 'name']) |
| |
| |
| def _RemoveClone(name): |
| """Return name up to the ".clone." marker.""" |
| clone_index = name.find('.clone.') |
| if clone_index != -1: |
| return name[:clone_index] |
| return name |
| |
| |
| def _GetSymbolInfosFromStream(nm_lines): |
| """Parses the output of nm, and get all the symbols from a binary. |
| |
| Args: |
| nm_lines: An iterable of lines |
| |
| Returns: |
| The same output as GetSymbolsFromBinary. |
| """ |
| # TODO(lizeb): Consider switching to objdump to simplify parsing. |
| symbol_infos = [] |
| for line in nm_lines: |
| # We are interested in two types of lines: |
| # This: |
| # 00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev |
| # offset size <symbol_type> symbol_name |
| # And that: |
| # 0070ee8c T WebRtcSpl_ComplexBitReverse |
| # In the second case we don't have a size, so use -1 as a sentinel |
| parts = line.split() |
| if len(parts) == 4: |
| symbol_infos.append(SymbolInfo( |
| offset=int(parts[0], 16), size=int(parts[1], 16), name=parts[3])) |
| elif len(parts) == 3: |
| symbol_infos.append(SymbolInfo( |
| offset=int(parts[0], 16), size=-1, name=parts[2])) |
| # Map the addresses to symbols. |
| offset_to_symbol_infos = collections.defaultdict(list) |
| name_to_symbol_infos = collections.defaultdict(list) |
| for symbol in symbol_infos: |
| symbol = SymbolInfo(symbol[0], symbol[1], _RemoveClone(symbol[2])) |
| offset_to_symbol_infos[symbol.offset].append(symbol) |
| name_to_symbol_infos[symbol.name].append(symbol) |
| return (offset_to_symbol_infos, name_to_symbol_infos) |
| |
| |
| def _GetSymbolInfosFromBinary(binary_filename): |
| """Runs nm to get all the symbols from a binary. |
| |
| Args: |
| binary_filename: path to the binary. |
| |
| Returns: |
| A tuple of collection.defaultdict: |
| (offset_to_symbol_infos, name_to_symbol_infos): |
| - offset_to_symbol_infos: {offset: [symbol_info1, ...]} |
| - name_to_symbol_infos: {name: [symbol_info1, ...]} |
| """ |
| command = 'nm -S -n %s | egrep "( t )|( W )|( T )"' % binary_filename |
| p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) |
| try: |
| result = _GetSymbolInfosFromStream(p.stdout) |
| return result |
| finally: |
| p.wait() |
| |
| |
| def _StripPrefix(line): |
| """Get the symbol from a line with a linker section name. |
| |
| Args: |
| line: a line from an orderfile, usually in the form: |
| .text.SymbolName |
| |
| Returns: |
| The symbol, SymbolName in the example above. |
| """ |
| line = line.rstrip('\n') |
| for prefix in _PREFIXES: |
| if line.startswith(prefix): |
| return line[len(prefix):] |
| return line # Unprefixed case |
| |
| |
| def _GetSymbolsFromStream(lines): |
| """Get the symbols from an iterable of lines. |
| Filters out wildcards and lines which do not correspond to symbols. |
| |
| Args: |
| lines: iterable of lines from an orderfile. |
| |
| Returns: |
| Same as GetSymbolsFromOrderfile |
| """ |
| # TODO(lizeb): Retain the prefixes later in the processing stages. |
| symbols = [] |
| unique_symbols = set() |
| for line in lines: |
| line = _StripPrefix(line) |
| name = _RemoveClone(line) |
| if name == '' or name == '*' or name == '.text': |
| continue |
| if not line in unique_symbols: |
| symbols.append(line) |
| unique_symbols.add(line) |
| return symbols |
| |
| |
| def _GetSymbolsFromOrderfile(filename): |
| """Return the symbols from an orderfile. |
| |
| Args: |
| filename: The name of the orderfile. |
| |
| Returns: |
| A list of symbol names. |
| """ |
| with open(filename, 'r') as f: |
| return _GetSymbolsFromStream(f.xreadlines()) |
| |
| def _SymbolsWithSameOffset(profiled_symbol, name_to_symbol_info, |
| offset_to_symbol_info): |
| """Expand a profiled symbol to include all symbols which share an offset |
| with that symbol. |
| Args: |
| profiled_symbol: the string symbol name to be expanded. |
| name_to_symbol_info: {name: [symbol_info1], ...}, as returned by |
| GetSymbolInfosFromBinary |
| offset_to_symbol_info: {offset: [symbol_info1, ...], ...} |
| |
| Returns: |
| A list of symbol names, or an empty list if profiled_symbol was not in |
| name_to_symbol_info. |
| """ |
| if not profiled_symbol in name_to_symbol_info: |
| return [] |
| symbol_infos = name_to_symbol_info[profiled_symbol] |
| expanded = [] |
| for symbol_info in symbol_infos: |
| expanded += (s.name for s in offset_to_symbol_info[symbol_info.offset]) |
| return expanded |
| |
| def _ExpandSymbols(profiled_symbols, name_to_symbol_infos, |
| offset_to_symbol_infos): |
| """Expand all of the symbols in profiled_symbols to include any symbols which |
| share the same address. |
| |
| Args: |
| profiled_symbols: Symbols to match |
| name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by |
| GetSymbolInfosFromBinary |
| offset_to_symbol_infos: {offset: [symbol_info1, ...], ...} |
| |
| Returns: |
| A list of the symbol names. |
| """ |
| found_symbols = 0 |
| missing_symbols = [] |
| all_symbols = [] |
| for name in profiled_symbols: |
| expansion = _SymbolsWithSameOffset(name, |
| name_to_symbol_infos, offset_to_symbol_infos) |
| if expansion: |
| found_symbols += 1 |
| all_symbols += expansion |
| else: |
| all_symbols.append(name) |
| missing_symbols.append(name) |
| logging.info('symbols found: %d\n' % found_symbols) |
| if missing_symbols > 0: |
| logging.warning('%d missing symbols.' % len(missing_symbols)) |
| missing_symbols_to_show = min(100, len(missing_symbols)) |
| logging.warning('First %d missing symbols:\n%s' % ( |
| missing_symbols_to_show, |
| '\n'.join(missing_symbols[:missing_symbols_to_show]))) |
| return all_symbols |
| |
| |
| def _PrintSymbolsWithPrefixes(symbol_names, output_file): |
| """For each symbol, outputs it to output_file with the prefixes.""" |
| unique_outputs = set() |
| for name in symbol_names: |
| for prefix in _PREFIXES: |
| linker_section = prefix + name |
| if not linker_section in unique_outputs: |
| output_file.write(linker_section + '\n') |
| unique_outputs.add(linker_section) |
| |
| |
| def main(argv): |
| if len(argv) != 3: |
| print 'Usage: %s <unpatched_orderfile> <libchrome.so>' % argv[0] |
| return 1 |
| orderfile_filename = argv[1] |
| binary_filename = argv[2] |
| (offset_to_symbol_infos, name_to_symbol_infos) = _GetSymbolInfosFromBinary( |
| binary_filename) |
| profiled_symbols = _GetSymbolsFromOrderfile(orderfile_filename) |
| expanded_symbols = _ExpandSymbols( |
| profiled_symbols, name_to_symbol_infos, offset_to_symbol_infos) |
| _PrintSymbolsWithPrefixes(expanded_symbols, sys.stdout) |
| # The following is needed otherwise Gold only applies a partial sort. |
| print '.text' # gets methods not in a section, such as assembly |
| print '.text.*' # gets everything else |
| return 0 |
| |
| |
| if __name__ == '__main__': |
| logging.basicConfig(level=logging.INFO) |
| sys.exit(main(sys.argv)) |