| # Copyright 2019 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Parses ELF information without relying external tools. |
| |
| This file was originally copied and adapted from: |
| https://fuchsia.googlesource.com/fuchsia/+/827f9fe/build/images/elfinfo.py |
| """ |
| |
| from contextlib import contextmanager |
| from collections import namedtuple |
| import mmap |
| import os |
| import struct |
| import uuid |
| |
| # Standard ELF constants. |
| ELFMAG = '\x7fELF' |
| EI_CLASS = 4 |
| ELFCLASS32 = 1 |
| ELFCLASS64 = 2 |
| EI_DATA = 5 |
| ELFDATA2LSB = 1 |
| ELFDATA2MSB = 2 |
| EM_386 = 3 |
| EM_ARM = 40 |
| EM_X86_64 = 62 |
| EM_AARCH64 = 183 |
| PT_LOAD = 1 |
| PT_DYNAMIC = 2 |
| PT_INTERP = 3 |
| PT_NOTE = 4 |
| DT_NEEDED = 1 |
| DT_STRTAB = 5 |
| DT_SONAME = 14 |
| NT_GNU_BUILD_ID = 3 |
| SHT_SYMTAB = 2 |
| |
| |
| class elf_note(namedtuple('elf_note', [ |
| 'name', |
| 'type', |
| 'desc', |
| ])): |
| |
| # An ELF note is identified by (name_string, type_integer). |
| def ident(self): |
| return (self.name, self.type) |
| |
| def is_build_id(self): |
| return self.ident() == ('GNU\0', NT_GNU_BUILD_ID) |
| |
| def build_id_hex(self): |
| if self.is_build_id(): |
| return ''.join(('%02x' % ord(byte)) for byte in self.desc) |
| return None |
| |
| def __repr__(self): |
| return ('elf_note(%r, %#x, <%d bytes>)' % (self.name, self.type, |
| len(self.desc))) |
| |
| |
| def gen_elf(): |
| # { 'Struct1': (ELFCLASS32 fields, ELFCLASS64 fields), |
| # 'Struct2': fields_same_for_both, ... } |
| elf_types = { |
| 'Ehdr': ([ |
| ('e_ident', '16s'), |
| ('e_type', 'H'), |
| ('e_machine', 'H'), |
| ('e_version', 'I'), |
| ('e_entry', 'I'), |
| ('e_phoff', 'I'), |
| ('e_shoff', 'I'), |
| ('e_flags', 'I'), |
| ('e_ehsize', 'H'), |
| ('e_phentsize', 'H'), |
| ('e_phnum', 'H'), |
| ('e_shentsize', 'H'), |
| ('e_shnum', 'H'), |
| ('e_shstrndx', 'H'), |
| ], [ |
| ('e_ident', '16s'), |
| ('e_type', 'H'), |
| ('e_machine', 'H'), |
| ('e_version', 'I'), |
| ('e_entry', 'Q'), |
| ('e_phoff', 'Q'), |
| ('e_shoff', 'Q'), |
| ('e_flags', 'I'), |
| ('e_ehsize', 'H'), |
| ('e_phentsize', 'H'), |
| ('e_phnum', 'H'), |
| ('e_shentsize', 'H'), |
| ('e_shnum', 'H'), |
| ('e_shstrndx', 'H'), |
| ]), |
| 'Phdr': ([ |
| ('p_type', 'I'), |
| ('p_offset', 'I'), |
| ('p_vaddr', 'I'), |
| ('p_paddr', 'I'), |
| ('p_filesz', 'I'), |
| ('p_memsz', 'I'), |
| ('p_flags', 'I'), |
| ('p_align', 'I'), |
| ], [ |
| ('p_type', 'I'), |
| ('p_flags', 'I'), |
| ('p_offset', 'Q'), |
| ('p_vaddr', 'Q'), |
| ('p_paddr', 'Q'), |
| ('p_filesz', 'Q'), |
| ('p_memsz', 'Q'), |
| ('p_align', 'Q'), |
| ]), |
| 'Shdr': ([ |
| ('sh_name', 'L'), |
| ('sh_type', 'L'), |
| ('sh_flags', 'L'), |
| ('sh_addr', 'L'), |
| ('sh_offset', 'L'), |
| ('sh_size', 'L'), |
| ('sh_link', 'L'), |
| ('sh_info', 'L'), |
| ('sh_addralign', 'L'), |
| ('sh_entsize', 'L'), |
| ], [ |
| ('sh_name', 'L'), |
| ('sh_type', 'L'), |
| ('sh_flags', 'Q'), |
| ('sh_addr', 'Q'), |
| ('sh_offset', 'Q'), |
| ('sh_size', 'Q'), |
| ('sh_link', 'L'), |
| ('sh_info', 'L'), |
| ('sh_addralign', 'Q'), |
| ('sh_entsize', 'Q'), |
| ]), |
| 'Dyn': ([ |
| ('d_tag', 'i'), |
| ('d_val', 'I'), |
| ], [ |
| ('d_tag', 'q'), |
| ('d_val', 'Q'), |
| ]), |
| 'Nhdr': [ |
| ('n_namesz', 'I'), |
| ('n_descsz', 'I'), |
| ('n_type', 'I'), |
| ], |
| 'dwarf2_line_header': [ |
| ('unit_length', 'L'), |
| ('version', 'H'), |
| ('header_length', 'L'), |
| ('minimum_instruction_length', 'B'), |
| ('default_is_stmt', 'B'), |
| ('line_base', 'b'), |
| ('line_range', 'B'), |
| ('opcode_base', 'B'), |
| ], |
| 'dwarf4_line_header': [ |
| ('unit_length', 'L'), |
| ('version', 'H'), |
| ('header_length', 'L'), |
| ('minimum_instruction_length', 'B'), |
| ('maximum_operations_per_instruction', 'B'), |
| ('default_is_stmt', 'B'), |
| ('line_base', 'b'), |
| ('line_range', 'b'), |
| ('opcode_base', 'B'), |
| ], |
| } |
| |
| # There is an accessor for each struct, e.g. Ehdr. |
| # Ehdr.read is a function like Struct.unpack_from. |
| # Ehdr.size is the size of the struct. |
| elf_accessor = namedtuple('elf_accessor', ['size', 'read', 'write', 'pack']) |
| |
| # All the accessors for a format (class, byte-order) form one elf, |
| # e.g. use elf.Ehdr and elf.Phdr. |
| elf = namedtuple('elf', elf_types.keys()) |
| |
| def gen_accessors(is64, struct_byte_order): |
| |
| def make_accessor(type, decoder): |
| return elf_accessor( |
| size=decoder.size, |
| read=lambda buffer, offset=0: type._make( |
| decoder.unpack_from(buffer, offset)), |
| write=lambda buffer, offset, x: decoder.pack_into( |
| buffer, offset, *x), |
| pack=lambda x: decoder.pack(*x)) |
| |
| for name, fields in elf_types.iteritems(): |
| if isinstance(fields, tuple): |
| fields = fields[1 if is64 else 0] |
| type = namedtuple(name, [field_name for field_name, fmt in fields]) |
| decoder = struct.Struct(struct_byte_order + ''.join( |
| fmt for field_name, fmt in fields)) |
| yield make_accessor(type, decoder) |
| |
| for elfclass, is64 in [(ELFCLASS32, False), (ELFCLASS64, True)]: |
| for elf_bo, struct_bo in [(ELFDATA2LSB, '<'), (ELFDATA2MSB, '>')]: |
| yield ((chr(elfclass), chr(elf_bo)), elf(*gen_accessors(is64, struct_bo))) |
| |
| |
| # e.g. ELF[file[EI_CLASS], file[EI_DATA]].Ehdr.read(file).e_phnum |
| ELF = dict(gen_elf()) |
| |
| |
| def get_elf_accessor(file): |
| # If it looks like an ELF file, whip out the decoder ring. |
| if file[:len(ELFMAG)] == ELFMAG: |
| return ELF[file[EI_CLASS], file[EI_DATA]] |
| return None |
| |
| |
| def gen_phdrs(file, elf, ehdr): |
| for pos in xrange(0, ehdr.e_phnum * elf.Phdr.size, elf.Phdr.size): |
| yield elf.Phdr.read(file, ehdr.e_phoff + pos) |
| |
| |
| def gen_shdrs(file, elf, ehdr): |
| for pos in xrange(0, ehdr.e_shnum * elf.Shdr.size, elf.Shdr.size): |
| yield elf.Shdr.read(file, ehdr.e_shoff + pos) |
| |
| |
| cpu = namedtuple( |
| 'cpu', |
| [ |
| 'e_machine', # ELF e_machine int |
| 'llvm', # LLVM triple CPU component |
| 'gn', # GN target_cpu |
| ]) |
| |
| ELF_MACHINE_TO_CPU = { |
| elf: cpu(elf, llvm, gn) for elf, llvm, gn in [ |
| (EM_386, 'i386', 'x86'), |
| (EM_ARM, 'arm', 'arm'), |
| (EM_X86_64, 'x86_64', 'x64'), |
| (EM_AARCH64, 'aarch64', 'arm64'), |
| ] |
| } |
| |
| |
| @contextmanager |
| def mmapper(filename): |
| """A context manager that yields (fd, file_contents) given a file name. |
| This ensures that the mmap and file objects are closed at the end of the |
| 'with' statement.""" |
| fileobj = open(filename, 'rb') |
| fd = fileobj.fileno() |
| if os.fstat(fd).st_size == 0: |
| # mmap can't handle empty files. |
| try: |
| yield fd, '' |
| finally: |
| fileobj.close() |
| else: |
| mmapobj = mmap.mmap(fd, 0, access=mmap.ACCESS_READ) |
| try: |
| yield fd, mmapobj |
| finally: |
| mmapobj.close() |
| fileobj.close() |
| |
| |
| elf_info = namedtuple( |
| 'elf_info', |
| [ |
| 'filename', |
| 'cpu', # cpu tuple |
| 'notes', # list of (ident, desc): selected notes |
| 'build_id', # string: lowercase hex |
| 'stripped', # bool: Has no symbols or .debug_* sections |
| 'interp', # string or None: PT_INTERP (without \0) |
| 'soname', # string or None: DT_SONAME |
| 'needed', # list of strings: DT_NEEDED |
| ]) |
| |
| |
| def get_elf_info(filename, match_notes=False): |
| file = None |
| elf = None |
| ehdr = None |
| phdrs = None |
| |
| # Yields an elf_note for each note in any PT_NOTE segment. |
| def gen_notes(): |
| |
| def round_up_to(size): |
| return ((size + 3) / 4) * 4 |
| |
| for phdr in phdrs: |
| if phdr.p_type == PT_NOTE: |
| pos = phdr.p_offset |
| while pos < phdr.p_offset + phdr.p_filesz: |
| nhdr = elf.Nhdr.read(file, pos) |
| pos += elf.Nhdr.size |
| name = file[pos:pos + nhdr.n_namesz] |
| pos += round_up_to(nhdr.n_namesz) |
| desc = file[pos:pos + nhdr.n_descsz] |
| pos += round_up_to(nhdr.n_descsz) |
| yield elf_note(name, nhdr.n_type, desc) |
| |
| def gen_sections(): |
| shdrs = list(gen_shdrs(file, elf, ehdr)) |
| if not shdrs: |
| return |
| strtab_shdr = shdrs[ehdr.e_shstrndx] |
| for shdr, i in zip(shdrs, xrange(len(shdrs))): |
| if i == 0: |
| continue |
| assert shdr.sh_name < strtab_shdr.sh_size, ( |
| "%s: invalid sh_name" % filename) |
| yield (shdr, extract_C_string(strtab_shdr.sh_offset + shdr.sh_name)) |
| |
| # Generates '\0'-terminated strings starting at the given offset, |
| # until an empty string. |
| def gen_strings(start): |
| while True: |
| end = file.find('\0', start) |
| assert end >= start, ( |
| "%s: Unterminated string at %#x" % (filename, start)) |
| if start == end: |
| break |
| yield file[start:end] |
| start = end + 1 |
| |
| def extract_C_string(start): |
| for string in gen_strings(start): |
| return string |
| return '' |
| |
| # Returns a string of hex digits (or None). |
| def get_build_id(): |
| build_id = None |
| for note in gen_notes(): |
| # Note that the last build_id note needs to be used due to TO-442. |
| possible_build_id = note.build_id_hex() |
| if possible_build_id: |
| build_id = possible_build_id |
| return build_id |
| |
| # Returns a list of elf_note objects. |
| def get_matching_notes(): |
| if isinstance(match_notes, bool): |
| if match_notes: |
| return list(gen_notes()) |
| else: |
| return [] |
| # If not a bool, it's an iterable of ident pairs. |
| return [note for note in gen_notes() if note.ident() in match_notes] |
| |
| # Returns a string (without trailing '\0'), or None. |
| def get_interp(): |
| # PT_INTERP points directly to a string in the file. |
| for interp in (phdr for phdr in phdrs if phdr.p_type == PT_INTERP): |
| interp = file[interp.p_offset:interp.p_offset + interp.p_filesz] |
| if interp[-1:] == '\0': |
| interp = interp[:-1] |
| return interp |
| return None |
| |
| # Returns a set of strings. |
| def get_soname_and_needed(): |
| # Each DT_NEEDED or DT_SONAME points to a string in the .dynstr table. |
| def GenDTStrings(tag): |
| return (extract_C_string(strtab_offset + dt.d_val) |
| for dt in dyn |
| if dt.d_tag == tag) |
| |
| # PT_DYNAMIC points to the list of ElfNN_Dyn tags. |
| for dynamic in (phdr for phdr in phdrs if phdr.p_type == PT_DYNAMIC): |
| dyn = [ |
| elf.Dyn.read(file, dynamic.p_offset + dyn_offset) |
| for dyn_offset in xrange(0, dynamic.p_filesz, elf.Dyn.size) |
| ] |
| |
| # DT_STRTAB points to the string table's vaddr (.dynstr). |
| [strtab_vaddr] = [dt.d_val for dt in dyn if dt.d_tag == DT_STRTAB] |
| |
| # Find the PT_LOAD containing the vaddr to compute the file offset. |
| [strtab_offset] = [ |
| strtab_vaddr - phdr.p_vaddr + phdr.p_offset |
| for phdr in phdrs |
| if (phdr.p_type == PT_LOAD and phdr.p_vaddr <= strtab_vaddr and |
| strtab_vaddr - phdr.p_vaddr < phdr.p_filesz) |
| ] |
| |
| soname = None |
| for soname in GenDTStrings(DT_SONAME): |
| break |
| |
| return soname, set(GenDTStrings(DT_NEEDED)) |
| return None, set() |
| |
| def get_stripped(): |
| return all(shdr.sh_type != SHT_SYMTAB and not name.startswith('.debug_') |
| for shdr, name in gen_sections()) |
| |
| def get_cpu(): |
| return ELF_MACHINE_TO_CPU.get(ehdr.e_machine) |
| |
| # Map in the whole file's contents and use it as a string. |
| with mmapper(filename) as mapped: |
| fd, file = mapped |
| elf = get_elf_accessor(file) |
| if elf is not None: |
| # ELF header leads to program headers. |
| ehdr = elf.Ehdr.read(file) |
| assert ehdr.e_phentsize == elf.Phdr.size, ( |
| "%s: invalid e_phentsize" % filename) |
| phdrs = list(gen_phdrs(file, elf, ehdr)) |
| return elf_info(filename, get_cpu(), get_matching_notes(), get_build_id(), |
| get_stripped(), get_interp(), *get_soname_and_needed()) |
| |
| return None |
| |
| |
| # Module public API. |
| __all__ = ['cpu', 'elf_info', 'elf_note', 'get_elf_accessor', 'get_elf_info'] |