|  | #!/usr/bin/env python3 | 
|  | # Copyright 2019 The Chromium Authors. All rights reserved. | 
|  | # Use of this source code is governed by a BSD-style license that can be | 
|  | # found in the LICENSE file. | 
|  | """This script compresses a part of shared library without breaking it. | 
|  |  | 
|  | It compresses the specified file range which will be used by a library's | 
|  | decompression hook that uses userfaultfd to intercept access attempts to the | 
|  | range and decompress its data on demand. | 
|  |  | 
|  | Technically this script does the following steps: | 
|  | 1) Makes a copy of specified range, compresses it and adds it to the binary | 
|  | as a new section using objcopy. | 
|  | 2) Moves the Phdr section to the end of the file to ease next manipulations on | 
|  | it. | 
|  | 3) Creates a LOAD segment for the compressed section created at step 1. | 
|  | 4) Splits the LOAD segments so the range lives in its own | 
|  | LOAD segment. | 
|  | 5) Cuts the range out of the binary, correcting offsets, broken in the | 
|  | process. | 
|  | 6) Changes cut range LOAD segment by zeroing the file_sz and setting | 
|  | mem_sz to the original range size, essentially lazily zeroing the space. | 
|  | 7) Changes magic bytes provided by the decompression hook to contain | 
|  | addresses of cut range and compressed range. | 
|  | """ | 
|  |  | 
|  | import argparse | 
|  | import logging | 
|  | import os | 
|  | import pathlib | 
|  | import subprocess | 
|  | import sys | 
|  | import tempfile | 
|  |  | 
|  | import compression | 
|  | import elf_headers | 
|  |  | 
|  | COMPRESSED_SECTION_NAME = '.compressed_library_data' | 
|  | ADDRESS_ALIGN = 0x1000 | 
|  |  | 
|  | CUT_RANGE_BEGIN_MAGIC = bytearray( | 
|  | [0x2e, 0x2a, 0xee, 0xf6, 0x45, 0x03, 0xd2, 0x50]) | 
|  | CUT_RANGE_END_MAGIC = bytearray( | 
|  | [0x52, 0x40, 0xeb, 0x9d, 0xdb, 0x11, 0xed, 0x1a]) | 
|  | COMPRESSED_RANGE_BEGIN_MAGIC = bytearray( | 
|  | [0x5e, 0x49, 0x4a, 0x4c, 0xae, 0x28, 0xc8, 0xbb]) | 
|  | COMPRESSED_RANGE_END_MAGIC = bytearray( | 
|  | [0xdd, 0x60, 0xed, 0xcf, 0xc3, 0x29, 0xa6, 0xd6]) | 
|  |  | 
|  | # src/third_party/llvm-build/Release+Asserts/bin/llvm-objcopy | 
|  | OBJCOPY_PATH = pathlib.Path(__file__).resolve().parents[3].joinpath( | 
|  | 'third_party/llvm-build/Release+Asserts/bin/llvm-objcopy') | 
|  |  | 
|  |  | 
|  | def SegmentContains(main_l, main_r, l, r): | 
|  | """Returns true if [l, r) is contained inside [main_l, main_r). | 
|  |  | 
|  | Args: | 
|  | main_l: int. Left border of the first segment. | 
|  | main_r: int. Right border (exclusive) of the second segment. | 
|  | l: int. Left border of the second segment. | 
|  | r: int. Right border (exclusive) of the second segment. | 
|  | """ | 
|  | return main_l <= l and main_r >= r | 
|  |  | 
|  |  | 
|  | def SegmentsIntersect(l1, r1, l2, r2): | 
|  | """Returns true if [l1, r1) intersects with [l2, r2). | 
|  |  | 
|  | Args: | 
|  | l1: int. Left border of the first segment. | 
|  | r1: int. Right border (exclusive) of the second segment. | 
|  | l2: int. Left border of the second segment. | 
|  | r2: int. Right border (exclusive) of the second segment. | 
|  | """ | 
|  | return l2 < r1 and r2 > l1 | 
|  |  | 
|  |  | 
|  | def AlignUp(addr, page_size=ADDRESS_ALIGN): | 
|  | """Rounds up given address to be aligned to page_size. | 
|  |  | 
|  | Args: | 
|  | addr: int. Virtual address to be aligned. | 
|  | page_size: int. Page size to be used for the alignment. | 
|  | """ | 
|  | if addr % page_size != 0: | 
|  | addr += page_size - (addr % page_size) | 
|  | return addr | 
|  |  | 
|  |  | 
|  | def AlignDown(addr, page_size=ADDRESS_ALIGN): | 
|  | """Round down given address to be aligned to page_size. | 
|  |  | 
|  | Args: | 
|  | addr: int. Virtual address to be aligned. | 
|  | page_size: int. Page size to be used for the alignment. | 
|  | """ | 
|  | return addr - addr % page_size | 
|  |  | 
|  |  | 
|  | def MatchVaddrAlignment(vaddr, offset, align=ADDRESS_ALIGN): | 
|  | """Align vaddr to comply with ELF standard binary alignment. | 
|  |  | 
|  | Increases vaddr until the following is true: | 
|  | vaddr % align == offset % align | 
|  |  | 
|  | Args: | 
|  | vaddr: virtual address to be aligned. | 
|  | offset: file offset to be aligned. | 
|  | align: alignment value. | 
|  |  | 
|  | Returns: | 
|  | Aligned virtual address, bigger or equal than the vaddr. | 
|  | """ | 
|  | delta = offset % align - vaddr % align | 
|  | if delta < 0: | 
|  | delta += align | 
|  | return vaddr + delta | 
|  |  | 
|  |  | 
|  | def _SetupLogging(): | 
|  | logging.basicConfig( | 
|  | format='%(asctime)s %(filename)s:%(lineno)s %(levelname)s] %(message)s', | 
|  | datefmt='%H:%M:%S', | 
|  | level=logging.ERROR) | 
|  |  | 
|  |  | 
|  | def _ParseArguments(): | 
|  | parser = argparse.ArgumentParser() | 
|  | parser.add_argument( | 
|  | '-i', '--input', help='Shared library to parse', required=True) | 
|  | parser.add_argument( | 
|  | '-o', '--output', help='Name of the output file', required=True) | 
|  | parser.add_argument( | 
|  | '-l', | 
|  | '--left_range', | 
|  | help='Beginning of the target part of the library', | 
|  | type=int, | 
|  | required=True) | 
|  | parser.add_argument( | 
|  | '-r', | 
|  | '--right_range', | 
|  | help='End (exclusive) of the target part of the library', | 
|  | type=int, | 
|  | required=True) | 
|  | return parser.parse_args() | 
|  |  | 
|  |  | 
|  | def _FileRangeToVirtualAddressRange(data, l, r): | 
|  | """Returns virtual address range corresponding to given file range. | 
|  |  | 
|  | Since we have to resolve them by their virtual address, parsing of LOAD | 
|  | segments is required here. | 
|  | """ | 
|  | elf = elf_headers.ElfHeader(data) | 
|  | for phdr in elf.GetProgramHeadersByType( | 
|  | elf_headers.ProgramHeader.Type.PT_LOAD): | 
|  | # Current version of the prototype only supports ranges which are fully | 
|  | # contained inside one LOAD segment. It should cover most of the common | 
|  | # cases. | 
|  | if not SegmentsIntersect(phdr.p_offset, phdr.FilePositionEnd(), l, r): | 
|  | continue | 
|  | if not SegmentContains(phdr.p_offset, phdr.FilePositionEnd(), l, r): | 
|  | raise RuntimeError('Range is not contained within one LOAD segment') | 
|  | l_virt = phdr.p_vaddr + (l - phdr.p_offset) | 
|  | r_virt = phdr.p_vaddr + (r - phdr.p_offset) | 
|  | return l_virt, r_virt | 
|  | raise RuntimeError('Specified range is outside of all LOAD segments.') | 
|  |  | 
|  |  | 
|  | def _CopyRangeIntoCompressedSection(data, l, r): | 
|  | """Adds a new section containing compressed version of provided range.""" | 
|  | compressed_range = compression.CompressData(data[l:r]) | 
|  |  | 
|  | with tempfile.TemporaryDirectory() as tmpdir: | 
|  | # The easiest way to add a new section is to use objcopy, but it requires | 
|  | # for all of the data to be stored in files. | 
|  | objcopy_input_file = os.path.join(tmpdir, 'input') | 
|  | objcopy_data_file = os.path.join(tmpdir, 'data') | 
|  | objcopy_output_file = os.path.join(tmpdir, 'output') | 
|  |  | 
|  | with open(objcopy_input_file, 'wb') as f: | 
|  | f.write(data) | 
|  | with open(objcopy_data_file, 'wb') as f: | 
|  | f.write(compressed_range) | 
|  |  | 
|  | objcopy_args = [ | 
|  | OBJCOPY_PATH, objcopy_input_file, objcopy_output_file, '--add-section', | 
|  | '{}={}'.format(COMPRESSED_SECTION_NAME, objcopy_data_file) | 
|  | ] | 
|  | run_result = subprocess.run( | 
|  | objcopy_args, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) | 
|  | if run_result.returncode != 0: | 
|  | raise RuntimeError('objcopy failed with status code {}: {}'.format( | 
|  | run_result.returncode, run_result.stderr)) | 
|  |  | 
|  | with open(objcopy_output_file, 'rb') as f: | 
|  | data[:] = bytearray(f.read()) | 
|  |  | 
|  |  | 
|  | def _FindNewVaddr(phdrs): | 
|  | """Returns the virt address that is safe to use for insertion of new data.""" | 
|  | max_vaddr = 0 | 
|  | # Strictly speaking it should be sufficient to look through only LOAD | 
|  | # segments, but better be safe than sorry. | 
|  | for phdr in phdrs: | 
|  | max_vaddr = max(max_vaddr, phdr.p_vaddr + phdr.p_memsz) | 
|  | # When the mapping occurs end address is increased to be a multiple | 
|  | # of page size. To ensure compatibility with anything relying on this | 
|  | # behaviour we take this increase into account. | 
|  | max_vaddr = AlignUp(max_vaddr) | 
|  | return max_vaddr | 
|  |  | 
|  |  | 
|  | def _MovePhdrToTheEnd(data): | 
|  | """Moves Phdrs to the end of the file and adjusts all references to it.""" | 
|  | elf_hdr = elf_headers.ElfHeader(data) | 
|  |  | 
|  | # If program headers are already in the end of the file, nothing to do. | 
|  | if elf_hdr.e_phoff + elf_hdr.e_phnum * elf_hdr.e_phentsize == len(data): | 
|  | return | 
|  |  | 
|  | old_phoff = elf_hdr.e_phoff | 
|  | new_phoff = elf_hdr.e_phoff = len(data) | 
|  |  | 
|  | unaligned_new_vaddr = _FindNewVaddr(elf_hdr.GetProgramHeaders()) | 
|  | new_vaddr = MatchVaddrAlignment(unaligned_new_vaddr, new_phoff) | 
|  | # Since we moved the PHDR section to the end of the file, we need to create a | 
|  | # new LOAD segment to load it in. | 
|  | current_filesize = elf_hdr.e_phnum * elf_hdr.e_phentsize | 
|  | # We are using current_filesize while adding new program header due to | 
|  | # AddProgramHeader handling the increase of size due to addition of new | 
|  | # header. | 
|  | elf_hdr.AddProgramHeader( | 
|  | elf_headers.ProgramHeader.Create( | 
|  | elf_hdr.byte_order, | 
|  | p_type=elf_headers.ProgramHeader.Type.PT_LOAD, | 
|  | p_flags=elf_headers.ProgramHeader.Flags.PF_R, | 
|  | p_offset=new_phoff, | 
|  | p_vaddr=new_vaddr, | 
|  | p_paddr=new_vaddr, | 
|  | p_filesz=current_filesize, | 
|  | p_memsz=current_filesize, | 
|  | p_align=ADDRESS_ALIGN, | 
|  | )) | 
|  |  | 
|  | # PHDR segment if it exists should point to the new location. | 
|  | for phdr in elf_hdr.GetProgramHeadersByType( | 
|  | elf_headers.ProgramHeader.Type.PT_PHDR): | 
|  | phdr.p_offset = new_phoff | 
|  | phdr.p_vaddr = new_vaddr | 
|  | phdr.p_paddr = new_vaddr | 
|  | phdr.p_align = ADDRESS_ALIGN | 
|  |  | 
|  | # We need to replace the previous phdr placement with zero bytes to fail | 
|  | # fast if dynamic linker doesn't like the new program header. | 
|  | previous_phdr_size = (elf_hdr.e_phnum - 1) * elf_hdr.e_phentsize | 
|  | data[old_phoff:old_phoff + previous_phdr_size] = [0] * previous_phdr_size | 
|  |  | 
|  | # Updating ELF header to point to the new location. | 
|  | elf_hdr.PatchData(data) | 
|  |  | 
|  |  | 
|  | def _CreateLoadForCompressedSection(data): | 
|  | """Creates a LOAD segment to previously created COMPRESSED_SECTION_NAME. | 
|  |  | 
|  | Returns the virtual address range corresponding to created segment.""" | 
|  | elf_hdr = elf_headers.ElfHeader(data) | 
|  |  | 
|  | section_offset = None | 
|  | section_size = None | 
|  | for shdr in elf_hdr.GetSectionHeaders(): | 
|  | if shdr.GetStrName() == COMPRESSED_SECTION_NAME: | 
|  | section_offset = shdr.sh_offset | 
|  | section_size = shdr.sh_size | 
|  | break | 
|  | if section_offset is None: | 
|  | raise RuntimeError( | 
|  | 'Failed to locate {} section in file'.format(COMPRESSED_SECTION_NAME)) | 
|  |  | 
|  | unaligned_new_vaddr = _FindNewVaddr(elf_hdr.GetProgramHeaders()) | 
|  | new_vaddr = MatchVaddrAlignment(unaligned_new_vaddr, section_offset) | 
|  | elf_hdr.AddProgramHeader( | 
|  | elf_headers.ProgramHeader.Create( | 
|  | elf_hdr.byte_order, | 
|  | p_type=elf_headers.ProgramHeader.Type.PT_LOAD, | 
|  | p_flags=elf_headers.ProgramHeader.Flags.PF_R, | 
|  | p_offset=section_offset, | 
|  | p_vaddr=new_vaddr, | 
|  | p_paddr=new_vaddr, | 
|  | p_filesz=section_size, | 
|  | p_memsz=section_size, | 
|  | p_align=ADDRESS_ALIGN, | 
|  | )) | 
|  | elf_hdr.PatchData(data) | 
|  | return new_vaddr, new_vaddr + section_size | 
|  |  | 
|  |  | 
|  | def _SplitLoadSegmentAndNullifyRange(data, l, r): | 
|  | """Find LOAD segment covering [l, r) and splits it into three segments. | 
|  |  | 
|  | Split is done so one of the LOAD segments contains only [l, r) and nothing | 
|  | else. If the range is located at the start or at the end of the segment less | 
|  | than three segments may be created. | 
|  |  | 
|  | The resulting LOAD segment containing [l, r) is edited so it sets the | 
|  | corresponding virtual address range to zeroes, ignoring file content. | 
|  |  | 
|  | Returns virtual address range corresponding to [l, r). | 
|  | """ | 
|  | elf_hdr = elf_headers.ElfHeader(data) | 
|  |  | 
|  | range_phdr = None | 
|  | for phdr in elf_hdr.GetProgramHeadersByType( | 
|  | elf_headers.ProgramHeader.Type.PT_LOAD): | 
|  | if SegmentContains(phdr.p_offset, phdr.FilePositionEnd(), l, r): | 
|  | range_phdr = phdr | 
|  | break | 
|  | if range_phdr is None: | 
|  | raise RuntimeError('No LOAD segment covering the range found') | 
|  |  | 
|  | # The range_phdr will become the LOAD segment containing the [l, r) range | 
|  | # but we need to create the additional two segments. | 
|  | left_segment_size = l - range_phdr.p_offset | 
|  | if left_segment_size > 0: | 
|  | # Creating LOAD segment containing the [phdr.p_offset, l) part. | 
|  | elf_hdr.AddProgramHeader( | 
|  | elf_headers.ProgramHeader.Create( | 
|  | elf_hdr.byte_order, | 
|  | p_type=range_phdr.p_type, | 
|  | p_flags=range_phdr.p_flags, | 
|  | p_offset=range_phdr.p_offset, | 
|  | p_vaddr=range_phdr.p_vaddr, | 
|  | p_paddr=range_phdr.p_paddr, | 
|  | p_filesz=left_segment_size, | 
|  | p_memsz=left_segment_size, | 
|  | p_align=range_phdr.p_align, | 
|  | )) | 
|  | if range_phdr.p_offset + range_phdr.p_memsz > r: | 
|  | # Creating LOAD segment containing the [r, phdr.p_offset + phdr.p_memsz). | 
|  | right_segment_delta = r - range_phdr.p_offset | 
|  | right_segment_address = range_phdr.p_vaddr + right_segment_delta | 
|  | right_segment_filesize = max(range_phdr.p_filesz - right_segment_delta, 0) | 
|  | right_segment_memsize = range_phdr.p_memsz - right_segment_delta | 
|  | elf_hdr.AddProgramHeader( | 
|  | elf_headers.ProgramHeader.Create( | 
|  | elf_hdr.byte_order, | 
|  | p_type=range_phdr.p_type, | 
|  | p_flags=range_phdr.p_flags, | 
|  | p_offset=r, | 
|  | p_vaddr=right_segment_address, | 
|  | p_paddr=right_segment_address, | 
|  | p_filesz=right_segment_filesize, | 
|  | p_memsz=right_segment_memsize, | 
|  | p_align=range_phdr.p_align, | 
|  | )) | 
|  | # Modifying the range_phdr | 
|  | central_segment_address = range_phdr.p_vaddr + left_segment_size | 
|  | range_phdr.p_offset = l | 
|  | range_phdr.p_vaddr = central_segment_address | 
|  | range_phdr.p_paddr = central_segment_address | 
|  | range_phdr.p_filesz = 0 | 
|  | range_phdr.p_memsz = r - l | 
|  |  | 
|  | elf_hdr.PatchData(data) | 
|  | return central_segment_address, central_segment_address + (r - l) | 
|  |  | 
|  |  | 
|  | def _CutRangeAndCorrectFile(data, l, r): | 
|  | """Removes [l, r) from the data and fixes offsets to stabilize the ELF.""" | 
|  | elf = elf_headers.ElfHeader(data) | 
|  | # Removing the range from the file: | 
|  | del data[l:r] | 
|  |  | 
|  | range_length = r - l | 
|  | for phdr in elf.GetProgramHeaders(): | 
|  | # Any other program header intersecting the [l, r) range poses serious | 
|  | # problem as this header needs to be split if possible. However since we are | 
|  | # compressing part of program's code this is highly unlikely, albeit | 
|  | # possible in worst case scenario. | 
|  | # With that in mind we assert that such thing doesn't happen in our case. | 
|  | if SegmentsIntersect(phdr.p_offset, phdr.FilePositionEnd(), l, r): | 
|  | raise RuntimeError('Segment intersects with provided range') | 
|  | if phdr.p_offset >= r: | 
|  | phdr.p_offset -= range_length | 
|  | # Per ELF standard: p_offset % p_align == p_vaddr % p_align. | 
|  | # Since we moved the p_offset we could have broken this rule. | 
|  | # To mitigate this issue we notice the following two facts: | 
|  | # 1) range_length % ADDRESS_ALIGN == 0. | 
|  | # 2) We can reduce the p_align without breaking the alignment. | 
|  | # We reduce all p_align to be less or equal than ADDRESS_ALIGN and now | 
|  | # range_length % p_align == 0, so the alignment remains valid. | 
|  | # Our changes of p_align are perfectly legal per standard | 
|  | # as long as p_align % PAGE_SIZE == 0. | 
|  | if phdr.p_align > ADDRESS_ALIGN: | 
|  | phdr.p_align = ADDRESS_ALIGN | 
|  |  | 
|  | for shdr in elf.GetSectionHeaders(): | 
|  | # Note that if the section overlaps with the cut range we are unable | 
|  | # to adjust its size to match both file and virtual size of it. In such | 
|  | # case we treat sh_size as memory size and don't adjust it, which may | 
|  | # cause some tools treating sh_size as file size to stop working. | 
|  | if shdr.sh_offset >= l: | 
|  | if shdr.sh_offset < r: | 
|  | raise RuntimeError('Section starts within the provided range') | 
|  | else: | 
|  | shdr.sh_offset -= range_length | 
|  | if elf.e_phoff > l: | 
|  | elf.e_phoff -= range_length | 
|  | if elf.e_shoff > l: | 
|  | elf.e_shoff -= range_length | 
|  | elf.PatchData(data) | 
|  |  | 
|  |  | 
|  | def _PatchConstructorBytes(data, cut_range_virt_l, cut_range_virt_r, | 
|  | compressed_range_virt_l, compressed_range_virt_r): | 
|  | """Sets magic bytes given by constructor to the given ranges.""" | 
|  | elf = elf_headers.ElfHeader(data) | 
|  |  | 
|  | to_patch = [ | 
|  | (CUT_RANGE_BEGIN_MAGIC, cut_range_virt_l, 'cut range begin'), | 
|  | (CUT_RANGE_END_MAGIC, cut_range_virt_r, 'cut range end'), | 
|  | (COMPRESSED_RANGE_BEGIN_MAGIC, compressed_range_virt_l, | 
|  | 'compressed range begin'), | 
|  | (COMPRESSED_RANGE_END_MAGIC, compressed_range_virt_r, | 
|  | 'compressed range end'), | 
|  | ] | 
|  |  | 
|  | for magic_bytes, new_value, name in to_patch: | 
|  | magic_idx = data.find(magic_bytes) | 
|  | if magic_idx == -1: | 
|  | raise RuntimeError('failed to find %s magic bytes' % name) | 
|  | if data.rfind(magic_bytes) != magic_idx: | 
|  | raise RuntimeError('%s magic bytes occures more then once' % name) | 
|  | new_value_bytes = new_value.to_bytes(length=8, byteorder=elf.byte_order) | 
|  | data[magic_idx:magic_idx + 8] = new_value_bytes | 
|  |  | 
|  |  | 
|  | def _ShrinkRangeToAlignVirtualAddress(data, l, r): | 
|  | virtual_l, virtual_r = _FileRangeToVirtualAddressRange(data, l, r) | 
|  | # LOAD segments borders are being rounded to the page size so we have to | 
|  | # shrink [l, r) so corresponding virtual addresses are aligned. | 
|  | l += AlignUp(virtual_l) - virtual_l | 
|  | r -= virtual_r - AlignDown(virtual_r) | 
|  | return l, r | 
|  |  | 
|  |  | 
|  | def main(): | 
|  | _SetupLogging() | 
|  | args = _ParseArguments() | 
|  |  | 
|  | with open(args.input, 'rb') as f: | 
|  | data = f.read() | 
|  | data = bytearray(data) | 
|  |  | 
|  | left_range, right_range = _ShrinkRangeToAlignVirtualAddress( | 
|  | data, args.left_range, args.right_range) | 
|  | if left_range >= right_range: | 
|  | raise RuntimeError('Range collapsed after aligning by page size') | 
|  |  | 
|  | _CopyRangeIntoCompressedSection(data, left_range, right_range) | 
|  | _MovePhdrToTheEnd(data) | 
|  |  | 
|  | compressed_virt_l, compressed_virt_r = _CreateLoadForCompressedSection(data) | 
|  | virt_l, virt_r = _SplitLoadSegmentAndNullifyRange(data, left_range, | 
|  | right_range) | 
|  | _CutRangeAndCorrectFile(data, left_range, right_range) | 
|  | _PatchConstructorBytes(data, virt_l, virt_r, compressed_virt_l, | 
|  | compressed_virt_r) | 
|  |  | 
|  | with open(args.output, 'wb') as f: | 
|  | f.write(data) | 
|  | return 0 | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | sys.exit(main()) |