| #!/usr/bin/python3 |
| |
| # Copyright 2022 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import itertools |
| import struct |
| import sys |
| |
| PAGE_SIZE = 0x1000 # System page size. |
| THRESHOLD = 0x2000 # Minimum size of the file to be aligned. |
| |
| |
| # Read 2 bytes. |
| def read16(data, offset): |
| return struct.unpack_from("<H", data, offset)[0] |
| |
| |
| # Read 4 bytes. |
| def read32(data, offset): |
| return struct.unpack_from("<I", data, offset)[0] |
| |
| |
| # Write 4 bytes. |
| def write32(data, offset, value): |
| return struct.pack_into("<I", data, offset, value) |
| |
| |
| ################################################################################ |
| # (Adapted from `source/tools/toolutil/pkg_gencmn.cpp`) |
| # |
| # A .dat package file contains a simple Table of Contents of item names, |
| # followed by the items themselves: |
| # |
| # 1. ToC table |
| # |
| # uint32_t count; - number of items |
| # UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item: |
| # uint32_t nameOffset; - offset of the item name |
| # uint32_t dataOffset; - offset of the item data |
| # both are byte offsets from the beginning of the data |
| # |
| # 2. item name strings |
| # |
| # All item names are stored as char * strings in one block between the ToC table |
| # and the data items. |
| # |
| # 3. data items |
| # |
| # The data items are stored following the item names block. |
| # The data items are stored in the sorted order of their names. |
| ################################################################################ |
| |
| |
| def pad_data(data): |
| out = bytearray() |
| |
| header_size = read16(data, 0) # Size of the ICU header. |
| item_count = read32(data, header_size) # Number of files inside icudtl.dat |
| toc_offset = header_size + 4 # Offset of the Table of Contents. |
| |
| # Copy everything until the beginning of the data. |
| out_offset = read32(data, toc_offset + 4) + header_size |
| out += data[:out_offset] |
| |
| # Iterate over the files. |
| for i in range(item_count): |
| # Offset inside the ToC for this file. |
| offset = toc_offset + (i * 8) |
| |
| # Offset of the name and data, relative to the beginning of the data section. |
| name_offset = read32(data, offset) |
| data_offset = read32(data, offset + 4) |
| |
| # Offset of the name and the data, relative to the beginning of the file. |
| name_file_offset = name_offset + header_size |
| data_file_offset = data_offset + header_size |
| |
| # Calculate the size of this file. |
| if i + 1 < item_count: |
| next_offset = toc_offset + ((i + 1) * 8) |
| next_data_offset = read32(data, next_offset + 4) |
| size = next_data_offset - data_offset |
| else: |
| size = len(data) - (data_offset + header_size) |
| |
| # Insert padding to align files bigger than the threshold. |
| page_offset = out_offset & (PAGE_SIZE - 1) |
| if size >= THRESHOLD and page_offset != 0: |
| padding = PAGE_SIZE - page_offset |
| out.extend(itertools.repeat(0x00, padding)) |
| out_offset += padding |
| |
| # Put the new offset into the Table of Contents. |
| write32(out, offset + 4, out_offset - header_size) |
| |
| # Copy the content of the file. |
| out += data[data_file_offset : data_file_offset + size] |
| out_offset += size |
| |
| return out |
| |
| |
| if __name__ == "__main__": |
| # Check arguments. |
| if len(sys.argv) != 3: |
| error_str = "icualign: wrong number of arguments\n\n" |
| help_str = "usage: icualign <infilename> <outfilename>\n\n" |
| sys.exit(error_str + help_str) |
| |
| # Extract arguments. |
| in_filename = sys.argv[1] |
| out_filename = sys.argv[2] |
| |
| # Read the input file. |
| with open(in_filename, "rb") as in_file: |
| data = in_file.read() |
| # Apply padding to the file to achieve the desired alignment. |
| out_data = pad_data(data) |
| # Write the output file. |
| with open(out_filename, "wb") as out_file: |
| out_file.write(out_data) |