| # Copyright 2022 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Utilities to read values from a bytearray.""" |
| |
| import struct |
| |
| |
| def _ParseULeb128(data, offset): |
| """Returns a tuple of (uleb128 value, number of bytes occupied). |
| |
| From DWARF3 spec: http://dwarfstd.org/doc/Dwarf3.pdf |
| |
| Args: |
| data: bytearray containing unsigned LEB128. |
| offset: Location of the unsigned LEB128. |
| """ |
| value = 0 |
| shift = 0 |
| cur_offset = offset |
| while True: |
| byte = data[cur_offset] |
| cur_offset += 1 |
| value |= (byte & 0b01111111) << shift |
| if (byte & 0b10000000) == 0: |
| break |
| shift += 7 |
| |
| return value, cur_offset - offset |
| |
| |
| def _ParseSLeb128(data, offset): |
| """Returns a tuple of (sleb128 value, number of bytes occupied). |
| |
| Args: |
| data: bytearray containing signed LEB128. |
| offset: Location of the signed LEB128. |
| """ |
| value, size = _ParseULeb128(data, offset) |
| sign_bit = 1 << min(31, size * 7 - 1) |
| if (value & sign_bit) != 0: |
| value -= sign_bit + sign_bit |
| return value, size |
| |
| |
| class Mutf8DecodeError(Exception): |
| def __init__(self, message, length, offset): |
| message += ' (decoded string length: {}, string data offset: {:#x})'.format( |
| length, offset) |
| super().__init__(message) |
| |
| |
| class StreamReader: |
| """Reads values from a bytearray using a seekable cursor. |
| |
| Integers are little endian. |
| """ |
| |
| def __init__(self, data): |
| self._data = data |
| self._pos = 0 |
| |
| def Seek(self, offset): |
| self._pos = offset |
| |
| def Tell(self): |
| return self._pos |
| |
| def Skip(self, delta): |
| self._pos += delta |
| |
| def NextStruct(self, fmt): |
| ret = struct.unpack_from(fmt, self._data, self._pos) |
| self._pos += struct.calcsize(fmt) |
| return ret |
| |
| def NextBytes(self, n): |
| old_pos = self._pos |
| self._pos = min(len(self._data), old_pos + n) |
| return self._data[old_pos:self._pos] |
| |
| def NextUByte(self): |
| self._pos += 1 |
| return self._data[self._pos - 1] |
| |
| def NextUShort(self): |
| self._pos += 2 |
| return struct.unpack_from('<H', self._data, self._pos - 2)[0] |
| |
| def NextUInt(self): |
| self._pos += 4 |
| return struct.unpack_from('<I', self._data, self._pos - 4)[0] |
| |
| def NextULeb128(self): |
| value, inc = _ParseULeb128(self._data, self._pos) |
| self._pos += inc |
| return value |
| |
| def NextSLeb128(self): |
| value, inc = _ParseSLeb128(self._data, self._pos) |
| self._pos += inc |
| return value |
| |
| def NextMUtf8(self, string_length): |
| """Returns the string located at the specified offset. |
| |
| See https://source.android.com/devices/tech/dalvik/dex-format#mutf-8 |
| |
| Ported from the Android Java implementation: |
| https://android.googlesource.com/platform/dalvik/+/fe107fb6e3f308ac5174ebdc5a794ee880c741d9/dx/src/com/android/dex/Mutf8.java#34 |
| |
| Args: |
| string_length: The length of the decoded string. |
| offset: Offset to the beginning of the string. |
| """ |
| offset = self._pos |
| ret = '' |
| |
| for _ in range(string_length): |
| a = self.NextUByte() |
| if a == 0: |
| raise Mutf8DecodeError('Early string termination encountered', |
| string_length, offset) |
| if (a & 0x80) == 0x00: |
| code = a |
| elif (a & 0xe0) == 0xc0: |
| b = self.NextUByte() |
| if (b & 0xc0) != 0x80: |
| raise Mutf8DecodeError('Error in byte 2', string_length, offset) |
| code = ((a & 0x1f) << 6) | (b & 0x3f) |
| elif (a & 0xf0) == 0xe0: |
| b = self.NextUByte() |
| c = self.NextUByte() |
| if (b & 0xc0) != 0x80 or (c & 0xc0) != 0x80: |
| raise Mutf8DecodeError('Error in byte 3 or 4', string_length, offset) |
| code = ((a & 0x0f) << 12) | ((b & 0x3f) << 6) | (c & 0x3f) |
| else: |
| raise Mutf8DecodeError('Bad byte', string_length, offset) |
| ret += chr(code) |
| |
| if self.NextUByte() != 0x00: |
| raise Mutf8DecodeError('Expected string termination', string_length, |
| offset) |
| |
| return ret |
| |
| def NextString(self): |
| string_length = self.NextULeb128() |
| return self.NextMUtf8(string_length) |
| |
| def NextList(self, count, factory): |
| return [factory(self) for _ in range(count)] |
| |
| def AlignUpTo(self, align_unit): |
| off_by = self._pos % align_unit |
| if off_by: |
| self.Seek(self._pos + align_unit - off_by) |