| /****************************************************************************** |
| * Remote Debugging Module - Object Reading Functions |
| * |
| * This file contains functions for reading Python objects from remote |
| * process memory, including strings, bytes, and integers. |
| ******************************************************************************/ |
| |
| #include "_remote_debugging.h" |
| |
| /* ============================================================================ |
| * MEMORY READING FUNCTIONS |
| * ============================================================================ */ |
| |
| #define DEFINE_MEMORY_READER(type_name, c_type, error_msg) \ |
| int \ |
| read_##type_name(RemoteUnwinderObject *unwinder, uintptr_t address, c_type *result) \ |
| { \ |
| int res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(c_type), result); \ |
| if (res < 0) { \ |
| set_exception_cause(unwinder, PyExc_RuntimeError, error_msg); \ |
| return -1; \ |
| } \ |
| return 0; \ |
| } |
| |
| DEFINE_MEMORY_READER(ptr, uintptr_t, "Failed to read pointer from remote memory") |
| DEFINE_MEMORY_READER(Py_ssize_t, Py_ssize_t, "Failed to read Py_ssize_t from remote memory") |
| DEFINE_MEMORY_READER(char, char, "Failed to read char from remote memory") |
| |
| int |
| read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr) |
| { |
| if (read_ptr(unwinder, address, ptr_addr)) { |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read Python pointer"); |
| return -1; |
| } |
| *ptr_addr &= ~Py_TAG_BITS; |
| return 0; |
| } |
| |
| /* ============================================================================ |
| * PYTHON OBJECT READING FUNCTIONS |
| * ============================================================================ */ |
| |
| PyObject * |
| read_py_str( |
| RemoteUnwinderObject *unwinder, |
| uintptr_t address, |
| Py_ssize_t max_len |
| ) { |
| // Read the entire PyUnicodeObject at once; for short strings the data |
| // is inline right after the header and we'll already have (some of) it. |
| char unicode_obj[SIZEOF_UNICODE_OBJ]; |
| int res = _Py_RemoteDebug_PagedReadRemoteMemory( |
| &unwinder->handle, |
| address, |
| SIZEOF_UNICODE_OBJ, |
| unicode_obj |
| ); |
| if (res < 0) { |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyUnicodeObject"); |
| return NULL; |
| } |
| |
| Py_ssize_t len = GET_MEMBER(Py_ssize_t, unicode_obj, unwinder->debug_offsets.unicode_object.length); |
| if (len < 0 || len > max_len) { |
| PyErr_Format(PyExc_RuntimeError, |
| "Invalid string length (%zd) at 0x%lx", len, address); |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid string length in remote Unicode object"); |
| return NULL; |
| } |
| |
| // Inspect state to pick the right data offset and character width. |
| // We rely on the remote process sharing this Python version's |
| // PyASCIIObject layout, the same assumption already used for `length`. |
| struct _PyUnicodeObject_state state = GET_MEMBER( |
| struct _PyUnicodeObject_state, |
| unicode_obj, |
| unwinder->debug_offsets.unicode_object.state); |
| |
| if (!state.compact) { |
| PyErr_Format(PyExc_RuntimeError, |
| "Cannot read non-compact Unicode object at 0x%lx", address); |
| set_exception_cause(unwinder, PyExc_RuntimeError, |
| "Legacy (non-compact) Unicode objects are not supported"); |
| return NULL; |
| } |
| |
| int kind = (int)state.kind; |
| Py_UCS4 max_char; |
| switch (kind) { |
| case PyUnicode_1BYTE_KIND: |
| max_char = state.ascii ? 0x7F : 0xFF; |
| break; |
| case PyUnicode_2BYTE_KIND: |
| max_char = 0xFFFF; |
| break; |
| case PyUnicode_4BYTE_KIND: |
| max_char = 0x10FFFF; |
| break; |
| default: |
| PyErr_Format(PyExc_RuntimeError, |
| "Invalid Unicode kind %d at 0x%lx", kind, address); |
| set_exception_cause(unwinder, PyExc_RuntimeError, |
| "Invalid kind in remote Unicode object"); |
| return NULL; |
| } |
| |
| size_t header_size = state.ascii |
| ? (size_t)unwinder->debug_offsets.unicode_object.asciiobject_size |
| : (size_t)unwinder->debug_offsets.unicode_object.compactunicodeobject_size; |
| |
| // len * kind is bounded by max_len * 4 (kind <= 4, len <= max_len), so |
| // the multiplication can't overflow for any caller-sane max_len, but the |
| // explicit cap here keeps a corrupted remote `length` from later turning |
| // into a giant allocation. |
| size_t nbytes = (size_t)len * (size_t)kind; |
| if ((size_t)len > (SIZE_MAX / 4) || nbytes > (size_t)max_len * 4) { |
| PyErr_Format(PyExc_RuntimeError, |
| "Implausible Unicode byte size %zu at 0x%lx", nbytes, address); |
| set_exception_cause(unwinder, PyExc_RuntimeError, |
| "Garbage byte size in remote Unicode object"); |
| return NULL; |
| } |
| |
| PyObject *result = PyUnicode_New(len, max_char); |
| if (result == NULL) { |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to allocate PyUnicode for remote string"); |
| return NULL; |
| } |
| if (nbytes == 0) { |
| return result; |
| } |
| |
| void *data = PyUnicode_DATA(result); |
| |
| // Reuse data already present in the header read; only round-trip for |
| // whatever spills past it. |
| size_t inline_avail = (header_size < SIZEOF_UNICODE_OBJ) |
| ? SIZEOF_UNICODE_OBJ - header_size |
| : 0; |
| size_t inline_bytes = nbytes < inline_avail ? nbytes : inline_avail; |
| if (inline_bytes > 0) { |
| memcpy(data, unicode_obj + header_size, inline_bytes); |
| } |
| |
| if (nbytes > inline_bytes) { |
| res = _Py_RemoteDebug_PagedReadRemoteMemory( |
| &unwinder->handle, |
| address + header_size + inline_bytes, |
| nbytes - inline_bytes, |
| (char *)data + inline_bytes); |
| if (res < 0) { |
| Py_DECREF(result); |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read string data from remote memory"); |
| return NULL; |
| } |
| } |
| |
| return result; |
| } |
| |
| PyObject * |
| read_py_bytes( |
| RemoteUnwinderObject *unwinder, |
| uintptr_t address, |
| Py_ssize_t max_len |
| ) { |
| PyObject *result = NULL; |
| char *buf = NULL; |
| |
| // Read the entire PyBytesObject at once |
| char bytes_obj[SIZEOF_BYTES_OBJ]; |
| int res = _Py_RemoteDebug_PagedReadRemoteMemory( |
| &unwinder->handle, |
| address, |
| SIZEOF_BYTES_OBJ, |
| bytes_obj |
| ); |
| if (res < 0) { |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyBytesObject"); |
| goto err; |
| } |
| |
| Py_ssize_t len = GET_MEMBER(Py_ssize_t, bytes_obj, unwinder->debug_offsets.bytes_object.ob_size); |
| if (len < 0 || len > max_len) { |
| PyErr_Format(PyExc_RuntimeError, |
| "Invalid bytes length (%zd) at 0x%lx", len, address); |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid bytes length in remote bytes object"); |
| return NULL; |
| } |
| |
| buf = (char *)PyMem_RawMalloc(len+1); |
| if (buf == NULL) { |
| PyErr_NoMemory(); |
| set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for bytes reading"); |
| return NULL; |
| } |
| |
| size_t offset = (size_t)unwinder->debug_offsets.bytes_object.ob_sval; |
| res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); |
| if (res < 0) { |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read bytes data from remote memory"); |
| goto err; |
| } |
| buf[len] = '\0'; |
| |
| result = PyBytes_FromStringAndSize(buf, len); |
| if (result == NULL) { |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyBytes from remote bytes data"); |
| goto err; |
| } |
| |
| PyMem_RawFree(buf); |
| assert(result != NULL); |
| return result; |
| |
| err: |
| if (buf != NULL) { |
| PyMem_RawFree(buf); |
| } |
| return NULL; |
| } |
| |
| long |
| read_py_long( |
| RemoteUnwinderObject *unwinder, |
| uintptr_t address |
| ) |
| { |
| unsigned int shift = PYLONG_BITS_IN_DIGIT; |
| |
| // Read the entire PyLongObject at once |
| char long_obj[SIZEOF_LONG_OBJ]; |
| int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( |
| &unwinder->handle, |
| address, |
| (size_t)unwinder->debug_offsets.long_object.size, |
| long_obj); |
| if (bytes_read < 0) { |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLongObject"); |
| return -1; |
| } |
| |
| uintptr_t lv_tag = GET_MEMBER(uintptr_t, long_obj, unwinder->debug_offsets.long_object.lv_tag); |
| int negative = (lv_tag & 3) == 2; |
| Py_ssize_t size = lv_tag >> 3; |
| |
| if (size == 0) { |
| return 0; |
| } |
| |
| // Validate size: reject garbage (negative or unreasonably large) |
| if (size < 0 || size > MAX_LONG_DIGITS) { |
| PyErr_Format(PyExc_RuntimeError, |
| "Invalid PyLong digit count: %zd (expected 0-%d)", size, MAX_LONG_DIGITS); |
| set_exception_cause(unwinder, PyExc_RuntimeError, |
| "Invalid PyLong size (corrupted remote memory)"); |
| return -1; |
| } |
| |
| // Calculate how many digits fit inline in our local buffer |
| Py_ssize_t ob_digit_offset = unwinder->debug_offsets.long_object.ob_digit; |
| Py_ssize_t inline_digits_space = SIZEOF_LONG_OBJ - ob_digit_offset; |
| Py_ssize_t max_inline_digits = inline_digits_space / (Py_ssize_t)sizeof(digit); |
| |
| // If the long object has inline digits that fit in our buffer, use them directly |
| digit *digits; |
| if (size <= max_inline_digits && size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) { |
| // For small integers, digits are inline in the long_value.ob_digit array |
| digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); |
| if (!digits) { |
| PyErr_NoMemory(); |
| set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong"); |
| return -1; |
| } |
| memcpy(digits, long_obj + ob_digit_offset, size * sizeof(digit)); |
| } else { |
| // For larger integers, we need to read the digits separately |
| digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); |
| if (!digits) { |
| PyErr_NoMemory(); |
| set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for large PyLong"); |
| return -1; |
| } |
| |
| bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( |
| &unwinder->handle, |
| address + (uintptr_t)unwinder->debug_offsets.long_object.ob_digit, |
| sizeof(digit) * size, |
| digits |
| ); |
| if (bytes_read < 0) { |
| set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLong digits from remote memory"); |
| goto error; |
| } |
| } |
| |
| long long value = 0; |
| |
| // In theory this can overflow, but because of llvm/llvm-project#16778 |
| // we can't use __builtin_mul_overflow because it fails to link with |
| // __muloti4 on aarch64. In practice this is fine because all we're |
| // testing here are task numbers that would fit in a single byte. |
| for (Py_ssize_t i = 0; i < size; ++i) { |
| long long factor = digits[i] * (1UL << (Py_ssize_t)(shift * i)); |
| value += factor; |
| } |
| PyMem_RawFree(digits); |
| if (negative) { |
| value *= -1; |
| } |
| return (long)value; |
| error: |
| PyMem_RawFree(digits); |
| return -1; |
| } |