blob: b6566d7cff7b5431c0f663ffe33a10456f724453 [file] [log] [blame]
/******************************************************************************
* Remote Debugging Module - Frame Cache
*
* This file contains functions for caching frame information to optimize
* repeated stack unwinding for profiling.
******************************************************************************/
#include "_remote_debugging.h"
/* ============================================================================
* FRAME CACHE - stores (address, frame_info) pairs per thread
* Uses preallocated fixed-size arrays for efficiency and bounded memory.
* ============================================================================ */
int
frame_cache_init(RemoteUnwinderObject *unwinder)
{
unwinder->frame_cache = PyMem_Calloc(FRAME_CACHE_MAX_THREADS, sizeof(FrameCacheEntry));
if (!unwinder->frame_cache) {
PyErr_NoMemory();
return -1;
}
return 0;
}
void
frame_cache_cleanup(RemoteUnwinderObject *unwinder)
{
if (!unwinder->frame_cache) {
return;
}
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
Py_CLEAR(unwinder->frame_cache[i].frame_list);
}
PyMem_Free(unwinder->frame_cache);
unwinder->frame_cache = NULL;
}
// Find cache entry by thread_id
FrameCacheEntry *
frame_cache_find(RemoteUnwinderObject *unwinder, uint64_t thread_id)
{
if (!unwinder->frame_cache || thread_id == 0) {
return NULL;
}
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
assert(i >= 0 && i < FRAME_CACHE_MAX_THREADS);
if (unwinder->frame_cache[i].thread_id == thread_id) {
assert(unwinder->frame_cache[i].num_addrs <= FRAME_CACHE_MAX_FRAMES);
return &unwinder->frame_cache[i];
}
}
return NULL;
}
// Allocate a cache slot for a thread
// Returns NULL if cache is full (graceful degradation)
static FrameCacheEntry *
frame_cache_alloc_slot(RemoteUnwinderObject *unwinder, uint64_t thread_id)
{
if (!unwinder->frame_cache || thread_id == 0) {
return NULL;
}
// First check if thread already has an entry
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
if (unwinder->frame_cache[i].thread_id == thread_id) {
return &unwinder->frame_cache[i];
}
}
// Find empty slot
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
if (unwinder->frame_cache[i].thread_id == 0) {
return &unwinder->frame_cache[i];
}
}
// Cache full - graceful degradation
return NULL;
}
// Remove cache entries for threads not seen in the result
// result structure: list of InterpreterInfo, where InterpreterInfo[1] is threads list,
// and ThreadInfo[0] is the thread_id
void
frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, PyObject *result)
{
if (!unwinder->frame_cache || !result || !PyList_Check(result)) {
return;
}
// Build array of seen thread IDs from result
uint64_t seen_threads[FRAME_CACHE_MAX_THREADS];
int num_seen = 0;
Py_ssize_t num_interps = PyList_GET_SIZE(result);
for (Py_ssize_t i = 0; i < num_interps && num_seen < FRAME_CACHE_MAX_THREADS; i++) {
PyObject *interp_info = PyList_GET_ITEM(result, i);
PyObject *threads = PyStructSequence_GetItem(interp_info, 1);
if (!threads || !PyList_Check(threads)) {
continue;
}
Py_ssize_t num_threads = PyList_GET_SIZE(threads);
for (Py_ssize_t j = 0; j < num_threads && num_seen < FRAME_CACHE_MAX_THREADS; j++) {
PyObject *thread_info = PyList_GET_ITEM(threads, j);
PyObject *tid_obj = PyStructSequence_GetItem(thread_info, 0);
if (tid_obj) {
uint64_t tid = PyLong_AsUnsignedLongLong(tid_obj);
if (!PyErr_Occurred()) {
seen_threads[num_seen++] = tid;
} else {
PyErr_Clear();
}
}
}
}
// Invalidate entries not in seen list
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
if (unwinder->frame_cache[i].thread_id == 0) {
continue;
}
int found = 0;
for (int j = 0; j < num_seen; j++) {
if (unwinder->frame_cache[i].thread_id == seen_threads[j]) {
found = 1;
break;
}
}
if (!found) {
// Clear this entry
Py_CLEAR(unwinder->frame_cache[i].frame_list);
unwinder->frame_cache[i].thread_id = 0;
unwinder->frame_cache[i].num_addrs = 0;
STATS_INC(unwinder, stale_cache_invalidations);
}
}
}
// Find last_profiled_frame in cache and extend frame_info with cached continuation
// If frame_addrs is provided (not NULL), also extends it with cached addresses
int
frame_cache_lookup_and_extend(
RemoteUnwinderObject *unwinder,
uint64_t thread_id,
uintptr_t last_profiled_frame,
PyObject *frame_info,
uintptr_t *frame_addrs,
Py_ssize_t *num_addrs,
Py_ssize_t max_addrs)
{
if (!unwinder->frame_cache || last_profiled_frame == 0) {
return 0;
}
FrameCacheEntry *entry = frame_cache_find(unwinder, thread_id);
if (!entry || !entry->frame_list) {
return 0;
}
assert(entry->num_addrs >= 0 && entry->num_addrs <= FRAME_CACHE_MAX_FRAMES);
// Find the index where last_profiled_frame matches
Py_ssize_t start_idx = -1;
for (Py_ssize_t i = 0; i < entry->num_addrs; i++) {
if (entry->addrs[i] == last_profiled_frame) {
start_idx = i;
break;
}
}
if (start_idx < 0) {
return 0; // Not found
}
assert(start_idx < entry->num_addrs);
Py_ssize_t num_frames = PyList_GET_SIZE(entry->frame_list);
// Extend frame_info with frames ABOVE start_idx (not including it).
// The frame at start_idx (last_profiled_frame) was the executing frame
// in the previous sample and its line number may have changed.
// Only frames above it (its callers) are frozen at their call sites.
Py_ssize_t cache_start = start_idx + 1;
if (cache_start >= num_frames) {
return 0; // Nothing above last_profiled_frame to extend with
}
PyObject *slice = PyList_GetSlice(entry->frame_list, cache_start, num_frames);
if (!slice) {
return -1;
}
Py_ssize_t cur_size = PyList_GET_SIZE(frame_info);
int result = PyList_SetSlice(frame_info, cur_size, cur_size, slice);
Py_DECREF(slice);
if (result < 0) {
return -1;
}
// Also extend frame_addrs with cached addresses (above last_profiled_frame)
if (frame_addrs) {
for (Py_ssize_t i = cache_start; i < entry->num_addrs && *num_addrs < max_addrs; i++) {
frame_addrs[(*num_addrs)++] = entry->addrs[i];
}
}
return 1;
}
// Store frame list with addresses in cache
// Only stores complete stacks that reach base_frame_addr (validation done internally)
// Returns: 1 = stored successfully, 0 = not stored (graceful degradation), -1 = error
int
frame_cache_store(
RemoteUnwinderObject *unwinder,
uint64_t thread_id,
PyObject *frame_list,
const uintptr_t *addrs,
Py_ssize_t num_addrs,
uintptr_t base_frame_addr,
uintptr_t last_frame_visited)
{
if (!unwinder->frame_cache || thread_id == 0) {
return 0;
}
// Validate we have a complete stack before caching.
// Only cache if last_frame_visited matches base_frame_addr (the sentinel
// at the bottom of the stack). Note: we use last_frame_visited rather than
// addrs[num_addrs-1] because the base frame is visited but not added to the
// addrs array (it returns frame==NULL from is_frame_valid due to
// owner==FRAME_OWNED_BY_INTERPRETER).
if (base_frame_addr != 0 && last_frame_visited != base_frame_addr) {
// Incomplete stack - don't cache (graceful degradation)
return 0;
}
// Clamp to max frames
if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
num_addrs = FRAME_CACHE_MAX_FRAMES;
}
assert(num_addrs >= 0 && num_addrs <= FRAME_CACHE_MAX_FRAMES);
FrameCacheEntry *entry = frame_cache_alloc_slot(unwinder, thread_id);
if (!entry) {
// Cache full - graceful degradation
return 0;
}
// Clear old frame_list if replacing
Py_CLEAR(entry->frame_list);
// Store full frame list (don't truncate to num_addrs - frames beyond the
// address array limit are still valid and needed for full cache hits)
Py_ssize_t num_frames = PyList_GET_SIZE(frame_list);
entry->frame_list = PyList_GetSlice(frame_list, 0, num_frames);
if (!entry->frame_list) {
return -1;
}
entry->thread_id = thread_id;
memcpy(entry->addrs, addrs, num_addrs * sizeof(uintptr_t));
entry->num_addrs = num_addrs;
assert(entry->num_addrs == num_addrs);
assert(entry->thread_id == thread_id);
return 1;
}