Include/internal/pycore_qsbr.h - external/github.com/python/cpython - Git at Google

 // The QSBR APIs (quiescent state-based reclamation) provide a mechanism for
 // the free-threaded build to safely reclaim memory when there may be
 // concurrent accesses.
 //
 // Many operations in the free-threaded build are protected by locks. However,
 // in some cases, we want to allow reads to happen concurrently with updates.
 // In this case, we need to delay freeing ("reclaiming") any memory that may be
 // concurrently accessed by a reader. The QSBR APIs provide a way to do this.
 #ifndef Py_INTERNAL_QSBR_H
 #define Py_INTERNAL_QSBR_H

 #include <stdbool.h>
 #include <stdint.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

 #ifndef Py_BUILD_CORE
 #  error "this header requires Py_BUILD_CORE define"
 #endif

 // The shared write sequence is always odd and incremented by two. Detached
 // threads are indicated by a read sequence of zero. This avoids collisions
 // between the offline state and any valid sequence number even if the
 // sequences numbers wrap around.
 #define QSBR_OFFLINE 0
 #define QSBR_INITIAL 1
 #define QSBR_INCR    2

 // Wrap-around safe comparison. This is a holdover from the FreeBSD
 // implementation, which uses 32-bit sequence numbers. We currently use 64-bit
 // sequence numbers, so wrap-around is unlikely.
 #define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
 #define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)

 struct _qsbr_shared;
 struct _PyThreadStateImpl;  // forward declare to avoid circular dependency

 // Per-thread state
 struct _qsbr_thread_state {
     // Last observed write sequence (or 0 if detached)
     uint64_t seq;

     // Shared (per-interpreter) QSBR state
     struct _qsbr_shared *shared;

     // Thread state (or NULL)
     PyThreadState *tstate;

     // Number of held items added by this thread since the last write sequence
     // advance
     int deferred_count;

     // Estimate for the amount of memory that is held by this thread since
     // the last write sequence advance
     size_t deferred_memory;

     // Amount of memory in mimalloc pages deferred from collection.  When
     // deferred, they are prevented from being used for a different size class
     // and in a different thread.
     size_t deferred_page_memory;

     // True if the deferred memory frees should be processed.
     bool should_process;

     // Is this thread state allocated?
     bool allocated;
     struct _qsbr_thread_state *freelist_next;
 };

 // Padding to avoid false sharing
 struct _qsbr_pad {
     struct _qsbr_thread_state qsbr;
     char __padding[64 - sizeof(struct _qsbr_thread_state)];
 };

 // Per-interpreter state
 struct _qsbr_shared {
     // Write sequence: always odd, incremented by two
     uint64_t wr_seq;

     // Minimum observed read sequence of all QSBR thread states
     uint64_t rd_seq;

     // Array of QSBR thread states.
     struct _qsbr_pad *array;
     Py_ssize_t size;

     // Freelist of unused _qsbr_thread_states (protected by mutex)
     PyMutex mutex;
     struct _qsbr_thread_state *freelist;
 };

 static inline uint64_t
 _Py_qsbr_shared_current(struct _qsbr_shared *shared)
 {
     return _Py_atomic_load_uint64_acquire(&shared->wr_seq);
 }

 // Reports a quiescent state: the caller no longer holds any pointer to shared
 // data not protected by locks or reference counts.
 static inline void
 _Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr)
 {
     uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
     _Py_atomic_store_uint64_release(&qsbr->seq, seq);
 }

 // Have the read sequences advanced to the given goal? Like `_Py_qsbr_poll()`,
 // but does not perform a scan of threads.
 static inline bool
 _Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal)
 {
     uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
     return QSBR_LEQ(goal, rd_seq);
 }

 // Advance the write sequence and return the new goal. This should be called
 // after data is removed. The returned goal is used with `_Py_qsbr_poll()` to
 // determine when it is safe to reclaim (free) the memory.
 extern uint64_t
 _Py_qsbr_advance(struct _qsbr_shared *shared);

 // Return the next value for the write sequence (current plus the increment).
 extern uint64_t
 _Py_qsbr_shared_next(struct _qsbr_shared *shared);

 // Return true if deferred memory frees held by QSBR should be processed to
 // determine if they can be safely freed.
 static inline bool
 _Py_qsbr_should_process(struct _qsbr_thread_state *qsbr)
 {
     return qsbr->should_process;
 }

 // Have the read sequences advanced to the given goal? If this returns true,
 // it safe to reclaim any memory tagged with the goal (or earlier goal).
 extern bool
 _Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal);

 // Called when thread attaches to interpreter
 extern void
 _Py_qsbr_attach(struct _qsbr_thread_state *qsbr);

 // Called when thread detaches from interpreter
 extern void
 _Py_qsbr_detach(struct _qsbr_thread_state *qsbr);

 // Reserves (allocates) a QSBR state and returns its index.
 extern Py_ssize_t
 _Py_qsbr_reserve(PyInterpreterState *interp);

 // Associates a PyThreadState with the QSBR state at the given index
 extern void
 _Py_qsbr_register(struct _PyThreadStateImpl *tstate,
                   PyInterpreterState *interp, Py_ssize_t index);

 // Disassociates a PyThreadState from the QSBR state and frees the QSBR state.
 extern void
 _Py_qsbr_unregister(PyThreadState *tstate);

 extern void
 _Py_qsbr_fini(PyInterpreterState *interp);

 extern void
 _Py_qsbr_after_fork(struct _PyThreadStateImpl *tstate);

 #ifdef __cplusplus
 }
 #endif
 #endif   /* !Py_INTERNAL_QSBR_H */
	// The QSBR APIs (quiescent state-based reclamation) provide a mechanism for
	// the free-threaded build to safely reclaim memory when there may be
	// concurrent accesses.
	//
	// Many operations in the free-threaded build are protected by locks. However,
	// in some cases, we want to allow reads to happen concurrently with updates.
	// In this case, we need to delay freeing ("reclaiming") any memory that may be
	// concurrently accessed by a reader. The QSBR APIs provide a way to do this.
	#ifndef Py_INTERNAL_QSBR_H
	#define Py_INTERNAL_QSBR_H

	#include <stdbool.h>
	#include <stdint.h>

	#ifdef __cplusplus
	extern "C" {
	#endif

	#ifndef Py_BUILD_CORE
	# error "this header requires Py_BUILD_CORE define"
	#endif

	// The shared write sequence is always odd and incremented by two. Detached
	// threads are indicated by a read sequence of zero. This avoids collisions
	// between the offline state and any valid sequence number even if the
	// sequences numbers wrap around.
	#define QSBR_OFFLINE 0
	#define QSBR_INITIAL 1
	#define QSBR_INCR 2

	// Wrap-around safe comparison. This is a holdover from the FreeBSD
	// implementation, which uses 32-bit sequence numbers. We currently use 64-bit
	// sequence numbers, so wrap-around is unlikely.
	#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
	#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)

	struct _qsbr_shared;
	struct _PyThreadStateImpl; // forward declare to avoid circular dependency

	// Per-thread state
	struct _qsbr_thread_state {
	// Last observed write sequence (or 0 if detached)
	uint64_t seq;

	// Shared (per-interpreter) QSBR state
	struct _qsbr_shared *shared;

	// Thread state (or NULL)
	PyThreadState *tstate;

	// Number of held items added by this thread since the last write sequence
	// advance
	int deferred_count;

	// Estimate for the amount of memory that is held by this thread since
	// the last write sequence advance
	size_t deferred_memory;

	// Amount of memory in mimalloc pages deferred from collection. When
	// deferred, they are prevented from being used for a different size class
	// and in a different thread.
	size_t deferred_page_memory;

	// True if the deferred memory frees should be processed.
	bool should_process;

	// Is this thread state allocated?
	bool allocated;
	struct _qsbr_thread_state *freelist_next;
	};

	// Padding to avoid false sharing
	struct _qsbr_pad {
	struct _qsbr_thread_state qsbr;
	char __padding[64 - sizeof(struct _qsbr_thread_state)];
	};

	// Per-interpreter state
	struct _qsbr_shared {
	// Write sequence: always odd, incremented by two
	uint64_t wr_seq;

	// Minimum observed read sequence of all QSBR thread states
	uint64_t rd_seq;

	// Array of QSBR thread states.
	struct _qsbr_pad *array;
	Py_ssize_t size;

	// Freelist of unused _qsbr_thread_states (protected by mutex)
	PyMutex mutex;
	struct _qsbr_thread_state *freelist;
	};

	static inline uint64_t
	_Py_qsbr_shared_current(struct _qsbr_shared *shared)
	{
	return _Py_atomic_load_uint64_acquire(&shared->wr_seq);
	}

	// Reports a quiescent state: the caller no longer holds any pointer to shared
	// data not protected by locks or reference counts.
	static inline void
	_Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr)
	{
	uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
	_Py_atomic_store_uint64_release(&qsbr->seq, seq);
	}

	// Have the read sequences advanced to the given goal? Like `_Py_qsbr_poll()`,
	// but does not perform a scan of threads.
	static inline bool
	_Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal)
	{
	uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
	return QSBR_LEQ(goal, rd_seq);
	}

	// Advance the write sequence and return the new goal. This should be called
	// after data is removed. The returned goal is used with `_Py_qsbr_poll()` to
	// determine when it is safe to reclaim (free) the memory.
	extern uint64_t
	_Py_qsbr_advance(struct _qsbr_shared *shared);

	// Return the next value for the write sequence (current plus the increment).
	extern uint64_t
	_Py_qsbr_shared_next(struct _qsbr_shared *shared);

	// Return true if deferred memory frees held by QSBR should be processed to
	// determine if they can be safely freed.
	static inline bool
	_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr)
	{
	return qsbr->should_process;
	}

	// Have the read sequences advanced to the given goal? If this returns true,
	// it safe to reclaim any memory tagged with the goal (or earlier goal).
	extern bool
	_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal);

	// Called when thread attaches to interpreter
	extern void
	_Py_qsbr_attach(struct _qsbr_thread_state *qsbr);

	// Called when thread detaches from interpreter
	extern void
	_Py_qsbr_detach(struct _qsbr_thread_state *qsbr);

	// Reserves (allocates) a QSBR state and returns its index.
	extern Py_ssize_t
	_Py_qsbr_reserve(PyInterpreterState *interp);

	// Associates a PyThreadState with the QSBR state at the given index
	extern void
	_Py_qsbr_register(struct _PyThreadStateImpl *tstate,
	PyInterpreterState *interp, Py_ssize_t index);

	// Disassociates a PyThreadState from the QSBR state and frees the QSBR state.
	extern void
	_Py_qsbr_unregister(PyThreadState *tstate);

	extern void
	_Py_qsbr_fini(PyInterpreterState *interp);

	extern void
	_Py_qsbr_after_fork(struct _PyThreadStateImpl *tstate);

	#ifdef __cplusplus
	}
	#endif
	#endif /* !Py_INTERNAL_QSBR_H */