| #ifndef Py_INTERNAL_OPTIMIZER_H |
| #define Py_INTERNAL_OPTIMIZER_H |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| #ifndef Py_BUILD_CORE |
| # error "this header requires Py_BUILD_CORE define" |
| #endif |
| |
| #include "pycore_uop_ids.h" |
| #include <stdbool.h> |
| |
| |
| typedef struct _PyExecutorLinkListNode { |
| struct _PyExecutorObject *next; |
| struct _PyExecutorObject *previous; |
| } _PyExecutorLinkListNode; |
| |
| |
| /* Bloom filter with m = 256 |
| * https://en.wikipedia.org/wiki/Bloom_filter */ |
| #define _Py_BLOOM_FILTER_WORDS 8 |
| |
| typedef struct { |
| uint32_t bits[_Py_BLOOM_FILTER_WORDS]; |
| } _PyBloomFilter; |
| |
| typedef struct { |
| uint8_t opcode; |
| uint8_t oparg; |
| uint8_t valid:1; |
| uint8_t linked:1; |
| uint8_t chain_depth:6; // Must be big enough for MAX_CHAIN_DEPTH - 1. |
| bool warm; |
| int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). |
| _PyBloomFilter bloom; |
| _PyExecutorLinkListNode links; |
| PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). |
| } _PyVMData; |
| |
| /* Depending on the format, |
| * the 32 bits between the oparg and operand are: |
| * UOP_FORMAT_TARGET: |
| * uint32_t target; |
| * UOP_FORMAT_JUMP |
| * uint16_t jump_target; |
| * uint16_t error_target; |
| */ |
| typedef struct { |
| uint16_t opcode:15; |
| uint16_t format:1; |
| uint16_t oparg; |
| union { |
| uint32_t target; |
| struct { |
| uint16_t jump_target; |
| uint16_t error_target; |
| }; |
| }; |
| uint64_t operand0; // A cache entry |
| uint64_t operand1; |
| } _PyUOpInstruction; |
| |
| typedef struct { |
| uint32_t target; |
| _Py_BackoffCounter temperature; |
| const struct _PyExecutorObject *executor; |
| } _PyExitData; |
| |
| typedef struct _PyExecutorObject { |
| PyObject_VAR_HEAD |
| const _PyUOpInstruction *trace; |
| _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ |
| uint32_t exit_count; |
| uint32_t code_size; |
| size_t jit_size; |
| void *jit_code; |
| void *jit_side_entry; |
| _PyExitData exits[1]; |
| } _PyExecutorObject; |
| |
| typedef struct _PyOptimizerObject _PyOptimizerObject; |
| |
| /* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ |
| typedef int (*_Py_optimize_func)( |
| _PyOptimizerObject* self, struct _PyInterpreterFrame *frame, |
| _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, |
| int curr_stackentries, bool progress_needed); |
| |
| struct _PyOptimizerObject { |
| PyObject_HEAD |
| _Py_optimize_func optimize; |
| /* Data needed by the optimizer goes here, but is opaque to the VM */ |
| }; |
| |
| /** Test support **/ |
| typedef struct { |
| _PyOptimizerObject base; |
| int64_t count; |
| } _PyCounterOptimizerObject; |
| |
| _PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer); |
| |
| |
| // Export for '_opcode' shared extension (JIT compiler). |
| PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset); |
| |
| void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); |
| void _Py_ExecutorDetach(_PyExecutorObject *); |
| void _Py_BloomFilter_Init(_PyBloomFilter *); |
| void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); |
| PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); |
| |
| // For testing |
| // Export for '_testinternalcapi' shared extension. |
| PyAPI_FUNC(_PyOptimizerObject *) _Py_GetOptimizer(void); |
| PyAPI_FUNC(int) _Py_SetTier2Optimizer(_PyOptimizerObject* optimizer); |
| PyAPI_FUNC(PyObject *) _PyOptimizer_NewCounter(void); |
| PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void); |
| |
| #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 |
| #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 |
| |
| #ifdef _Py_TIER2 |
| PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); |
| PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); |
| PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); |
| |
| #else |
| # define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) |
| # define _Py_Executors_InvalidateAll(A, B) ((void)0) |
| # define _Py_Executors_InvalidateCold(A) ((void)0) |
| |
| #endif |
| |
| // Used as the threshold to trigger executor invalidation when |
| // trace_run_counter is greater than this value. |
| #define JIT_CLEANUP_THRESHOLD 100000 |
| |
| // This is the length of the trace we project initially. |
| #define UOP_MAX_TRACE_LENGTH 800 |
| |
| #define TRACE_STACK_SIZE 5 |
| |
| int _Py_uop_analyze_and_optimize(struct _PyInterpreterFrame *frame, |
| _PyUOpInstruction *trace, int trace_len, int curr_stackentries, |
| _PyBloomFilter *dependencies); |
| |
| extern PyTypeObject _PyCounterExecutor_Type; |
| extern PyTypeObject _PyCounterOptimizer_Type; |
| extern PyTypeObject _PyDefaultOptimizer_Type; |
| extern PyTypeObject _PyUOpExecutor_Type; |
| extern PyTypeObject _PyUOpOptimizer_Type; |
| |
| /* Symbols */ |
| /* See explanation in optimizer_symbols.c */ |
| |
| struct _Py_UopsSymbol { |
| int flags; // 0 bits: Top; 2 or more bits: Bottom |
| PyTypeObject *typ; // Borrowed reference |
| PyObject *const_val; // Owned reference (!) |
| unsigned int type_version; // currently stores type version |
| }; |
| |
| #define UOP_FORMAT_TARGET 0 |
| #define UOP_FORMAT_JUMP 1 |
| |
| static inline uint32_t uop_get_target(const _PyUOpInstruction *inst) |
| { |
| assert(inst->format == UOP_FORMAT_TARGET); |
| return inst->target; |
| } |
| |
| static inline uint16_t uop_get_jump_target(const _PyUOpInstruction *inst) |
| { |
| assert(inst->format == UOP_FORMAT_JUMP); |
| return inst->jump_target; |
| } |
| |
| static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst) |
| { |
| assert(inst->format != UOP_FORMAT_TARGET); |
| return inst->error_target; |
| } |
| |
| // Holds locals, stack, locals, stack ... co_consts (in that order) |
| #define MAX_ABSTRACT_INTERP_SIZE 4096 |
| |
| #define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5) |
| |
| // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) |
| #define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) |
| |
| // The maximum number of side exits that we can take before requiring forward |
| // progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this |
| // is the "maximum amount of polymorphism" that an isolated trace tree can |
| // handle before rejoining the rest of the program. |
| #define MAX_CHAIN_DEPTH 4 |
| |
| typedef struct _Py_UopsSymbol _Py_UopsSymbol; |
| |
| struct _Py_UOpsAbstractFrame { |
| // Max stacklen |
| int stack_len; |
| int locals_len; |
| |
| _Py_UopsSymbol **stack_pointer; |
| _Py_UopsSymbol **stack; |
| _Py_UopsSymbol **locals; |
| }; |
| |
| typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; |
| |
| typedef struct ty_arena { |
| int ty_curr_number; |
| int ty_max_number; |
| _Py_UopsSymbol arena[TY_ARENA_SIZE]; |
| } ty_arena; |
| |
| struct _Py_UOpsContext { |
| char done; |
| char out_of_space; |
| bool contradiction; |
| // The current "executing" frame. |
| _Py_UOpsAbstractFrame *frame; |
| _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; |
| int curr_frame_depth; |
| |
| // Arena for the symbolic types. |
| ty_arena t_arena; |
| |
| _Py_UopsSymbol **n_consumed; |
| _Py_UopsSymbol **limit; |
| _Py_UopsSymbol *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; |
| }; |
| |
| typedef struct _Py_UOpsContext _Py_UOpsContext; |
| |
| extern bool _Py_uop_sym_is_null(_Py_UopsSymbol *sym); |
| extern bool _Py_uop_sym_is_not_null(_Py_UopsSymbol *sym); |
| extern bool _Py_uop_sym_is_const(_Py_UopsSymbol *sym); |
| extern PyObject *_Py_uop_sym_get_const(_Py_UopsSymbol *sym); |
| extern _Py_UopsSymbol *_Py_uop_sym_new_unknown(_Py_UOpsContext *ctx); |
| extern _Py_UopsSymbol *_Py_uop_sym_new_not_null(_Py_UOpsContext *ctx); |
| extern _Py_UopsSymbol *_Py_uop_sym_new_type( |
| _Py_UOpsContext *ctx, PyTypeObject *typ); |
| extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *const_val); |
| extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); |
| extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); |
| extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); |
| extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version); |
| extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); |
| extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); |
| extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); |
| extern bool _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version); |
| extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val); |
| extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); |
| extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); |
| extern PyTypeObject *_Py_uop_sym_get_type(_Py_UopsSymbol *sym); |
| |
| |
| extern void _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx); |
| extern void _Py_uop_abstractcontext_fini(_Py_UOpsContext *ctx); |
| |
| extern _Py_UOpsAbstractFrame *_Py_uop_frame_new( |
| _Py_UOpsContext *ctx, |
| PyCodeObject *co, |
| int curr_stackentries, |
| _Py_UopsSymbol **args, |
| int arg_len); |
| extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx); |
| |
| PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); |
| |
| PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyStackRef *stack_pointer, _PyExecutorObject **exec_ptr, int chain_depth); |
| |
| static inline int is_terminator(const _PyUOpInstruction *uop) |
| { |
| int opcode = uop->opcode; |
| return ( |
| opcode == _EXIT_TRACE || |
| opcode == _JUMP_TO_TOP || |
| opcode == _DYNAMIC_EXIT |
| ); |
| } |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| #endif /* !Py_INTERNAL_OPTIMIZER_H */ |