https://github.com/python/cpython/commit/aeb34035633b24afb27d3888fdd12abdf2bdc339
commit: aeb34035633b24afb27d3888fdd12abdf2bdc339
branch: main
author: Hai Zhu <[email protected]>
committer: Fidget-Spinner <[email protected]>
date: 2026-01-08T19:38:21Z
summary:

gh-143421: Move `JitOptContext` from stack allocation to per-thread heap 
allocation (GH-143536)

* move JitOptContext to _PyThreadStateImpl
* make _PyUOpInstruction buffer a part of _PyThreadStateImpl

Co-authored-by: Kumar Aditya <[email protected]>

files:
A Include/internal/pycore_optimizer_types.h
M Include/internal/pycore_optimizer.h
M Include/internal/pycore_tstate.h
M Python/optimizer.c
M Python/optimizer_analysis.c
M Python/pystate.c

diff --git a/Include/internal/pycore_optimizer.h 
b/Include/internal/pycore_optimizer.h
index 6a0fc1a59e7965..d1d22c77507c6c 100644
--- a/Include/internal/pycore_optimizer.h
+++ b/Include/internal/pycore_optimizer.h
@@ -12,6 +12,7 @@ extern "C" {
 #include "pycore_uop.h"           // _PyUOpInstruction
 #include "pycore_uop_ids.h"
 #include "pycore_stackref.h"      // _PyStackRef
+#include "pycore_optimizer_types.h"
 #include <stdbool.h>
 
 
@@ -84,7 +85,7 @@ PyAPI_FUNC(void) 
_Py_Executors_InvalidateCold(PyInterpreterState *interp);
 #define JIT_CLEANUP_THRESHOLD 1000
 
 int _Py_uop_analyze_and_optimize(
-    PyFunctionObject *func,
+    _PyThreadStateImpl *tstate,
     _PyUOpInstruction *trace, int trace_len, int curr_stackentries,
     _PyBloomFilter *dependencies);
 
@@ -112,86 +113,6 @@ static inline uint16_t uop_get_error_target(const 
_PyUOpInstruction *inst)
     return inst->error_target;
 }
 
-// Holds locals, stack, locals, stack ... co_consts (in that order)
-#define MAX_ABSTRACT_INTERP_SIZE 4096
-
-#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
-
-// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
-#define MAX_ABSTRACT_FRAME_DEPTH (16)
-
-// The maximum number of side exits that we can take before requiring forward
-// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
-// is the "maximum amount of polymorphism" that an isolated trace tree can
-// handle before rejoining the rest of the program.
-#define MAX_CHAIN_DEPTH 4
-
-/* Symbols */
-/* See explanation in optimizer_symbols.c */
-
-
-typedef enum _JitSymType {
-    JIT_SYM_UNKNOWN_TAG = 1,
-    JIT_SYM_NULL_TAG = 2,
-    JIT_SYM_NON_NULL_TAG = 3,
-    JIT_SYM_BOTTOM_TAG = 4,
-    JIT_SYM_TYPE_VERSION_TAG = 5,
-    JIT_SYM_KNOWN_CLASS_TAG = 6,
-    JIT_SYM_KNOWN_VALUE_TAG = 7,
-    JIT_SYM_TUPLE_TAG = 8,
-    JIT_SYM_TRUTHINESS_TAG = 9,
-    JIT_SYM_COMPACT_INT = 10,
-} JitSymType;
-
-typedef struct _jit_opt_known_class {
-    uint8_t tag;
-    uint32_t version;
-    PyTypeObject *type;
-} JitOptKnownClass;
-
-typedef struct _jit_opt_known_version {
-    uint8_t tag;
-    uint32_t version;
-} JitOptKnownVersion;
-
-typedef struct _jit_opt_known_value {
-    uint8_t tag;
-    PyObject *value;
-} JitOptKnownValue;
-
-#define MAX_SYMBOLIC_TUPLE_SIZE 7
-
-typedef struct _jit_opt_tuple {
-    uint8_t tag;
-    uint8_t length;
-    uint16_t items[MAX_SYMBOLIC_TUPLE_SIZE];
-} JitOptTuple;
-
-typedef struct {
-    uint8_t tag;
-    bool invert;
-    uint16_t value;
-} JitOptTruthiness;
-
-typedef struct {
-    uint8_t tag;
-} JitOptCompactInt;
-
-typedef union _jit_opt_symbol {
-    uint8_t tag;
-    JitOptKnownClass cls;
-    JitOptKnownValue value;
-    JitOptKnownVersion version;
-    JitOptTuple tuple;
-    JitOptTruthiness truthiness;
-    JitOptCompactInt compact;
-} JitOptSymbol;
-
-
-// This mimics the _PyStackRef API
-typedef union {
-    uintptr_t bits;
-} JitOptRef;
 
 #define REF_IS_BORROWED 1
 
@@ -238,48 +159,6 @@ PyJitRef_IsBorrowed(JitOptRef ref)
     return (ref.bits & REF_IS_BORROWED) == REF_IS_BORROWED;
 }
 
-struct _Py_UOpsAbstractFrame {
-    bool globals_watched;
-     // The version number of the globals dicts, once checked. 0 if unchecked.
-    uint32_t globals_checked_version;
-    // Max stacklen
-    int stack_len;
-    int locals_len;
-    PyFunctionObject *func;
-    PyCodeObject *code;
-
-    JitOptRef *stack_pointer;
-    JitOptRef *stack;
-    JitOptRef *locals;
-};
-
-typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
-
-typedef struct ty_arena {
-    int ty_curr_number;
-    int ty_max_number;
-    JitOptSymbol arena[TY_ARENA_SIZE];
-} ty_arena;
-
-typedef struct _JitOptContext {
-    char done;
-    char out_of_space;
-    bool contradiction;
-     // Has the builtins dict been watched?
-    bool builtins_watched;
-    // The current "executing" frame.
-    _Py_UOpsAbstractFrame *frame;
-    _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
-    int curr_frame_depth;
-
-    // Arena for the symbolic types.
-    ty_arena t_arena;
-
-    JitOptRef *n_consumed;
-    JitOptRef *limit;
-    JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
-} JitOptContext;
-
 extern bool _Py_uop_sym_is_null(JitOptRef sym);
 extern bool _Py_uop_sym_is_not_null(JitOptRef sym);
 extern bool _Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef sym);
diff --git a/Include/internal/pycore_optimizer_types.h 
b/Include/internal/pycore_optimizer_types.h
new file mode 100644
index 00000000000000..de8e50921e3311
--- /dev/null
+++ b/Include/internal/pycore_optimizer_types.h
@@ -0,0 +1,137 @@
+#ifndef Py_INTERNAL_OPTIMIZER_TYPES_H
+#define Py_INTERNAL_OPTIMIZER_TYPES_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+#include "pycore_uop.h"  // UOP_MAX_TRACE_LENGTH
+
+// Holds locals, stack, locals, stack ... co_consts (in that order)
+#define MAX_ABSTRACT_INTERP_SIZE 4096
+
+#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
+
+// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
+#define MAX_ABSTRACT_FRAME_DEPTH (16)
+
+// The maximum number of side exits that we can take before requiring forward
+// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
+// is the "maximum amount of polymorphism" that an isolated trace tree can
+// handle before rejoining the rest of the program.
+#define MAX_CHAIN_DEPTH 4
+
+/* Symbols */
+/* See explanation in optimizer_symbols.c */
+
+
+typedef enum _JitSymType {
+    JIT_SYM_UNKNOWN_TAG = 1,
+    JIT_SYM_NULL_TAG = 2,
+    JIT_SYM_NON_NULL_TAG = 3,
+    JIT_SYM_BOTTOM_TAG = 4,
+    JIT_SYM_TYPE_VERSION_TAG = 5,
+    JIT_SYM_KNOWN_CLASS_TAG = 6,
+    JIT_SYM_KNOWN_VALUE_TAG = 7,
+    JIT_SYM_TUPLE_TAG = 8,
+    JIT_SYM_TRUTHINESS_TAG = 9,
+    JIT_SYM_COMPACT_INT = 10,
+} JitSymType;
+
+typedef struct _jit_opt_known_class {
+    uint8_t tag;
+    uint32_t version;
+    PyTypeObject *type;
+} JitOptKnownClass;
+
+typedef struct _jit_opt_known_version {
+    uint8_t tag;
+    uint32_t version;
+} JitOptKnownVersion;
+
+typedef struct _jit_opt_known_value {
+    uint8_t tag;
+    PyObject *value;
+} JitOptKnownValue;
+
+#define MAX_SYMBOLIC_TUPLE_SIZE 7
+
+typedef struct _jit_opt_tuple {
+    uint8_t tag;
+    uint8_t length;
+    uint16_t items[MAX_SYMBOLIC_TUPLE_SIZE];
+} JitOptTuple;
+
+typedef struct {
+    uint8_t tag;
+    bool invert;
+    uint16_t value;
+} JitOptTruthiness;
+
+typedef struct {
+    uint8_t tag;
+} JitOptCompactInt;
+
+typedef union _jit_opt_symbol {
+    uint8_t tag;
+    JitOptKnownClass cls;
+    JitOptKnownValue value;
+    JitOptKnownVersion version;
+    JitOptTuple tuple;
+    JitOptTruthiness truthiness;
+    JitOptCompactInt compact;
+} JitOptSymbol;
+
+// This mimics the _PyStackRef API
+typedef union {
+    uintptr_t bits;
+} JitOptRef;
+
+typedef struct _Py_UOpsAbstractFrame {
+    bool globals_watched;
+    // The version number of the globals dicts, once checked. 0 if unchecked.
+    uint32_t globals_checked_version;
+    // Max stacklen
+    int stack_len;
+    int locals_len;
+    PyFunctionObject *func;
+    PyCodeObject *code;
+
+    JitOptRef *stack_pointer;
+    JitOptRef *stack;
+    JitOptRef *locals;
+} _Py_UOpsAbstractFrame;
+
+typedef struct ty_arena {
+    int ty_curr_number;
+    int ty_max_number;
+    JitOptSymbol arena[TY_ARENA_SIZE];
+} ty_arena;
+
+typedef struct _JitOptContext {
+    char done;
+    char out_of_space;
+    bool contradiction;
+    // Has the builtins dict been watched?
+    bool builtins_watched;
+    // The current "executing" frame.
+    _Py_UOpsAbstractFrame *frame;
+    _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
+    int curr_frame_depth;
+
+    // Arena for the symbolic types.
+    ty_arena t_arena;
+
+    JitOptRef *n_consumed;
+    JitOptRef *limit;
+    JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
+} JitOptContext;
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_OPTIMIZER_TYPES_H */
diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h
index d8f4bfef98af7e..81cabb4dca47e4 100644
--- a/Include/internal/pycore_tstate.h
+++ b/Include/internal/pycore_tstate.h
@@ -12,6 +12,7 @@ extern "C" {
 #include "pycore_freelist_state.h"  // struct _Py_freelists
 #include "pycore_interpframe_structs.h"  // _PyInterpreterFrame
 #include "pycore_mimalloc.h"        // struct _mimalloc_thread_state
+#include "pycore_optimizer_types.h" // JitOptContext
 #include "pycore_qsbr.h"            // struct qsbr
 #include "pycore_uop.h"             // struct _PyUOpInstruction
 #include "pycore_structs.h"
@@ -52,10 +53,11 @@ typedef struct _PyJitTracerTranslatorState {
 } _PyJitTracerTranslatorState;
 
 typedef struct _PyJitTracerState {
-    _PyUOpInstruction *code_buffer;
     _PyJitTracerInitialState initial_state;
     _PyJitTracerPreviousState prev_state;
     _PyJitTracerTranslatorState translator_state;
+    JitOptContext opt_context;
+    _PyUOpInstruction code_buffer[UOP_MAX_TRACE_LENGTH];
 } _PyJitTracerState;
 
 #endif
diff --git a/Python/optimizer.c b/Python/optimizer.c
index d32fae2e489af4..73617f6ca26425 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -1025,13 +1025,6 @@ _PyJit_TryInitializeTracing(
     if (oparg > 0xFFFF) {
         return 0;
     }
-    if (_tstate->jit_tracer_state.code_buffer == NULL) {
-        _tstate->jit_tracer_state.code_buffer = (_PyUOpInstruction 
*)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
-        if (_tstate->jit_tracer_state.code_buffer == NULL) {
-            // Don't error, just go to next instruction.
-            return 0;
-        }
-    }
     PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
     if (func == NULL) {
         return 0;
@@ -1484,8 +1477,8 @@ uop_optimize(
     OPT_STAT_INC(traces_created);
     if (!is_noopt) {
         length = _Py_uop_analyze_and_optimize(
-            _tstate->jit_tracer_state.initial_state.func,
-            buffer,length,
+            _tstate,
+            buffer, length,
             curr_stackentries, dependencies);
         if (length <= 0) {
             return length;
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c
index 29a088e43c2a0f..56d4f9945d6908 100644
--- a/Python/optimizer_analysis.c
+++ b/Python/optimizer_analysis.c
@@ -18,6 +18,7 @@
 #include "pycore_opcode_metadata.h"
 #include "pycore_opcode_utils.h"
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
+#include "pycore_tstate.h"        // _PyThreadStateImpl
 #include "pycore_uop_metadata.h"
 #include "pycore_long.h"
 #include "pycore_interpframe.h"  // _PyFrame_GetCode
@@ -334,7 +335,7 @@ _Py_opt_assert_within_stack_bounds(
 /* >0 (length) for success, 0 for not ready, clears all possible errors. */
 static int
 optimize_uops(
-    PyFunctionObject *func,
+    _PyThreadStateImpl *tstate,
     _PyUOpInstruction *trace,
     int trace_len,
     int curr_stacklen,
@@ -342,9 +343,9 @@ optimize_uops(
 )
 {
     assert(!PyErr_Occurred());
+    PyFunctionObject *func = tstate->jit_tracer_state.initial_state.func;
 
-    JitOptContext context;
-    JitOptContext *ctx = &context;
+    JitOptContext *ctx = &tstate->jit_tracer_state.opt_context;
     uint32_t opcode = UINT16_MAX;
 
     // Make sure that watchers are set up
@@ -574,7 +575,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int 
buffer_size)
 //  > 0 - length of optimized trace
 int
 _Py_uop_analyze_and_optimize(
-    PyFunctionObject *func,
+    _PyThreadStateImpl *tstate,
     _PyUOpInstruction *buffer,
     int length,
     int curr_stacklen,
@@ -584,7 +585,7 @@ _Py_uop_analyze_and_optimize(
     OPT_STAT_INC(optimizer_attempts);
 
     length = optimize_uops(
-         func, buffer,
+         tstate, buffer,
          length, curr_stacklen, dependencies);
 
     if (length == 0) {
diff --git a/Python/pystate.c b/Python/pystate.c
index 23853f69792450..74507efa5b4cf3 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -1553,7 +1553,6 @@ init_threadstate(_PyThreadStateImpl *_tstate,
     init_policy(&_tstate->policy.jit.side_exit_initial_backoff,
                 "PYTHON_JIT_SIDE_EXIT_INITIAL_BACKOFF",
                 SIDE_EXIT_INITIAL_BACKOFF, 0, MAX_BACKOFF);
-    _tstate->jit_tracer_state.code_buffer = NULL;
 #endif
     tstate->delete_later = NULL;
 
@@ -1868,14 +1867,6 @@ tstate_delete_common(PyThreadState *tstate, int 
release_gil)
     assert(tstate_impl->refcounts.values == NULL);
 #endif
 
-#if _Py_TIER2
-    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
-    if (_tstate->jit_tracer_state.code_buffer != NULL) {
-        _PyObject_VirtualFree(_tstate->jit_tracer_state.code_buffer, 
UOP_BUFFER_SIZE);
-        _tstate->jit_tracer_state.code_buffer = NULL;
-    }
-#endif
-
     HEAD_UNLOCK(runtime);
 
     // XXX Unbind in PyThreadState_Clear(), or earlier

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to