https://github.com/python/cpython/commit/c84928ed6de105696be24859e03f3ab27e11daf6
commit: c84928ed6de105696be24859e03f3ab27e11daf6
branch: main
author: mpage <[email protected]>
committer: mpage <[email protected]>
date: 2024-12-11T15:18:22-08:00
summary:

gh-115999: Specialize `CALL_KW` in free-threaded builds (#127713)

* Enable specialization of CALL_KW

* Fix bug pushing frame in _PY_FRAME_KW

`_PY_FRAME_KW` pushes a pointer to the new frame onto the stack for
consumption by the next uop. When pushing the frame fails, we do not
want to push the result, `NULL`, to the stack because it is not
a valid stackref. This works in the default build because `PyStackRef_NULL`
 and `NULL` are the same value, so the `PyStackRef_XCLOSE()` in the error
handler ignores it. In the free-threaded build the values are not the same;
`PyStackRef_XCLOSE()` will attempt to decref a null pointer.

files:
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/specialize.c

diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 3d280941b35244..d0e4c2bc45489b 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -4311,7 +4311,7 @@ dummy_func(
             assert(Py_TYPE(callable_o) == &PyFunction_Type);
             int code_flags = 
((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags;
             PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : 
Py_NewRef(PyFunction_GET_GLOBALS(callable_o));
-            new_frame = _PyEvalFramePushAndInit(
+            _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
                 tstate, callable[0], locals,
                 args, positional_args, kwnames_o, frame
             );
@@ -4319,9 +4319,10 @@ dummy_func(
             // The frame has stolen all the arguments from the stack,
             // so there is no need to clean them up.
             SYNC_SP();
-            if (new_frame == NULL) {
+            if (temp == NULL) {
                 ERROR_NO_POP();
             }
+            new_frame = temp;
         }
 
         op(_CHECK_FUNCTION_VERSION_KW, (func_version/2, callable[1], 
self_or_null[1], unused[oparg], kwnames -- callable[1], self_or_null[1], 
unused[oparg], kwnames)) {
@@ -4372,7 +4373,7 @@ dummy_func(
             _PUSH_FRAME;
 
         specializing op(_SPECIALIZE_CALL_KW, (counter/1, callable[1], 
self_or_null[1], args[oparg], kwnames -- callable[1], self_or_null[1], 
args[oparg], kwnames)) {
-            #if ENABLE_SPECIALIZATION
+            #if ENABLE_SPECIALIZATION_FT
             if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_CallKw(callable[0], next_instr, oparg + 
!PyStackRef_IsNull(self_or_null[0]));
@@ -4380,7 +4381,7 @@ dummy_func(
             }
             OPCODE_DEFERRED_INC(CALL_KW);
             ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-            #endif  /* ENABLE_SPECIALIZATION */
+            #endif  /* ENABLE_SPECIALIZATION_FT */
         }
 
         macro(CALL_KW) =
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 987ff2e6419669..18f19773d25c90 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -5235,7 +5235,7 @@
             int code_flags = 
((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags;
             PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : 
Py_NewRef(PyFunction_GET_GLOBALS(callable_o));
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            new_frame = _PyEvalFramePushAndInit(
+            _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
                 tstate, callable[0], locals,
                 args, positional_args, kwnames_o, frame
             );
@@ -5243,12 +5243,15 @@
             PyStackRef_CLOSE(kwnames);
             // The frame has stolen all the arguments from the stack,
             // so there is no need to clean them up.
-            stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame;
-            stack_pointer += -2 - oparg;
+            stack_pointer += -3 - oparg;
             assert(WITHIN_STACK_BOUNDS());
-            if (new_frame == NULL) {
+            if (temp == NULL) {
                 JUMP_TO_ERROR();
             }
+            new_frame = temp;
+            stack_pointer[0].bits = (uintptr_t)new_frame;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 33f32aba1e5145..fc0f55555f5c36 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -1895,7 +1895,7 @@
                 callable = &stack_pointer[-3 - oparg];
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
-                #if ENABLE_SPECIALIZATION
+                #if ENABLE_SPECIALIZATION_FT
                 if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -1905,7 +1905,7 @@
                 }
                 OPCODE_DEFERRED_INC(CALL_KW);
                 ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-                #endif  /* ENABLE_SPECIALIZATION */
+                #endif  /* ENABLE_SPECIALIZATION_FT */
             }
             /* Skip 2 cache entries */
             // _MAYBE_EXPAND_METHOD_KW
@@ -2093,7 +2093,7 @@
                 int code_flags = 
((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags;
                 PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : 
Py_NewRef(PyFunction_GET_GLOBALS(callable_o));
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                new_frame = _PyEvalFramePushAndInit(
+                _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
                     tstate, callable[0], locals,
                     args, positional_args, kwnames_o, frame
                 );
@@ -2101,12 +2101,12 @@
                 PyStackRef_CLOSE(kwnames);
                 // The frame has stolen all the arguments from the stack,
                 // so there is no need to clean them up.
-                stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame;
-                stack_pointer += -2 - oparg;
+                stack_pointer += -3 - oparg;
                 assert(WITHIN_STACK_BOUNDS());
-                if (new_frame == NULL) {
+                if (temp == NULL) {
                     goto error;
                 }
+                new_frame = temp;
             }
             // _SAVE_RETURN_OFFSET
             {
@@ -2123,8 +2123,6 @@
                 // Eventually this should be the only occurrence of this code.
                 assert(tstate->interp->eval_frame == NULL);
                 _PyInterpreterFrame *temp = new_frame;
-                stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 assert(new_frame->previous == frame || 
new_frame->previous->previous == frame);
                 CALL_STAT_INC(inlined_py_calls);
@@ -2271,7 +2269,7 @@
                 int code_flags = 
((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags;
                 PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : 
Py_NewRef(PyFunction_GET_GLOBALS(callable_o));
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                new_frame = _PyEvalFramePushAndInit(
+                _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
                     tstate, callable[0], locals,
                     args, positional_args, kwnames_o, frame
                 );
@@ -2279,12 +2277,12 @@
                 PyStackRef_CLOSE(kwnames);
                 // The frame has stolen all the arguments from the stack,
                 // so there is no need to clean them up.
-                stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame;
-                stack_pointer += -2 - oparg;
+                stack_pointer += -3 - oparg;
                 assert(WITHIN_STACK_BOUNDS());
-                if (new_frame == NULL) {
+                if (temp == NULL) {
                     goto error;
                 }
+                new_frame = temp;
             }
             // _SAVE_RETURN_OFFSET
             {
@@ -2301,8 +2299,6 @@
                 // Eventually this should be the only occurrence of this code.
                 assert(tstate->interp->eval_frame == NULL);
                 _PyInterpreterFrame *temp = new_frame;
-                stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 assert(new_frame->previous == frame || 
new_frame->previous->previous == frame);
                 CALL_STAT_INC(inlined_py_calls);
diff --git a/Python/specialize.c b/Python/specialize.c
index d3fea717243847..fd182e7d7a9215 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -2107,7 +2107,7 @@ specialize_py_call_kw(PyFunctionObject *func, 
_Py_CODEUNIT *instr, int nargs,
         return -1;
     }
     write_u32(cache->func_version, version);
-    instr->op.code = bound_method ? CALL_KW_BOUND_METHOD : CALL_KW_PY;
+    specialize(instr, bound_method ? CALL_KW_BOUND_METHOD : CALL_KW_PY);
     return 0;
 }
 
@@ -2202,10 +2202,9 @@ _Py_Specialize_CallKw(_PyStackRef callable_st, 
_Py_CODEUNIT *instr, int nargs)
 {
     PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_st);
 
-    assert(ENABLE_SPECIALIZATION);
+    assert(ENABLE_SPECIALIZATION_FT);
     assert(_PyOpcode_Caches[CALL_KW] == INLINE_CACHE_ENTRIES_CALL_KW);
     assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL_KW);
-    _PyCallCache *cache = (_PyCallCache *)(instr + 1);
     int fail;
     if (PyFunction_Check(callable)) {
         fail = specialize_py_call_kw((PyFunctionObject *)callable, instr, 
nargs, false);
@@ -2221,19 +2220,11 @@ _Py_Specialize_CallKw(_PyStackRef callable_st, 
_Py_CODEUNIT *instr, int nargs)
         }
     }
     else {
-        instr->op.code = CALL_KW_NON_PY;
+        specialize(instr, CALL_KW_NON_PY);
         fail = 0;
     }
     if (fail) {
-        STAT_INC(CALL, failure);
-        assert(!PyErr_Occurred());
-        instr->op.code = CALL_KW;
-        cache->counter = adaptive_counter_backoff(cache->counter);
-    }
-    else {
-        STAT_INC(CALL, success);
-        assert(!PyErr_Occurred());
-        cache->counter = adaptive_counter_cooldown();
+        unspecialize(instr);
     }
 }
 

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to