https://github.com/python/cpython/commit/082dbf77884264a8470b1ea132cc458d0b5bf08a
commit: 082dbf77884264a8470b1ea132cc458d0b5bf08a
branch: main
author: Irit Katriel <[email protected]>
committer: iritkatriel <[email protected]>
date: 2025-05-05T17:46:56+01:00
summary:
gh-133395: add option for extension modules to specialize BINARY_OP/SUBSCR,
apply to arrays (#133396)
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst
M Include/cpython/object.h
M Include/internal/pycore_code.h
M Include/internal/pycore_opcode_metadata.h
M Include/internal/pycore_uop_metadata.h
M Include/typeslots.h
M Lib/test/test_sys.py
M Modules/arraymodule.c
M Objects/typeslots.inc
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/optimizer_cases.c.h
M Python/specialize.c
diff --git a/Include/cpython/object.h b/Include/cpython/object.h
index 3a4d65f7712c63..818fc7d05605ff 100644
--- a/Include/cpython/object.h
+++ b/Include/cpython/object.h
@@ -143,6 +143,11 @@ typedef struct {
* backwards-compatibility */
typedef Py_ssize_t printfunc;
+/* Specialize a binary op by setting the descriptor pointer */
+struct _PyBinopSpecializationDescr;
+typedef int (*binop_specialize_func)(PyObject *v, PyObject *w, int oparg,
+ struct _PyBinopSpecializationDescr
**descr);
+
// If this structure is modified, Doc/includes/typestruct.h should be updated
// as well.
struct _typeobject {
@@ -233,6 +238,13 @@ struct _typeobject {
/* bitset of which type-watchers care about this type */
unsigned char tp_watched;
+ /* callback that may specialize BINARY_OP
+ * this is an experimental API based on the ideas in the paper
+ * Cross Module Quickening - The Curious Case of C Extensions
+ * by Felix Berlakovich and Stefan Brunthaler.
+ */
+ binop_specialize_func tp_binop_specialize;
+
/* Number of tp_version_tag values used.
* Set to _Py_ATTR_CACHE_UNUSED if the attribute cache is
* disabled for this type (e.g. due to custom MRO entries).
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 635d2b24f4bdff..1c77730e417ef7 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -482,13 +482,18 @@ adaptive_counter_backoff(_Py_BackoffCounter counter) {
/* Specialization Extensions */
/* callbacks for an external specialization */
+
+struct _PyBinopSpecializationDescr;
+
typedef int (*binaryopguardfunc)(PyObject *lhs, PyObject *rhs);
-typedef PyObject *(*binaryopactionfunc)(PyObject *lhs, PyObject *rhs);
+typedef PyObject* (*binaryopactionfunc)(PyObject *lhs, PyObject *rhs);
+typedef void (*binaryopfreefunc)(struct _PyBinopSpecializationDescr *descr);
-typedef struct {
+typedef struct _PyBinopSpecializationDescr {
int oparg;
binaryopguardfunc guard;
binaryopactionfunc action;
+ binaryopfreefunc free;
} _PyBinaryOpSpecializationDescr;
/* Comparison bit masks. */
diff --git a/Include/internal/pycore_opcode_metadata.h
b/Include/internal/pycore_opcode_metadata.h
index dc7ecc998c4477..fbb696e1755590 100644
--- a/Include/internal/pycore_opcode_metadata.h
+++ b/Include/internal/pycore_opcode_metadata.h
@@ -1082,7 +1082,7 @@ const struct opcode_metadata
_PyOpcode_opcode_metadata[267] = {
[BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG |
HAS_ERROR_FLAG },
[BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG |
HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG |
HAS_ERROR_FLAG },
- [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG |
HAS_ESCAPES_FLAG },
+ [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG |
HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000,
HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG |
HAS_ESCAPES_FLAG },
[BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG |
HAS_ERROR_FLAG },
[BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG |
HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
@@ -1333,7 +1333,6 @@ _PyOpcode_macro_expansion[256] = {
[BINARY_OP_ADD_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT,
OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, {
_BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 } } },
[BINARY_OP_ADD_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT,
OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT,
OPARG_SIMPLE, 5 } } },
[BINARY_OP_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE,
OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, {
_BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 } } },
- [BINARY_OP_EXTEND] = { .nuops = 2, .uops = { { _GUARD_BINARY_OP_EXTEND, 4,
1 }, { _BINARY_OP_EXTEND, 4, 1 } } },
[BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 3, .uops = { {
_GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 },
{ _BINARY_OP_INPLACE_ADD_UNICODE, OPARG_SIMPLE, 5 } } },
[BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT,
OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, {
_BINARY_OP_MULTIPLY_FLOAT, OPARG_SIMPLE, 5 } } },
[BINARY_OP_MULTIPLY_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT,
OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, {
_BINARY_OP_MULTIPLY_INT, OPARG_SIMPLE, 5 } } },
diff --git a/Include/internal/pycore_uop_metadata.h
b/Include/internal/pycore_uop_metadata.h
index 9b88763da07ba7..922d8a6cadf08c 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -94,8 +94,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_BINARY_OP_SUBTRACT_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
[_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
[_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG |
HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
- [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
- [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
+ [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
@@ -423,7 +422,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_GET_ITER] = "_GET_ITER",
[_GET_LEN] = "_GET_LEN",
[_GET_YIELD_FROM_ITER] = "_GET_YIELD_FROM_ITER",
- [_GUARD_BINARY_OP_EXTEND] = "_GUARD_BINARY_OP_EXTEND",
[_GUARD_CALLABLE_STR_1] = "_GUARD_CALLABLE_STR_1",
[_GUARD_CALLABLE_TUPLE_1] = "_GUARD_CALLABLE_TUPLE_1",
[_GUARD_CALLABLE_TYPE_1] = "_GUARD_CALLABLE_TYPE_1",
@@ -760,8 +758,6 @@ int _PyUop_num_popped(int opcode, int oparg)
return 2;
case _BINARY_OP_INPLACE_ADD_UNICODE:
return 2;
- case _GUARD_BINARY_OP_EXTEND:
- return 0;
case _BINARY_OP_EXTEND:
return 2;
case _BINARY_SLICE:
diff --git a/Include/typeslots.h b/Include/typeslots.h
index a7f3017ec02e92..980e714714e786 100644
--- a/Include/typeslots.h
+++ b/Include/typeslots.h
@@ -93,4 +93,5 @@
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030E0000
/* New in 3.14 */
#define Py_tp_token 83
+#define Py_tp_binop_specialize 84
#endif
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 5f7171d02c5602..aeca3720cfaedc 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -1776,7 +1776,7 @@ def delx(self): del self.__x
check((1,2,3), vsize('') + self.P + 3*self.P)
# type
# static type: PyTypeObject
- fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
+ fmt = 'P2nPI13Pl4Pn9Pn12PI3Pc'
s = vsize(fmt)
check(int, s)
typeid = 'n' if support.Py_GIL_DISABLED else ''
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst
new file mode 100644
index 00000000000000..a391ce16339c26
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst
@@ -0,0 +1,2 @@
+Add option for extension modules to specialize ``BINARY_OP`` instructions.
+Applied to ``array`` objects.
diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c
index 401a3a7072b846..4d2ff32cabe467 100644
--- a/Modules/arraymodule.c
+++ b/Modules/arraymodule.c
@@ -14,6 +14,8 @@
#include "pycore_modsupport.h" // _PyArg_NoKeywords()
#include "pycore_moduleobject.h" // _PyModule_GetState()
+#include "opcode.h" // binary op opargs (NB_*)
+
#include <stddef.h> // offsetof()
#include <stdbool.h>
@@ -848,6 +850,10 @@ array_richcompare(PyObject *v, PyObject *w, int op)
return res;
}
+static int
+array_binop_specialize(PyObject *v, PyObject *w, int oparg,
+ _PyBinaryOpSpecializationDescr **descr);
+
static Py_ssize_t
array_length(PyObject *op)
{
@@ -2963,6 +2969,8 @@ static PyType_Slot array_slots[] = {
{Py_tp_alloc, PyType_GenericAlloc},
{Py_tp_new, array_new},
{Py_tp_traverse, array_tp_traverse},
+ {Py_tp_token, Py_TP_USE_SPEC},
+ {Py_tp_binop_specialize, array_binop_specialize},
/* as sequence */
{Py_sq_length, array_length},
@@ -2995,6 +3003,70 @@ static PyType_Spec array_spec = {
.slots = array_slots,
};
+static inline int
+array_subscr_guard(PyObject *lhs, PyObject *rhs)
+{
+ PyObject *exc = PyErr_GetRaisedException();
+ int ret = PyType_GetBaseByToken(Py_TYPE(lhs), &array_spec, NULL);
+ if (ret < 0) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+ PyErr_Clear();
+ ret = 0;
+ }
+ }
+ _PyErr_ChainExceptions1(exc);
+ return ret;
+}
+
+static PyObject *
+array_subscr_action(PyObject *lhs, PyObject *rhs)
+{
+ return array_subscr(lhs, rhs);
+}
+
+static void
+array_subscr_free(_PyBinaryOpSpecializationDescr* descr)
+{
+ if (descr != NULL) {
+ PyMem_Free(descr);
+ }
+}
+
+static int
+array_binop_specialize(PyObject *v, PyObject *w, int oparg,
+ _PyBinaryOpSpecializationDescr **descr)
+{
+ array_state *state = find_array_state_by_type(Py_TYPE(v));
+
+ if (!array_Check(v, state)) {
+ return 0;
+ }
+
+ *descr = NULL;
+ switch(oparg) {
+ case NB_SUBSCR:
+ if (array_subscr_guard(v, w)) {
+ *descr = (_PyBinaryOpSpecializationDescr*)PyMem_Malloc(
+ sizeof(_PyBinaryOpSpecializationDescr));
+ if (*descr == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ **descr = (_PyBinaryOpSpecializationDescr) {
+ .oparg = oparg,
+ .guard = array_subscr_guard,
+ .action = array_subscr_action,
+ .free = array_subscr_free,
+ };
+ return 1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+
/*********************** Array Iterator **************************/
/*[clinic input]
diff --git a/Objects/typeslots.inc b/Objects/typeslots.inc
index 642160fe0bd8bc..f197c3f5023670 100644
--- a/Objects/typeslots.inc
+++ b/Objects/typeslots.inc
@@ -82,3 +82,4 @@
{offsetof(PyAsyncMethods, am_send), offsetof(PyTypeObject, tp_as_async)},
{-1, offsetof(PyTypeObject, tp_vectorcall)},
{-1, offsetof(PyHeapTypeObject, ht_token)},
+{-1, offsetof(PyTypeObject, tp_binop_specialize)},
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 5a52efaaec8042..cc47e57175d727 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -801,9 +801,19 @@ dummy_func(
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
_PyBinaryOpSpecializationDescr *d =
(_PyBinaryOpSpecializationDescr*)descr;
assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
- assert(d && d->guard);
+ assert(d);
+ assert(d->guard);
int res = d->guard(left_o, right_o);
- DEOPT_IF(!res);
+ ERROR_IF(res < 0);
+ if (res == 0) {
+ if (d->free) {
+ d->free(d);
+ }
+ _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(this_instr+1);
+ write_ptr(cache->external_cache, NULL);
+ this_instr->op.code = BINARY_OP;
+ DEOPT_IF(true);
+ }
}
pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) {
@@ -816,6 +826,7 @@ dummy_func(
PyObject *res_o = d->action(left_o, right_o);
DECREF_INPUTS();
+ ERROR_IF(res_o == NULL);
res = PyStackRef_FromPyObjectSteal(res_o);
}
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 7f3c3141ad00b9..662e050c5c478a 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -1147,26 +1147,7 @@
break;
}
- case _GUARD_BINARY_OP_EXTEND: {
- _PyStackRef right;
- _PyStackRef left;
- right = stack_pointer[-1];
- left = stack_pointer[-2];
- PyObject *descr = (PyObject *)CURRENT_OPERAND0();
- PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
- PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
- _PyBinaryOpSpecializationDescr *d =
(_PyBinaryOpSpecializationDescr*)descr;
- assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
- assert(d && d->guard);
- _PyFrame_SetStackPointer(frame, stack_pointer);
- int res = d->guard(left_o, right_o);
- stack_pointer = _PyFrame_GetStackPointer(frame);
- if (!res) {
- UOP_STAT_INC(uopcode, miss);
- JUMP_TO_JUMP_TARGET();
- }
- break;
- }
+ /* _GUARD_BINARY_OP_EXTEND is not a viable micro-op for tier 2 because
it uses the 'this_instr' variable */
case _BINARY_OP_EXTEND: {
_PyStackRef right;
@@ -1193,6 +1174,9 @@
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += -2;
assert(WITHIN_STACK_BOUNDS());
+ if (res_o == NULL) {
+ JUMP_TO_ERROR();
+ }
res = PyStackRef_FromPyObjectSteal(res_o);
stack_pointer[0] = res;
stack_pointer += 1;
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index ee54b385b7064e..072951d2a5fcf6 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -283,14 +283,30 @@
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
_PyBinaryOpSpecializationDescr *d =
(_PyBinaryOpSpecializationDescr*)descr;
assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
- assert(d && d->guard);
+ assert(d);
+ assert(d->guard);
_PyFrame_SetStackPointer(frame, stack_pointer);
int res = d->guard(left_o, right_o);
stack_pointer = _PyFrame_GetStackPointer(frame);
- if (!res) {
- UPDATE_MISS_STATS(BINARY_OP);
- assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
- JUMP_TO_PREDICTED(BINARY_OP);
+ if (res < 0) {
+ JUMP_TO_LABEL(error);
+ }
+ if (res == 0) {
+ if (d->free) {
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ d->free(d);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ }
+ _PyBinaryOpCache *cache = (_PyBinaryOpCache
*)(this_instr+1);
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ write_ptr(cache->external_cache, NULL);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ this_instr->op.code = BINARY_OP;
+ if (true) {
+ UPDATE_MISS_STATS(BINARY_OP);
+ assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
+ JUMP_TO_PREDICTED(BINARY_OP);
+ }
}
}
/* Skip -4 cache entry */
@@ -315,6 +331,9 @@
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += -2;
assert(WITHIN_STACK_BOUNDS());
+ if (res_o == NULL) {
+ JUMP_TO_LABEL(error);
+ }
res = PyStackRef_FromPyObjectSteal(res_o);
}
stack_pointer[0] = res;
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 3f91f7eefc7bb0..8e8b2ecfa5b0cf 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -554,9 +554,7 @@
break;
}
- case _GUARD_BINARY_OP_EXTEND: {
- break;
- }
+ /* _GUARD_BINARY_OP_EXTEND is not a viable micro-op for tier 2 */
case _BINARY_OP_EXTEND: {
JitOptSymbol *res;
diff --git a/Python/specialize.c b/Python/specialize.c
index 59ec9a4cad6b9c..fe4a65ee5f8079 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -2534,7 +2534,7 @@ LONG_FLOAT_ACTION(compactlong_float_multiply, *)
LONG_FLOAT_ACTION(compactlong_float_true_div, /)
#undef LONG_FLOAT_ACTION
-static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
+static const _PyBinaryOpSpecializationDescr binaryop_extend_builtins[] = {
/* long-long arithmetic */
{NB_OR, compactlongs_guard, compactlongs_or},
{NB_AND, compactlongs_guard, compactlongs_and},
@@ -2560,14 +2560,41 @@ static int
binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg,
_PyBinaryOpSpecializationDescr **descr)
{
- size_t n =
sizeof(binaryop_extend_descrs)/sizeof(_PyBinaryOpSpecializationDescr);
- for (size_t i = 0; i < n; i++) {
- _PyBinaryOpSpecializationDescr *d = &binaryop_extend_descrs[i];
+ /* We are currently using this only for NB_SUBSCR, which is not
+ * commutative. Will need to revisit this function when we use
+ * this for operators which are.
+ */
+
+ typedef _PyBinaryOpSpecializationDescr descr_type;
+ size_t size = Py_ARRAY_LENGTH(binaryop_extend_builtins);
+ for (size_t i = 0; i < size; i++) {
+ descr_type *d = (descr_type *)&binaryop_extend_builtins[i];
+ assert(d != NULL);
+ assert(d->guard != NULL);
if (d->oparg == oparg && d->guard(lhs, rhs)) {
*descr = d;
return 1;
}
}
+
+ PyTypeObject *lhs_type = Py_TYPE(lhs);
+ if (lhs_type->tp_binop_specialize != NULL) {
+ int ret = lhs_type->tp_binop_specialize(lhs, rhs, oparg, descr);
+ if (ret < 0) {
+ return -1;
+ }
+ if (ret == 1) {
+ if (*descr == NULL) {
+ PyErr_Format(
+ PyExc_ValueError,
+ "tp_binop_specialize of '%T' returned 1 with *descr ==
NULL",
+ lhs);
+ return -1;
+ }
+ (*descr)->oparg = oparg;
+ }
+ return ret;
+ }
return 0;
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]