https://github.com/python/cpython/commit/f0df35eeca2ccdfd58cfb9801f06ffa23537270b
commit: f0df35eeca2ccdfd58cfb9801f06ffa23537270b
branch: main
author: Brandt Bucher <[email protected]>
committer: brandtbucher <[email protected]>
date: 2024-02-29T08:11:28-08:00
summary:

GH-115802: JIT "small" code for Windows (GH-115964)

files:
M Include/cpython/optimizer.h
M Include/internal/pycore_ceval.h
M Include/internal/pycore_dict.h
M Include/internal/pycore_floatobject.h
M Include/internal/pycore_function.h
M Include/internal/pycore_genobject.h
M Include/internal/pycore_intrinsics.h
M Include/internal/pycore_list.h
M Include/internal/pycore_long.h
M Include/internal/pycore_object.h
M Include/internal/pycore_optimizer.h
M Include/internal/pycore_pyerrors.h
M Include/internal/pycore_sliceobject.h
M Include/internal/pycore_tuple.h
M Include/internal/pycore_typeobject.h
M Include/internal/pycore_unicodeobject.h
M Python/bytecodes.c
M Python/ceval.c
M Python/ceval_macros.h
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/jit.c
M Tools/jit/_schema.py
M Tools/jit/_stencils.py
M Tools/jit/_targets.py
M Tools/jit/template.c

diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h
index 8fc9fb62aebdb4..6d7b8bc3c1433a 100644
--- a/Include/cpython/optimizer.h
+++ b/Include/cpython/optimizer.h
@@ -92,9 +92,6 @@ PyAPI_FUNC(_PyOptimizerObject *) 
PyUnstable_GetOptimizer(void);
 
 PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int 
offset);
 
-int
-_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, 
PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
-
 void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
 void _Py_ExecutorClear(_PyExecutorObject *);
 void _Py_BloomFilter_Init(_PyBloomFilter *);
diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index bf77526cf75cc1..6eab2ba1daedf8 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -181,22 +181,26 @@ extern PyObject* _Py_MakeCoro(PyFunctionObject *func);
 
 /* Handle signals, pending calls, GIL drop request
    and asynchronous exception */
-extern int _Py_HandlePending(PyThreadState *tstate);
+PyAPI_FUNC(int) _Py_HandlePending(PyThreadState *tstate);
 
 extern PyObject * _PyEval_GetFrameLocals(void);
 
-extern const binaryfunc _PyEval_BinaryOps[];
-int _PyEval_CheckExceptStarTypeValid(PyThreadState *tstate, PyObject* right);
-int _PyEval_CheckExceptTypeValid(PyThreadState *tstate, PyObject* right);
-int _PyEval_ExceptionGroupMatch(PyObject* exc_value, PyObject *match_type, 
PyObject **match, PyObject **rest);
-void _PyEval_FormatAwaitableError(PyThreadState *tstate, PyTypeObject *type, 
int oparg);
-void _PyEval_FormatExcCheckArg(PyThreadState *tstate, PyObject *exc, const 
char *format_str, PyObject *obj);
-void _PyEval_FormatExcUnbound(PyThreadState *tstate, PyCodeObject *co, int 
oparg);
-void _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject 
*kwargs);
-PyObject *_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, 
PyObject *type, Py_ssize_t nargs, PyObject *kwargs);
-PyObject *_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject 
*keys);
-int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int 
argcntafter, PyObject **sp);
-void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame 
*frame);
+typedef PyObject *(*conversion_func)(PyObject *);
+
+PyAPI_DATA(const binaryfunc) _PyEval_BinaryOps[];
+PyAPI_DATA(const conversion_func) _PyEval_ConversionFuncs[];
+
+PyAPI_FUNC(int) _PyEval_CheckExceptStarTypeValid(PyThreadState *tstate, 
PyObject* right);
+PyAPI_FUNC(int) _PyEval_CheckExceptTypeValid(PyThreadState *tstate, PyObject* 
right);
+PyAPI_FUNC(int) _PyEval_ExceptionGroupMatch(PyObject* exc_value, PyObject 
*match_type, PyObject **match, PyObject **rest);
+PyAPI_FUNC(void) _PyEval_FormatAwaitableError(PyThreadState *tstate, 
PyTypeObject *type, int oparg);
+PyAPI_FUNC(void) _PyEval_FormatExcCheckArg(PyThreadState *tstate, PyObject 
*exc, const char *format_str, PyObject *obj);
+PyAPI_FUNC(void) _PyEval_FormatExcUnbound(PyThreadState *tstate, PyCodeObject 
*co, int oparg);
+PyAPI_FUNC(void) _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject 
*func, PyObject *kwargs);
+PyAPI_FUNC(PyObject *)_PyEval_MatchClass(PyThreadState *tstate, PyObject 
*subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs);
+PyAPI_FUNC(PyObject *)_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, 
PyObject *keys);
+PyAPI_FUNC(int) _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int 
argcnt, int argcntafter, PyObject **sp);
+PyAPI_FUNC(void) _PyEval_FrameClearAndPop(PyThreadState *tstate, 
_PyInterpreterFrame *frame);
 
 
 /* Bits that can be set in PyThreadState.eval_breaker */
diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index d1a0010b9a81dd..cd171a4384db5d 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -52,7 +52,7 @@ PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *);
    of a key wins, if override is 2, a KeyError with conflicting key as
    argument is raised.
 */
-extern int _PyDict_MergeEx(PyObject *mp, PyObject *other, int override);
+PyAPI_FUNC(int) _PyDict_MergeEx(PyObject *mp, PyObject *other, int override);
 
 extern void _PyDict_DebugMallocStats(FILE *out);
 
@@ -100,10 +100,10 @@ extern Py_ssize_t _Py_dict_lookup(PyDictObject *mp, 
PyObject *key, Py_hash_t has
 
 extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *);
 extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, 
PyObject *key);
-extern PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject 
*);
+PyAPI_FUNC(PyObject *)_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, 
PyObject *);
 
 /* Consumes references to key and value */
-extern int _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject 
*value);
+PyAPI_FUNC(int) _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, 
PyObject *value);
 extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, 
PyObject *name, PyObject *value);
 
 extern int _PyDict_Pop_KnownHash(
@@ -247,8 +247,8 @@ _PyDict_NotifyEvent(PyInterpreterState *interp,
 }
 
 extern PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, 
PyDictValues *values);
-extern bool _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, 
PyDictOrValues *dorv);
-extern PyObject *_PyDict_FromItems(
+PyAPI_FUNC(bool) _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, 
PyDictOrValues *dorv);
+PyAPI_FUNC(PyObject *)_PyDict_FromItems(
         PyObject *const *keys, Py_ssize_t keys_offset,
         PyObject *const *values, Py_ssize_t values_offset,
         Py_ssize_t length);
diff --git a/Include/internal/pycore_floatobject.h 
b/Include/internal/pycore_floatobject.h
index 3767df5506d43f..f984df695696c3 100644
--- a/Include/internal/pycore_floatobject.h
+++ b/Include/internal/pycore_floatobject.h
@@ -34,7 +34,7 @@ struct _Py_float_runtime_state {
 
 
 
-void _PyFloat_ExactDealloc(PyObject *op);
+PyAPI_FUNC(void) _PyFloat_ExactDealloc(PyObject *op);
 
 
 extern void _PyFloat_DebugMallocStats(FILE* out);
diff --git a/Include/internal/pycore_function.h 
b/Include/internal/pycore_function.h
index 3f3da8a44b77e4..dad6a89af77dec 100644
--- a/Include/internal/pycore_function.h
+++ b/Include/internal/pycore_function.h
@@ -29,7 +29,7 @@ struct _py_func_state {
 extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor 
*constr);
 
 extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func);
-extern void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version);
+PyAPI_FUNC(void) _PyFunction_SetVersion(PyFunctionObject *func, uint32_t 
version);
 PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version);
 
 extern PyObject *_Py_set_function_type_params(
diff --git a/Include/internal/pycore_genobject.h 
b/Include/internal/pycore_genobject.h
index b2aa017598409f..9463c822ad8669 100644
--- a/Include/internal/pycore_genobject.h
+++ b/Include/internal/pycore_genobject.h
@@ -10,7 +10,7 @@ extern "C" {
 
 #include "pycore_freelist.h"
 
-extern PyObject *_PyGen_yf(PyGenObject *);
+PyAPI_FUNC(PyObject *)_PyGen_yf(PyGenObject *);
 extern void _PyGen_Finalize(PyObject *self);
 
 // Export for '_asyncio' shared extension
@@ -19,7 +19,7 @@ PyAPI_FUNC(int) _PyGen_SetStopIterationValue(PyObject *);
 // Export for '_asyncio' shared extension
 PyAPI_FUNC(int) _PyGen_FetchStopIterationValue(PyObject **);
 
-extern PyObject *_PyCoro_GetAwaitableIter(PyObject *o);
+PyAPI_FUNC(PyObject *)_PyCoro_GetAwaitableIter(PyObject *o);
 extern PyObject *_PyAsyncGenValueWrapperNew(PyThreadState *state, PyObject *);
 
 extern PyTypeObject _PyCoroWrapper_Type;
diff --git a/Include/internal/pycore_intrinsics.h 
b/Include/internal/pycore_intrinsics.h
index 3a8dd95cff8e5d..8fa88ea3f74caa 100644
--- a/Include/internal/pycore_intrinsics.h
+++ b/Include/internal/pycore_intrinsics.h
@@ -44,7 +44,7 @@ typedef struct {
     const char *name;
 } intrinsic_func2_info;
 
-extern const intrinsic_func1_info _PyIntrinsics_UnaryFunctions[];
-extern const intrinsic_func2_info _PyIntrinsics_BinaryFunctions[];
+PyAPI_DATA(const intrinsic_func1_info) _PyIntrinsics_UnaryFunctions[];
+PyAPI_DATA(const intrinsic_func2_info) _PyIntrinsics_BinaryFunctions[];
 
 #endif  // !Py_INTERNAL_INTRINSIC_H
diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h
index 50dc13c4da4487..2a82912e41d557 100644
--- a/Include/internal/pycore_list.h
+++ b/Include/internal/pycore_list.h
@@ -10,12 +10,12 @@ extern "C" {
 
 #include "pycore_freelist.h"  // _PyFreeListState
 
-extern PyObject* _PyList_Extend(PyListObject *, PyObject *);
+PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *);
 extern void _PyList_DebugMallocStats(FILE *out);
 
 #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item)
 
-extern int
+PyAPI_FUNC(int)
 _PyList_AppendTakeRefListResize(PyListObject *self, PyObject *newitem);
 
 // In free-threaded build: self should be locked by the caller, if it should 
be thread-safe.
@@ -54,7 +54,7 @@ typedef struct {
     PyListObject *it_seq; /* Set to NULL when iterator is exhausted */
 } _PyListIterObject;
 
-extern PyObject *_PyList_FromArraySteal(PyObject *const *src, Py_ssize_t n);
+PyAPI_FUNC(PyObject *)_PyList_FromArraySteal(PyObject *const *src, Py_ssize_t 
n);
 
 #ifdef __cplusplus
 }
diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h
index ec27df9e416c58..f04f66d053bab9 100644
--- a/Include/internal/pycore_long.h
+++ b/Include/internal/pycore_long.h
@@ -121,9 +121,9 @@ PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, size_t);
 // Export for 'math' shared extension
 PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, size_t);
 
-extern PyObject* _PyLong_Add(PyLongObject *left, PyLongObject *right);
-extern PyObject* _PyLong_Multiply(PyLongObject *left, PyLongObject *right);
-extern PyObject* _PyLong_Subtract(PyLongObject *left, PyLongObject *right);
+PyAPI_FUNC(PyObject*) _PyLong_Add(PyLongObject *left, PyLongObject *right);
+PyAPI_FUNC(PyObject*) _PyLong_Multiply(PyLongObject *left, PyLongObject 
*right);
+PyAPI_FUNC(PyObject*) _PyLong_Subtract(PyLongObject *left, PyLongObject 
*right);
 
 // Export for 'binascii' shared extension.
 PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index 34a83ea228e8b1..9809f5f2e0271a 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -73,7 +73,7 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *);
         .ob_size = size                       \
     }
 
-extern void _Py_NO_RETURN _Py_FatalRefcountErrorFunc(
+PyAPI_FUNC(void) _Py_NO_RETURN _Py_FatalRefcountErrorFunc(
     const char *func,
     const char *message);
 
@@ -684,7 +684,7 @@ PyAPI_FUNC(PyObject*) _PyObject_LookupSpecial(PyObject *, 
PyObject *);
 
 extern int _PyObject_IsAbstract(PyObject *);
 
-extern int _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject 
**method);
+PyAPI_FUNC(int) _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject 
**method);
 extern PyObject* _PyObject_NextNotImplemented(PyObject *);
 
 // Pickle support.
diff --git a/Include/internal/pycore_optimizer.h 
b/Include/internal/pycore_optimizer.h
index 614850468ec1d3..4894f613b5fb91 100644
--- a/Include/internal/pycore_optimizer.h
+++ b/Include/internal/pycore_optimizer.h
@@ -111,6 +111,8 @@ extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx);
 
 PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
 
+PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT 
*start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/internal/pycore_pyerrors.h 
b/Include/internal/pycore_pyerrors.h
index 0f16fb894d17e1..910335fd2cf33b 100644
--- a/Include/internal/pycore_pyerrors.h
+++ b/Include/internal/pycore_pyerrors.h
@@ -95,7 +95,7 @@ extern void _PyErr_Fetch(
 
 extern PyObject* _PyErr_GetRaisedException(PyThreadState *tstate);
 
-extern int _PyErr_ExceptionMatches(
+PyAPI_FUNC(int) _PyErr_ExceptionMatches(
     PyThreadState *tstate,
     PyObject *exc);
 
@@ -114,18 +114,18 @@ extern void _PyErr_SetObject(
 
 extern void _PyErr_ChainStackItem(void);
 
-extern void _PyErr_Clear(PyThreadState *tstate);
+PyAPI_FUNC(void) _PyErr_Clear(PyThreadState *tstate);
 
 extern void _PyErr_SetNone(PyThreadState *tstate, PyObject *exception);
 
 extern PyObject* _PyErr_NoMemory(PyThreadState *tstate);
 
-extern void _PyErr_SetString(
+PyAPI_FUNC(void) _PyErr_SetString(
     PyThreadState *tstate,
     PyObject *exception,
     const char *string);
 
-extern PyObject* _PyErr_Format(
+PyAPI_FUNC(PyObject*) _PyErr_Format(
     PyThreadState *tstate,
     PyObject *exception,
     const char *format,
diff --git a/Include/internal/pycore_sliceobject.h 
b/Include/internal/pycore_sliceobject.h
index 89086f67683a2f..ba8b1f1cb27dee 100644
--- a/Include/internal/pycore_sliceobject.h
+++ b/Include/internal/pycore_sliceobject.h
@@ -11,7 +11,7 @@ extern "C" {
 
 /* runtime lifecycle */
 
-extern PyObject *
+PyAPI_FUNC(PyObject *)
 _PyBuildSlice_ConsumeRefs(PyObject *start, PyObject *stop);
 
 #ifdef __cplusplus
diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h
index 4605f355ccbc38..14a9e42c3a324c 100644
--- a/Include/internal/pycore_tuple.h
+++ b/Include/internal/pycore_tuple.h
@@ -21,7 +21,7 @@ extern PyStatus _PyTuple_InitGlobalObjects(PyInterpreterState 
*);
 #define _PyTuple_ITEMS(op) _Py_RVALUE(_PyTuple_CAST(op)->ob_item)
 
 extern PyObject *_PyTuple_FromArray(PyObject *const *, Py_ssize_t);
-extern PyObject *_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t);
+PyAPI_FUNC(PyObject *)_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t);
 
 typedef struct {
     PyObject_HEAD
diff --git a/Include/internal/pycore_typeobject.h 
b/Include/internal/pycore_typeobject.h
index 9134ab45cd0039..c214111fed6f97 100644
--- a/Include/internal/pycore_typeobject.h
+++ b/Include/internal/pycore_typeobject.h
@@ -147,7 +147,7 @@ extern PyObject* _Py_slot_tp_getattr_hook(PyObject *self, 
PyObject *name);
 
 extern PyTypeObject _PyBufferWrapper_Type;
 
-extern PyObject* _PySuper_Lookup(PyTypeObject *su_type, PyObject *su_obj,
+PyAPI_FUNC(PyObject*) _PySuper_Lookup(PyTypeObject *su_type, PyObject *su_obj,
                                  PyObject *name, int *meth_found);
 
 
diff --git a/Include/internal/pycore_unicodeobject.h 
b/Include/internal/pycore_unicodeobject.h
index 7ee540154b23d8..fea5ceea0954f4 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -31,7 +31,7 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
     PyObject *op,
     int check_content);
 
-extern void _PyUnicode_ExactDealloc(PyObject *op);
+PyAPI_FUNC(void) _PyUnicode_ExactDealloc(PyObject *op);
 extern Py_ssize_t _PyUnicode_InternedSize(void);
 
 // Get a copy of a Unicode string.
@@ -202,7 +202,7 @@ PyAPI_FUNC(PyObject*) 
_PyUnicode_TransformDecimalAndSpaceToASCII(
 
 /* --- Methods & Slots ---------------------------------------------------- */
 
-extern PyObject* _PyUnicode_JoinArray(
+PyAPI_FUNC(PyObject*) _PyUnicode_JoinArray(
     PyObject *separator,
     PyObject *const *items,
     Py_ssize_t seqlen
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 565379afc4b5a7..1d515098f6c7e9 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -2755,7 +2755,7 @@ dummy_func(
                 GOTO_ERROR(error);
             }
             DECREF_INPUTS();
-            res = _PyObject_CallNoArgsTstate(tstate, enter);
+            res = PyObject_CallNoArgs(enter);
             Py_DECREF(enter);
             if (res == NULL) {
                 Py_DECREF(exit);
@@ -2790,7 +2790,7 @@ dummy_func(
                 GOTO_ERROR(error);
             }
             DECREF_INPUTS();
-            res = _PyObject_CallNoArgsTstate(tstate, enter);
+            res = PyObject_CallNoArgs(enter);
             Py_DECREF(enter);
             if (res == NULL) {
                 Py_DECREF(exit);
@@ -3822,9 +3822,9 @@ dummy_func(
         }
 
         inst(CONVERT_VALUE, (value -- result)) {
-            convertion_func_ptr  conv_fn;
+            conversion_func conv_fn;
             assert(oparg >= FVC_STR && oparg <= FVC_ASCII);
-            conv_fn = CONVERSION_FUNCTIONS[oparg];
+            conv_fn = _PyEval_ConversionFuncs[oparg];
             result = conv_fn(value);
             Py_DECREF(value);
             ERROR_IF(result == NULL, error);
diff --git a/Python/ceval.c b/Python/ceval.c
index 41e9310938d826..34f286e4e17eb8 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -337,6 +337,12 @@ const binaryfunc _PyEval_BinaryOps[] = {
     [NB_INPLACE_XOR] = PyNumber_InPlaceXor,
 };
 
+const conversion_func _PyEval_ConversionFuncs[4] = {
+    [FVC_STR] = PyObject_Str,
+    [FVC_REPR] = PyObject_Repr,
+    [FVC_ASCII] = PyObject_ASCII
+};
+
 
 // PEP 634: Structural Pattern Matching
 
diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h
index 6ad41950ea3b7e..9674a7824a4690 100644
--- a/Python/ceval_macros.h
+++ b/Python/ceval_macros.h
@@ -352,13 +352,6 @@ do { \
     } \
 } while (0);
 
-typedef PyObject *(*convertion_func_ptr)(PyObject *);
-
-static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
-    [FVC_STR] = PyObject_Str,
-    [FVC_REPR] = PyObject_Repr,
-    [FVC_ASCII] = PyObject_ASCII
-};
 
 // GH-89279: Force inlining by using a macro.
 #if defined(_MSC_VER) && SIZEOF_INT == 4
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 20fab8f4c61eb5..9ec1be9076a5a0 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -2548,7 +2548,7 @@
                 GOTO_ERROR(error);
             }
             Py_DECREF(mgr);
-            res = _PyObject_CallNoArgsTstate(tstate, enter);
+            res = PyObject_CallNoArgs(enter);
             Py_DECREF(enter);
             if (res == NULL) {
                 Py_DECREF(exit);
@@ -2591,7 +2591,7 @@
                 GOTO_ERROR(error);
             }
             Py_DECREF(mgr);
-            res = _PyObject_CallNoArgsTstate(tstate, enter);
+            res = PyObject_CallNoArgs(enter);
             Py_DECREF(enter);
             if (res == NULL) {
                 Py_DECREF(exit);
@@ -3570,9 +3570,9 @@
             PyObject *result;
             oparg = CURRENT_OPARG();
             value = stack_pointer[-1];
-            convertion_func_ptr  conv_fn;
+            conversion_func conv_fn;
             assert(oparg >= FVC_STR && oparg <= FVC_ASCII);
-            conv_fn = CONVERSION_FUNCTIONS[oparg];
+            conv_fn = _PyEval_ConversionFuncs[oparg];
             result = conv_fn(value);
             Py_DECREF(value);
             if (result == NULL) goto pop_1_error_tier_two;
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index bb26dac0e2be20..3312078e9a2d4d 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -40,7 +40,7 @@
                 GOTO_ERROR(error);
             }
             Py_DECREF(mgr);
-            res = _PyObject_CallNoArgsTstate(tstate, enter);
+            res = PyObject_CallNoArgs(enter);
             Py_DECREF(enter);
             if (res == NULL) {
                 Py_DECREF(exit);
@@ -86,7 +86,7 @@
                 GOTO_ERROR(error);
             }
             Py_DECREF(mgr);
-            res = _PyObject_CallNoArgsTstate(tstate, enter);
+            res = PyObject_CallNoArgs(enter);
             Py_DECREF(enter);
             if (res == NULL) {
                 Py_DECREF(exit);
@@ -2140,9 +2140,9 @@
             PyObject *value;
             PyObject *result;
             value = stack_pointer[-1];
-            convertion_func_ptr  conv_fn;
+            conversion_func conv_fn;
             assert(oparg >= FVC_STR && oparg <= FVC_ASCII);
-            conv_fn = CONVERSION_FUNCTIONS[oparg];
+            conv_fn = _PyEval_ConversionFuncs[oparg];
             result = conv_fn(value);
             Py_DECREF(value);
             if (result == NULL) goto pop_1_error;
diff --git a/Python/jit.c b/Python/jit.c
index ac2c60ed925a26..9f9e123ab91fef 100644
--- a/Python/jit.c
+++ b/Python/jit.c
@@ -203,13 +203,14 @@ patch(unsigned char *base, const Stencil *stencil, 
uint64_t *patches)
                 *loc32 = (uint32_t)value;
                 continue;
             case HoleKind_ARM64_RELOC_UNSIGNED:
-            case HoleKind_IMAGE_REL_AMD64_ADDR64:
             case HoleKind_R_AARCH64_ABS64:
             case HoleKind_X86_64_RELOC_UNSIGNED:
             case HoleKind_R_X86_64_64:
                 // 64-bit absolute address.
                 *loc64 = value;
                 continue;
+            case HoleKind_IMAGE_REL_AMD64_REL32:
+            case HoleKind_IMAGE_REL_I386_REL32:
             case HoleKind_R_X86_64_GOTPCRELX:
             case HoleKind_R_X86_64_REX_GOTPCRELX:
             case HoleKind_X86_64_RELOC_GOT:
@@ -249,7 +250,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t 
*patches)
                 // Check that we're not out of range of 32 signed bits:
                 assert((int64_t)value >= -(1LL << 31));
                 assert((int64_t)value < (1LL << 31));
-                loc32[0] = (uint32_t)value;
+                *loc32 = (uint32_t)value;
                 continue;
             case HoleKind_R_AARCH64_CALL26:
             case HoleKind_R_AARCH64_JUMP26:
@@ -307,23 +308,23 @@ patch(unsigned char *base, const Stencil *stencil, 
uint64_t *patches)
                     next_hole->addend == hole->addend &&
                     next_hole->value == hole->value)
                 {
-                    unsigned char rd = get_bits(loc32[0], 0, 5);
+                    unsigned char reg = get_bits(loc32[0], 0, 5);
                     assert(IS_AARCH64_LDR_OR_STR(loc32[1]));
-                    unsigned char rt = get_bits(loc32[1], 0, 5);
-                    unsigned char rn = get_bits(loc32[1], 5, 5);
-                    assert(rd == rn && rn == rt);
+                    // There should be only one register involved:
+                    assert(reg == get_bits(loc32[1], 0, 5));  // ldr's output 
register.
+                    assert(reg == get_bits(loc32[1], 5, 5));  // ldr's input 
register.
                     uint64_t relaxed = *(uint64_t *)value;
                     if (relaxed < (1UL << 16)) {
                         // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, 
XXX; nop
-                        loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 
5) | rd;
+                        loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 
5) | reg;
                         loc32[1] = 0xD503201F;
                         i++;
                         continue;
                     }
                     if (relaxed < (1ULL << 32)) {
                         // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, 
XXX; movk reg, YYY
-                        loc32[0] = 0xD2800000 | (get_bits(relaxed,  0, 16) << 
5) | rd;
-                        loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 
5) | rd;
+                        loc32[0] = 0xD2800000 | (get_bits(relaxed,  0, 16) << 
5) | reg;
+                        loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 
5) | reg;
                         i++;
                         continue;
                     }
@@ -332,13 +333,15 @@ patch(unsigned char *base, const Stencil *stencil, 
uint64_t *patches)
                         (int64_t)relaxed >= -(1L << 19) &&
                         (int64_t)relaxed < (1L << 19))
                     {
-                        // adrp reg, AAA; ldr reg, [reg + BBB] -> ldr x0, XXX; 
nop
-                        loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 
5) | rd;
+                        // adrp reg, AAA; ldr reg, [reg + BBB] -> ldr reg, 
XXX; nop
+                        loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 
5) | reg;
                         loc32[1] = 0xD503201F;
                         i++;
                         continue;
                     }
                 }
+                // Fall through...
+            case HoleKind_ARM64_RELOC_PAGE21:
                 // Number of pages between this page and the value's page:
                 value = (value >> 12) - ((uint64_t)location >> 12);
                 // Check that we're not out of range of 21 signed bits:
@@ -350,6 +353,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t 
*patches)
                 set_bits(loc32, 5, value, 2, 19);
                 continue;
             case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12:
+            case HoleKind_ARM64_RELOC_PAGEOFF12:
             case HoleKind_R_AARCH64_LD64_GOT_LO12_NC:
                 // 12-bit low part of an absolute address. Pairs nicely with
                 // ARM64_RELOC_GOT_LOAD_PAGE21 (above).
diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py
index 975ca650a13c1a..14e5fc2aae80ef 100644
--- a/Tools/jit/_schema.py
+++ b/Tools/jit/_schema.py
@@ -4,9 +4,12 @@
 HoleKind: typing.TypeAlias = typing.Literal[
     "ARM64_RELOC_GOT_LOAD_PAGE21",
     "ARM64_RELOC_GOT_LOAD_PAGEOFF12",
+    "ARM64_RELOC_PAGE21",
+    "ARM64_RELOC_PAGEOFF12",
     "ARM64_RELOC_UNSIGNED",
-    "IMAGE_REL_AMD64_ADDR64",
+    "IMAGE_REL_AMD64_REL32",
     "IMAGE_REL_I386_DIR32",
+    "IMAGE_REL_I386_REL32",
     "R_AARCH64_ABS64",
     "R_AARCH64_ADR_GOT_PAGE",
     "R_AARCH64_CALL26",
diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py
index 71c678e04fbfd5..eddec731984c82 100644
--- a/Tools/jit/_stencils.py
+++ b/Tools/jit/_stencils.py
@@ -96,7 +96,7 @@ def emit_aarch64_trampoline(self, hole: Hole) -> None:
         instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF
         self.body[where] = instruction.to_bytes(4, sys.byteorder)
         self.disassembly += [
-            f"{base + 4 * 0: x}: d2800008      mov     x8, #0x0",
+            f"{base + 4 * 0:x}: d2800008      mov     x8, #0x0",
             f"{base + 4 * 0:016x}:  R_AARCH64_MOVW_UABS_G0_NC    
{hole.symbol}",
             f"{base + 4 * 1:x}: f2a00008      movk    x8, #0x0, lsl #16",
             f"{base + 4 * 1:016x}:  R_AARCH64_MOVW_UABS_G1_NC    
{hole.symbol}",
@@ -162,6 +162,13 @@ def process_relocations(self, *, alignment: int = 1) -> 
None:
                 ):
                     self.code.emit_aarch64_trampoline(hole)
                     continue
+                elif (
+                    hole.kind in {"IMAGE_REL_AMD64_REL32"}
+                    and hole.value is HoleValue.ZERO
+                ):
+                    raise ValueError(
+                        f"Add PyAPI_FUNC(...) or PyAPI_DATA(...) to 
declaration of {hole.symbol}!"
+                    )
                 holes.append(hole)
             stencil.holes[:] = holes
         self.code.pad(alignment)
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py
index 06dc4e7acc6c91..07959b15b6c4b9 100644
--- a/Tools/jit/_targets.py
+++ b/Tools/jit/_targets.py
@@ -106,7 +106,7 @@ async def _compile(
         o = tempdir / f"{opname}.o"
         args = [
             f"--target={self.triple}",
-            "-DPy_BUILD_CORE",
+            "-DPy_BUILD_CORE_MODULE",
             "-D_DEBUG" if self.debug else "-DNDEBUG",
             f"-D_JIT_OPCODE={opname}",
             "-D_PyJIT_ACTIVE",
@@ -118,12 +118,17 @@ async def _compile(
             f"-I{CPYTHON / 'Python'}",
             "-O3",
             "-c",
+            # This debug info isn't necessary, and bloats out the JIT'ed code.
+            # We *may* be able to re-enable this, process it, and JIT it for a
+            # nicer debugging experience... but that needs a lot more research:
             "-fno-asynchronous-unwind-tables",
+            # Don't call built-in functions that we can't find or patch:
             "-fno-builtin",
-            # SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds:
-            "-fno-jump-tables",
+            # Emit relaxable 64-bit calls/jumps, so we don't have to worry 
about
+            # about emitting in-range trampolines for out-of-range targets.
+            # We can probably remove this and emit trampolines in the future:
             "-fno-plt",
-            # Don't make calls to weird stack-smashing canaries:
+            # Don't call stack-smashing canaries that we can't find or patch:
             "-fno-stack-protector",
             "-o",
             f"{o}",
@@ -194,12 +199,21 @@ def _handle_section(
             offset = base + symbol["Value"]
             name = symbol["Name"]
             name = name.removeprefix(self.prefix)
-            group.symbols[name] = value, offset
+            if name not in group.symbols:
+                group.symbols[name] = value, offset
         for wrapped_relocation in section["Relocations"]:
             relocation = wrapped_relocation["Relocation"]
             hole = self._handle_relocation(base, relocation, stencil.body)
             stencil.holes.append(hole)
 
+    def _unwrap_dllimport(self, name: str) -> tuple[_stencils.HoleValue, str | 
None]:
+        if name.startswith("__imp_"):
+            name = name.removeprefix("__imp_")
+            name = name.removeprefix(self.prefix)
+            return _stencils.HoleValue.GOT, name
+        name = name.removeprefix(self.prefix)
+        return _stencils.symbol_to_value(name)
+
     def _handle_relocation(
         self, base: int, relocation: _schema.COFFRelocation, raw: bytes
     ) -> _stencils.Hole:
@@ -207,21 +221,23 @@ def _handle_relocation(
             case {
                 "Offset": offset,
                 "Symbol": s,
-                "Type": {"Value": "IMAGE_REL_AMD64_ADDR64" as kind},
+                "Type": {"Value": "IMAGE_REL_I386_DIR32" as kind},
             }:
                 offset += base
-                s = s.removeprefix(self.prefix)
-                value, symbol = _stencils.symbol_to_value(s)
-                addend = int.from_bytes(raw[offset : offset + 8], "little")
+                value, symbol = self._unwrap_dllimport(s)
+                addend = int.from_bytes(raw[offset : offset + 4], "little")
             case {
                 "Offset": offset,
                 "Symbol": s,
-                "Type": {"Value": "IMAGE_REL_I386_DIR32" as kind},
+                "Type": {
+                    "Value": "IMAGE_REL_AMD64_REL32" | "IMAGE_REL_I386_REL32" 
as kind
+                },
             }:
                 offset += base
-                s = s.removeprefix(self.prefix)
-                value, symbol = _stencils.symbol_to_value(s)
-                addend = int.from_bytes(raw[offset : offset + 4], "little")
+                value, symbol = self._unwrap_dllimport(s)
+                addend = (
+                    int.from_bytes(raw[offset : offset + 4], "little", 
signed=True) - 4
+                )
             case _:
                 raise NotImplementedError(relocation)
         return _stencils.Hole(offset, kind, value, symbol, addend)
@@ -423,12 +439,12 @@ def get_target(host: str) -> _COFF | _ELF | _MachO:
         args = ["-mcmodel=large"]
         return _ELF(host, alignment=8, args=args)
     if re.fullmatch(r"i686-pc-windows-msvc", host):
-        args = ["-mcmodel=large"]
+        args = ["-DPy_NO_ENABLE_SHARED"]
         return _COFF(host, args=args, prefix="_")
     if re.fullmatch(r"x86_64-apple-darwin.*", host):
         return _MachO(host, prefix="_")
     if re.fullmatch(r"x86_64-pc-windows-msvc", host):
-        args = ["-mcmodel=large"]
+        args = ["-fms-runtime-lib=dll"]
         return _COFF(host, args=args)
     if re.fullmatch(r"x86_64-.*-linux-gnu", host):
         return _ELF(host)
diff --git a/Tools/jit/template.c b/Tools/jit/template.c
index d79c6efb8f6de4..8aaf4581de362d 100644
--- a/Tools/jit/template.c
+++ b/Tools/jit/template.c
@@ -9,6 +9,7 @@
 #include "pycore_long.h"
 #include "pycore_opcode_metadata.h"
 #include "pycore_opcode_utils.h"
+#include "pycore_optimizer.h"
 #include "pycore_range.h"
 #include "pycore_setobject.h"
 #include "pycore_sliceobject.h"
@@ -58,11 +59,11 @@ do {  \
     } while (0)
 
 #define PATCH_VALUE(TYPE, NAME, ALIAS)  \
-    extern void ALIAS;                  \
+    PyAPI_DATA(void) ALIAS;             \
     TYPE NAME = (TYPE)(uint64_t)&ALIAS;
 
 #define PATCH_JUMP(ALIAS)                                    \
-    extern void ALIAS;                                       \
+    PyAPI_DATA(void) ALIAS;                                  \
     __attribute__((musttail))                                \
     return ((jit_func)&ALIAS)(frame, stack_pointer, tstate);
 

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to