https://github.com/python/cpython/commit/55824d01f866d1fa0f21996d897fba0e07d09ac8
commit: 55824d01f866d1fa0f21996d897fba0e07d09ac8
branch: main
author: Mark Shannon <[email protected]>
committer: markshannon <[email protected]>
date: 2024-01-11T18:20:42Z
summary:

GH-113853: Guarantee forward progress in executors (GH-113854)

files:
A Misc/NEWS.d/next/Core and 
Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst
M Include/cpython/optimizer.h
M Python/bytecodes.c
M Python/generated_cases.c.h
M Python/optimizer.c

diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h
index f077da7ee88456..0e7bc9fdd7a07d 100644
--- a/Include/cpython/optimizer.h
+++ b/Include/cpython/optimizer.h
@@ -65,7 +65,7 @@ PyAPI_FUNC(_PyOptimizerObject *) 
PyUnstable_GetOptimizer(void);
 PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int 
offset);
 
 int
-_PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src, 
_Py_CODEUNIT *dest, PyObject **stack_pointer);
+_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, 
PyObject **stack_pointer);
 
 extern _PyOptimizerObject _PyOptimizer_Default;
 
diff --git a/Misc/NEWS.d/next/Core and 
Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst b/Misc/NEWS.d/next/Core 
and Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst
new file mode 100644
index 00000000000000..d4f0a764153579
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and 
Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst 
@@ -0,0 +1,2 @@
+Guarantee that all executors make progress. This then guarantees that tier 2
+execution always makes progress.
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index f53ddae8df985a..2011d963e3687c 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -2325,12 +2325,18 @@ dummy_func(
             // Double-check that the opcode isn't instrumented or something:
             if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
                 OPT_STAT_INC(attempts);
-                int optimized = _PyOptimizer_BackEdge(frame, this_instr, 
next_instr, stack_pointer);
+                _Py_CODEUNIT *start = this_instr;
+                /* Back up over EXTENDED_ARGs so optimizer sees the whole 
instruction */
+                while (oparg > 255) {
+                    oparg >>= 8;
+                    start--;
+                }
+                int optimized = _PyOptimizer_Optimize(frame, start, 
stack_pointer);
                 ERROR_IF(optimized < 0, error);
                 if (optimized) {
                     // Rewind and enter the executor:
-                    assert(this_instr->op.code == ENTER_EXECUTOR);
-                    next_instr = this_instr;
+                    assert(start->op.code == ENTER_EXECUTOR);
+                    next_instr = start;
                     this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 
1);
                 }
                 else {
@@ -2370,10 +2376,12 @@ dummy_func(
                 GOTO_TIER_TWO();
             }
             else {
-                code->co_executors->executors[oparg & 255] = NULL;
+                /* ENTER_EXECUTOR will be the first code unit of the 
instruction */
+                assert(oparg < 256);
+                code->co_executors->executors[oparg] = NULL;
                 opcode = this_instr->op.code = executor->vm_data.opcode;
                 this_instr->op.arg = executor->vm_data.oparg;
-                oparg = (oparg & (~255)) | executor->vm_data.oparg;
+                oparg = executor->vm_data.oparg;
                 Py_DECREF(executor);
                 next_instr = this_instr;
                 DISPATCH_GOTO();
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index e693e3e2560e7b..1e995b62a72fcf 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -2384,10 +2384,12 @@
                 GOTO_TIER_TWO();
             }
             else {
-                code->co_executors->executors[oparg & 255] = NULL;
+                /* ENTER_EXECUTOR will be the first code unit of the 
instruction */
+                assert(oparg < 256);
+                code->co_executors->executors[oparg] = NULL;
                 opcode = this_instr->op.code = executor->vm_data.opcode;
                 this_instr->op.arg = executor->vm_data.oparg;
-                oparg = (oparg & (~255)) | executor->vm_data.oparg;
+                oparg = executor->vm_data.oparg;
                 Py_DECREF(executor);
                 next_instr = this_instr;
                 DISPATCH_GOTO();
@@ -3289,12 +3291,18 @@
             // Double-check that the opcode isn't instrumented or something:
             if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
                 OPT_STAT_INC(attempts);
-                int optimized = _PyOptimizer_BackEdge(frame, this_instr, 
next_instr, stack_pointer);
+                _Py_CODEUNIT *start = this_instr;
+                /* Back up over EXTENDED_ARGs so optimizer sees the whole 
instruction */
+                while (oparg > 255) {
+                    oparg >>= 8;
+                    start--;
+                }
+                int optimized = _PyOptimizer_Optimize(frame, start, 
stack_pointer);
                 if (optimized < 0) goto error;
                 if (optimized) {
                     // Rewind and enter the executor:
-                    assert(this_instr->op.code == ENTER_EXECUTOR);
-                    next_instr = this_instr;
+                    assert(start->op.code == ENTER_EXECUTOR);
+                    next_instr = start;
                     this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 
1);
                 }
                 else {
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 28e12dbbf5d78b..227d6be0092419 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
 }
 
 int
-_PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, 
_Py_CODEUNIT *dest, PyObject **stack_pointer)
+_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, 
PyObject **stack_pointer)
 {
-    assert(src->op.code == JUMP_BACKWARD);
     PyCodeObject *code = (PyCodeObject *)frame->f_executable;
     assert(PyCode_Check(code));
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    if (!has_space_for_executor(code, src)) {
+    if (!has_space_for_executor(code, start)) {
         return 0;
     }
     _PyOptimizerObject *opt = interp->optimizer;
     _PyExecutorObject *executor = NULL;
-    /* Start optimizing at the destination to guarantee forward progress */
-    int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - 
_PyFrame_Stackbase(frame)));
+    int err = opt->optimize(opt, code, start, &executor, (int)(stack_pointer - 
_PyFrame_Stackbase(frame)));
     if (err <= 0) {
         assert(executor == NULL);
         return err;
     }
-    int index = get_index_for_executor(code, src);
+    int index = get_index_for_executor(code, start);
     if (index < 0) {
         /* Out of memory. Don't raise and assume that the
          * error will show up elsewhere.
@@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, 
_Py_CODEUNIT *src, _Py_CODEUNI
         Py_DECREF(executor);
         return 0;
     }
-    insert_executor(code, src, index, executor);
+    insert_executor(code, start, index, executor);
     Py_DECREF(executor);
     return 1;
 }
@@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = {
 #define CONFIDENCE_RANGE 1000
 #define CONFIDENCE_CUTOFF 333
 
-/* Returns 1 on success,
- * 0 if it failed to produce a worthwhile trace,
- * and -1 on an error.
- */
-static int
-translate_bytecode_to_trace(
-    PyCodeObject *code,
-    _Py_CODEUNIT *instr,
-    _PyUOpInstruction *trace,
-    int buffer_size,
-    _PyBloomFilter *dependencies)
-{
-    PyCodeObject *initial_code = code;
-    _Py_BloomFilter_Add(dependencies, initial_code);
-    _Py_CODEUNIT *initial_instr = instr;
-    int trace_length = 0;
-    int max_length = buffer_size;
-    struct {
-        PyCodeObject *code;
-        _Py_CODEUNIT *instr;
-    } trace_stack[TRACE_STACK_SIZE];
-    int trace_stack_depth = 0;
-    int confidence = CONFIDENCE_RANGE;  // Adjusted by branch instructions
-
-#ifdef Py_DEBUG
-    char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
-    int lltrace = 0;
-    if (python_lltrace != NULL && *python_lltrace >= '0') {
-        lltrace = *python_lltrace - '0';  // TODO: Parse an int and all that
-    }
-#endif
-
 #ifdef Py_DEBUG
 #define DPRINTF(level, ...) \
     if (lltrace >= (level)) { printf(__VA_ARGS__); }
@@ -403,6 +369,39 @@ translate_bytecode_to_trace(
     code = trace_stack[trace_stack_depth].code; \
     instr = trace_stack[trace_stack_depth].instr;
 
+/* Returns 1 on success,
+ * 0 if it failed to produce a worthwhile trace,
+ * and -1 on an error.
+ */
+static int
+translate_bytecode_to_trace(
+    PyCodeObject *code,
+    _Py_CODEUNIT *instr,
+    _PyUOpInstruction *trace,
+    int buffer_size,
+    _PyBloomFilter *dependencies)
+{
+    bool progress_needed = true;
+    PyCodeObject *initial_code = code;
+    _Py_BloomFilter_Add(dependencies, initial_code);
+    _Py_CODEUNIT *initial_instr = instr;
+    int trace_length = 0;
+    int max_length = buffer_size;
+    struct {
+        PyCodeObject *code;
+        _Py_CODEUNIT *instr;
+    } trace_stack[TRACE_STACK_SIZE];
+    int trace_stack_depth = 0;
+    int confidence = CONFIDENCE_RANGE;  // Adjusted by branch instructions
+
+#ifdef Py_DEBUG
+    char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
+    int lltrace = 0;
+    if (python_lltrace != NULL && *python_lltrace >= '0') {
+        lltrace = *python_lltrace - '0';  // TODO: Parse an int and all that
+    }
+#endif
+
     DPRINTF(4,
             "Optimizing %s (%s:%d) at byte offset %d\n",
             PyUnicode_AsUTF8(code->co_qualname),
@@ -410,6 +409,7 @@ translate_bytecode_to_trace(
             code->co_firstlineno,
             2 * INSTR_IP(initial_instr, code));
     uint32_t target = 0;
+
 top:  // Jump here after _PUSH_FRAME or likely branches
     for (;;) {
         target = INSTR_IP(instr, code);
@@ -421,6 +421,15 @@ translate_bytecode_to_trace(
         uint32_t oparg = instr->op.arg;
         uint32_t extended = 0;
 
+        if (opcode == ENTER_EXECUTOR) {
+            assert(oparg < 256);
+            _PyExecutorObject *executor =
+                (_PyExecutorObject *)code->co_executors->executors[oparg];
+            opcode = executor->vm_data.opcode;
+            DPRINTF(2, "  * ENTER_EXECUTOR -> %s\n",  
_PyOpcode_OpName[opcode]);
+            oparg = executor->vm_data.oparg;
+        }
+
         if (opcode == EXTENDED_ARG) {
             instr++;
             extended = 1;
@@ -431,13 +440,23 @@ translate_bytecode_to_trace(
                 goto done;
             }
         }
-
-        if (opcode == ENTER_EXECUTOR) {
-            _PyExecutorObject *executor =
-                (_PyExecutorObject *)code->co_executors->executors[oparg&255];
-            opcode = executor->vm_data.opcode;
-            DPRINTF(2, "  * ENTER_EXECUTOR -> %s\n",  
_PyOpcode_OpName[opcode]);
-            oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
+        assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
+
+        /* Special case the first instruction,
+         * so that we can guarantee forward progress */
+        if (progress_needed) {
+            progress_needed = false;
+            if (opcode == JUMP_BACKWARD || opcode == 
JUMP_BACKWARD_NO_INTERRUPT) {
+                instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
+                initial_instr = instr;
+                continue;
+            }
+            else {
+                if (OPCODE_HAS_DEOPT(opcode)) {
+                    opcode = _PyOpcode_Deopt[opcode];
+                }
+                assert(!OPCODE_HAS_DEOPT(opcode));
+            }
         }
 
         switch (opcode) {
@@ -480,7 +499,9 @@ translate_bytecode_to_trace(
             case JUMP_BACKWARD:
             case JUMP_BACKWARD_NO_INTERRUPT:
             {
-                if (instr + 2 - oparg == initial_instr && code == 
initial_code) {
+                _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - 
(int)oparg;
+                if (target == initial_instr) {
+                    /* We have looped round to the start */
                     RESERVE(1);
                     ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
                 }
@@ -641,19 +662,7 @@ translate_bytecode_to_trace(
     }
     assert(code == initial_code);
     // Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
-    if (trace_length > 4) {
-        ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
-        DPRINTF(1,
-                "Created a trace for %s (%s:%d) at byte offset %d -- length 
%d\n",
-                PyUnicode_AsUTF8(code->co_qualname),
-                PyUnicode_AsUTF8(code->co_filename),
-                code->co_firstlineno,
-                2 * INSTR_IP(initial_instr, code),
-                trace_length);
-        OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
-        return 1;
-    }
-    else {
+    if (progress_needed || trace_length < 5) {
         OPT_STAT_INC(trace_too_short);
         DPRINTF(4,
                 "No trace for %s (%s:%d) at byte offset %d\n",
@@ -661,15 +670,25 @@ translate_bytecode_to_trace(
                 PyUnicode_AsUTF8(code->co_filename),
                 code->co_firstlineno,
                 2 * INSTR_IP(initial_instr, code));
+        return 0;
     }
-    return 0;
+    ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
+    DPRINTF(1,
+            "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
+            PyUnicode_AsUTF8(code->co_qualname),
+            PyUnicode_AsUTF8(code->co_filename),
+            code->co_firstlineno,
+            2 * INSTR_IP(initial_instr, code),
+            trace_length);
+    OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
+    return 1;
+}
 
 #undef RESERVE
 #undef RESERVE_RAW
 #undef INSTR_IP
 #undef ADD_TO_TRACE
 #undef DPRINTF
-}
 
 #define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31)))
 #define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
@@ -854,10 +873,20 @@ counter_optimize(
     int Py_UNUSED(curr_stackentries)
 )
 {
+    int oparg = instr->op.arg;
+    while (instr->op.code == EXTENDED_ARG) {
+        instr++;
+        oparg = (oparg << 8) | instr->op.arg;
+    }
+    if (instr->op.code != JUMP_BACKWARD) {
+        /* Counter optimizer can only handle backward edges */
+        return 0;
+    }
+    _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
     _PyUOpInstruction buffer[3] = {
         { .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
         { .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
-        { .opcode = _EXIT_TRACE, .target = (uint32_t)(instr - 
_PyCode_CODE(code)) }
+        { .opcode = _EXIT_TRACE, .target = (uint32_t)(target - 
_PyCode_CODE(code)) }
     };
     _PyBloomFilter empty;
     _Py_BloomFilter_Init(&empty);

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to