https://github.com/python/cpython/commit/55824d01f866d1fa0f21996d897fba0e07d09ac8
commit: 55824d01f866d1fa0f21996d897fba0e07d09ac8
branch: main
author: Mark Shannon <[email protected]>
committer: markshannon <[email protected]>
date: 2024-01-11T18:20:42Z
summary:
GH-113853: Guarantee forward progress in executors (GH-113854)
files:
A Misc/NEWS.d/next/Core and
Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst
M Include/cpython/optimizer.h
M Python/bytecodes.c
M Python/generated_cases.c.h
M Python/optimizer.c
diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h
index f077da7ee88456..0e7bc9fdd7a07d 100644
--- a/Include/cpython/optimizer.h
+++ b/Include/cpython/optimizer.h
@@ -65,7 +65,7 @@ PyAPI_FUNC(_PyOptimizerObject *)
PyUnstable_GetOptimizer(void);
PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int
offset);
int
-_PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src,
_Py_CODEUNIT *dest, PyObject **stack_pointer);
+_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start,
PyObject **stack_pointer);
extern _PyOptimizerObject _PyOptimizer_Default;
diff --git a/Misc/NEWS.d/next/Core and
Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst b/Misc/NEWS.d/next/Core
and Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst
new file mode 100644
index 00000000000000..d4f0a764153579
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and
Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst
@@ -0,0 +1,2 @@
+Guarantee that all executors make progress. This then guarantees that tier 2
+execution always makes progress.
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index f53ddae8df985a..2011d963e3687c 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -2325,12 +2325,18 @@ dummy_func(
// Double-check that the opcode isn't instrumented or something:
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
OPT_STAT_INC(attempts);
- int optimized = _PyOptimizer_BackEdge(frame, this_instr,
next_instr, stack_pointer);
+ _Py_CODEUNIT *start = this_instr;
+ /* Back up over EXTENDED_ARGs so optimizer sees the whole
instruction */
+ while (oparg > 255) {
+ oparg >>= 8;
+ start--;
+ }
+ int optimized = _PyOptimizer_Optimize(frame, start,
stack_pointer);
ERROR_IF(optimized < 0, error);
if (optimized) {
// Rewind and enter the executor:
- assert(this_instr->op.code == ENTER_EXECUTOR);
- next_instr = this_instr;
+ assert(start->op.code == ENTER_EXECUTOR);
+ next_instr = start;
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) -
1);
}
else {
@@ -2370,10 +2376,12 @@ dummy_func(
GOTO_TIER_TWO();
}
else {
- code->co_executors->executors[oparg & 255] = NULL;
+ /* ENTER_EXECUTOR will be the first code unit of the
instruction */
+ assert(oparg < 256);
+ code->co_executors->executors[oparg] = NULL;
opcode = this_instr->op.code = executor->vm_data.opcode;
this_instr->op.arg = executor->vm_data.oparg;
- oparg = (oparg & (~255)) | executor->vm_data.oparg;
+ oparg = executor->vm_data.oparg;
Py_DECREF(executor);
next_instr = this_instr;
DISPATCH_GOTO();
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index e693e3e2560e7b..1e995b62a72fcf 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -2384,10 +2384,12 @@
GOTO_TIER_TWO();
}
else {
- code->co_executors->executors[oparg & 255] = NULL;
+ /* ENTER_EXECUTOR will be the first code unit of the
instruction */
+ assert(oparg < 256);
+ code->co_executors->executors[oparg] = NULL;
opcode = this_instr->op.code = executor->vm_data.opcode;
this_instr->op.arg = executor->vm_data.oparg;
- oparg = (oparg & (~255)) | executor->vm_data.oparg;
+ oparg = executor->vm_data.oparg;
Py_DECREF(executor);
next_instr = this_instr;
DISPATCH_GOTO();
@@ -3289,12 +3291,18 @@
// Double-check that the opcode isn't instrumented or something:
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
OPT_STAT_INC(attempts);
- int optimized = _PyOptimizer_BackEdge(frame, this_instr,
next_instr, stack_pointer);
+ _Py_CODEUNIT *start = this_instr;
+ /* Back up over EXTENDED_ARGs so optimizer sees the whole
instruction */
+ while (oparg > 255) {
+ oparg >>= 8;
+ start--;
+ }
+ int optimized = _PyOptimizer_Optimize(frame, start,
stack_pointer);
if (optimized < 0) goto error;
if (optimized) {
// Rewind and enter the executor:
- assert(this_instr->op.code == ENTER_EXECUTOR);
- next_instr = this_instr;
+ assert(start->op.code == ENTER_EXECUTOR);
+ next_instr = start;
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) -
1);
}
else {
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 28e12dbbf5d78b..227d6be0092419 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
}
int
-_PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src,
_Py_CODEUNIT *dest, PyObject **stack_pointer)
+_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start,
PyObject **stack_pointer)
{
- assert(src->op.code == JUMP_BACKWARD);
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
assert(PyCode_Check(code));
PyInterpreterState *interp = _PyInterpreterState_GET();
- if (!has_space_for_executor(code, src)) {
+ if (!has_space_for_executor(code, start)) {
return 0;
}
_PyOptimizerObject *opt = interp->optimizer;
_PyExecutorObject *executor = NULL;
- /* Start optimizing at the destination to guarantee forward progress */
- int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer -
_PyFrame_Stackbase(frame)));
+ int err = opt->optimize(opt, code, start, &executor, (int)(stack_pointer -
_PyFrame_Stackbase(frame)));
if (err <= 0) {
assert(executor == NULL);
return err;
}
- int index = get_index_for_executor(code, src);
+ int index = get_index_for_executor(code, start);
if (index < 0) {
/* Out of memory. Don't raise and assume that the
* error will show up elsewhere.
@@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame,
_Py_CODEUNIT *src, _Py_CODEUNI
Py_DECREF(executor);
return 0;
}
- insert_executor(code, src, index, executor);
+ insert_executor(code, start, index, executor);
Py_DECREF(executor);
return 1;
}
@@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = {
#define CONFIDENCE_RANGE 1000
#define CONFIDENCE_CUTOFF 333
-/* Returns 1 on success,
- * 0 if it failed to produce a worthwhile trace,
- * and -1 on an error.
- */
-static int
-translate_bytecode_to_trace(
- PyCodeObject *code,
- _Py_CODEUNIT *instr,
- _PyUOpInstruction *trace,
- int buffer_size,
- _PyBloomFilter *dependencies)
-{
- PyCodeObject *initial_code = code;
- _Py_BloomFilter_Add(dependencies, initial_code);
- _Py_CODEUNIT *initial_instr = instr;
- int trace_length = 0;
- int max_length = buffer_size;
- struct {
- PyCodeObject *code;
- _Py_CODEUNIT *instr;
- } trace_stack[TRACE_STACK_SIZE];
- int trace_stack_depth = 0;
- int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
-
-#ifdef Py_DEBUG
- char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
- int lltrace = 0;
- if (python_lltrace != NULL && *python_lltrace >= '0') {
- lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
- }
-#endif
-
#ifdef Py_DEBUG
#define DPRINTF(level, ...) \
if (lltrace >= (level)) { printf(__VA_ARGS__); }
@@ -403,6 +369,39 @@ translate_bytecode_to_trace(
code = trace_stack[trace_stack_depth].code; \
instr = trace_stack[trace_stack_depth].instr;
+/* Returns 1 on success,
+ * 0 if it failed to produce a worthwhile trace,
+ * and -1 on an error.
+ */
+static int
+translate_bytecode_to_trace(
+ PyCodeObject *code,
+ _Py_CODEUNIT *instr,
+ _PyUOpInstruction *trace,
+ int buffer_size,
+ _PyBloomFilter *dependencies)
+{
+ bool progress_needed = true;
+ PyCodeObject *initial_code = code;
+ _Py_BloomFilter_Add(dependencies, initial_code);
+ _Py_CODEUNIT *initial_instr = instr;
+ int trace_length = 0;
+ int max_length = buffer_size;
+ struct {
+ PyCodeObject *code;
+ _Py_CODEUNIT *instr;
+ } trace_stack[TRACE_STACK_SIZE];
+ int trace_stack_depth = 0;
+ int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
+
+#ifdef Py_DEBUG
+ char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
+ int lltrace = 0;
+ if (python_lltrace != NULL && *python_lltrace >= '0') {
+ lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
+ }
+#endif
+
DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
@@ -410,6 +409,7 @@ translate_bytecode_to_trace(
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
uint32_t target = 0;
+
top: // Jump here after _PUSH_FRAME or likely branches
for (;;) {
target = INSTR_IP(instr, code);
@@ -421,6 +421,15 @@ translate_bytecode_to_trace(
uint32_t oparg = instr->op.arg;
uint32_t extended = 0;
+ if (opcode == ENTER_EXECUTOR) {
+ assert(oparg < 256);
+ _PyExecutorObject *executor =
+ (_PyExecutorObject *)code->co_executors->executors[oparg];
+ opcode = executor->vm_data.opcode;
+ DPRINTF(2, " * ENTER_EXECUTOR -> %s\n",
_PyOpcode_OpName[opcode]);
+ oparg = executor->vm_data.oparg;
+ }
+
if (opcode == EXTENDED_ARG) {
instr++;
extended = 1;
@@ -431,13 +440,23 @@ translate_bytecode_to_trace(
goto done;
}
}
-
- if (opcode == ENTER_EXECUTOR) {
- _PyExecutorObject *executor =
- (_PyExecutorObject *)code->co_executors->executors[oparg&255];
- opcode = executor->vm_data.opcode;
- DPRINTF(2, " * ENTER_EXECUTOR -> %s\n",
_PyOpcode_OpName[opcode]);
- oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
+ assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
+
+ /* Special case the first instruction,
+ * so that we can guarantee forward progress */
+ if (progress_needed) {
+ progress_needed = false;
+ if (opcode == JUMP_BACKWARD || opcode ==
JUMP_BACKWARD_NO_INTERRUPT) {
+ instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
+ initial_instr = instr;
+ continue;
+ }
+ else {
+ if (OPCODE_HAS_DEOPT(opcode)) {
+ opcode = _PyOpcode_Deopt[opcode];
+ }
+ assert(!OPCODE_HAS_DEOPT(opcode));
+ }
}
switch (opcode) {
@@ -480,7 +499,9 @@ translate_bytecode_to_trace(
case JUMP_BACKWARD:
case JUMP_BACKWARD_NO_INTERRUPT:
{
- if (instr + 2 - oparg == initial_instr && code ==
initial_code) {
+ _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] -
(int)oparg;
+ if (target == initial_instr) {
+ /* We have looped round to the start */
RESERVE(1);
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
}
@@ -641,19 +662,7 @@ translate_bytecode_to_trace(
}
assert(code == initial_code);
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
- if (trace_length > 4) {
- ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
- DPRINTF(1,
- "Created a trace for %s (%s:%d) at byte offset %d -- length
%d\n",
- PyUnicode_AsUTF8(code->co_qualname),
- PyUnicode_AsUTF8(code->co_filename),
- code->co_firstlineno,
- 2 * INSTR_IP(initial_instr, code),
- trace_length);
- OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
- return 1;
- }
- else {
+ if (progress_needed || trace_length < 5) {
OPT_STAT_INC(trace_too_short);
DPRINTF(4,
"No trace for %s (%s:%d) at byte offset %d\n",
@@ -661,15 +670,25 @@ translate_bytecode_to_trace(
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
+ return 0;
}
- return 0;
+ ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
+ DPRINTF(1,
+ "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
+ PyUnicode_AsUTF8(code->co_qualname),
+ PyUnicode_AsUTF8(code->co_filename),
+ code->co_firstlineno,
+ 2 * INSTR_IP(initial_instr, code),
+ trace_length);
+ OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
+ return 1;
+}
#undef RESERVE
#undef RESERVE_RAW
#undef INSTR_IP
#undef ADD_TO_TRACE
#undef DPRINTF
-}
#define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31)))
#define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
@@ -854,10 +873,20 @@ counter_optimize(
int Py_UNUSED(curr_stackentries)
)
{
+ int oparg = instr->op.arg;
+ while (instr->op.code == EXTENDED_ARG) {
+ instr++;
+ oparg = (oparg << 8) | instr->op.arg;
+ }
+ if (instr->op.code != JUMP_BACKWARD) {
+ /* Counter optimizer can only handle backward edges */
+ return 0;
+ }
+ _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
_PyUOpInstruction buffer[3] = {
{ .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
- { .opcode = _EXIT_TRACE, .target = (uint32_t)(instr -
_PyCode_CODE(code)) }
+ { .opcode = _EXIT_TRACE, .target = (uint32_t)(target -
_PyCode_CODE(code)) }
};
_PyBloomFilter empty;
_Py_BloomFilter_Init(&empty);
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]