https://github.com/python/cpython/commit/8dd8b5c2f0785675b9282b719256341448d49967
commit: 8dd8b5c2f0785675b9282b719256341448d49967
branch: main
author: Mark Shannon <[email protected]>
committer: markshannon <[email protected]>
date: 2025-06-17T13:43:09+01:00
summary:
GH-135379: Support limited scalar replacement for replicated uops in the JIT
code generator. (GH-135563)
* Use it to support efficient specializations of COPY and SWAP in the JIT.
files:
M Include/internal/pycore_uop_ids.h
M Include/internal/pycore_uop_metadata.h
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/optimizer.c
M Tools/cases_generator/analyzer.py
M Tools/cases_generator/parsing.py
M Tools/cases_generator/uop_metadata_generator.py
diff --git a/Include/internal/pycore_uop_ids.h
b/Include/internal/pycore_uop_ids.h
index 2b845527cf2ed5..8211c5d056535e 100644
--- a/Include/internal/pycore_uop_ids.h
+++ b/Include/internal/pycore_uop_ids.h
@@ -86,86 +86,89 @@ extern "C" {
#define _CONTAINS_OP_DICT 362
#define _CONTAINS_OP_SET 363
#define _CONVERT_VALUE CONVERT_VALUE
-#define _COPY COPY
+#define _COPY 364
+#define _COPY_1 365
+#define _COPY_2 366
+#define _COPY_3 367
#define _COPY_FREE_VARS COPY_FREE_VARS
-#define _CREATE_INIT_FRAME 364
+#define _CREATE_INIT_FRAME 368
#define _DELETE_ATTR DELETE_ATTR
#define _DELETE_DEREF DELETE_DEREF
#define _DELETE_FAST DELETE_FAST
#define _DELETE_GLOBAL DELETE_GLOBAL
#define _DELETE_NAME DELETE_NAME
#define _DELETE_SUBSCR DELETE_SUBSCR
-#define _DEOPT 365
+#define _DEOPT 369
#define _DICT_MERGE DICT_MERGE
#define _DICT_UPDATE DICT_UPDATE
-#define _DO_CALL 366
-#define _DO_CALL_FUNCTION_EX 367
-#define _DO_CALL_KW 368
+#define _DO_CALL 370
+#define _DO_CALL_FUNCTION_EX 371
+#define _DO_CALL_KW 372
#define _END_FOR END_FOR
#define _END_SEND END_SEND
-#define _ERROR_POP_N 369
+#define _ERROR_POP_N 373
#define _EXIT_INIT_CHECK EXIT_INIT_CHECK
-#define _EXPAND_METHOD 370
-#define _EXPAND_METHOD_KW 371
-#define _FATAL_ERROR 372
+#define _EXPAND_METHOD 374
+#define _EXPAND_METHOD_KW 375
+#define _FATAL_ERROR 376
#define _FORMAT_SIMPLE FORMAT_SIMPLE
#define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC
-#define _FOR_ITER 373
-#define _FOR_ITER_GEN_FRAME 374
-#define _FOR_ITER_TIER_TWO 375
+#define _FOR_ITER 377
+#define _FOR_ITER_GEN_FRAME 378
+#define _FOR_ITER_TIER_TWO 379
#define _GET_AITER GET_AITER
#define _GET_ANEXT GET_ANEXT
#define _GET_AWAITABLE GET_AWAITABLE
#define _GET_ITER GET_ITER
#define _GET_LEN GET_LEN
#define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER
-#define _GUARD_BINARY_OP_EXTEND 376
-#define _GUARD_CALLABLE_ISINSTANCE 377
-#define _GUARD_CALLABLE_LEN 378
-#define _GUARD_CALLABLE_LIST_APPEND 379
-#define _GUARD_CALLABLE_STR_1 380
-#define _GUARD_CALLABLE_TUPLE_1 381
-#define _GUARD_CALLABLE_TYPE_1 382
-#define _GUARD_DORV_NO_DICT 383
-#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 384
-#define _GUARD_GLOBALS_VERSION 385
-#define _GUARD_IS_FALSE_POP 386
-#define _GUARD_IS_NONE_POP 387
-#define _GUARD_IS_NOT_NONE_POP 388
-#define _GUARD_IS_TRUE_POP 389
-#define _GUARD_KEYS_VERSION 390
-#define _GUARD_NOS_DICT 391
-#define _GUARD_NOS_FLOAT 392
-#define _GUARD_NOS_INT 393
-#define _GUARD_NOS_LIST 394
-#define _GUARD_NOS_NOT_NULL 395
-#define _GUARD_NOS_NULL 396
-#define _GUARD_NOS_TUPLE 397
-#define _GUARD_NOS_UNICODE 398
-#define _GUARD_NOT_EXHAUSTED_LIST 399
-#define _GUARD_NOT_EXHAUSTED_RANGE 400
-#define _GUARD_NOT_EXHAUSTED_TUPLE 401
-#define _GUARD_THIRD_NULL 402
-#define _GUARD_TOS_ANY_SET 403
-#define _GUARD_TOS_DICT 404
-#define _GUARD_TOS_FLOAT 405
-#define _GUARD_TOS_INT 406
-#define _GUARD_TOS_LIST 407
-#define _GUARD_TOS_SLICE 408
-#define _GUARD_TOS_TUPLE 409
-#define _GUARD_TOS_UNICODE 410
-#define _GUARD_TYPE_VERSION 411
-#define _GUARD_TYPE_VERSION_AND_LOCK 412
+#define _GUARD_BINARY_OP_EXTEND 380
+#define _GUARD_CALLABLE_ISINSTANCE 381
+#define _GUARD_CALLABLE_LEN 382
+#define _GUARD_CALLABLE_LIST_APPEND 383
+#define _GUARD_CALLABLE_STR_1 384
+#define _GUARD_CALLABLE_TUPLE_1 385
+#define _GUARD_CALLABLE_TYPE_1 386
+#define _GUARD_DORV_NO_DICT 387
+#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 388
+#define _GUARD_GLOBALS_VERSION 389
+#define _GUARD_IS_FALSE_POP 390
+#define _GUARD_IS_NONE_POP 391
+#define _GUARD_IS_NOT_NONE_POP 392
+#define _GUARD_IS_TRUE_POP 393
+#define _GUARD_KEYS_VERSION 394
+#define _GUARD_NOS_DICT 395
+#define _GUARD_NOS_FLOAT 396
+#define _GUARD_NOS_INT 397
+#define _GUARD_NOS_LIST 398
+#define _GUARD_NOS_NOT_NULL 399
+#define _GUARD_NOS_NULL 400
+#define _GUARD_NOS_TUPLE 401
+#define _GUARD_NOS_UNICODE 402
+#define _GUARD_NOT_EXHAUSTED_LIST 403
+#define _GUARD_NOT_EXHAUSTED_RANGE 404
+#define _GUARD_NOT_EXHAUSTED_TUPLE 405
+#define _GUARD_THIRD_NULL 406
+#define _GUARD_TOS_ANY_SET 407
+#define _GUARD_TOS_DICT 408
+#define _GUARD_TOS_FLOAT 409
+#define _GUARD_TOS_INT 410
+#define _GUARD_TOS_LIST 411
+#define _GUARD_TOS_SLICE 412
+#define _GUARD_TOS_TUPLE 413
+#define _GUARD_TOS_UNICODE 414
+#define _GUARD_TYPE_VERSION 415
+#define _GUARD_TYPE_VERSION_AND_LOCK 416
#define _IMPORT_FROM IMPORT_FROM
#define _IMPORT_NAME IMPORT_NAME
-#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 413
-#define _INIT_CALL_PY_EXACT_ARGS 414
-#define _INIT_CALL_PY_EXACT_ARGS_0 415
-#define _INIT_CALL_PY_EXACT_ARGS_1 416
-#define _INIT_CALL_PY_EXACT_ARGS_2 417
-#define _INIT_CALL_PY_EXACT_ARGS_3 418
-#define _INIT_CALL_PY_EXACT_ARGS_4 419
-#define _INSERT_NULL 420
+#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 417
+#define _INIT_CALL_PY_EXACT_ARGS 418
+#define _INIT_CALL_PY_EXACT_ARGS_0 419
+#define _INIT_CALL_PY_EXACT_ARGS_1 420
+#define _INIT_CALL_PY_EXACT_ARGS_2 421
+#define _INIT_CALL_PY_EXACT_ARGS_3 422
+#define _INIT_CALL_PY_EXACT_ARGS_4 423
+#define _INSERT_NULL 424
#define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER
#define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION
#define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD
@@ -175,171 +178,173 @@ extern "C" {
#define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE
#define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE
#define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE
-#define _IS_NONE 421
+#define _IS_NONE 425
#define _IS_OP IS_OP
-#define _ITER_CHECK_LIST 422
-#define _ITER_CHECK_RANGE 423
-#define _ITER_CHECK_TUPLE 424
-#define _ITER_JUMP_LIST 425
-#define _ITER_JUMP_RANGE 426
-#define _ITER_JUMP_TUPLE 427
-#define _ITER_NEXT_LIST 428
-#define _ITER_NEXT_LIST_TIER_TWO 429
-#define _ITER_NEXT_RANGE 430
-#define _ITER_NEXT_TUPLE 431
-#define _JUMP_TO_TOP 432
+#define _ITER_CHECK_LIST 426
+#define _ITER_CHECK_RANGE 427
+#define _ITER_CHECK_TUPLE 428
+#define _ITER_JUMP_LIST 429
+#define _ITER_JUMP_RANGE 430
+#define _ITER_JUMP_TUPLE 431
+#define _ITER_NEXT_LIST 432
+#define _ITER_NEXT_LIST_TIER_TWO 433
+#define _ITER_NEXT_RANGE 434
+#define _ITER_NEXT_TUPLE 435
+#define _JUMP_TO_TOP 436
#define _LIST_APPEND LIST_APPEND
#define _LIST_EXTEND LIST_EXTEND
-#define _LOAD_ATTR 433
-#define _LOAD_ATTR_CLASS 434
+#define _LOAD_ATTR 437
+#define _LOAD_ATTR_CLASS 438
#define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
-#define _LOAD_ATTR_INSTANCE_VALUE 435
-#define _LOAD_ATTR_METHOD_LAZY_DICT 436
-#define _LOAD_ATTR_METHOD_NO_DICT 437
-#define _LOAD_ATTR_METHOD_WITH_VALUES 438
-#define _LOAD_ATTR_MODULE 439
-#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 440
-#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 441
-#define _LOAD_ATTR_PROPERTY_FRAME 442
-#define _LOAD_ATTR_SLOT 443
-#define _LOAD_ATTR_WITH_HINT 444
+#define _LOAD_ATTR_INSTANCE_VALUE 439
+#define _LOAD_ATTR_METHOD_LAZY_DICT 440
+#define _LOAD_ATTR_METHOD_NO_DICT 441
+#define _LOAD_ATTR_METHOD_WITH_VALUES 442
+#define _LOAD_ATTR_MODULE 443
+#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 444
+#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 445
+#define _LOAD_ATTR_PROPERTY_FRAME 446
+#define _LOAD_ATTR_SLOT 447
+#define _LOAD_ATTR_WITH_HINT 448
#define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
-#define _LOAD_BYTECODE 445
+#define _LOAD_BYTECODE 449
#define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT
#define _LOAD_CONST LOAD_CONST
-#define _LOAD_CONST_INLINE 446
-#define _LOAD_CONST_INLINE_BORROW 447
-#define _LOAD_CONST_UNDER_INLINE 448
-#define _LOAD_CONST_UNDER_INLINE_BORROW 449
+#define _LOAD_CONST_INLINE 450
+#define _LOAD_CONST_INLINE_BORROW 451
+#define _LOAD_CONST_UNDER_INLINE 452
+#define _LOAD_CONST_UNDER_INLINE_BORROW 453
#define _LOAD_DEREF LOAD_DEREF
-#define _LOAD_FAST 450
-#define _LOAD_FAST_0 451
-#define _LOAD_FAST_1 452
-#define _LOAD_FAST_2 453
-#define _LOAD_FAST_3 454
-#define _LOAD_FAST_4 455
-#define _LOAD_FAST_5 456
-#define _LOAD_FAST_6 457
-#define _LOAD_FAST_7 458
+#define _LOAD_FAST 454
+#define _LOAD_FAST_0 455
+#define _LOAD_FAST_1 456
+#define _LOAD_FAST_2 457
+#define _LOAD_FAST_3 458
+#define _LOAD_FAST_4 459
+#define _LOAD_FAST_5 460
+#define _LOAD_FAST_6 461
+#define _LOAD_FAST_7 462
#define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
-#define _LOAD_FAST_BORROW 459
-#define _LOAD_FAST_BORROW_0 460
-#define _LOAD_FAST_BORROW_1 461
-#define _LOAD_FAST_BORROW_2 462
-#define _LOAD_FAST_BORROW_3 463
-#define _LOAD_FAST_BORROW_4 464
-#define _LOAD_FAST_BORROW_5 465
-#define _LOAD_FAST_BORROW_6 466
-#define _LOAD_FAST_BORROW_7 467
+#define _LOAD_FAST_BORROW 463
+#define _LOAD_FAST_BORROW_0 464
+#define _LOAD_FAST_BORROW_1 465
+#define _LOAD_FAST_BORROW_2 466
+#define _LOAD_FAST_BORROW_3 467
+#define _LOAD_FAST_BORROW_4 468
+#define _LOAD_FAST_BORROW_5 469
+#define _LOAD_FAST_BORROW_6 470
+#define _LOAD_FAST_BORROW_7 471
#define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW
#define _LOAD_FAST_CHECK LOAD_FAST_CHECK
#define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
#define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
#define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
-#define _LOAD_GLOBAL 468
-#define _LOAD_GLOBAL_BUILTINS 469
-#define _LOAD_GLOBAL_MODULE 470
+#define _LOAD_GLOBAL 472
+#define _LOAD_GLOBAL_BUILTINS 473
+#define _LOAD_GLOBAL_MODULE 474
#define _LOAD_LOCALS LOAD_LOCALS
#define _LOAD_NAME LOAD_NAME
-#define _LOAD_SMALL_INT 471
-#define _LOAD_SMALL_INT_0 472
-#define _LOAD_SMALL_INT_1 473
-#define _LOAD_SMALL_INT_2 474
-#define _LOAD_SMALL_INT_3 475
-#define _LOAD_SPECIAL 476
+#define _LOAD_SMALL_INT 475
+#define _LOAD_SMALL_INT_0 476
+#define _LOAD_SMALL_INT_1 477
+#define _LOAD_SMALL_INT_2 478
+#define _LOAD_SMALL_INT_3 479
+#define _LOAD_SPECIAL 480
#define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
#define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
-#define _MAKE_CALLARGS_A_TUPLE 477
+#define _MAKE_CALLARGS_A_TUPLE 481
#define _MAKE_CELL MAKE_CELL
#define _MAKE_FUNCTION MAKE_FUNCTION
-#define _MAKE_WARM 478
+#define _MAKE_WARM 482
#define _MAP_ADD MAP_ADD
#define _MATCH_CLASS MATCH_CLASS
#define _MATCH_KEYS MATCH_KEYS
#define _MATCH_MAPPING MATCH_MAPPING
#define _MATCH_SEQUENCE MATCH_SEQUENCE
-#define _MAYBE_EXPAND_METHOD 479
-#define _MAYBE_EXPAND_METHOD_KW 480
-#define _MONITOR_CALL 481
-#define _MONITOR_CALL_KW 482
-#define _MONITOR_JUMP_BACKWARD 483
-#define _MONITOR_RESUME 484
+#define _MAYBE_EXPAND_METHOD 483
+#define _MAYBE_EXPAND_METHOD_KW 484
+#define _MONITOR_CALL 485
+#define _MONITOR_CALL_KW 486
+#define _MONITOR_JUMP_BACKWARD 487
+#define _MONITOR_RESUME 488
#define _NOP NOP
-#define _POP_CALL 485
-#define _POP_CALL_LOAD_CONST_INLINE_BORROW 486
-#define _POP_CALL_ONE 487
-#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 488
-#define _POP_CALL_TWO 489
-#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490
+#define _POP_CALL 489
+#define _POP_CALL_LOAD_CONST_INLINE_BORROW 490
+#define _POP_CALL_ONE 491
+#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 492
+#define _POP_CALL_TWO 493
+#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 494
#define _POP_EXCEPT POP_EXCEPT
#define _POP_ITER POP_ITER
-#define _POP_JUMP_IF_FALSE 491
-#define _POP_JUMP_IF_TRUE 492
+#define _POP_JUMP_IF_FALSE 495
+#define _POP_JUMP_IF_TRUE 496
#define _POP_TOP POP_TOP
-#define _POP_TOP_LOAD_CONST_INLINE 493
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 494
-#define _POP_TWO 495
-#define _POP_TWO_LOAD_CONST_INLINE_BORROW 496
+#define _POP_TOP_LOAD_CONST_INLINE 497
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 498
+#define _POP_TWO 499
+#define _POP_TWO_LOAD_CONST_INLINE_BORROW 500
#define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 497
+#define _PUSH_FRAME 501
#define _PUSH_NULL PUSH_NULL
-#define _PUSH_NULL_CONDITIONAL 498
-#define _PY_FRAME_GENERAL 499
-#define _PY_FRAME_KW 500
-#define _QUICKEN_RESUME 501
-#define _REPLACE_WITH_TRUE 502
+#define _PUSH_NULL_CONDITIONAL 502
+#define _PY_FRAME_GENERAL 503
+#define _PY_FRAME_KW 504
+#define _QUICKEN_RESUME 505
+#define _REPLACE_WITH_TRUE 506
#define _RESUME_CHECK RESUME_CHECK
#define _RETURN_GENERATOR RETURN_GENERATOR
#define _RETURN_VALUE RETURN_VALUE
-#define _SAVE_RETURN_OFFSET 503
-#define _SEND 504
-#define _SEND_GEN_FRAME 505
+#define _SAVE_RETURN_OFFSET 507
+#define _SEND 508
+#define _SEND_GEN_FRAME 509
#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
#define _SET_ADD SET_ADD
#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
#define _SET_UPDATE SET_UPDATE
-#define _START_EXECUTOR 506
-#define _STORE_ATTR 507
-#define _STORE_ATTR_INSTANCE_VALUE 508
-#define _STORE_ATTR_SLOT 509
-#define _STORE_ATTR_WITH_HINT 510
+#define _START_EXECUTOR 510
+#define _STORE_ATTR 511
+#define _STORE_ATTR_INSTANCE_VALUE 512
+#define _STORE_ATTR_SLOT 513
+#define _STORE_ATTR_WITH_HINT 514
#define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 511
-#define _STORE_FAST_0 512
-#define _STORE_FAST_1 513
-#define _STORE_FAST_2 514
-#define _STORE_FAST_3 515
-#define _STORE_FAST_4 516
-#define _STORE_FAST_5 517
-#define _STORE_FAST_6 518
-#define _STORE_FAST_7 519
+#define _STORE_FAST 515
+#define _STORE_FAST_0 516
+#define _STORE_FAST_1 517
+#define _STORE_FAST_2 518
+#define _STORE_FAST_3 519
+#define _STORE_FAST_4 520
+#define _STORE_FAST_5 521
+#define _STORE_FAST_6 522
+#define _STORE_FAST_7 523
#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
#define _STORE_GLOBAL STORE_GLOBAL
#define _STORE_NAME STORE_NAME
-#define _STORE_SLICE 520
-#define _STORE_SUBSCR 521
-#define _STORE_SUBSCR_DICT 522
-#define _STORE_SUBSCR_LIST_INT 523
-#define _SWAP SWAP
-#define _TIER2_RESUME_CHECK 524
-#define _TO_BOOL 525
+#define _STORE_SLICE 524
+#define _STORE_SUBSCR 525
+#define _STORE_SUBSCR_DICT 526
+#define _STORE_SUBSCR_LIST_INT 527
+#define _SWAP 528
+#define _SWAP_2 529
+#define _SWAP_3 530
+#define _TIER2_RESUME_CHECK 531
+#define _TO_BOOL 532
#define _TO_BOOL_BOOL TO_BOOL_BOOL
#define _TO_BOOL_INT TO_BOOL_INT
-#define _TO_BOOL_LIST 526
+#define _TO_BOOL_LIST 533
#define _TO_BOOL_NONE TO_BOOL_NONE
-#define _TO_BOOL_STR 527
+#define _TO_BOOL_STR 534
#define _UNARY_INVERT UNARY_INVERT
#define _UNARY_NEGATIVE UNARY_NEGATIVE
#define _UNARY_NOT UNARY_NOT
#define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 528
-#define _UNPACK_SEQUENCE_LIST 529
-#define _UNPACK_SEQUENCE_TUPLE 530
-#define _UNPACK_SEQUENCE_TWO_TUPLE 531
+#define _UNPACK_SEQUENCE 535
+#define _UNPACK_SEQUENCE_LIST 536
+#define _UNPACK_SEQUENCE_TUPLE 537
+#define _UNPACK_SEQUENCE_TWO_TUPLE 538
#define _WITH_EXCEPT_START WITH_EXCEPT_START
#define _YIELD_VALUE YIELD_VALUE
-#define MAX_UOP_ID 531
+#define MAX_UOP_ID 538
#ifdef __cplusplus
}
diff --git a/Include/internal/pycore_uop_metadata.h
b/Include/internal/pycore_uop_metadata.h
index cd36023c25cbb4..fad87d4b586e64 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -12,7 +12,8 @@ extern "C" {
#include <stdint.h>
#include "pycore_uop_ids.h"
extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];
-extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];
+typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;
+extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];
extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];
extern int _PyUop_num_popped(int opcode, int oparg);
@@ -288,8 +289,13 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
+ [_COPY_1] = HAS_PURE_FLAG,
+ [_COPY_2] = HAS_PURE_FLAG,
+ [_COPY_3] = HAS_PURE_FLAG,
[_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG,
[_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG |
HAS_ESCAPES_FLAG,
+ [_SWAP_2] = HAS_PURE_FLAG,
+ [_SWAP_3] = HAS_PURE_FLAG,
[_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG,
[_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG,
[_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG,
@@ -323,12 +329,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG,
};
-const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {
- [_LOAD_FAST] = 8,
- [_LOAD_FAST_BORROW] = 8,
- [_LOAD_SMALL_INT] = 4,
- [_STORE_FAST] = 8,
- [_INIT_CALL_PY_EXACT_ARGS] = 5,
+const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {
+ [_LOAD_FAST] = { 0, 8 },
+ [_LOAD_FAST_BORROW] = { 0, 8 },
+ [_LOAD_SMALL_INT] = { 0, 4 },
+ [_STORE_FAST] = { 0, 8 },
+ [_INIT_CALL_PY_EXACT_ARGS] = { 0, 5 },
+ [_COPY] = { 1, 4 },
+ [_SWAP] = { 2, 4 },
};
const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
@@ -408,6 +416,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_CONTAINS_OP_SET] = "_CONTAINS_OP_SET",
[_CONVERT_VALUE] = "_CONVERT_VALUE",
[_COPY] = "_COPY",
+ [_COPY_1] = "_COPY_1",
+ [_COPY_2] = "_COPY_2",
+ [_COPY_3] = "_COPY_3",
[_COPY_FREE_VARS] = "_COPY_FREE_VARS",
[_CREATE_INIT_FRAME] = "_CREATE_INIT_FRAME",
[_DELETE_ATTR] = "_DELETE_ATTR",
@@ -617,6 +628,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT",
[_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT",
[_SWAP] = "_SWAP",
+ [_SWAP_2] = "_SWAP_2",
+ [_SWAP_3] = "_SWAP_3",
[_TIER2_RESUME_CHECK] = "_TIER2_RESUME_CHECK",
[_TO_BOOL] = "_TO_BOOL",
[_TO_BOOL_BOOL] = "_TO_BOOL_BOOL",
@@ -1176,10 +1189,20 @@ int _PyUop_num_popped(int opcode, int oparg)
return 1;
case _FORMAT_WITH_SPEC:
return 2;
+ case _COPY_1:
+ return 0;
+ case _COPY_2:
+ return 0;
+ case _COPY_3:
+ return 0;
case _COPY:
return 0;
case _BINARY_OP:
return 2;
+ case _SWAP_2:
+ return 0;
+ case _SWAP_3:
+ return 0;
case _SWAP:
return 0;
case _GUARD_IS_TRUE_POP:
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 971e97a5784692..27a04766cc8dd8 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -4946,8 +4946,7 @@ dummy_func(
res = PyStackRef_FromPyObjectSteal(res_o);
}
- pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1],
top)) {
- assert(oparg > 0);
+ pure replicate(1:4) inst(COPY, (bottom, unused[oparg-1] -- bottom,
unused[oparg-1], top)) {
top = PyStackRef_DUP(bottom);
}
@@ -4980,12 +4979,11 @@ dummy_func(
macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP;
- pure inst(SWAP, (bottom, unused[oparg-2], top --
+ pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top --
bottom, unused[oparg-2], top)) {
_PyStackRef temp = bottom;
bottom = top;
top = temp;
- assert(oparg >= 2);
}
inst(INSTRUMENTED_LINE, ( -- )) {
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index dbfb2391bf0623..74c78e4d1f5a69 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -6763,12 +6763,44 @@
break;
}
+ case _COPY_1: {
+ _PyStackRef bottom;
+ _PyStackRef top;
+ bottom = stack_pointer[-1];
+ top = PyStackRef_DUP(bottom);
+ stack_pointer[0] = top;
+ stack_pointer += 1;
+ assert(WITHIN_STACK_BOUNDS());
+ break;
+ }
+
+ case _COPY_2: {
+ _PyStackRef bottom;
+ _PyStackRef top;
+ bottom = stack_pointer[-2];
+ top = PyStackRef_DUP(bottom);
+ stack_pointer[0] = top;
+ stack_pointer += 1;
+ assert(WITHIN_STACK_BOUNDS());
+ break;
+ }
+
+ case _COPY_3: {
+ _PyStackRef bottom;
+ _PyStackRef top;
+ bottom = stack_pointer[-3];
+ top = PyStackRef_DUP(bottom);
+ stack_pointer[0] = top;
+ stack_pointer += 1;
+ assert(WITHIN_STACK_BOUNDS());
+ break;
+ }
+
case _COPY: {
_PyStackRef bottom;
_PyStackRef top;
oparg = CURRENT_OPARG();
bottom = stack_pointer[-1 - (oparg-1)];
- assert(oparg > 0);
top = PyStackRef_DUP(bottom);
stack_pointer[0] = top;
stack_pointer += 1;
@@ -6808,6 +6840,32 @@
break;
}
+ case _SWAP_2: {
+ _PyStackRef top;
+ _PyStackRef bottom;
+ top = stack_pointer[-1];
+ bottom = stack_pointer[-2];
+ _PyStackRef temp = bottom;
+ bottom = top;
+ top = temp;
+ stack_pointer[-2] = bottom;
+ stack_pointer[-1] = top;
+ break;
+ }
+
+ case _SWAP_3: {
+ _PyStackRef top;
+ _PyStackRef bottom;
+ top = stack_pointer[-1];
+ bottom = stack_pointer[-3];
+ _PyStackRef temp = bottom;
+ bottom = top;
+ top = temp;
+ stack_pointer[-3] = bottom;
+ stack_pointer[-1] = top;
+ break;
+ }
+
case _SWAP: {
_PyStackRef top;
_PyStackRef bottom;
@@ -6817,7 +6875,6 @@
_PyStackRef temp = bottom;
bottom = top;
top = temp;
- assert(oparg >= 2);
stack_pointer[-2 - (oparg-2)] = bottom;
stack_pointer[-1] = top;
break;
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 2cf027c539b992..4fc1d5266d0a87 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -5228,7 +5228,6 @@
_PyStackRef bottom;
_PyStackRef top;
bottom = stack_pointer[-1 - (oparg-1)];
- assert(oparg > 0);
top = PyStackRef_DUP(bottom);
stack_pointer[0] = top;
stack_pointer += 1;
@@ -11568,7 +11567,6 @@
_PyStackRef temp = bottom;
bottom = top;
top = temp;
- assert(oparg >= 2);
stack_pointer[-2 - (oparg-2)] = bottom;
stack_pointer[-1] = top;
DISPATCH();
diff --git a/Python/optimizer.c b/Python/optimizer.c
index dde3dd8ebe745a..8d01d605ef4a2a 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -1292,8 +1292,8 @@ uop_optimize(
for (int pc = 0; pc < length; pc++) {
int opcode = buffer[pc].opcode;
int oparg = buffer[pc].oparg;
- if (oparg < _PyUop_Replication[opcode]) {
- buffer[pc].opcode = opcode + oparg + 1;
+ if (oparg < _PyUop_Replication[opcode].stop && oparg >=
_PyUop_Replication[opcode].start) {
+ buffer[pc].opcode = opcode + oparg + 1 -
_PyUop_Replication[opcode].start;
assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode],
_PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
}
else if (is_terminator(&buffer[pc])) {
diff --git a/Tools/cases_generator/analyzer.py
b/Tools/cases_generator/analyzer.py
index ca6d0301f3572d..c6a9fbcad8891f 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -180,7 +180,7 @@ class Uop:
properties: Properties
_size: int = -1
implicitly_created: bool = False
- replicated = 0
+ replicated = range(0)
replicates: "Uop | None" = None
# Size of the instruction(s), only set for uops containing the
INSTRUCTION_SIZE macro
instruction_size: int | None = None
@@ -868,6 +868,28 @@ def compute_properties(op: parser.CodeDef) -> Properties:
needs_prev=variable_used(op, "prev_instr"),
)
+def expand(items: list[StackItem], oparg: int) -> list[StackItem]:
+ # Only replace array item with scalar if no more than one item is an array
+ index = -1
+ for i, item in enumerate(items):
+ if "oparg" in item.size:
+ if index >= 0:
+ return items
+ index = i
+ if index < 0:
+ return items
+ try:
+ count = int(eval(items[index].size.replace("oparg", str(oparg))))
+ except ValueError:
+ return items
+ return items[:index] + [
+ StackItem(items[index].name + f"_{i}", "", items[index].peek,
items[index].used) for i in range(count)
+ ] + items[index+1:]
+
+def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect:
+ stack.inputs = expand(stack.inputs, oparg)
+ stack.outputs = expand(stack.outputs, oparg)
+ return stack
def make_uop(
name: str,
@@ -887,20 +909,26 @@ def make_uop(
)
for anno in op.annotations:
if anno.startswith("replicate"):
- result.replicated = int(anno[10:-1])
+ text = anno[10:-1]
+ start, stop = text.split(":")
+ result.replicated = range(int(start), int(stop))
break
else:
return result
- for oparg in range(result.replicated):
+ for oparg in result.replicated:
name_x = name + "_" + str(oparg)
properties = compute_properties(op)
properties.oparg = False
- properties.const_oparg = oparg
+ stack = analyze_stack(op)
+ if not variable_used(op, "oparg"):
+ stack = scalarize_stack(stack, oparg)
+ else:
+ properties.const_oparg = oparg
rep = Uop(
name=name_x,
context=op.context,
annotations=op.annotations,
- stack=analyze_stack(op),
+ stack=stack,
caches=analyze_caches(inputs),
local_stores=find_variable_stores(op),
body=op.block,
diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py
index a6dac48187525d..c7fe0d162ac6e4 100644
--- a/Tools/cases_generator/parsing.py
+++ b/Tools/cases_generator/parsing.py
@@ -379,9 +379,13 @@ def inst_header(self) -> InstHeader | None:
while anno := self.expect(lx.ANNOTATION):
if anno.text == "replicate":
self.require(lx.LPAREN)
- times = self.require(lx.NUMBER)
+ stop = self.require(lx.NUMBER)
+ start_text = "0"
+ if self.expect(lx.COLON):
+ start_text = stop.text
+ stop = self.require(lx.NUMBER)
self.require(lx.RPAREN)
- annotations.append(f"replicate({times.text})")
+ annotations.append(f"replicate({start_text}:{stop.text})")
else:
annotations.append(anno.text)
tkn = self.expect(lx.INST)
diff --git a/Tools/cases_generator/uop_metadata_generator.py
b/Tools/cases_generator/uop_metadata_generator.py
index 6f995e5c46bfcf..1cc23837a72dea 100644
--- a/Tools/cases_generator/uop_metadata_generator.py
+++ b/Tools/cases_generator/uop_metadata_generator.py
@@ -24,7 +24,8 @@
def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n")
- out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n")
+ out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; }
ReplicationRange;\n")
+ out.emit("extern const ReplicationRange
_PyUop_Replication[MAX_UOP_ID+1];\n")
out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n")
out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n")
out.emit("#ifdef NEED_OPCODE_METADATA\n")
@@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out:
CWriter) -> None:
out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n")
out.emit("};\n\n")
- out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n")
+ out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n")
for uop in analysis.uops.values():
if uop.replicated:
- out.emit(f"[{uop.name}] = {uop.replicated},\n")
+ assert(uop.replicated.step == 1)
+ out.emit(f"[{uop.name}] = {{ {uop.replicated.start},
{uop.replicated.stop} }},\n")
out.emit("};\n\n")
out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n")
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]