https://github.com/python/cpython/commit/8dd8b5c2f0785675b9282b719256341448d49967
commit: 8dd8b5c2f0785675b9282b719256341448d49967
branch: main
author: Mark Shannon <[email protected]>
committer: markshannon <[email protected]>
date: 2025-06-17T13:43:09+01:00
summary:

GH-135379: Support limited scalar replacement for replicated uops in the JIT 
code generator. (GH-135563)

* Use it to support efficient specializations of COPY and SWAP in the JIT.

files:
M Include/internal/pycore_uop_ids.h
M Include/internal/pycore_uop_metadata.h
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/optimizer.c
M Tools/cases_generator/analyzer.py
M Tools/cases_generator/parsing.py
M Tools/cases_generator/uop_metadata_generator.py

diff --git a/Include/internal/pycore_uop_ids.h 
b/Include/internal/pycore_uop_ids.h
index 2b845527cf2ed5..8211c5d056535e 100644
--- a/Include/internal/pycore_uop_ids.h
+++ b/Include/internal/pycore_uop_ids.h
@@ -86,86 +86,89 @@ extern "C" {
 #define _CONTAINS_OP_DICT 362
 #define _CONTAINS_OP_SET 363
 #define _CONVERT_VALUE CONVERT_VALUE
-#define _COPY COPY
+#define _COPY 364
+#define _COPY_1 365
+#define _COPY_2 366
+#define _COPY_3 367
 #define _COPY_FREE_VARS COPY_FREE_VARS
-#define _CREATE_INIT_FRAME 364
+#define _CREATE_INIT_FRAME 368
 #define _DELETE_ATTR DELETE_ATTR
 #define _DELETE_DEREF DELETE_DEREF
 #define _DELETE_FAST DELETE_FAST
 #define _DELETE_GLOBAL DELETE_GLOBAL
 #define _DELETE_NAME DELETE_NAME
 #define _DELETE_SUBSCR DELETE_SUBSCR
-#define _DEOPT 365
+#define _DEOPT 369
 #define _DICT_MERGE DICT_MERGE
 #define _DICT_UPDATE DICT_UPDATE
-#define _DO_CALL 366
-#define _DO_CALL_FUNCTION_EX 367
-#define _DO_CALL_KW 368
+#define _DO_CALL 370
+#define _DO_CALL_FUNCTION_EX 371
+#define _DO_CALL_KW 372
 #define _END_FOR END_FOR
 #define _END_SEND END_SEND
-#define _ERROR_POP_N 369
+#define _ERROR_POP_N 373
 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK
-#define _EXPAND_METHOD 370
-#define _EXPAND_METHOD_KW 371
-#define _FATAL_ERROR 372
+#define _EXPAND_METHOD 374
+#define _EXPAND_METHOD_KW 375
+#define _FATAL_ERROR 376
 #define _FORMAT_SIMPLE FORMAT_SIMPLE
 #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC
-#define _FOR_ITER 373
-#define _FOR_ITER_GEN_FRAME 374
-#define _FOR_ITER_TIER_TWO 375
+#define _FOR_ITER 377
+#define _FOR_ITER_GEN_FRAME 378
+#define _FOR_ITER_TIER_TWO 379
 #define _GET_AITER GET_AITER
 #define _GET_ANEXT GET_ANEXT
 #define _GET_AWAITABLE GET_AWAITABLE
 #define _GET_ITER GET_ITER
 #define _GET_LEN GET_LEN
 #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER
-#define _GUARD_BINARY_OP_EXTEND 376
-#define _GUARD_CALLABLE_ISINSTANCE 377
-#define _GUARD_CALLABLE_LEN 378
-#define _GUARD_CALLABLE_LIST_APPEND 379
-#define _GUARD_CALLABLE_STR_1 380
-#define _GUARD_CALLABLE_TUPLE_1 381
-#define _GUARD_CALLABLE_TYPE_1 382
-#define _GUARD_DORV_NO_DICT 383
-#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 384
-#define _GUARD_GLOBALS_VERSION 385
-#define _GUARD_IS_FALSE_POP 386
-#define _GUARD_IS_NONE_POP 387
-#define _GUARD_IS_NOT_NONE_POP 388
-#define _GUARD_IS_TRUE_POP 389
-#define _GUARD_KEYS_VERSION 390
-#define _GUARD_NOS_DICT 391
-#define _GUARD_NOS_FLOAT 392
-#define _GUARD_NOS_INT 393
-#define _GUARD_NOS_LIST 394
-#define _GUARD_NOS_NOT_NULL 395
-#define _GUARD_NOS_NULL 396
-#define _GUARD_NOS_TUPLE 397
-#define _GUARD_NOS_UNICODE 398
-#define _GUARD_NOT_EXHAUSTED_LIST 399
-#define _GUARD_NOT_EXHAUSTED_RANGE 400
-#define _GUARD_NOT_EXHAUSTED_TUPLE 401
-#define _GUARD_THIRD_NULL 402
-#define _GUARD_TOS_ANY_SET 403
-#define _GUARD_TOS_DICT 404
-#define _GUARD_TOS_FLOAT 405
-#define _GUARD_TOS_INT 406
-#define _GUARD_TOS_LIST 407
-#define _GUARD_TOS_SLICE 408
-#define _GUARD_TOS_TUPLE 409
-#define _GUARD_TOS_UNICODE 410
-#define _GUARD_TYPE_VERSION 411
-#define _GUARD_TYPE_VERSION_AND_LOCK 412
+#define _GUARD_BINARY_OP_EXTEND 380
+#define _GUARD_CALLABLE_ISINSTANCE 381
+#define _GUARD_CALLABLE_LEN 382
+#define _GUARD_CALLABLE_LIST_APPEND 383
+#define _GUARD_CALLABLE_STR_1 384
+#define _GUARD_CALLABLE_TUPLE_1 385
+#define _GUARD_CALLABLE_TYPE_1 386
+#define _GUARD_DORV_NO_DICT 387
+#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 388
+#define _GUARD_GLOBALS_VERSION 389
+#define _GUARD_IS_FALSE_POP 390
+#define _GUARD_IS_NONE_POP 391
+#define _GUARD_IS_NOT_NONE_POP 392
+#define _GUARD_IS_TRUE_POP 393
+#define _GUARD_KEYS_VERSION 394
+#define _GUARD_NOS_DICT 395
+#define _GUARD_NOS_FLOAT 396
+#define _GUARD_NOS_INT 397
+#define _GUARD_NOS_LIST 398
+#define _GUARD_NOS_NOT_NULL 399
+#define _GUARD_NOS_NULL 400
+#define _GUARD_NOS_TUPLE 401
+#define _GUARD_NOS_UNICODE 402
+#define _GUARD_NOT_EXHAUSTED_LIST 403
+#define _GUARD_NOT_EXHAUSTED_RANGE 404
+#define _GUARD_NOT_EXHAUSTED_TUPLE 405
+#define _GUARD_THIRD_NULL 406
+#define _GUARD_TOS_ANY_SET 407
+#define _GUARD_TOS_DICT 408
+#define _GUARD_TOS_FLOAT 409
+#define _GUARD_TOS_INT 410
+#define _GUARD_TOS_LIST 411
+#define _GUARD_TOS_SLICE 412
+#define _GUARD_TOS_TUPLE 413
+#define _GUARD_TOS_UNICODE 414
+#define _GUARD_TYPE_VERSION 415
+#define _GUARD_TYPE_VERSION_AND_LOCK 416
 #define _IMPORT_FROM IMPORT_FROM
 #define _IMPORT_NAME IMPORT_NAME
-#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 413
-#define _INIT_CALL_PY_EXACT_ARGS 414
-#define _INIT_CALL_PY_EXACT_ARGS_0 415
-#define _INIT_CALL_PY_EXACT_ARGS_1 416
-#define _INIT_CALL_PY_EXACT_ARGS_2 417
-#define _INIT_CALL_PY_EXACT_ARGS_3 418
-#define _INIT_CALL_PY_EXACT_ARGS_4 419
-#define _INSERT_NULL 420
+#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 417
+#define _INIT_CALL_PY_EXACT_ARGS 418
+#define _INIT_CALL_PY_EXACT_ARGS_0 419
+#define _INIT_CALL_PY_EXACT_ARGS_1 420
+#define _INIT_CALL_PY_EXACT_ARGS_2 421
+#define _INIT_CALL_PY_EXACT_ARGS_3 422
+#define _INIT_CALL_PY_EXACT_ARGS_4 423
+#define _INSERT_NULL 424
 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER
 #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION
 #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD
@@ -175,171 +178,173 @@ extern "C" {
 #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE
 #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE
 #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE
-#define _IS_NONE 421
+#define _IS_NONE 425
 #define _IS_OP IS_OP
-#define _ITER_CHECK_LIST 422
-#define _ITER_CHECK_RANGE 423
-#define _ITER_CHECK_TUPLE 424
-#define _ITER_JUMP_LIST 425
-#define _ITER_JUMP_RANGE 426
-#define _ITER_JUMP_TUPLE 427
-#define _ITER_NEXT_LIST 428
-#define _ITER_NEXT_LIST_TIER_TWO 429
-#define _ITER_NEXT_RANGE 430
-#define _ITER_NEXT_TUPLE 431
-#define _JUMP_TO_TOP 432
+#define _ITER_CHECK_LIST 426
+#define _ITER_CHECK_RANGE 427
+#define _ITER_CHECK_TUPLE 428
+#define _ITER_JUMP_LIST 429
+#define _ITER_JUMP_RANGE 430
+#define _ITER_JUMP_TUPLE 431
+#define _ITER_NEXT_LIST 432
+#define _ITER_NEXT_LIST_TIER_TWO 433
+#define _ITER_NEXT_RANGE 434
+#define _ITER_NEXT_TUPLE 435
+#define _JUMP_TO_TOP 436
 #define _LIST_APPEND LIST_APPEND
 #define _LIST_EXTEND LIST_EXTEND
-#define _LOAD_ATTR 433
-#define _LOAD_ATTR_CLASS 434
+#define _LOAD_ATTR 437
+#define _LOAD_ATTR_CLASS 438
 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
-#define _LOAD_ATTR_INSTANCE_VALUE 435
-#define _LOAD_ATTR_METHOD_LAZY_DICT 436
-#define _LOAD_ATTR_METHOD_NO_DICT 437
-#define _LOAD_ATTR_METHOD_WITH_VALUES 438
-#define _LOAD_ATTR_MODULE 439
-#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 440
-#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 441
-#define _LOAD_ATTR_PROPERTY_FRAME 442
-#define _LOAD_ATTR_SLOT 443
-#define _LOAD_ATTR_WITH_HINT 444
+#define _LOAD_ATTR_INSTANCE_VALUE 439
+#define _LOAD_ATTR_METHOD_LAZY_DICT 440
+#define _LOAD_ATTR_METHOD_NO_DICT 441
+#define _LOAD_ATTR_METHOD_WITH_VALUES 442
+#define _LOAD_ATTR_MODULE 443
+#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 444
+#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 445
+#define _LOAD_ATTR_PROPERTY_FRAME 446
+#define _LOAD_ATTR_SLOT 447
+#define _LOAD_ATTR_WITH_HINT 448
 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
-#define _LOAD_BYTECODE 445
+#define _LOAD_BYTECODE 449
 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT
 #define _LOAD_CONST LOAD_CONST
-#define _LOAD_CONST_INLINE 446
-#define _LOAD_CONST_INLINE_BORROW 447
-#define _LOAD_CONST_UNDER_INLINE 448
-#define _LOAD_CONST_UNDER_INLINE_BORROW 449
+#define _LOAD_CONST_INLINE 450
+#define _LOAD_CONST_INLINE_BORROW 451
+#define _LOAD_CONST_UNDER_INLINE 452
+#define _LOAD_CONST_UNDER_INLINE_BORROW 453
 #define _LOAD_DEREF LOAD_DEREF
-#define _LOAD_FAST 450
-#define _LOAD_FAST_0 451
-#define _LOAD_FAST_1 452
-#define _LOAD_FAST_2 453
-#define _LOAD_FAST_3 454
-#define _LOAD_FAST_4 455
-#define _LOAD_FAST_5 456
-#define _LOAD_FAST_6 457
-#define _LOAD_FAST_7 458
+#define _LOAD_FAST 454
+#define _LOAD_FAST_0 455
+#define _LOAD_FAST_1 456
+#define _LOAD_FAST_2 457
+#define _LOAD_FAST_3 458
+#define _LOAD_FAST_4 459
+#define _LOAD_FAST_5 460
+#define _LOAD_FAST_6 461
+#define _LOAD_FAST_7 462
 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
-#define _LOAD_FAST_BORROW 459
-#define _LOAD_FAST_BORROW_0 460
-#define _LOAD_FAST_BORROW_1 461
-#define _LOAD_FAST_BORROW_2 462
-#define _LOAD_FAST_BORROW_3 463
-#define _LOAD_FAST_BORROW_4 464
-#define _LOAD_FAST_BORROW_5 465
-#define _LOAD_FAST_BORROW_6 466
-#define _LOAD_FAST_BORROW_7 467
+#define _LOAD_FAST_BORROW 463
+#define _LOAD_FAST_BORROW_0 464
+#define _LOAD_FAST_BORROW_1 465
+#define _LOAD_FAST_BORROW_2 466
+#define _LOAD_FAST_BORROW_3 467
+#define _LOAD_FAST_BORROW_4 468
+#define _LOAD_FAST_BORROW_5 469
+#define _LOAD_FAST_BORROW_6 470
+#define _LOAD_FAST_BORROW_7 471
 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW
 #define _LOAD_FAST_CHECK LOAD_FAST_CHECK
 #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
 #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
-#define _LOAD_GLOBAL 468
-#define _LOAD_GLOBAL_BUILTINS 469
-#define _LOAD_GLOBAL_MODULE 470
+#define _LOAD_GLOBAL 472
+#define _LOAD_GLOBAL_BUILTINS 473
+#define _LOAD_GLOBAL_MODULE 474
 #define _LOAD_LOCALS LOAD_LOCALS
 #define _LOAD_NAME LOAD_NAME
-#define _LOAD_SMALL_INT 471
-#define _LOAD_SMALL_INT_0 472
-#define _LOAD_SMALL_INT_1 473
-#define _LOAD_SMALL_INT_2 474
-#define _LOAD_SMALL_INT_3 475
-#define _LOAD_SPECIAL 476
+#define _LOAD_SMALL_INT 475
+#define _LOAD_SMALL_INT_0 476
+#define _LOAD_SMALL_INT_1 477
+#define _LOAD_SMALL_INT_2 478
+#define _LOAD_SMALL_INT_3 479
+#define _LOAD_SPECIAL 480
 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
 #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
-#define _MAKE_CALLARGS_A_TUPLE 477
+#define _MAKE_CALLARGS_A_TUPLE 481
 #define _MAKE_CELL MAKE_CELL
 #define _MAKE_FUNCTION MAKE_FUNCTION
-#define _MAKE_WARM 478
+#define _MAKE_WARM 482
 #define _MAP_ADD MAP_ADD
 #define _MATCH_CLASS MATCH_CLASS
 #define _MATCH_KEYS MATCH_KEYS
 #define _MATCH_MAPPING MATCH_MAPPING
 #define _MATCH_SEQUENCE MATCH_SEQUENCE
-#define _MAYBE_EXPAND_METHOD 479
-#define _MAYBE_EXPAND_METHOD_KW 480
-#define _MONITOR_CALL 481
-#define _MONITOR_CALL_KW 482
-#define _MONITOR_JUMP_BACKWARD 483
-#define _MONITOR_RESUME 484
+#define _MAYBE_EXPAND_METHOD 483
+#define _MAYBE_EXPAND_METHOD_KW 484
+#define _MONITOR_CALL 485
+#define _MONITOR_CALL_KW 486
+#define _MONITOR_JUMP_BACKWARD 487
+#define _MONITOR_RESUME 488
 #define _NOP NOP
-#define _POP_CALL 485
-#define _POP_CALL_LOAD_CONST_INLINE_BORROW 486
-#define _POP_CALL_ONE 487
-#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 488
-#define _POP_CALL_TWO 489
-#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490
+#define _POP_CALL 489
+#define _POP_CALL_LOAD_CONST_INLINE_BORROW 490
+#define _POP_CALL_ONE 491
+#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 492
+#define _POP_CALL_TWO 493
+#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 494
 #define _POP_EXCEPT POP_EXCEPT
 #define _POP_ITER POP_ITER
-#define _POP_JUMP_IF_FALSE 491
-#define _POP_JUMP_IF_TRUE 492
+#define _POP_JUMP_IF_FALSE 495
+#define _POP_JUMP_IF_TRUE 496
 #define _POP_TOP POP_TOP
-#define _POP_TOP_LOAD_CONST_INLINE 493
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 494
-#define _POP_TWO 495
-#define _POP_TWO_LOAD_CONST_INLINE_BORROW 496
+#define _POP_TOP_LOAD_CONST_INLINE 497
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 498
+#define _POP_TWO 499
+#define _POP_TWO_LOAD_CONST_INLINE_BORROW 500
 #define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 497
+#define _PUSH_FRAME 501
 #define _PUSH_NULL PUSH_NULL
-#define _PUSH_NULL_CONDITIONAL 498
-#define _PY_FRAME_GENERAL 499
-#define _PY_FRAME_KW 500
-#define _QUICKEN_RESUME 501
-#define _REPLACE_WITH_TRUE 502
+#define _PUSH_NULL_CONDITIONAL 502
+#define _PY_FRAME_GENERAL 503
+#define _PY_FRAME_KW 504
+#define _QUICKEN_RESUME 505
+#define _REPLACE_WITH_TRUE 506
 #define _RESUME_CHECK RESUME_CHECK
 #define _RETURN_GENERATOR RETURN_GENERATOR
 #define _RETURN_VALUE RETURN_VALUE
-#define _SAVE_RETURN_OFFSET 503
-#define _SEND 504
-#define _SEND_GEN_FRAME 505
+#define _SAVE_RETURN_OFFSET 507
+#define _SEND 508
+#define _SEND_GEN_FRAME 509
 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
 #define _SET_ADD SET_ADD
 #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
 #define _SET_UPDATE SET_UPDATE
-#define _START_EXECUTOR 506
-#define _STORE_ATTR 507
-#define _STORE_ATTR_INSTANCE_VALUE 508
-#define _STORE_ATTR_SLOT 509
-#define _STORE_ATTR_WITH_HINT 510
+#define _START_EXECUTOR 510
+#define _STORE_ATTR 511
+#define _STORE_ATTR_INSTANCE_VALUE 512
+#define _STORE_ATTR_SLOT 513
+#define _STORE_ATTR_WITH_HINT 514
 #define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 511
-#define _STORE_FAST_0 512
-#define _STORE_FAST_1 513
-#define _STORE_FAST_2 514
-#define _STORE_FAST_3 515
-#define _STORE_FAST_4 516
-#define _STORE_FAST_5 517
-#define _STORE_FAST_6 518
-#define _STORE_FAST_7 519
+#define _STORE_FAST 515
+#define _STORE_FAST_0 516
+#define _STORE_FAST_1 517
+#define _STORE_FAST_2 518
+#define _STORE_FAST_3 519
+#define _STORE_FAST_4 520
+#define _STORE_FAST_5 521
+#define _STORE_FAST_6 522
+#define _STORE_FAST_7 523
 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
 #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
 #define _STORE_GLOBAL STORE_GLOBAL
 #define _STORE_NAME STORE_NAME
-#define _STORE_SLICE 520
-#define _STORE_SUBSCR 521
-#define _STORE_SUBSCR_DICT 522
-#define _STORE_SUBSCR_LIST_INT 523
-#define _SWAP SWAP
-#define _TIER2_RESUME_CHECK 524
-#define _TO_BOOL 525
+#define _STORE_SLICE 524
+#define _STORE_SUBSCR 525
+#define _STORE_SUBSCR_DICT 526
+#define _STORE_SUBSCR_LIST_INT 527
+#define _SWAP 528
+#define _SWAP_2 529
+#define _SWAP_3 530
+#define _TIER2_RESUME_CHECK 531
+#define _TO_BOOL 532
 #define _TO_BOOL_BOOL TO_BOOL_BOOL
 #define _TO_BOOL_INT TO_BOOL_INT
-#define _TO_BOOL_LIST 526
+#define _TO_BOOL_LIST 533
 #define _TO_BOOL_NONE TO_BOOL_NONE
-#define _TO_BOOL_STR 527
+#define _TO_BOOL_STR 534
 #define _UNARY_INVERT UNARY_INVERT
 #define _UNARY_NEGATIVE UNARY_NEGATIVE
 #define _UNARY_NOT UNARY_NOT
 #define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 528
-#define _UNPACK_SEQUENCE_LIST 529
-#define _UNPACK_SEQUENCE_TUPLE 530
-#define _UNPACK_SEQUENCE_TWO_TUPLE 531
+#define _UNPACK_SEQUENCE 535
+#define _UNPACK_SEQUENCE_LIST 536
+#define _UNPACK_SEQUENCE_TUPLE 537
+#define _UNPACK_SEQUENCE_TWO_TUPLE 538
 #define _WITH_EXCEPT_START WITH_EXCEPT_START
 #define _YIELD_VALUE YIELD_VALUE
-#define MAX_UOP_ID 531
+#define MAX_UOP_ID 538
 
 #ifdef __cplusplus
 }
diff --git a/Include/internal/pycore_uop_metadata.h 
b/Include/internal/pycore_uop_metadata.h
index cd36023c25cbb4..fad87d4b586e64 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -12,7 +12,8 @@ extern "C" {
 #include <stdint.h>
 #include "pycore_uop_ids.h"
 extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];
-extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];
+typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;
+extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];
 extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];
 
 extern int _PyUop_num_popped(int opcode, int oparg);
@@ -288,8 +289,13 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
+    [_COPY_1] = HAS_PURE_FLAG,
+    [_COPY_2] = HAS_PURE_FLAG,
+    [_COPY_3] = HAS_PURE_FLAG,
     [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | 
HAS_ESCAPES_FLAG,
+    [_SWAP_2] = HAS_PURE_FLAG,
+    [_SWAP_3] = HAS_PURE_FLAG,
     [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG,
     [_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG,
     [_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG,
@@ -323,12 +329,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG,
 };
 
-const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {
-    [_LOAD_FAST] = 8,
-    [_LOAD_FAST_BORROW] = 8,
-    [_LOAD_SMALL_INT] = 4,
-    [_STORE_FAST] = 8,
-    [_INIT_CALL_PY_EXACT_ARGS] = 5,
+const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {
+    [_LOAD_FAST] = { 0, 8 },
+    [_LOAD_FAST_BORROW] = { 0, 8 },
+    [_LOAD_SMALL_INT] = { 0, 4 },
+    [_STORE_FAST] = { 0, 8 },
+    [_INIT_CALL_PY_EXACT_ARGS] = { 0, 5 },
+    [_COPY] = { 1, 4 },
+    [_SWAP] = { 2, 4 },
 };
 
 const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
@@ -408,6 +416,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_CONTAINS_OP_SET] = "_CONTAINS_OP_SET",
     [_CONVERT_VALUE] = "_CONVERT_VALUE",
     [_COPY] = "_COPY",
+    [_COPY_1] = "_COPY_1",
+    [_COPY_2] = "_COPY_2",
+    [_COPY_3] = "_COPY_3",
     [_COPY_FREE_VARS] = "_COPY_FREE_VARS",
     [_CREATE_INIT_FRAME] = "_CREATE_INIT_FRAME",
     [_DELETE_ATTR] = "_DELETE_ATTR",
@@ -617,6 +628,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT",
     [_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT",
     [_SWAP] = "_SWAP",
+    [_SWAP_2] = "_SWAP_2",
+    [_SWAP_3] = "_SWAP_3",
     [_TIER2_RESUME_CHECK] = "_TIER2_RESUME_CHECK",
     [_TO_BOOL] = "_TO_BOOL",
     [_TO_BOOL_BOOL] = "_TO_BOOL_BOOL",
@@ -1176,10 +1189,20 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 1;
         case _FORMAT_WITH_SPEC:
             return 2;
+        case _COPY_1:
+            return 0;
+        case _COPY_2:
+            return 0;
+        case _COPY_3:
+            return 0;
         case _COPY:
             return 0;
         case _BINARY_OP:
             return 2;
+        case _SWAP_2:
+            return 0;
+        case _SWAP_3:
+            return 0;
         case _SWAP:
             return 0;
         case _GUARD_IS_TRUE_POP:
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 971e97a5784692..27a04766cc8dd8 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -4946,8 +4946,7 @@ dummy_func(
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
 
-        pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], 
top)) {
-            assert(oparg > 0);
+        pure replicate(1:4) inst(COPY, (bottom, unused[oparg-1] -- bottom, 
unused[oparg-1], top)) {
             top = PyStackRef_DUP(bottom);
         }
 
@@ -4980,12 +4979,11 @@ dummy_func(
 
         macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP;
 
-        pure inst(SWAP, (bottom, unused[oparg-2], top --
+        pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top --
                     bottom, unused[oparg-2], top)) {
             _PyStackRef temp = bottom;
             bottom = top;
             top = temp;
-            assert(oparg >= 2);
         }
 
         inst(INSTRUMENTED_LINE, ( -- )) {
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index dbfb2391bf0623..74c78e4d1f5a69 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -6763,12 +6763,44 @@
             break;
         }
 
+        case _COPY_1: {
+            _PyStackRef bottom;
+            _PyStackRef top;
+            bottom = stack_pointer[-1];
+            top = PyStackRef_DUP(bottom);
+            stack_pointer[0] = top;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _COPY_2: {
+            _PyStackRef bottom;
+            _PyStackRef top;
+            bottom = stack_pointer[-2];
+            top = PyStackRef_DUP(bottom);
+            stack_pointer[0] = top;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _COPY_3: {
+            _PyStackRef bottom;
+            _PyStackRef top;
+            bottom = stack_pointer[-3];
+            top = PyStackRef_DUP(bottom);
+            stack_pointer[0] = top;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
         case _COPY: {
             _PyStackRef bottom;
             _PyStackRef top;
             oparg = CURRENT_OPARG();
             bottom = stack_pointer[-1 - (oparg-1)];
-            assert(oparg > 0);
             top = PyStackRef_DUP(bottom);
             stack_pointer[0] = top;
             stack_pointer += 1;
@@ -6808,6 +6840,32 @@
             break;
         }
 
+        case _SWAP_2: {
+            _PyStackRef top;
+            _PyStackRef bottom;
+            top = stack_pointer[-1];
+            bottom = stack_pointer[-2];
+            _PyStackRef temp = bottom;
+            bottom = top;
+            top = temp;
+            stack_pointer[-2] = bottom;
+            stack_pointer[-1] = top;
+            break;
+        }
+
+        case _SWAP_3: {
+            _PyStackRef top;
+            _PyStackRef bottom;
+            top = stack_pointer[-1];
+            bottom = stack_pointer[-3];
+            _PyStackRef temp = bottom;
+            bottom = top;
+            top = temp;
+            stack_pointer[-3] = bottom;
+            stack_pointer[-1] = top;
+            break;
+        }
+
         case _SWAP: {
             _PyStackRef top;
             _PyStackRef bottom;
@@ -6817,7 +6875,6 @@
             _PyStackRef temp = bottom;
             bottom = top;
             top = temp;
-            assert(oparg >= 2);
             stack_pointer[-2 - (oparg-2)] = bottom;
             stack_pointer[-1] = top;
             break;
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 2cf027c539b992..4fc1d5266d0a87 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -5228,7 +5228,6 @@
             _PyStackRef bottom;
             _PyStackRef top;
             bottom = stack_pointer[-1 - (oparg-1)];
-            assert(oparg > 0);
             top = PyStackRef_DUP(bottom);
             stack_pointer[0] = top;
             stack_pointer += 1;
@@ -11568,7 +11567,6 @@
             _PyStackRef temp = bottom;
             bottom = top;
             top = temp;
-            assert(oparg >= 2);
             stack_pointer[-2 - (oparg-2)] = bottom;
             stack_pointer[-1] = top;
             DISPATCH();
diff --git a/Python/optimizer.c b/Python/optimizer.c
index dde3dd8ebe745a..8d01d605ef4a2a 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -1292,8 +1292,8 @@ uop_optimize(
     for (int pc = 0; pc < length; pc++) {
         int opcode = buffer[pc].opcode;
         int oparg = buffer[pc].oparg;
-        if (oparg < _PyUop_Replication[opcode]) {
-            buffer[pc].opcode = opcode + oparg + 1;
+        if (oparg < _PyUop_Replication[opcode].stop && oparg >= 
_PyUop_Replication[opcode].start) {
+            buffer[pc].opcode = opcode + oparg + 1 - 
_PyUop_Replication[opcode].start;
             assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], 
_PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
         }
         else if (is_terminator(&buffer[pc])) {
diff --git a/Tools/cases_generator/analyzer.py 
b/Tools/cases_generator/analyzer.py
index ca6d0301f3572d..c6a9fbcad8891f 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -180,7 +180,7 @@ class Uop:
     properties: Properties
     _size: int = -1
     implicitly_created: bool = False
-    replicated = 0
+    replicated = range(0)
     replicates: "Uop | None" = None
     # Size of the instruction(s), only set for uops containing the 
INSTRUCTION_SIZE macro
     instruction_size: int | None = None
@@ -868,6 +868,28 @@ def compute_properties(op: parser.CodeDef) -> Properties:
         needs_prev=variable_used(op, "prev_instr"),
     )
 
+def expand(items: list[StackItem], oparg: int) -> list[StackItem]:
+    # Only replace array item with scalar if no more than one item is an array
+    index = -1
+    for i, item in enumerate(items):
+        if "oparg" in item.size:
+            if index >= 0:
+                return items
+            index = i
+    if index < 0:
+        return items
+    try:
+        count = int(eval(items[index].size.replace("oparg", str(oparg))))
+    except ValueError:
+        return items
+    return items[:index] + [
+        StackItem(items[index].name + f"_{i}", "", items[index].peek, 
items[index].used) for i in range(count)
+        ] + items[index+1:]
+
+def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect:
+    stack.inputs = expand(stack.inputs, oparg)
+    stack.outputs = expand(stack.outputs, oparg)
+    return stack
 
 def make_uop(
     name: str,
@@ -887,20 +909,26 @@ def make_uop(
     )
     for anno in op.annotations:
         if anno.startswith("replicate"):
-            result.replicated = int(anno[10:-1])
+            text = anno[10:-1]
+            start, stop = text.split(":")
+            result.replicated = range(int(start), int(stop))
             break
     else:
         return result
-    for oparg in range(result.replicated):
+    for oparg in result.replicated:
         name_x = name + "_" + str(oparg)
         properties = compute_properties(op)
         properties.oparg = False
-        properties.const_oparg = oparg
+        stack = analyze_stack(op)
+        if not variable_used(op, "oparg"):
+            stack = scalarize_stack(stack, oparg)
+        else:
+            properties.const_oparg = oparg
         rep = Uop(
             name=name_x,
             context=op.context,
             annotations=op.annotations,
-            stack=analyze_stack(op),
+            stack=stack,
             caches=analyze_caches(inputs),
             local_stores=find_variable_stores(op),
             body=op.block,
diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py
index a6dac48187525d..c7fe0d162ac6e4 100644
--- a/Tools/cases_generator/parsing.py
+++ b/Tools/cases_generator/parsing.py
@@ -379,9 +379,13 @@ def inst_header(self) -> InstHeader | None:
         while anno := self.expect(lx.ANNOTATION):
             if anno.text == "replicate":
                 self.require(lx.LPAREN)
-                times = self.require(lx.NUMBER)
+                stop = self.require(lx.NUMBER)
+                start_text = "0"
+                if self.expect(lx.COLON):
+                    start_text = stop.text
+                    stop = self.require(lx.NUMBER)
                 self.require(lx.RPAREN)
-                annotations.append(f"replicate({times.text})")
+                annotations.append(f"replicate({start_text}:{stop.text})")
             else:
                 annotations.append(anno.text)
         tkn = self.expect(lx.INST)
diff --git a/Tools/cases_generator/uop_metadata_generator.py 
b/Tools/cases_generator/uop_metadata_generator.py
index 6f995e5c46bfcf..1cc23837a72dea 100644
--- a/Tools/cases_generator/uop_metadata_generator.py
+++ b/Tools/cases_generator/uop_metadata_generator.py
@@ -24,7 +24,8 @@
 
 def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
     out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n")
-    out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n")
+    out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } 
ReplicationRange;\n")
+    out.emit("extern const ReplicationRange 
_PyUop_Replication[MAX_UOP_ID+1];\n")
     out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n")
     out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n")
     out.emit("#ifdef NEED_OPCODE_METADATA\n")
@@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out: 
CWriter) -> None:
             out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n")
 
     out.emit("};\n\n")
-    out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n")
+    out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n")
     for uop in analysis.uops.values():
         if uop.replicated:
-            out.emit(f"[{uop.name}] = {uop.replicated},\n")
+            assert(uop.replicated.step == 1)
+            out.emit(f"[{uop.name}] = {{ {uop.replicated.start}, 
{uop.replicated.stop} }},\n")
 
     out.emit("};\n\n")
     out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n")

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to