slinder1 wrote:

<details>
<summary>🛠️ Changes since last push (click to expand):</summary>

```diff
diff --git b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp 
a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1889,12 +1889,7 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
     Register Exec = TRI->getExec();
     assert(!MFI->hasPrologEpilogSGPRSpillEntry(Exec) &&
            "Re-reserving spill slot for EXEC");
-    // FIXME: Machine Copy Propagation currently optimizes away the EXEC copy 
to
-    // the scratch as we emit it only in the prolog. This optimization should
-    // not happen for frame related instructions. Until this is fixed ignore
-    // copy to scratch SGPR.
-    getVGPRSpillLaneOrTempRegister(MF, LiveUnits, Exec, RC,
-                                   /*IncludeScratchCopy=*/false);
+    getVGPRSpillLaneOrTempRegister(MF, LiveUnits, Exec, RC);
   }
 
   // Functions that don't return to the caller don't need to preserve
diff --git b/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll 
a/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 
-amdgpu-spill-cfi-saved-regs -o - %s | FileCheck --check-prefixes=CHECK,WAVE64 
%s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 
-amdgpu-spill-cfi-saved-regs -o - %s | FileCheck --check-prefixes=CHECK,WAVE32 
%s
 
@@ -23,17 +23,8 @@ define hidden void @func_saved_in_clobbered_vgpr() #0 {
 ; WAVE64-NEXT:    .cfi_llvm_def_aspace_cfa 64, 0, 6
 ; WAVE64-NEXT:    .cfi_llvm_register_pair 16, 62, 32, 63, 32
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE64-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; WAVE64-NEXT:    .cfi_offset 2560, 0
-; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
-; WAVE64-NEXT:    v_writelane_b32 v0, exec_lo, 0
-; WAVE64-NEXT:    v_writelane_b32 v0, exec_hi, 1
-; WAVE64-NEXT:    .cfi_llvm_vector_registers 17, 2560, 0, 32, 2560, 1, 32
-; WAVE64-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
-; WAVE64-NEXT:    s_waitcnt vmcnt(0)
+; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
+; WAVE64-NEXT:    .cfi_llvm_register_pair 17, 36, 32, 37, 32
 ; WAVE64-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; WAVE32-LABEL: func_saved_in_clobbered_vgpr:
@@ -43,18 +34,8 @@ define hidden void @func_saved_in_clobbered_vgpr() #0 {
 ; WAVE32-NEXT:    .cfi_llvm_def_aspace_cfa 64, 0, 6
 ; WAVE32-NEXT:    .cfi_llvm_register_pair 16, 62, 32, 63, 32
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT:    s_xor_saveexec_b32 s4, -1
-; WAVE32-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; WAVE32-NEXT:    .cfi_offset 1536, 0
-; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
-; WAVE32-NEXT:    s_mov_b32 exec_lo, s4
-; WAVE32-NEXT:    v_writelane_b32 v0, exec_lo, 0
-; WAVE32-NEXT:    .cfi_llvm_vector_registers 1, 1536, 0, 32
-; WAVE32-NEXT:    s_xor_saveexec_b32 s4, -1
-; WAVE32-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
-; WAVE32-NEXT:    s_mov_b32 exec_lo, s4
-; WAVE32-NEXT:    s_waitcnt vmcnt(0)
+; WAVE32-NEXT:    s_mov_b32 s4, exec_lo
+; WAVE32-NEXT:    .cfi_register 1, 36
 ; WAVE32-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ret void
@@ -109,20 +90,11 @@ define hidden void @func_saved_in_preserved_vgpr() #0 {
 ; WAVE64-NEXT:    .cfi_undefined 2598
 ; WAVE64-NEXT:    .cfi_undefined 2599
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE64-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; WAVE64-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded 
Spill
-; WAVE64-NEXT:    .cfi_offset 2600, 0
-; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
-; WAVE64-NEXT:    v_writelane_b32 v40, exec_lo, 0
-; WAVE64-NEXT:    v_writelane_b32 v40, exec_hi, 1
-; WAVE64-NEXT:    .cfi_llvm_vector_registers 17, 2600, 0, 32, 2600, 1, 32
+; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
+; WAVE64-NEXT:    .cfi_llvm_register_pair 17, 36, 32, 37, 32
 ; WAVE64-NEXT:    ;;#ASMSTART
 ; WAVE64-NEXT:    ; clobber nonpreserved VGPRs
 ; WAVE64-NEXT:    ;;#ASMEND
-; WAVE64-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; WAVE64-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded 
Reload
-; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
-; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; WAVE32-LABEL: func_saved_in_preserved_vgpr:
@@ -172,21 +144,11 @@ define hidden void @func_saved_in_preserved_vgpr() #0 {
 ; WAVE32-NEXT:    .cfi_undefined 1574
 ; WAVE32-NEXT:    .cfi_undefined 1575
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT:    s_or_saveexec_b32 s4, -1
-; WAVE32-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded 
Spill
-; WAVE32-NEXT:    .cfi_offset 1576, 0
-; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
-; WAVE32-NEXT:    s_mov_b32 exec_lo, s4
-; WAVE32-NEXT:    v_writelane_b32 v40, exec_lo, 0
-; WAVE32-NEXT:    .cfi_llvm_vector_registers 1, 1576, 0, 32
+; WAVE32-NEXT:    s_mov_b32 s4, exec_lo
+; WAVE32-NEXT:    .cfi_register 1, 36
 ; WAVE32-NEXT:    ;;#ASMSTART
 ; WAVE32-NEXT:    ; clobber nonpreserved VGPRs
 ; WAVE32-NEXT:    ;;#ASMEND
-; WAVE32-NEXT:    s_or_saveexec_b32 s4, -1
-; WAVE32-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded 
Reload
-; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
-; WAVE32-NEXT:    s_mov_b32 exec_lo, s4
-; WAVE32-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE32-NEXT:    s_setpc_b64 s[30:31]
 entry:
   call void asm sideeffect "; clobber nonpreserved VGPRs",
@@ -206,12 +168,7 @@ define void @empty_func() {
 ; WAVE64-NEXT:    .cfi_llvm_def_aspace_cfa 64, 0, 6
 ; WAVE64-NEXT:    .cfi_llvm_register_pair 16, 62, 32, 63, 32
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE64-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; WAVE64-NEXT:    .cfi_offset 2560, 0
-; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
-; WAVE64-NEXT:    v_writelane_b32 v0, exec_lo, 0
-; WAVE64-NEXT:    v_writelane_b32 v0, exec_hi, 1
+; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ;
 ; WAVE32-LABEL: empty_func:
 ; WAVE32:       .Lfunc_begin3:
@@ -220,12 +177,7 @@ define void @empty_func() {
 ; WAVE32-NEXT:    .cfi_llvm_def_aspace_cfa 64, 0, 6
 ; WAVE32-NEXT:    .cfi_llvm_register_pair 16, 62, 32, 63, 32
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT:    s_xor_saveexec_b32 s4, -1
-; WAVE32-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; WAVE32-NEXT:    .cfi_offset 1536, 0
-; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
-; WAVE32-NEXT:    s_mov_b32 exec_lo, s4
-; WAVE32-NEXT:    v_writelane_b32 v0, exec_lo, 0
+; WAVE32-NEXT:    s_mov_b32 s4, exec_lo
   unreachable
 }
 
@@ -263,14 +215,10 @@ define void @no_vgprs_to_spill_into() #1 {
 ; WAVE64-NEXT:    .cfi_undefined 2583
 ; WAVE64-NEXT:    .cfi_undefined 2584
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE64-NEXT:    v_mov_b32_e32 v0, exec_lo
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; WAVE64-NEXT:    v_mov_b32_e32 v0, exec_hi
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    .cfi_offset 17, 0
+; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
+; WAVE64-NEXT:    .cfi_llvm_register_pair 17, 36, 32, 37, 32
 ; WAVE64-NEXT:    ;;#ASMSTART
 ; WAVE64-NEXT:    ;;#ASMEND
-; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; WAVE32-LABEL: no_vgprs_to_spill_into:
@@ -305,20 +253,10 @@ define void @no_vgprs_to_spill_into() #1 {
 ; WAVE32-NEXT:    .cfi_undefined 1559
 ; WAVE32-NEXT:    .cfi_undefined 1560
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT:    s_xor_saveexec_b32 s4, -1
-; WAVE32-NEXT:    buffer_store_dword v25, off, s[0:3], s32 ; 4-byte Folded 
Spill
-; WAVE32-NEXT:    .cfi_offset 1561, 0
-; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
-; WAVE32-NEXT:    s_mov_b32 exec_lo, s4
-; WAVE32-NEXT:    v_writelane_b32 v25, exec_lo, 0
-; WAVE32-NEXT:    .cfi_llvm_vector_registers 1, 1561, 0, 32
+; WAVE32-NEXT:    s_mov_b32 s4, exec_lo
+; WAVE32-NEXT:    .cfi_register 1, 36
 ; WAVE32-NEXT:    ;;#ASMSTART
 ; WAVE32-NEXT:    ;;#ASMEND
-; WAVE32-NEXT:    s_xor_saveexec_b32 s4, -1
-; WAVE32-NEXT:    buffer_load_dword v25, off, s[0:3], s32 ; 4-byte Folded 
Reload
-; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
-; WAVE32-NEXT:    s_mov_b32 exec_lo, s4
-; WAVE32-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE32-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "",
     "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
@@ -445,16 +383,13 @@ define void @callee_need_to_spill_fp_exec_to_memory() #2 {
 ; WAVE64-NEXT:    .cfi_undefined 60
 ; WAVE64-NEXT:    .cfi_undefined 61
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE64-NEXT:    s_mov_b32 s40, s33
-; WAVE64-NEXT:    .cfi_register 65, 72
+; WAVE64-NEXT:    s_mov_b32 s42, s33
+; WAVE64-NEXT:    .cfi_register 65, 74
 ; WAVE64-NEXT:    s_mov_b32 s33, s32
-; WAVE64-NEXT:    v_mov_b32_e32 v0, exec_lo
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    v_mov_b32_e32 v0, exec_hi
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:324 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    .cfi_offset 17, 20480
+; WAVE64-NEXT:    s_mov_b64 s[40:41], exec
+; WAVE64-NEXT:    .cfi_llvm_register_pair 17, 72, 32, 73, 32
 ; WAVE64-NEXT:    .cfi_def_cfa_register 65
-; WAVE64-NEXT:    s_addk_i32 s32, 0x5300
+; WAVE64-NEXT:    s_addk_i32 s32, 0x5100
 ; WAVE64-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:188 ; 4-byte 
Folded Spill
 ; WAVE64-NEXT:    .cfi_llvm_vector_offset 2600, 32, 17, 64, 12032
 ; WAVE64-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:184 ; 4-byte 
Folded Spill
@@ -553,258 +488,258 @@ define void @callee_need_to_spill_fp_exec_to_memory() 
#2 {
 ; WAVE64-NEXT:    .cfi_llvm_vector_offset 2687, 32, 17, 64, 0
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s34, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:192 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s35, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:196 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s36, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:200 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s37, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:204 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s38, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:208 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s39, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:212 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s48, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:216 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s49, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:220 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s50, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:224 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s51, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:228 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s52, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:232 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s53, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:236 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s54, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:240 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s55, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:244 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s64, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:248 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s65, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:252 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s66, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:256 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s67, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:260 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s68, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:264 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s69, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:268 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s70, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:272 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s71, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:276 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s80, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:280 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s81, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:284 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s82, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:288 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s83, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:292 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s84, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:296 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s85, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:300 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s86, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:304 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s87, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:308 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s96, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:312 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    v_writelane_b32 v0, s97, 0
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:316 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    ;;#ASMSTART
@@ -815,290 +750,290 @@ define void @callee_need_to_spill_fp_exec_to_memory() 
#2 {
 ; WAVE64-NEXT:    ;;#ASMEND
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:316 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s97, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:312 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s96, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:308 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s87, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:304 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s86, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:300 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s85, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:296 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s84, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:292 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s83, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:288 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s82, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:284 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s81, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:280 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s80, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:276 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s71, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:272 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s70, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:268 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s69, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:264 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s68, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:260 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s67, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:256 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s66, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:252 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s65, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:248 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s64, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:244 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s55, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:240 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s54, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:236 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s53, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:232 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s52, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:228 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s51, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:224 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s50, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:220 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s49, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:216 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s48, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:212 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s39, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:208 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s38, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:204 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s37, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:200 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s36, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:196 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s35, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 1
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:192 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s34, v0, 0
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:328
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:320
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    buffer_load_dword v127, off, s[0:3], s33 ; 4-byte Folded 
Reload
@@ -1151,7 +1086,7 @@ define void @callee_need_to_spill_fp_exec_to_memory() #2 {
 ; WAVE64-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:188 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_mov_b32 s32, s33
 ; WAVE64-NEXT:    .cfi_def_cfa_register 64
-; WAVE64-NEXT:    s_mov_b32 s33, s40
+; WAVE64-NEXT:    s_mov_b32 s33, s42
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1270,19 +1205,18 @@ define void @callee_need_to_spill_fp_exec_to_memory() 
#2 {
 ; WAVE32-NEXT:    .cfi_undefined 60
 ; WAVE32-NEXT:    .cfi_undefined 61
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT:    s_mov_b32 s40, s33
-; WAVE32-NEXT:    .cfi_register 65, 72
+; WAVE32-NEXT:    s_mov_b32 s41, s33
+; WAVE32-NEXT:    .cfi_register 65, 73
 ; WAVE32-NEXT:    s_mov_b32 s33, s32
 ; WAVE32-NEXT:    s_xor_saveexec_b32 s4, -1
 ; WAVE32-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:192 ; 4-byte 
Folded Spill
 ; WAVE32-NEXT:    .cfi_offset 1575, 6144
 ; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
 ; WAVE32-NEXT:    s_mov_b32 exec_lo, s4
-; WAVE32-NEXT:    v_mov_b32_e32 v0, exec_lo
-; WAVE32-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:196 ; 4-byte 
Folded Spill
-; WAVE32-NEXT:    .cfi_offset 1, 6272
+; WAVE32-NEXT:    s_mov_b32 s40, exec_lo
+; WAVE32-NEXT:    .cfi_register 1, 72
 ; WAVE32-NEXT:    .cfi_def_cfa_register 65
-; WAVE32-NEXT:    s_addk_i32 s32, 0x1980
+; WAVE32-NEXT:    s_addk_i32 s32, 0x1900
 ; WAVE32-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:188 ; 4-byte 
Folded Spill
 ; WAVE32-NEXT:    .cfi_llvm_vector_offset 1576, 32, 1, 32, 6016
 ; WAVE32-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:184 ; 4-byte 
Folded Spill
@@ -1536,7 +1470,7 @@ define void @callee_need_to_spill_fp_exec_to_memory() #2 {
 ; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
 ; WAVE32-NEXT:    s_mov_b32 exec_lo, s4
 ; WAVE32-NEXT:    .cfi_def_cfa_register 64
-; WAVE32-NEXT:    s_mov_b32 s33, s40
+; WAVE32-NEXT:    s_mov_b32 s33, s41
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE32-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "; clobber nonpreserved and 32 CSR SGPRs",
@@ -1718,15 +1652,11 @@ define internal void 
@caller_needs_to_spill_pc_to_memory() #3 {
 ; WAVE64-NEXT:    .cfi_undefined 2806
 ; WAVE64-NEXT:    .cfi_undefined 2807
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE64-NEXT:    v_mov_b32_e32 v0, exec_lo
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; WAVE64-NEXT:    v_mov_b32_e32 v0, exec_hi
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    .cfi_offset 17, 0
+; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
+; WAVE64-NEXT:    .cfi_llvm_register_pair 17, 36, 32, 37, 32
 ; WAVE64-NEXT:    ;;#ASMSTART
 ; WAVE64-NEXT:    ; clobber all VGPRs
 ; WAVE64-NEXT:    ;;#ASMEND
-; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; WAVE32-LABEL: caller_needs_to_spill_pc_to_memory:
@@ -1880,9 +1810,8 @@ define internal void 
@caller_needs_to_spill_pc_to_memory() #3 {
 ; WAVE32-NEXT:    .cfi_undefined 1782
 ; WAVE32-NEXT:    .cfi_undefined 1783
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT:    v_mov_b32_e32 v0, exec_lo
-; WAVE32-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; WAVE32-NEXT:    .cfi_offset 1, 0
+; WAVE32-NEXT:    s_mov_b32 s4, exec_lo
+; WAVE32-NEXT:    .cfi_register 1, 36
 ; WAVE32-NEXT:    ;;#ASMSTART
 ; WAVE32-NEXT:    ; clobber all VGPRs
 ; WAVE32-NEXT:    ;;#ASMEND
@@ -2068,19 +1997,18 @@ define void @need_to_spill_pc_to_mem() #3 {
 ; WAVE64-NEXT:    .cfi_undefined 2805
 ; WAVE64-NEXT:    .cfi_undefined 2806
 ; WAVE64-NEXT:    .cfi_undefined 2807
+; WAVE64-NEXT:    .cfi_undefined 36
+; WAVE64-NEXT:    .cfi_undefined 37
 ; WAVE64-NEXT:    .cfi_undefined 48
 ; WAVE64-NEXT:    .cfi_undefined 49
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE64-NEXT:    s_mov_b32 s18, s33
-; WAVE64-NEXT:    .cfi_register 65, 50
+; WAVE64-NEXT:    s_mov_b32 s20, s33
+; WAVE64-NEXT:    .cfi_register 65, 52
 ; WAVE64-NEXT:    s_mov_b32 s33, s32
-; WAVE64-NEXT:    v_mov_b32_e32 v0, exec_lo
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:456 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    v_mov_b32_e32 v0, exec_hi
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:460 ; 4-byte 
Folded Spill
-; WAVE64-NEXT:    .cfi_offset 17, 29184
+; WAVE64-NEXT:    s_mov_b64 s[18:19], exec
+; WAVE64-NEXT:    .cfi_llvm_register_pair 17, 50, 32, 51, 32
 ; WAVE64-NEXT:    .cfi_def_cfa_register 65
-; WAVE64-NEXT:    s_addk_i32 s32, 0x7800
+; WAVE64-NEXT:    s_addk_i32 s32, 0x7400
 ; WAVE64-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte 
Folded Spill
 ; WAVE64-NEXT:    .cfi_llvm_vector_offset 2600, 32, 17, 64, 28416
 ; WAVE64-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte 
Folded Spill
@@ -2307,12 +2235,12 @@ define void @need_to_spill_pc_to_mem() #3 {
 ; WAVE64-NEXT:    .cfi_llvm_vector_offset 2815, 32, 17, 64, 0
 ; WAVE64-NEXT:    s_mov_b64 s[16:17], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 3
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:464
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:456
 ; WAVE64-NEXT:    v_writelane_b32 v0, s30, 0
 ; WAVE64-NEXT:    v_writelane_b32 v0, s31, 1
 ; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:448 ; 4-byte 
Folded Spill
 ; WAVE64-NEXT:    .cfi_offset 16, 28672
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:464
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:456
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[16:17]
 ; WAVE64-NEXT:    s_getpc_b64 s[16:17]
@@ -2321,12 +2249,12 @@ define void @need_to_spill_pc_to_mem() #3 {
 ; WAVE64-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; WAVE64-NEXT:    s_mov_b64 s[4:5], exec
 ; WAVE64-NEXT:    s_mov_b64 exec, 3
-; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:464
+; WAVE64-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:456
 ; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:448 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    v_readlane_b32 s30, v0, 0
 ; WAVE64-NEXT:    v_readlane_b32 s31, v0, 1
-; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:464
+; WAVE64-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:456
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-NEXT:    buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded 
Reload
@@ -2443,7 +2371,7 @@ define void @need_to_spill_pc_to_mem() #3 {
 ; WAVE64-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte 
Folded Reload
 ; WAVE64-NEXT:    s_mov_b32 s32, s33
 ; WAVE64-NEXT:    .cfi_def_cfa_register 64
-; WAVE64-NEXT:    s_mov_b32 s33, s18
+; WAVE64-NEXT:    s_mov_b32 s33, s20
 ; WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2597,15 +2525,15 @@ define void @need_to_spill_pc_to_mem() #3 {
 ; WAVE32-NEXT:    .cfi_undefined 1781
 ; WAVE32-NEXT:    .cfi_undefined 1782
 ; WAVE32-NEXT:    .cfi_undefined 1783
+; WAVE32-NEXT:    .cfi_undefined 36
 ; WAVE32-NEXT:    .cfi_undefined 48
 ; WAVE32-NEXT:    .cfi_undefined 49
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT:    s_mov_b32 s18, s33
-; WAVE32-NEXT:    .cfi_register 65, 50
-; WAVE32-NEXT:    v_mov_b32_e32 v0, exec_lo
+; WAVE32-NEXT:    s_mov_b32 s19, s33
+; WAVE32-NEXT:    .cfi_register 65, 51
 ; WAVE32-NEXT:    s_mov_b32 s33, s32
-; WAVE32-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:456 ; 4-byte 
Folded Spill
-; WAVE32-NEXT:    .cfi_offset 1, 14592
+; WAVE32-NEXT:    s_mov_b32 s18, exec_lo
+; WAVE32-NEXT:    .cfi_register 1, 50
 ; WAVE32-NEXT:    .cfi_def_cfa_register 65
 ; WAVE32-NEXT:    s_addk_i32 s32, 0x3a00
 ; WAVE32-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte 
Folded Spill
@@ -2835,12 +2763,12 @@ define void @need_to_spill_pc_to_mem() #3 {
 ; WAVE32-NEXT:    s_mov_b32 s16, exec_lo
 ; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
 ; WAVE32-NEXT:    s_mov_b32 exec_lo, 3
-; WAVE32-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:460
+; WAVE32-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:456
 ; WAVE32-NEXT:    v_writelane_b32 v0, s30, 0
 ; WAVE32-NEXT:    v_writelane_b32 v0, s31, 1
 ; WAVE32-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:448 ; 4-byte 
Folded Spill
 ; WAVE32-NEXT:    .cfi_offset 16, 14336
-; WAVE32-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:460
+; WAVE32-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:456
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
 ; WAVE32-NEXT:    s_mov_b32 exec_lo, s16
@@ -2850,12 +2778,12 @@ define void @need_to_spill_pc_to_mem() #3 {
 ; WAVE32-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; WAVE32-NEXT:    s_mov_b32 s4, exec_lo
 ; WAVE32-NEXT:    s_mov_b32 exec_lo, 3
-; WAVE32-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:460
+; WAVE32-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:456
 ; WAVE32-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:448 ; 4-byte 
Folded Reload
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE32-NEXT:    v_readlane_b32 s30, v0, 0
 ; WAVE32-NEXT:    v_readlane_b32 s31, v0, 1
-; WAVE32-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:460
+; WAVE32-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:456
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
 ; WAVE32-NEXT:    s_mov_b32 exec_lo, s4
@@ -2976,7 +2904,7 @@ define void @need_to_spill_pc_to_mem() #3 {
 ; WAVE32-NEXT:    s_mov_b32 s32, s33
 ; WAVE32-NEXT:    .cfi_def_cfa_register 64
 ; WAVE32-NEXT:    s_waitcnt_depctr depctr_vm_vsrc(0)
-; WAVE32-NEXT:    s_mov_b32 s33, s18
+; WAVE32-NEXT:    s_mov_b32 s33, s19
 ; WAVE32-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE32-NEXT:    s_setpc_b64 s[30:31]
   call void @caller_needs_to_spill_pc_to_memory()

```
</details>

https://github.com/llvm/llvm-project/pull/183149
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to