Scalar loads may arrive out-of-order with respect to KMCNT.
The affected code expects the two loads to arrive in-order.

Signed-off-by: Jay Cornwall <[email protected]>
Cc: Lancelot Six <[email protected]>
Cc: Joseph Greathouse <[email protected]>
Cc: Vladimir Indic <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h         | 8 ++++----
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 6281b2f9faee..453c08845d74 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -4638,8 +4638,8 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0x01ffffff, 0xb8fbf811,
        0xbf0d847b, 0xbfa20078,
        0xf4003eb6, 0xf8000000,
-       0xf4003bb6, 0xf8000008,
-       0xbfc70001, 0x8b76ff7a,
+       0xbfc70000, 0xf4003bb6,
+       0xf8000008, 0x8b76ff7a,
        0x80000000, 0xbfa20027,
        0x9376ff7a, 0x00060019,
        0x81f9a376, 0xbf0b8179,
@@ -4717,8 +4717,8 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0xb980f821, 0x00000000,
        0xbf0d847b, 0xbfa20078,
        0xf4003eb6, 0xf8000000,
-       0xf4003bb6, 0xf8000008,
-       0xbfc70001, 0x8b76ff7a,
+       0xbfc70000, 0xf4003bb6,
+       0xf8000008, 0x8b76ff7a,
        0x80000000, 0xbfa20027,
        0x9376ff7a, 0x00060019,
        0x81f9a376, 0xbf0b8179,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
index 1624a02ad0ef..7ed4b502eb22 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -1357,8 +1357,8 @@ function fixup_vgpr_bank_selection
        // ttmp[0:1]: {7b'0} PC[56:0]
        // ttmp2, 3, 10, 13, 14, 15: free
        s_load_b64      [ttmp14, ttmp15], [ttmp0, ttmp1], 0 scope:SCOPE_CU      
// Load the 2 instruction DW we are returning to
+       s_wait_kmcnt    0
        s_load_b64      [ttmp2, ttmp3], [ttmp0, ttmp1], 8 scope:SCOPE_CU        
// Load the next 2 instruction DW, just in case
-       s_wait_kmcnt    1
        s_and_b32       ttmp10, ttmp14, 0x80000000                              
// Check bit 31 in the first DWORD
                                                                                
// SCC set if ttmp10 is != 0, i.e. if bit 31 == 1
        s_cbranch_scc1  L_FIXUP_NOT_VOP12C                                      
// If bit 31 is 1, we are not VOP1, VOP2, or VOP3C
-- 
2.34.1

Reply via email to