On 06/06/2024 00:16, Jay Cornwall wrote:
In commit 6d1878882d2d
("drm/amdkfd: gfx12 context save/restore trap handler fixes") the
following fix was introduced but incorrectly restricted to gfx12.
The same issue and a corresponding fix apply to gfx10 and gfx11.

Do not overwrite TRAPSTS.{SAVECTX,HOST_TRAP} when restoring this
register. Both of these fields can assert while the wavefront is
running the trap handler.

Signed-off-by: Jay Cornwall <jay.cornw...@amd.com>
Cc: Lancelot Six <lancelot....@amd.com>

Hi Jay,

Thanks, that looks good to me (tested on gfx10.3, 11 and 12). For gfx11+ there might be a risk of overriding perf_snapshot, but that seems fine as this is not used, and such sample from cwsr restore would mostly be meaningless anyway.

Reviewed-by: Lancelot Six <lancelot....@amd.com>

Best,
Lancelot.

---
  .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 16 +++++---
  .../amd/amdkfd/cwsr_trap_handler_gfx10.asm    | 38 ++++++++++++++-----
  2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 665122d1bbbd..02f7ba8c93cd 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -1136,7 +1136,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0x705d0000, 0x807c817c,
        0x8070ff70, 0x00000080,
        0xbf0a7b7c, 0xbf85fff8,
-       0xbf82013d, 0xbef4037e,
+       0xbf82013f, 0xbef4037e,
        0x8775ff7f, 0x0000ffff,
        0x8875ff75, 0x00040000,
        0xbef60380, 0xbef703ff,
@@ -1275,7 +1275,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0x80788478, 0xbf8c0000,
        0xb9eef815, 0xbefc036f,
        0xbefe0370, 0xbeff0371,
-       0xb9f9f816, 0xb9fbf803,
+       0xb9f9f816, 0xb9fb4803,
+       0x907b8b7b, 0xb9fba2c3,
        0xb9f3f801, 0xb96e3a05,
        0x806e816e, 0xbf0d9972,
        0xbf850002, 0x8f6e896e,
@@ -2544,7 +2545,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
        0xe0704000, 0x705d0000,
        0x807c817c, 0x8070ff70,
        0x00000080, 0xbf0a7b7c,
-       0xbf85fff8, 0xbf820134,
+       0xbf85fff8, 0xbf820136,
        0xbef4037e, 0x8775ff7f,
        0x0000ffff, 0x8875ff75,
        0x00040000, 0xbef60380,
@@ -2683,7 +2684,8 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
        0xf0000000, 0x80788478,
        0xbf8c0000, 0xb9eef815,
        0xbefc036f, 0xbefe0370,
-       0xbeff0371, 0xb9fbf803,
+       0xbeff0371, 0xb9fb4803,
+       0x907b8b7b, 0xb9fba2c3,
        0xb9f3f801, 0xb96e3a05,
        0x806e816e, 0xbf0d9972,
        0xbf850002, 0x8f6e896e,
@@ -2981,7 +2983,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
        0x701d0000, 0x807d817d,
        0x8070ff70, 0x00000080,
        0xbf0a7b7d, 0xbfa2fff8,
-       0xbfa0013f, 0xbef4007e,
+       0xbfa00143, 0xbef4007e,
        0x8b75ff7f, 0x0000ffff,
        0x8c75ff75, 0x00040000,
        0xbef60080, 0xbef700ff,
@@ -3123,7 +3125,9 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
        0x80788478, 0xbf890000,
        0xb96ef815, 0xbefd006f,
        0xbefe0070, 0xbeff0071,
-       0xb97bf803, 0xb973f801,
+       0xb97b4803, 0x857b8b7b,
+       0xb97b22c3, 0x857b867b,
+       0xb97b7443, 0xb973f801,
        0xb8ee3b05, 0x806e816e,
        0xbf0d9972, 0xbfa20002,
        0x846e896e, 0xbfa00001,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index ac3702b8e3c4..44772eec9ef4 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -119,9 +119,12 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT               = 7
  var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK             = 0x100
  var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT            = 8
  var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK         = 0x800
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT         = 11
  var SQ_WAVE_TRAPSTS_EXCP_HI_MASK              = 0x7000
  #if ASIC_FAMILY >= CHIP_PLUM_BONITO
+var SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT            = 16
  var SQ_WAVE_TRAPSTS_WAVE_START_MASK           = 0x20000
+var SQ_WAVE_TRAPSTS_WAVE_START_SHIFT           = 17
  var SQ_WAVE_TRAPSTS_WAVE_END_MASK             = 0x40000
  var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK      = 0x100000
  #endif
@@ -137,14 +140,23 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 +var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
+var S_TRAPSTS_RESTORE_PART_2_SHIFT             = 
SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
+
  #if ASIC_FAMILY < CHIP_PLUM_BONITO
  var S_TRAPSTS_NON_MASKABLE_EXCP_MASK          = 
SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+var S_TRAPSTS_RESTORE_PART_2_SIZE              = 32 - 
S_TRAPSTS_RESTORE_PART_2_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT             = 0
+var S_TRAPSTS_RESTORE_PART_3_SIZE              = 0
  #else
  var S_TRAPSTS_NON_MASKABLE_EXCP_MASK          = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK 
        |\
                                                  
SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK     |\
                                                  
SQ_WAVE_TRAPSTS_WAVE_START_MASK       |\
                                                  SQ_WAVE_TRAPSTS_WAVE_END_MASK 
        |\
                                                  
SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK
+var S_TRAPSTS_RESTORE_PART_2_SIZE              = 
SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT - SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT             = 
SQ_WAVE_TRAPSTS_WAVE_START_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SIZE              = 32 - 
S_TRAPSTS_RESTORE_PART_3_SHIFT
  #endif
  var S_TRAPSTS_HWREG                           = HW_REG_TRAPSTS
  var S_TRAPSTS_SAVE_CONTEXT_MASK                       = 
SQ_WAVE_TRAPSTS_SAVECTX_MASK
@@ -157,6 +169,7 @@ var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK        = 0x20
  var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK  = 0x40
  var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
  var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK     = 0x80
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT     = 7
  var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK    = 0x100
  var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT   = 8
  var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK      = 0x200
@@ -173,6 +186,11 @@ var S_TRAPSTS_NON_MASKABLE_EXCP_MASK               = 
SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK          |\
                                                  
SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK        |\
                                                  
SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK          |\
                                                  
SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
+var S_TRAPSTS_RESTORE_PART_1_SIZE              = 
SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
+var S_TRAPSTS_RESTORE_PART_2_SHIFT             = 
SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var S_TRAPSTS_RESTORE_PART_2_SIZE              = 
SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - 
SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT             = 
SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SIZE              = 32 - 
S_TRAPSTS_RESTORE_PART_3_SHIFT
  var BARRIER_STATE_SIGNAL_OFFSET                       = 16
  var BARRIER_STATE_VALID_OFFSET                        = 0
  #endif
@@ -1386,17 +1404,17 @@ L_SKIP_BARRIER_RESTORE:
        s_setreg_b32    hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
  #endif
-#if ASIC_FAMILY < CHIP_GFX12
-       s_setreg_b32    hwreg(S_TRAPSTS_HWREG), s_restore_trapsts
-#else
-       // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed.
+       // {TRAPSTS/EXCP_FLAG_PRIV}.SAVE_CONTEXT and HOST_TRAP may have changed.
        // Only restore the other fields to avoid clobbering them.
-       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, 0, 
SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT), s_restore_trapsts
-       s_lshr_b32      s_restore_trapsts, s_restore_trapsts, 
SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
-       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, 
SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT, 1), s_restore_trapsts
-       s_lshr_b32      s_restore_trapsts, s_restore_trapsts, 
SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT - 
SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
-       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, 
SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT, 32 - 
SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT), s_restore_trapsts
-#endif
+       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, 0, 
S_TRAPSTS_RESTORE_PART_1_SIZE), s_restore_trapsts
+       s_lshr_b32      s_restore_trapsts, s_restore_trapsts, 
S_TRAPSTS_RESTORE_PART_2_SHIFT
+       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_2_SHIFT, 
S_TRAPSTS_RESTORE_PART_2_SIZE), s_restore_trapsts
+
+if S_TRAPSTS_RESTORE_PART_3_SIZE > 0
+       s_lshr_b32      s_restore_trapsts, s_restore_trapsts, 
S_TRAPSTS_RESTORE_PART_3_SHIFT - S_TRAPSTS_RESTORE_PART_2_SHIFT
+       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_3_SHIFT, 
S_TRAPSTS_RESTORE_PART_3_SIZE), s_restore_trapsts
+end
+
        s_setreg_b32    hwreg(HW_REG_MODE), s_restore_mode
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic

Reply via email to