On 2022-10-31 12:23, Jonathan Kim wrote:
Add missing debug trap registers references and initialize all debug
registers on boot by clearing the hardware exception overrides and the
wave allocation ID index.

For debug devices that only support single process debugging, enable
trap temporary setup by default.

Debug devices that support multi-process debugging require trap
temporary setup to be disabled by default in order to satisfy microbench
performance when in non-debug mode.

The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
waves onto dispatch during compute context inspection.
In order to correctly this up, set the special reserved CP bit by default
whenever the MQD is initailized.

Signed-off-by: Jonathan Kim <jonathan....@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c        | 26 +++++++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c         | 30 ++++++++
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  5 ++
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  5 ++
  .../include/asic_reg/gc/gc_10_1_0_offset.h    | 14 ++++
  .../include/asic_reg/gc/gc_10_1_0_sh_mask.h   | 69 +++++++++++++++++++
  .../include/asic_reg/gc/gc_10_3_0_offset.h    | 10 +++
  .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   |  4 ++
  8 files changed, 163 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index af94ac580d3e..d49aff0b4ba3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4904,6 +4904,29 @@ static u32 
gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
#define DEFAULT_SH_MEM_BASES (0x6000) +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev,
+                               uint32_t first_vmid,
+                               uint32_t last_vmid)
+{
+       uint32_t data;
+       uint32_t trap_config_vmid_mask = 0;
+       int i;
+
+       /* Calculate trap config vmid mask */
+       for (i = first_vmid; i < last_vmid; i++)
+               trap_config_vmid_mask |= (1 << i);
+
+       data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+                       VMID_SEL, trap_config_vmid_mask);
+       data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+                       TRAP_EN, 1);
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
  static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
  {
        int i;
@@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct 
amdgpu_device *adev)
                WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
                WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
        }
+
+       gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid,
+                                       AMDGPU_NUM_VMID);
  }
static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0320be4a5fc6..a0e5ad342f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
        adev->gfx.config.num_rbs = hweight32(active_rbs);
  }
+static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
+                               uint32_t first_vmid,
+                               uint32_t last_vmid)
+{
+       uint32_t data;
+       uint32_t trap_config_vmid_mask = 0;
+       int i;
+
+       /* Calculate trap config vmid mask */
+       for (i = first_vmid; i < last_vmid; i++)
+               trap_config_vmid_mask |= (1 << i);
+
+       data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+                       VMID_SEL, trap_config_vmid_mask);
+       data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+                       TRAP_EN, 1);
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
  #define DEFAULT_SH_MEM_BASES  (0x6000)
  static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
  {
@@ -4609,6 +4632,13 @@ static int gfx_v9_0_late_init(void *handle)
        if (r)
                return r;
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+               gfx_v9_4_2_debug_trap_config_init(adev,
+                       adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);

Where is this function defined. I don't see it in any of your patches. Did you forget to git add a file?

Regards,
  Felix


+       else
+               gfx_v9_0_debug_trap_config_init(adev,
+                       adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
+
        return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index d3e2b6a599a4..cb484ace17de 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -117,6 +117,11 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
                        1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
                        1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+ /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
+        * DISPATCH_PTR.  This is required for the kfd debugger
+        */
+       m->cp_hqd_hq_scheduler0 = 1 << 14;
+
        if (q->format == KFD_QUEUE_FORMAT_AQL) {
                m->cp_hqd_aql_control =
                        1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 0778e587a2d6..86f1cf090246 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -164,6 +164,11 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
                        1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
                        1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+ /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
+        * DISPATCH_PTR.  This is required for the kfd debugger
+        */
+       m->cp_hqd_hq_status0 = 1 << 14;
+
        if (q->format == KFD_QUEUE_FORMAT_AQL) {
                m->cp_hqd_aql_control =
                        1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h 
b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
index 18d34bbceebe..7d384f86bd67 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
@@ -5190,6 +5190,20 @@
  #define mmSPI_WCL_PIPE_PERCENT_CS6_BASE_IDX                                   
                         0
  #define mmSPI_WCL_PIPE_PERCENT_CS7                                            
                         0x1f70
  #define mmSPI_WCL_PIPE_PERCENT_CS7_BASE_IDX                                   
                         0
+#define mmSPI_GDBG_WAVE_CNTL                                                   
                        0x1f71
+#define mmSPI_GDBG_WAVE_CNTL_BASE_IDX                                          
                        0
+#define mmSPI_GDBG_TRAP_CONFIG                                                 
                        0x1f72
+#define mmSPI_GDBG_TRAP_CONFIG_BASE_IDX                                        
                        0
+#define mmSPI_GDBG_TRAP_MASK                                                   
                        0x1f73
+#define mmSPI_GDBG_TRAP_MASK_BASE_IDX                                          
                        0
+#define mmSPI_GDBG_WAVE_CNTL2                                                  
                        0x1f74
+#define mmSPI_GDBG_WAVE_CNTL2_BASE_IDX                                         
                        0
+#define mmSPI_GDBG_WAVE_CNTL3                                                  
                        0x1f75
+#define mmSPI_GDBG_WAVE_CNTL3_BASE_IDX                                         
                        0
+#define mmSPI_GDBG_TRAP_DATA0                                                  
                        0x1f78
+#define mmSPI_GDBG_TRAP_DATA0_BASE_IDX                                         
                        0
+#define mmSPI_GDBG_TRAP_DATA1                                                  
                        0x1f79
+#define mmSPI_GDBG_TRAP_DATA1_BASE_IDX                                         
                        0
  #define mmSPI_COMPUTE_QUEUE_RESET                                             
                         0x1f7b
  #define mmSPI_COMPUTE_QUEUE_RESET_BASE_IDX                                    
                         0
  #define mmSPI_RESOURCE_RESERVE_CU_0                                           
                         0x1f7c
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
index 4127896ffcdf..08772ba845b0 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
@@ -19646,6 +19646,75 @@
  //SPI_WCL_PIPE_PERCENT_CS7
  #define SPI_WCL_PIPE_PERCENT_CS7__VALUE__SHIFT                                
                                0x0
  #define SPI_WCL_PIPE_PERCENT_CS7__VALUE_MASK                                  
                                0x7FL
+//SPI_GDBG_WAVE_CNTL
+#define SPI_GDBG_WAVE_CNTL__STALL_RA__SHIFT                                    
                               0x0
+#define SPI_GDBG_WAVE_CNTL__STALL_VMID__SHIFT                                  
                               0x1
+#define SPI_GDBG_WAVE_CNTL__STALL_RA_MASK                                      
                               0x00000001L
+#define SPI_GDBG_WAVE_CNTL__STALL_VMID_MASK                                    
                               0x0001FFFEL
+//SPI_GDBG_TRAP_CONFIG
+#define SPI_GDBG_TRAP_CONFIG__ME_SEL__SHIFT                                    
                               0x0
+#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL__SHIFT                                  
                               0x2
+#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL__SHIFT                                 
                               0x4
+#define SPI_GDBG_TRAP_CONFIG__ME_MATCH__SHIFT                                  
                               0x7
+#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH__SHIFT                                
                               0x8
+#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH__SHIFT                               
                               0x9
+#define SPI_GDBG_TRAP_CONFIG__TRAP_EN__SHIFT                                   
                               0xf
+#define SPI_GDBG_TRAP_CONFIG__VMID_SEL__SHIFT                                  
                               0x10
+#define SPI_GDBG_TRAP_CONFIG__ME_SEL_MASK                                      
                               0x00000003L
+#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL_MASK                                    
                               0x0000000CL
+#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL_MASK                                   
                               0x00000070L
+#define SPI_GDBG_TRAP_CONFIG__ME_MATCH_MASK                                    
                               0x00000080L
+#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH_MASK                                  
                               0x00000100L
+#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH_MASK                                 
                               0x00000200L
+#define SPI_GDBG_TRAP_CONFIG__TRAP_EN_MASK                                     
                               0x00008000L
+#define SPI_GDBG_TRAP_CONFIG__VMID_SEL_MASK                                    
                               0xFFFF0000L
+//SPI_GDBG_TRAP_MASK
+#define SPI_GDBG_TRAP_MASK__EXCP_EN__SHIFT                                     
                               0x0
+#define SPI_GDBG_TRAP_MASK__REPLACE__SHIFT                                     
                               0x9
+#define SPI_GDBG_TRAP_MASK__EXCP_EN_MASK                                       
                               0x01FFL
+#define SPI_GDBG_TRAP_MASK__REPLACE_MASK                                       
                               0x0200L
+//SPI_GDBG_WAVE_CNTL2
+#define SPI_GDBG_WAVE_CNTL2__VMID_MASK__SHIFT                                  
                               0x0
+#define SPI_GDBG_WAVE_CNTL2__MODE__SHIFT                                       
                               0x10
+#define SPI_GDBG_WAVE_CNTL2__VMID_MASK_MASK                                    
                               0x0000FFFFL
+#define SPI_GDBG_WAVE_CNTL2__MODE_MASK                                         
                               0x00030000L
+//SPI_GDBG_WAVE_CNTL3
+#define SPI_GDBG_WAVE_CNTL3__STALL_PS__SHIFT                                   
                               0x0
+#define SPI_GDBG_WAVE_CNTL3__STALL_VS__SHIFT                                   
                               0x1
+#define SPI_GDBG_WAVE_CNTL3__STALL_GS__SHIFT                                   
                               0x2
+#define SPI_GDBG_WAVE_CNTL3__STALL_HS__SHIFT                                   
                               0x3
+#define SPI_GDBG_WAVE_CNTL3__STALL_CSG__SHIFT                                  
                               0x4
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS0__SHIFT                                  
                               0x5
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS1__SHIFT                                  
                               0x6
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS2__SHIFT                                  
                               0x7
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS3__SHIFT                                  
                               0x8
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS4__SHIFT                                  
                               0x9
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS5__SHIFT                                  
                               0xa
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS6__SHIFT                                  
                               0xb
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS7__SHIFT                                  
                               0xc
+#define SPI_GDBG_WAVE_CNTL3__STALL_DURATION__SHIFT                             
                               0xd
+#define SPI_GDBG_WAVE_CNTL3__STALL_MULT__SHIFT                                 
                               0x1c
+#define SPI_GDBG_WAVE_CNTL3__STALL_PS_MASK                                     
                               0x00000001L
+#define SPI_GDBG_WAVE_CNTL3__STALL_VS_MASK                                     
                               0x00000002L
+#define SPI_GDBG_WAVE_CNTL3__STALL_GS_MASK                                     
                               0x00000004L
+#define SPI_GDBG_WAVE_CNTL3__STALL_HS_MASK                                     
                               0x00000008L
+#define SPI_GDBG_WAVE_CNTL3__STALL_CSG_MASK                                    
                               0x00000010L
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS0_MASK                                    
                               0x00000020L
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS1_MASK                                    
                               0x00000040L
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS2_MASK                                    
                               0x00000080L
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS3_MASK                                    
                               0x00000100L
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS4_MASK                                    
                               0x00000200L
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS5_MASK                                    
                               0x00000400L
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS6_MASK                                    
                               0x00000800L
+#define SPI_GDBG_WAVE_CNTL3__STALL_CS7_MASK                                    
                               0x00001000L
+#define SPI_GDBG_WAVE_CNTL3__STALL_DURATION_MASK                               
                               0x0FFFE000L
+#define SPI_GDBG_WAVE_CNTL3__STALL_MULT_MASK                                   
                               0x10000000L
+//SPI_GDBG_TRAP_DATA0
+#define SPI_GDBG_TRAP_DATA0__DATA__SHIFT                                       
                               0x0
+#define SPI_GDBG_TRAP_DATA0__DATA_MASK                                         
                               0xFFFFFFFFL
+//SPI_GDBG_TRAP_DATA1
+#define SPI_GDBG_TRAP_DATA1__DATA__SHIFT                                       
                               0x0
+#define SPI_GDBG_TRAP_DATA1__DATA_MASK                                         
                               0xFFFFFFFFL
  //SPI_COMPUTE_QUEUE_RESET
  #define SPI_COMPUTE_QUEUE_RESET__RESET__SHIFT                                 
                                0x0
  #define SPI_COMPUTE_QUEUE_RESET__RESET_MASK                                   
                                0x01L
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h 
b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
index 3973110f149c..d09f1a06f4bf 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
@@ -26,6 +26,8 @@
  #define mmSQ_DEBUG_STS_GLOBAL_BASE_IDX                                        
                         0
  #define mmSQ_DEBUG_STS_GLOBAL2                                                
                         0x10B0
  #define mmSQ_DEBUG_STS_GLOBAL2_BASE_IDX                                       
                         0
+#define mmSQ_DEBUG                                                             
                        0x10B1
+#define mmSQ_DEBUG_BASE_IDX                                                    
                        0
// addressBlock: gc_sdma0_sdma0dec
  // base address: 0x4980
@@ -4849,10 +4851,18 @@
  #define mmSPI_WCL_PIPE_PERCENT_CS3_BASE_IDX                                   
                         0
  #define mmSPI_GDBG_WAVE_CNTL                                                  
                         0x1f71
  #define mmSPI_GDBG_WAVE_CNTL_BASE_IDX                                         
                         0
+#define mmSPI_GDBG_TRAP_CONFIG                                                 
                        0x1f72
+#define mmSPI_GDBG_TRAP_CONFIG_BASE_IDX                                        
                        0
  #define mmSPI_GDBG_TRAP_MASK                                                  
                         0x1f73
  #define mmSPI_GDBG_TRAP_MASK_BASE_IDX                                         
                         0
  #define mmSPI_GDBG_WAVE_CNTL2                                                 
                         0x1f74
  #define mmSPI_GDBG_WAVE_CNTL2_BASE_IDX                                        
                         0
+#define mmSPI_GDBG_WAVE_CNTL3                                                  
                        0x1f75
+#define mmSPI_GDBG_WAVE_CNTL3_BASE_IDX                                         
                        0
+#define mmSPI_GDBG_TRAP_DATA0                                                  
                        0x1f78
+#define mmSPI_GDBG_TRAP_DATA0_BASE_IDX                                         
                        0
+#define mmSPI_GDBG_TRAP_DATA1                                                  
                        0x1f79
+#define mmSPI_GDBG_TRAP_DATA1_BASE_IDX                                         
                        0
  #define mmSPI_COMPUTE_QUEUE_RESET                                             
                         0x1f7b
  #define mmSPI_COMPUTE_QUEUE_RESET_BASE_IDX                                    
                         0
  #define mmSPI_RESOURCE_RESERVE_CU_0                                           
                         0x1f7c
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
index d4e8ff22ecb8..fc85aee010fe 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
@@ -47853,6 +47853,10 @@
// addressBlock: sqind
+//SQ_DEBUG
+#define SQ_DEBUG__SINGLE_MEMOP_MASK 0x00000001L
+#define SQ_DEBUG__SINGLE_MEMOP__SHIFT 0x00000000
+
  //SQ_DEBUG_STS_GLOBAL
  #define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0_MASK 0x000000ffL
  #define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0__SHIFT 0x00000000

Reply via email to