RE: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver initialization

2022-12-02 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: November 30, 2022 7:24 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver
> initialization
>
>
> On 2022-10-31 12:23, Jonathan Kim wrote:
> > Add missing debug trap registers references and initialize all debug
> > registers on boot by clearing the hardware exception overrides and the
> > wave allocation ID index.
> >
> > For debug devices that only support single process debugging, enable
> > trap temporary setup by default.
> >
> > Debug devices that support multi-process debugging require trap
> > temporary setup to be disabled by default in order to satisfy microbench
> > performance when in non-debug mode.
> >
> > The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
> > waves onto dispatch during compute context inspection.
> > In order to correctly this up, set the special reserved CP bit by default
> > whenever the MQD is initailized.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 26 +++
> >   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  5 ++
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  5 ++
> >   .../include/asic_reg/gc/gc_10_1_0_offset.h| 14 
> >   .../include/asic_reg/gc/gc_10_1_0_sh_mask.h   | 69
> +++
> >   .../include/asic_reg/gc/gc_10_3_0_offset.h| 10 +++
> >   .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   |  4 ++
> >   8 files changed, 163 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index af94ac580d3e..d49aff0b4ba3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -4904,6 +4904,29 @@ static u32
> gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
> >
> >   #define DEFAULT_SH_MEM_BASES  (0x6000)
> >
> > +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device
> *adev,
> > +   uint32_t first_vmid,
> > +   uint32_t last_vmid)
> > +{
> > +   uint32_t data;
> > +   uint32_t trap_config_vmid_mask = 0;
> > +   int i;
> > +
> > +   /* Calculate trap config vmid mask */
> > +   for (i = first_vmid; i < last_vmid; i++)
> > +   trap_config_vmid_mask |= (1 << i);
> > +
> > +   data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
> > +   VMID_SEL, trap_config_vmid_mask);
> > +   data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
> > +   TRAP_EN, 1);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG),
> data);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK),
> 0);
> > +
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0),
> 0);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1),
> 0);
> > +}
> > +
> >   static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
> >   {
> > int i;
> > @@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct
> amdgpu_device *adev)
> > WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
> > WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
> > }
> > +
> > +   gfx_v10_0_debug_trap_config_init(adev, adev-
> >vm_manager.first_kfd_vmid,
> > +   AMDGPU_NUM_VMID);
> >   }
> >
> >   static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 0320be4a5fc6..a0e5ad342f13 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct
> amdgpu_device *adev)
> > adev->gfx.config.num_rbs = hweight32(active_rbs);
> >   }
> >
> > +static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device
> *adev,
> > +   uint32_t first_vmid,
> > +   uint32_t last_vmid)
> > +{
> > +   uint32_t data;
> > +   uint32_t trap_config_vmid_mask = 0;
> > +   int i;
> > +
> > +   /* Calculate trap config vmid mask */
> > +   for (i = first_vmid; i < last_vmid; i++)
> > +   trap_config_vmid_mask |= (1 

Re: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver initialization

2022-11-30 Thread Felix Kuehling



On 2022-10-31 12:23, Jonathan Kim wrote:

Add missing debug trap registers references and initialize all debug
registers on boot by clearing the hardware exception overrides and the
wave allocation ID index.

For debug devices that only support single process debugging, enable
trap temporary setup by default.

Debug devices that support multi-process debugging require trap
temporary setup to be disabled by default in order to satisfy microbench
performance when in non-debug mode.

The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
waves onto dispatch during compute context inspection.
In order to correctly this up, set the special reserved CP bit by default
whenever the MQD is initailized.

Signed-off-by: Jonathan Kim 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 26 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  5 ++
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  5 ++
  .../include/asic_reg/gc/gc_10_1_0_offset.h| 14 
  .../include/asic_reg/gc/gc_10_1_0_sh_mask.h   | 69 +++
  .../include/asic_reg/gc/gc_10_3_0_offset.h| 10 +++
  .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   |  4 ++
  8 files changed, 163 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index af94ac580d3e..d49aff0b4ba3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4904,6 +4904,29 @@ static u32 
gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
  
  #define DEFAULT_SH_MEM_BASES	(0x6000)
  
+static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev,

+   uint32_t first_vmid,
+   uint32_t last_vmid)
+{
+   uint32_t data;
+   uint32_t trap_config_vmid_mask = 0;
+   int i;
+
+   /* Calculate trap config vmid mask */
+   for (i = first_vmid; i < last_vmid; i++)
+   trap_config_vmid_mask |= (1 << i);
+
+   data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+   VMID_SEL, trap_config_vmid_mask);
+   data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+   TRAP_EN, 1);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
  static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
  {
int i;
@@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct 
amdgpu_device *adev)
WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
}
+
+   gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid,
+   AMDGPU_NUM_VMID);
  }
  
  static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0320be4a5fc6..a0e5ad342f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
adev->gfx.config.num_rbs = hweight32(active_rbs);
  }
  
+static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,

+   uint32_t first_vmid,
+   uint32_t last_vmid)
+{
+   uint32_t data;
+   uint32_t trap_config_vmid_mask = 0;
+   int i;
+
+   /* Calculate trap config vmid mask */
+   for (i = first_vmid; i < last_vmid; i++)
+   trap_config_vmid_mask |= (1 << i);
+
+   data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+   VMID_SEL, trap_config_vmid_mask);
+   data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+   TRAP_EN, 1);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
  #define DEFAULT_SH_MEM_BASES  (0x6000)
  static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
  {
@@ -4609,6 +4632,13 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
  
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))

+   gfx_v9_4_2_debug_trap_config_init(adev,
+   adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);


Where is this function defined. I don't see it in any of your patches. 
Did you forget to git add a file?


Regards,
  Felix



+   else
+   gfx_v9_0_debug_trap_config_init(adev,
+   adev->vm_manager.

Re: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver initialization

2022-11-30 Thread Felix Kuehling



On 2022-11-22 18:38, Felix Kuehling wrote:


On 2022-10-31 12:23, Jonathan Kim wrote:

Add missing debug trap registers references and initialize all debug
registers on boot by clearing the hardware exception overrides and the
wave allocation ID index.

For debug devices that only support single process debugging, enable
trap temporary setup by default.

Debug devices that support multi-process debugging require trap
temporary setup to be disabled by default in order to satisfy microbench
performance when in non-debug mode.


Where is this done? I don't think it's in the MQD setup because that 
happens unconditionally on all GPUs.


If I understand it correctly, it's done by actually enabling the debug 
trap in patch 9 (for Aldebaran). For single-process debug devices, the 
debug trap is always on, as per this patch.


Maybe just add a reference to the Aldebaran patch to make it clearer.

Regards,
  Felix







The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
waves onto dispatch during compute context inspection.
In order to correctly this up, set the special reserved CP bit by 
default

whenever the MQD is initailized.


There is a word missing here. "In order to correctly _set_ this up ..."?

This patch covers GFXv9 and 10. Will GFXv11 be handled separately?

Regards,
  Felix




Signed-off-by: Jonathan Kim 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c    | 26 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  5 ++
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  5 ++
  .../include/asic_reg/gc/gc_10_1_0_offset.h    | 14 
  .../include/asic_reg/gc/gc_10_1_0_sh_mask.h   | 69 +++
  .../include/asic_reg/gc/gc_10_3_0_offset.h    | 10 +++
  .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   |  4 ++
  8 files changed, 163 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

index af94ac580d3e..d49aff0b4ba3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4904,6 +4904,29 @@ static u32 
gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade

    #define DEFAULT_SH_MEM_BASES    (0x6000)
  +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device 
*adev,

+    uint32_t first_vmid,
+    uint32_t last_vmid)
+{
+    uint32_t data;
+    uint32_t trap_config_vmid_mask = 0;
+    int i;
+
+    /* Calculate trap config vmid mask */
+    for (i = first_vmid; i < last_vmid; i++)
+    trap_config_vmid_mask |= (1 << i);
+
+    data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+    VMID_SEL, trap_config_vmid_mask);
+    data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+    TRAP_EN, 1);
+    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
  static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
  {
  int i;
@@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct 
amdgpu_device *adev)

  WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
  WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
  }
+
+    gfx_v10_0_debug_trap_config_init(adev, 
adev->vm_manager.first_kfd_vmid,

+    AMDGPU_NUM_VMID);
  }
    static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index 0320be4a5fc6..a0e5ad342f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct 
amdgpu_device *adev)

  adev->gfx.config.num_rbs = hweight32(active_rbs);
  }
  +static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device 
*adev,

+    uint32_t first_vmid,
+    uint32_t last_vmid)
+{
+    uint32_t data;
+    uint32_t trap_config_vmid_mask = 0;
+    int i;
+
+    /* Calculate trap config vmid mask */
+    for (i = first_vmid; i < last_vmid; i++)
+    trap_config_vmid_mask |= (1 << i);
+
+    data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+    VMID_SEL, trap_config_vmid_mask);
+    data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+    TRAP_EN, 1);
+    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
  #define DEFAULT_SH_MEM_BASES    (0x6000)
  static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
  {
@@ -4609,6 +4632,13 @@ static int gfx_v9_0_late_init(void *handle)
  if (r)
  return r;
  +    if (adev->ip_versions[GC_HWIP][0] == IP_VE

RE: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver initialization

2022-11-23 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: November 22, 2022 6:39 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver
> initialization
>
>
> On 2022-10-31 12:23, Jonathan Kim wrote:
> > Add missing debug trap registers references and initialize all debug
> > registers on boot by clearing the hardware exception overrides and the
> > wave allocation ID index.
> >
> > For debug devices that only support single process debugging, enable
> > trap temporary setup by default.
> >
> > Debug devices that support multi-process debugging require trap
> > temporary setup to be disabled by default in order to satisfy microbench
> > performance when in non-debug mode.
>
> Where is this done? I don't think it's in the MQD setup because that
> happens unconditionally on all GPUs.

Right I forgot to update gfx_v9_4_2_debug_trap_config_init to clear TRAP_EN 
instead of setting it.
I'll fix that.

>
>
> >
> > The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
> > waves onto dispatch during compute context inspection.
> > In order to correctly this up, set the special reserved CP bit by default
> > whenever the MQD is initailized.
>
> There is a word missing here. "In order to correctly _set_ this up ..."?

Whoops.  Thanks.

>
> This patch covers GFXv9 and 10. Will GFXv11 be handled separately?

Ok.  I'll include GFX11 as well for the next round of reviews in this patch.

Thanks,

Jon

>
> Regards,
>Felix
>
>
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 26 +++
> >   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  5 ++
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  5 ++
> >   .../include/asic_reg/gc/gc_10_1_0_offset.h| 14 
> >   .../include/asic_reg/gc/gc_10_1_0_sh_mask.h   | 69
> +++
> >   .../include/asic_reg/gc/gc_10_3_0_offset.h| 10 +++
> >   .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   |  4 ++
> >   8 files changed, 163 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index af94ac580d3e..d49aff0b4ba3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -4904,6 +4904,29 @@ static u32
> gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
> >
> >   #define DEFAULT_SH_MEM_BASES  (0x6000)
> >
> > +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device
> *adev,
> > +   uint32_t first_vmid,
> > +   uint32_t last_vmid)
> > +{
> > +   uint32_t data;
> > +   uint32_t trap_config_vmid_mask = 0;
> > +   int i;
> > +
> > +   /* Calculate trap config vmid mask */
> > +   for (i = first_vmid; i < last_vmid; i++)
> > +   trap_config_vmid_mask |= (1 << i);
> > +
> > +   data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
> > +   VMID_SEL, trap_config_vmid_mask);
> > +   data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
> > +   TRAP_EN, 1);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG),
> data);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK),
> 0);
> > +
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0),
> 0);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1),
> 0);
> > +}
> > +
> >   static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
> >   {
> > int i;
> > @@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct
> amdgpu_device *adev)
> > WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
> > WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
> > }
> > +
> > +   gfx_v10_0_debug_trap_config_init(adev, adev-
> >vm_manager.first_kfd_vmid,
> > +   AMDGPU_NUM_VMID);
> >   }
> >
> >   static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 0320be4a5fc6..a0e5ad342f13 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct
> amdgpu_device *

Re: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver initialization

2022-11-22 Thread Felix Kuehling



On 2022-10-31 12:23, Jonathan Kim wrote:

Add missing debug trap registers references and initialize all debug
registers on boot by clearing the hardware exception overrides and the
wave allocation ID index.

For debug devices that only support single process debugging, enable
trap temporary setup by default.

Debug devices that support multi-process debugging require trap
temporary setup to be disabled by default in order to satisfy microbench
performance when in non-debug mode.


Where is this done? I don't think it's in the MQD setup because that 
happens unconditionally on all GPUs.





The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
waves onto dispatch during compute context inspection.
In order to correctly this up, set the special reserved CP bit by default
whenever the MQD is initailized.


There is a word missing here. "In order to correctly _set_ this up ..."?

This patch covers GFXv9 and 10. Will GFXv11 be handled separately?

Regards,
  Felix




Signed-off-by: Jonathan Kim 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 26 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  5 ++
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  5 ++
  .../include/asic_reg/gc/gc_10_1_0_offset.h| 14 
  .../include/asic_reg/gc/gc_10_1_0_sh_mask.h   | 69 +++
  .../include/asic_reg/gc/gc_10_3_0_offset.h| 10 +++
  .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   |  4 ++
  8 files changed, 163 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index af94ac580d3e..d49aff0b4ba3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4904,6 +4904,29 @@ static u32 
gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
  
  #define DEFAULT_SH_MEM_BASES	(0x6000)
  
+static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev,

+   uint32_t first_vmid,
+   uint32_t last_vmid)
+{
+   uint32_t data;
+   uint32_t trap_config_vmid_mask = 0;
+   int i;
+
+   /* Calculate trap config vmid mask */
+   for (i = first_vmid; i < last_vmid; i++)
+   trap_config_vmid_mask |= (1 << i);
+
+   data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+   VMID_SEL, trap_config_vmid_mask);
+   data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+   TRAP_EN, 1);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
  static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
  {
int i;
@@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct 
amdgpu_device *adev)
WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
}
+
+   gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid,
+   AMDGPU_NUM_VMID);
  }
  
  static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0320be4a5fc6..a0e5ad342f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
adev->gfx.config.num_rbs = hweight32(active_rbs);
  }
  
+static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,

+   uint32_t first_vmid,
+   uint32_t last_vmid)
+{
+   uint32_t data;
+   uint32_t trap_config_vmid_mask = 0;
+   int i;
+
+   /* Calculate trap config vmid mask */
+   for (i = first_vmid; i < last_vmid; i++)
+   trap_config_vmid_mask |= (1 << i);
+
+   data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+   VMID_SEL, trap_config_vmid_mask);
+   data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+   TRAP_EN, 1);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
  #define DEFAULT_SH_MEM_BASES  (0x6000)
  static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
  {
@@ -4609,6 +4632,13 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
  
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))

+   gfx_v9_4_2_debug_trap_config_init(adev,
+   adev->vm_man