Re: [PATCH] drm/amdkfd: optimize gfx off enable toggle for debugging

2023-06-07 Thread Felix Kuehling



On 2023-06-07 13:32, Jonathan Kim wrote:

Legacy debug devices limited to pinning a single debug VMID for debugging
are the only devices that require disabling GFX OFF while accessing
debug registers.  Debug devices that support multi-process debugging
rely on the hardware scheduler to update debug registers and do not run
into GFX OFF access issues.

Remove KFD GFX OFF enable toggle clutter by moving these calls into the
KGD debug calls themselves.

v2: toggle gfx off around address watch hi/lo settings as well.

Signed-off-by: Jonathan Kim 
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  4 +++
  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  7 
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 33 ++-
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|  4 +++
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 ++
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 22 +++--
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 21 +---


Looks like you missed one amdgpu_amdkfd_gfx_off_ctrl call in kfd_process.c.



  7 files changed, 77 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 60f9e027fb66..1f0e6ec56618 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -150,6 +150,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
VALID,
1);
  
+	amdgpu_gfx_off_ctrl(adev, false);

+


Aldebaran doesn't use automatic gfxoff, so this should not be needed.



WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_high);
@@ -158,6 +160,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
(watch_id * TCP_WATCH_STRIDE)),
watch_address_low);
  
+	amdgpu_gfx_off_ctrl(adev, true);

+
return watch_address_cntl;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c

index 625db444df1c..a4e28d547173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -350,6 +350,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct 
amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid)
  {
+   amdgpu_gfx_off_ctrl(adev, false);
+


I would need to double check, but I believe Arcturus also doesn't 
support gfxoff.




mutex_lock(&adev->grbm_idx_mutex);
  
  	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);

@@ -362,6 +364,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct 
amdgpu_device *adev,
  
  	mutex_unlock(&adev->grbm_idx_mutex);
  
+	amdgpu_gfx_off_ctrl(adev, true);

+
return 0;
  }
  
@@ -375,6 +379,7 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,

bool keep_trap_enabled,
uint32_t vmid)
  {
+   amdgpu_gfx_off_ctrl(adev, false);
  
  	mutex_lock(&adev->grbm_idx_mutex);
  
@@ -388,6 +393,8 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
  
  	mutex_unlock(&adev->grbm_idx_mutex);
  
+	amdgpu_gfx_off_ctrl(adev, true);

+
return 0;
  }
  const struct kfd2kgd_calls arcturus_kfd2kgd = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 8ad7a7779e14..415928139861 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -754,12 +754,13 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct 
amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid)
  {
+   amdgpu_gfx_off_ctrl(adev, false);
  
  	mutex_lock(&adev->grbm_idx_mutex);
  
  	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
  
-	/* assume gfx off is disabled for the debug session if rlc restore not supported. */

+   /* keep gfx off disabled for the debug session if rlc restore not 
supported. */
if (restore_dbg_registers) {
uint32_t data = 0;
  
@@ -784,6 +785,8 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
  
  	mutex_unlock(&adev->grbm_idx_mutex);
  
+	amdgpu_gfx_off_ctrl(adev, true);

+
return 0;
  }
  
@@ -791,6 +794,8 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,

bool keep_trap_enabled,
uint32_t vmid)
  {
+   amdgpu_gfx_off_ctrl(adev, false);
+
mutex_lock(&adev->grbm_idx_mutex);
  
  	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);

@@

RE: [PATCH] drm/amdkfd: optimize gfx off enable toggle for debugging

2023-06-07 Thread Kim, Jonathan
[Public]

+ Felix (typo on email)

> -Original Message-
> From: Kim, Jonathan 
> Sent: Wednesday, June 7, 2023 1:32 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: felix.kuel...@amd.com; Huang, JinHuiEric ;
> Kim, Jonathan 
> Subject: [PATCH] drm/amdkfd: optimize gfx off enable toggle for debugging
>
> Legacy debug devices limited to pinning a single debug VMID for debugging
> are the only devices that require disabling GFX OFF while accessing
> debug registers.  Debug devices that support multi-process debugging
> rely on the hardware scheduler to update debug registers and do not run
> into GFX OFF access issues.
>
> Remove KFD GFX OFF enable toggle clutter by moving these calls into the
> KGD debug calls themselves.
>
> v2: toggle gfx off around address watch hi/lo settings as well.
>
> Signed-off-by: Jonathan Kim 
> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  4 +++
>  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  7 
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 33
> ++-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|  4 +++
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 22 +++--
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 21 +---
>  7 files changed, 77 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index 60f9e027fb66..1f0e6ec56618 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -150,6 +150,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>   VALID,
>   1);
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
>   WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H)
> +
>   (watch_id * TCP_WATCH_STRIDE)),
>   watch_address_high);
> @@ -158,6 +160,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>   (watch_id * TCP_WATCH_STRIDE)),
>   watch_address_low);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
>   return watch_address_cntl;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index 625db444df1c..a4e28d547173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -350,6 +350,8 @@ static uint32_t
> kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
>   bool restore_dbg_registers,
>   uint32_t vmid)
>  {
> + amdgpu_gfx_off_ctrl(adev, false);
> +
>   mutex_lock(&adev->grbm_idx_mutex);
>
>   kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> @@ -362,6 +364,8 @@ static uint32_t
> kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
>
>   mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
>   return 0;
>  }
>
> @@ -375,6 +379,7 @@ static uint32_t
> kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
>   bool keep_trap_enabled,
>   uint32_t vmid)
>  {
> + amdgpu_gfx_off_ctrl(adev, false);
>
>   mutex_lock(&adev->grbm_idx_mutex);
>
> @@ -388,6 +393,8 @@ static uint32_t
> kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
>
>   mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
>   return 0;
>  }
>  const struct kfd2kgd_calls arcturus_kfd2kgd = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 8ad7a7779e14..415928139861 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -754,12 +754,13 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct
> amdgpu_device *adev,
>   bool restore_dbg_registers,
>   uint32_t vmid)
>  {
> + amdgpu_gfx_off_ctrl(adev, false);
>
>   mutex_lock(&adev->grbm_idx_mutex);
>
>   kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
>
> - /* assume gfx off is disabled for the debug session if rlc restore not
> supported. */
> + /* keep gfx off disabled for the debug session if rlc restore not
> supported. */
>   if (restore_dbg_registers) {
>   uint32_t data = 0;
>
> @@ -784,6 +785,8 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct
> amdgpu_device *adev,
>
>   mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
>   return 0;
>  }
>
> @@ -791,6 +794,8 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct
> amdgpu_device *adev,
>   bool keep_trap_enabled,
>