RE: [PATCH] drm/amdkfd: Move gfx12 trap handler to separate file

2024-10-03 Thread Kim, Jonathan
[Public]

+ David/Sreekant for KFD impact

Acked-by: Jonathan Kim 

> -Original Message-
> From: Cornwall, Jay 
> Sent: Thursday, October 3, 2024 3:31 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Cornwall, Jay ; Six, Lancelot
> ; Kim, Jonathan 
> Subject: [PATCH] drm/amdkfd: Move gfx12 trap handler to separate file
>
> gfx12 derivatives will have substantially different trap handler
> implementations from gfx10/gfx11. Add a separate source file for
> gfx12+ and remove unneeded conditional code.
>
> No functional change.
>
> Signed-off-by: Jay Cornwall 
> Cc: Lancelot Six 
> Cc: Jonathan Kim 
> ---
>  .../amd/amdkfd/cwsr_trap_handler_gfx10.asm|  202 +--
>  .../amd/amdkfd/cwsr_trap_handler_gfx12.asm| 1128 +
>  2 files changed, 1129 insertions(+), 201 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> index 44772eec9ef4..96fbb16ceb21 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> @@ -34,41 +34,24 @@
>   *   cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -
> o gfx11.sp3
>   *   sp3 gfx11.sp3 -hex gfx11.hex
>   *
> - * gfx12:
> - *   cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm -P -o
> gfx12.sp3
> - *   sp3 gfx12.sp3 -hex gfx12.hex
>   */
>
>  #define CHIP_NAVI10 26
>  #define CHIP_SIENNA_CICHLID 30
>  #define CHIP_PLUM_BONITO 36
> -#define CHIP_GFX12 37
>
>  #define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
>  #define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
>  #define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
>  #define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
> -#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO &&
> ASIC_FAMILY < CHIP_GFX12)
> +#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO)
>  #define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) //
> workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for
> debugger
>  #define SINGLE_STEP_MISSED_WORKAROUND 1  //workaround for lost
> MODE.DEBUG_EN exception when SAVECTX raised
>
> -#if ASIC_FAMILY < CHIP_GFX12
>  #define S_COHERENCE glc:1
>  #define V_COHERENCE slc:1 glc:1
>  #define S_WAITCNT_0 s_waitcnt 0
> -#else
> -#define S_COHERENCE scope:SCOPE_SYS
> -#define V_COHERENCE scope:SCOPE_SYS
> -#define S_WAITCNT_0 s_wait_idle
> -
> -#define HW_REG_SHADER_FLAT_SCRATCH_LO
> HW_REG_WAVE_SCRATCH_BASE_LO
> -#define HW_REG_SHADER_FLAT_SCRATCH_HI
> HW_REG_WAVE_SCRATCH_BASE_HI
> -#define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC
> -#define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC
> -#define HW_REG_MODE HW_REG_WAVE_MODE
> -#endif
>
> -#if ASIC_FAMILY < CHIP_GFX12
>  var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x0006
>  var SQ_WAVE_STATUS_HALT_MASK = 0x2000
>  var SQ_WAVE_STATUS_ECC_ERR_MASK  = 0x2
> @@ -81,21 +64,6 @@ var S_STATUS_ALWAYS_CLEAR_MASK =
> SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_E
>  var S_STATUS_HALT_MASK   =
> SQ_WAVE_STATUS_HALT_MASK
>  var S_SAVE_PC_HI_TRAP_ID_MASK= 0x00FF
>  var S_SAVE_PC_HI_HT_MASK = 0x0100
> -#else
> -var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
> -var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
> -var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
> -var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
> -var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK   = 0x8000
> -var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT  = 15
> -var SQ_WAVE_STATUS_WAVE64_SHIFT  = 29
> -var SQ_WAVE_STATUS_WAVE64_SIZE   = 1
> -var SQ_WAVE_LDS_ALLOC_GRANULARITY= 9
> -var S_STATUS_HWREG   =
> HW_REG_WAVE_STATE_PRIV
> -var S_STATUS_ALWAYS_CLEAR_MASK   =
> SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_E
> RR_MASK
> -var S_STATUS_HALT_MASK   =
> SQ_WAVE_STATE_PRIV_HALT_MASK
> -var S_SAVE_PC_HI_TRAP_ID_MASK= 0xF000
> -#endif
>
>  var SQ_WAVE_STATUS_NO_VGPRS_SHIFT= 24
>  var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
> @@ -110,7 +78,6 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT  =
> 8
>  var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT= 12
>  #endif
>
> -#if ASIC_FAMILY < CHIP_GFX12
>  var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
>

RE: [PATCH] drm/amdkfd: Copy wave state only for compute queue

2024-10-03 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Yang, Philip 
> Sent: Thursday, October 3, 2024 12:29 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Joshi, Mukul ; Kim, Jonathan
> ; Yang, Philip 
> Subject: [PATCH] drm/amdkfd: Copy wave state only for compute queue
>
> get_wave_state is not defined for sdma queue, copy_context_work_handler
> calls it for sdma queue will crash.
>
> Signed-off-by: Philip Yang 

Reviewed-and-tested-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 648f40091aa3..b2b16a812e73 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -3173,7 +3173,7 @@ struct copy_context_work_handler_workarea {
>   struct kfd_process *p;
>  };
>
> -static void copy_context_work_handler (struct work_struct *work)
> +static void copy_context_work_handler(struct work_struct *work)
>  {
>   struct copy_context_work_handler_workarea *workarea;
>   struct mqd_manager *mqd_mgr;
> @@ -3200,6 +3200,9 @@ static void copy_context_work_handler (struct
> work_struct *work)
>   struct qcm_process_device *qpd = &pdd->qpd;
>
>   list_for_each_entry(q, &qpd->queues_list, list) {
> + if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE)
> + continue;
> +
>   mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
>
>   /* We ignore the return value from get_wave_state
> --
> 2.43.2



RE: [PATCH 1/2] drm/amdkfd: Move queue fs deletion after destroy check

2024-09-11 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: amd-gfx  On Behalf Of Kent
> Russell
> Sent: Tuesday, September 10, 2024 9:37 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Russell, Kent 
> Subject: [PATCH 1/2] drm/amdkfd: Move queue fs deletion after destroy check
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> We were removing the kernfs entry for queue info before checking if the
> queue could be destroyed. If it failed to get destroyed (e.g. during
> some GPU resets), then we would try to delete it later during pqm
> teardown, but the file was already removed. This led to a kernel WARN
> trying to remove size, gpuid and type. Move the remove to after the
> destroy check.
>
> Signed-off-by: Kent Russell 

This patch is:
Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index b439d4d0bd84..01b960b15274 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -517,7 +517,6 @@ int pqm_destroy_queue(struct process_queue_manager
> *pqm, unsigned int qid)
> if (retval)
> goto err_destroy_queue;
>
> -   kfd_procfs_del_queue(pqn->q);
> dqm = pqn->q->device->dqm;
> retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
> if (retval) {
> @@ -527,6 +526,7 @@ int pqm_destroy_queue(struct process_queue_manager
> *pqm, unsigned int qid)
> if (retval != -ETIME)
> goto err_destroy_queue;
> }
> +   kfd_procfs_del_queue(pqn->q);
> kfd_queue_release_buffers(pdd, &pqn->q->properties);
> pqm_clean_queue_resource(pqm, pqn);
> uninit_queue(pqn->q);
> --
> 2.34.1



RE: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions

2024-09-09 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Christian König 
> Sent: Thursday, September 5, 2024 10:24 AM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Deucher, Alexander
> ; Joshi, Mukul 
> Subject: Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> Am 19.08.24 um 19:59 schrieb Jonathan Kim:
> > Currently multiple partitions will incorrectly overwrite the VM lookup
> > table since the table is indexed by PASID and multiple partitions can
> > register different VM objects on the same PASID.
>
> That's a rather bad idea. Why do we have the same PASID for different VM
> objects in the first place?

Alex can probably elaborate on the KGD side, but from what I can see, the KMS 
driver open call has always assigned a new VM object per PASID on an open call.
The KFD acquires and replaces the KGD PASID-VMID registration on its own 
compute process open/creation call.
If this is the bad_idea you're referring to, then someone else will have to 
chime in.  I don't have much history on this unfortunately.

That aside, the current problem is, is that all KFD device structures are 
logical partitions and register their PASID-VM binding using this concept of a 
device.
On the KGD side however, the registration table is maintained in the adev 
struct, which is a physical socket.
So there's a mismatch in understanding of what a device is between the KFD & 
KGD with regard to the look up table that results in bad bindings.

Adding a per-partition dimension to the existing lookup table resolves issues 
where seeing, for example, with memory violation interception and XNACK i.e bad 
bindings result in wrong vm object found to set no-retry flags on memory 
violations.

Jon

>
> Regards,
> Christian.
>
> >
> > This results in loading the wrong VM object on PASID query.
> >
> > To correct this, setup the lookup table to be per-partition-per-PASID
> > instead.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 12 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |  4 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  7 ++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 55 +++--
> --
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 11 +++-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  5 +-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c|  5 +-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c|  5 +-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c |  3 +-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c |  5 +-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++
> >   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c|  2 +-
> >   drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c  |  4 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  3 +-
> >   .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c  |  8 +--
> >   .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  8 +--
> >   drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   |  3 +-
> >   18 files changed, 92 insertions(+), 65 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > index c272461d70a9..28db789610e1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > @@ -887,3 +887,15 @@ int amdgpu_amdkfd_unmap_hiq(struct
> amdgpu_device *adev, u32 doorbell_off,
> >
> >   return r;
> >   }
> > +
> > +int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev,
> uint32_t node_id)
> > +{
> > + if (adev->gfx.funcs->ih_node_to_logical_xcc) {
> > + int xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, 
> > node_id);
> > +
> > + if (xcc_id >= 0)
> > + return xcc_id;
> > + }
> > +
> > + return 0;
> > +}
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > index 4ed49265c764..bf8bb45d8ab6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > @@ -356,6 +356,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct
> amdgpu_device *adev,
> >   uint64_t size, u32 alloc_flag, int8_t xcp_id);
> >
> >   u64 amdgpu_amdkfd_xcp_memory_size(stru

RE: [PATCH] drm/amdkfd: fix missed queue reset on queue destroy

2024-08-22 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Wednesday, August 21, 2024 5:51 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Kasiviswanathan, Harish ; Deucher,
> Alexander 
> Subject: Re: [PATCH] drm/amdkfd: fix missed queue reset on queue destroy
>
>
> On 2024-08-21 17:17, Jonathan Kim wrote:
> > If a queue is being destroyed but causes a HWS hang on removal, the KFD
> > may issue an unnecessary gpu reset if the destroyed queue can be fixed
> > by a queue reset.
> >
> > This is because the queue has been removed from the KFD's queue list
> > prior to the preemption action on destroy so the reset call will fail to
> > match the HQD PQ reset information against the KFD's queue record to do
> > the actual reset.
> >
> > Since a queue destroy request is under the same device lock as any other
> > preemption request (which subsumes queue reset calls), transiently
> > store the destroyed queue's reference so that a potential subsequent queue
> > reset call can check against this queue as well.
>
> Maybe this could be simplified by disabling the queues before destroying
> it. That way the queue would still exist when it's being unmapped and
> you don't need to hack the special case "cur_destroyed_queue" into the
> queue reset code.

Thanks Felix.  That's a much simpler fix.
Sending it out.

Jon

>
> Regards,
>Felix
>
>
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10
> +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h |  1 +
> >   2 files changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index 577d121cc6d1..09e39a72ca31 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -1842,6 +1842,8 @@ static int start_cpsch(struct
> device_queue_manager *dqm)
> > goto fail_detect_hang_buffer;
> > }
> >
> > +   dqm->cur_destroyed_queue = NULL;
> > +
> > dqm_unlock(dqm);
> >
> > return 0;
> > @@ -2105,7 +2107,7 @@ static void set_queue_as_reset(struct
> device_queue_manager *dqm, struct queue *q
> > q->properties.queue_id, q->process->pasid);
> >
> > pdd->has_reset_queue = true;
> > -   if (q->properties.is_active) {
> > +   if (q->properties.is_active && dqm->cur_destroyed_queue != q) {
> > q->properties.is_active = false;
> > decrement_queue_count(dqm, qpd, q);
> > }
> > @@ -2160,6 +2162,10 @@ static struct queue
> *find_queue_by_address(struct device_queue_manager *dqm, uin
> > struct qcm_process_device *qpd;
> > struct queue *q;
> >
> > +   if (dqm->cur_destroyed_queue &&
> > +   dqm->cur_destroyed_queue->properties.queue_address ==
> queue_address)
> > +   return dqm->cur_destroyed_queue;
> > +
> > list_for_each_entry(cur, &dqm->queues, list) {
> > qpd = cur->qpd;
> > list_for_each_entry(q, &qpd->queues_list, list) {
> > @@ -2409,6 +2415,7 @@ static int destroy_queue_cpsch(struct
> device_queue_manager *dqm,
> >
> > list_del(&q->list);
> > qpd->queue_count--;
> > +   dqm->cur_destroyed_queue = q;
> > if (q->properties.is_active) {
> > decrement_queue_count(dqm, qpd, q);
> > if (!dqm->dev->kfd->shared_resources.enable_mes) {
> > @@ -2421,6 +2428,7 @@ static int destroy_queue_cpsch(struct
> device_queue_manager *dqm,
> > retval = remove_queue_mes(dqm, q, qpd);
> > }
> > }
> > +   dqm->cur_destroyed_queue = NULL;
> >
> > /*
> >  * Unconditionally decrement this counter, regardless of the queue's
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > index 08b40826ad1e..5425c1dd7924 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > @@ -285,6 +285,7 @@ struct device_queue_manager {
> > struct dqm_detect_hang_info *detect_hang_info;
> > size_t detect_hang_info_size;
> > int detect_hang_count;
> > +   struct queue *cur_destroyed_queue;
> >   };
> >
> >   void device_queue_manager_init_cik(


RE: [PATCH] drm/amdgpu: fix KFDMemoryTest.PtraceAccessInvisibleVram fail on SRIOV

2024-08-12 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Friday, August 9, 2024 7:49 PM
> To: amd-gfx@lists.freedesktop.org; Zhang, GuoQing (Sam)
> ; Kim, Jonathan 
> Subject: Re: [PATCH] drm/amdgpu: fix
> KFDMemoryTest.PtraceAccessInvisibleVram fail on SRIOV
>
>
> On 2024-08-07 04:36, Samuel Zhang wrote:
> > Ptrace access VRAM bo will first try sdma access in
> > amdgpu_ttm_access_memory_sdma(), if fails, it will fallback to mmio
> > access.
> >
> > Since ptrace only access 8 bytes at a time and
> > amdgpu_ttm_access_memory_sdma() only allow PAGE_SIZE bytes access,
> > it returns fail.
> > On SRIOV, mmio access will also fail as MM_INDEX/MM_DATA register write
> > is blocked for security reasons.
> >
> > The fix is just change len check in amdgpu_ttm_access_memory_sdma() so
> > that len in (0, PAGE_SIZE] are allowed. This will not fix the ptrace
> > test case on SRIOV, but also improve the access performance when the
> > access length is < PAGE_SIZE.
> > len > PAGE_SIZE case support is not needed as larger size will be break
> > into chunks of PAGE_SIZE len max in mem_rw().
>
> I'm not convinced that using SDMA for small accesses is the best
> solution for all cases. For example, on large-BAR GPUs we should fall
> back to access through the FB BAR before we use indirect register
> access. That may still perform better than SDMA especially for very
> small accesses like 4-bytes typical for ptrace accesses. Maybe this
> needs an SRIOV-VF-specific condition if MMIO register access is not an
> option there.
>
> @Jonathan Kim, can you chime in as well?

Relaxing length checks only under SRIOV mode is probably a good idea.
SDMA enqueue latency impacting performance for sub-page copy sizes has been 
observed in the past.
Plus MMIO is stable even if SDMA is dead.
I know we have fallbacks anyways in the general case, but it's probably better 
not to prod a defunct SDMA if we don't have to.

Thanks,

Jon

>
> Thanks,
>Felix
>
>
> >
> > Signed-off-by: Samuel Zhang 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++--
> >   1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 5daa05e23ddf..a6e90eada367 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -1486,7 +1486,7 @@ static int
> amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
> > unsigned int num_dw;
> > int r, idx;
> >
> > -   if (len != PAGE_SIZE)
> > +   if (len > PAGE_SIZE)
> > return -EINVAL;
> >
> > if (!adev->mman.sdma_access_ptr)
> > @@ -1514,7 +1514,7 @@ static int
> amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
> > swap(src_addr, dst_addr);
> >
> > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
> > -   PAGE_SIZE, 0);
> > +   len, 0);
> >
> > amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
> > WARN_ON(job->ibs[0].length_dw > num_dw);


RE: [PATCH] drm/amdkfd: fallback to pipe reset on queue reset fail for gfx9

2024-08-09 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Alex Deucher 
> Sent: Friday, August 9, 2024 11:55 AM
> To: Kim, Jonathan 
> Cc: amd-gfx@lists.freedesktop.org; Kuehling, Felix
> ; Deucher, Alexander
> 
> Subject: Re: [PATCH] drm/amdkfd: fallback to pipe reset on queue reset fail 
> for
> gfx9
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> On Fri, Aug 2, 2024 at 12:38 PM Jonathan Kim 
> wrote:
> >
> > If queue reset fails, tell the CP to reset the pipe.
> > Since queues multiplex context per pipe and we've issues a device wide
> > preemption prior to the hang, we can assume the hung pipe only has one
> > queue to reset on pipe reset.
>
> Is there a specific CP or PSP firmware version required for this?  If
> so, we should check for it before attempting this if it will cause a
> problem.

Thanks for the review Alex.
Worst case is that the MMIO reg write doesn't do anything and we end up with 
extra CP active poll wait cycles before falling back to adapter reset.
We may run into scenarios where pipe reset doesn't help anyways even if we did 
have the right FW.

Jon

>
> Other than that:
> Reviewed-by: Alex Deucher 
>
>
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 46
> +--
> >  1 file changed, 31 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> > index 32f28c12077b..c63528a4e894 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> > @@ -1173,12 +1173,30 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct
> amdgpu_device *adev,
> > return queue_addr;
> >  }
> >
> > +/* assume queue acquired  */
> > +static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev,
> uint32_t inst,
> > +  unsigned int utimeout)
> > +{
> > +   unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
> > +
> > +   while (true) {
> > +   uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst),
> mmCP_HQD_ACTIVE);
> > +
> > +   if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
> > +   return 0;
> > +
> > +   if (time_after(jiffies, end_jiffies))
> > +   return -ETIME;
> > +
> > +   usleep_range(500, 1000);
> > +   }
> > +}
> > +
> >  uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
> >   uint32_t pipe_id, uint32_t queue_id,
> >   uint32_t inst, unsigned int utimeout)
> >  {
> > -   uint32_t low, high, temp;
> > -   unsigned long end_jiffies;
> > +   uint32_t low, high, pipe_reset_data = 0;
> > uint64_t queue_addr = 0;
> >
> > kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
> > @@ -1202,25 +1220,23 @@ uint64_t kgd_gfx_v9_hqd_reset(struct
> amdgpu_device *adev,
> > /* assume previous dequeue request issued will take affect after 
> > reset */
> > WREG32_SOC15(GC, GET_INST(GC, inst),
> mmSPI_COMPUTE_QUEUE_RESET, 0x1);
> >
> > -   end_jiffies = (utimeout * HZ / 1000) + jiffies;
> > -   while (true) {
> > -   temp = RREG32_SOC15(GC, GET_INST(GC, inst),
> mmCP_HQD_ACTIVE);
> > +   if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
> > +   goto unlock_out;
> >
> > -   if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
> > -   break;
> > +   pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst, 
> > pipe_id);
> >
> > -   if (time_after(jiffies, end_jiffies)) {
> > -   queue_addr = 0;
> > -   break;
> > -   }
> > +   pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL,
> MEC_ME1_PIPE0_RESET, 1);
> > +   pipe_reset_data = pipe_reset_data << pipe_id;
> >
> > -   usleep_range(500, 1000);
> > -   }
> > +   WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL,
> pipe_reset_data);
> > +   WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0);
> >
> > -   pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n",
> > -inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : 
> > "failed!");
> > +   if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
> > +   queue_addr = 0;
> >
> >  unlock_out:
> > +   pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n",
> > +inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : 
> > "failed!");
> > amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
> > kgd_gfx_v9_release_queue(adev, inst);
> >
> > --
> > 2.34.1
> >


RE: [PATCH] drm/amdkfd: fallback to pipe reset on queue reset fail for gfx9

2024-08-09 Thread Kim, Jonathan
[Public]

Ping on review.

Thanks,

Jon

> -Original Message-
> From: Kim, Jonathan 
> Sent: Friday, August 2, 2024 12:38 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Deucher, Alexander
> ; Kim, Jonathan ;
> Kim, Jonathan 
> Subject: [PATCH] drm/amdkfd: fallback to pipe reset on queue reset fail for
> gfx9
>
> If queue reset fails, tell the CP to reset the pipe.
> Since queues multiplex context per pipe and we've issues a device wide
> preemption prior to the hang, we can assume the hung pipe only has one
> queue to reset on pipe reset.
>
> Signed-off-by: Jonathan Kim 
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 46 +-
> -
>  1 file changed, 31 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 32f28c12077b..c63528a4e894 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -1173,12 +1173,30 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct
> amdgpu_device *adev,
>   return queue_addr;
>  }
>
> +/* assume queue acquired  */
> +static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev,
> uint32_t inst,
> +unsigned int utimeout)
> +{
> + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
> +
> + while (true) {
> + uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst),
> mmCP_HQD_ACTIVE);
> +
> + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
> + return 0;
> +
> + if (time_after(jiffies, end_jiffies))
> + return -ETIME;
> +
> + usleep_range(500, 1000);
> + }
> +}
> +
>  uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
> uint32_t pipe_id, uint32_t queue_id,
> uint32_t inst, unsigned int utimeout)
>  {
> - uint32_t low, high, temp;
> - unsigned long end_jiffies;
> + uint32_t low, high, pipe_reset_data = 0;
>   uint64_t queue_addr = 0;
>
>   kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
> @@ -1202,25 +1220,23 @@ uint64_t kgd_gfx_v9_hqd_reset(struct
> amdgpu_device *adev,
>   /* assume previous dequeue request issued will take affect after reset
> */
>   WREG32_SOC15(GC, GET_INST(GC, inst),
> mmSPI_COMPUTE_QUEUE_RESET, 0x1);
>
> - end_jiffies = (utimeout * HZ / 1000) + jiffies;
> - while (true) {
> - temp = RREG32_SOC15(GC, GET_INST(GC, inst),
> mmCP_HQD_ACTIVE);
> + if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
> + goto unlock_out;
>
> - if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
> - break;
> + pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst,
> pipe_id);
>
> - if (time_after(jiffies, end_jiffies)) {
> - queue_addr = 0;
> - break;
> - }
> + pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL,
> MEC_ME1_PIPE0_RESET, 1);
> + pipe_reset_data = pipe_reset_data << pipe_id;
>
> - usleep_range(500, 1000);
> - }
> + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL,
> pipe_reset_data);
> + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0);
>
> - pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n",
> -  inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" :
> "failed!");
> + if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
> + queue_addr = 0;
>
>  unlock_out:
> + pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n",
> +  inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" :
> "failed!");
>   amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
>   kgd_gfx_v9_release_queue(adev, inst);
>
> --
> 2.34.1



RE: [PATCH] drm/amdkfd: Add node_id to location_id generically

2024-08-09 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Lazar, Lijo 
> Sent: Wednesday, August 7, 2024 12:17 PM
> To: amd-gfx@lists.freedesktop.org; brahma_sw_dev
> 
> Cc: Zhang, Hawking ; Deucher, Alexander
> ; Kim, Jonathan ;
> Kasiviswanathan, Harish 
> Subject: [PATCH] drm/amdkfd: Add node_id to location_id generically
>
> If there are multiple nodes per kfd device, add nodeid to location_id to
> differentiate.
>
> Signed-off-by: Lijo Lazar 

Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 40771f8752cb..e6caab75e863 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -2079,7 +2079,7 @@ int kfd_topology_add_device(struct kfd_node
> *gpu)
>   HSA_CAP_ASIC_REVISION_MASK);
>
>   dev->node_props.location_id = pci_dev_id(gpu->adev->pdev);
> - if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3))
> + if (gpu->kfd->num_nodes > 1)
>   dev->node_props.location_id |= dev->gpu->node_id;
>
>   dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus);
> --
> 2.25.1



RE: [PATCH] drm/amdkfd: fix partition query when setting up recommended sdma engines

2024-08-08 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Lazar, Lijo 
> Sent: Wednesday, August 7, 2024 11:46 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix 
> Subject: Re: [PATCH] drm/amdkfd: fix partition query when setting up
> recommended sdma engines
>
>
>
> On 8/8/2024 2:04 AM, Jonathan Kim wrote:
> > When users dynamically set the partition mode through sysfs writes,
> > this can lead to a double lock situation where the KFD is trying to take
> > the partition lock when updating the recommended SDMA engines.
> > Have the KFD do a lockless query instead to avoid this.
> > This should work since the KFD always initializes synchronously after
> > the KGD partition mode is set regardless of user or system setup.
> >
> > Fixes: a0f548d7871e ("drm/amdkfd: allow users to target recommended
> SDMA engines")
> > Signed-off-by: Jonathan Kim 
> > ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > index 40771f8752cb..8fee89b8dd67 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > @@ -1287,7 +1287,7 @@ static void
> kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev,
> > int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
> > bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu &&
> > adev->aid_mask && num_xgmi_nodes &&
> > -   (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
> AMDGPU_XCP_FL_NONE) ==
> > +   (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
> AMDGPU_XCP_FL_LOCKED) ==
> >   AMDGPU_SPX_PARTITION_MODE) &&
>
> Replacing with (gpu->kfd->num_nodes == 1) may be better.

Thanks.  That seems a lot simpler.  Also another assumption is that all 14 SDMA 
xGMI engines are present, but that may or may not always be the case for all 
dGPU SPX-mode devices.
I'll add that as a hard condition check as well.

Jon

>
> Thanks,
> Lijo
>
> > (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8);
> >


RE: [PATCH] drm/amdkfd: support per-queue reset on gfx9

2024-07-31 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Wednesday, July 31, 2024 11:45 AM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: Re: [PATCH] drm/amdkfd: support per-queue reset on gfx9
>
>
> On 2024-07-31 09:37, Jonathan Kim wrote:
> > Support per-queue reset for GFX9.  The recommendation is for the driver
> > to target reset the HW queue via a SPI MMIO register write.
> >
> > Since this requires pipe and HW queue info and MEC FW is limited to
> > doorbell reports of hung queues after an unmap failure, scan the HW
> > queue slots defined by SET_RESOURCES first to identify the user queue
> > candidates to reset.
> >
> > Only signal reset events to processes that have had a queue reset.
> >
> > If queue reset fails, fall back to GPU reset.
> >
> > v3: address nitpicks
> > - handle hang detect buffer ENOMEM
> > - warn on multiple detect hang misuse
> > - reset hang detect buffer to NULL on free
> > - update DRM_ERR on reset to drm_err app warning message
>
> I meant dev_err here to make sure we print the device identifier. That's
> what we mostly use in KFD. If drm_err does the same, that's fine, too.
> Looking at the definitions in drm_print.h, the only thing that drm_err
> adds is a "[drm]" tag in the message.
>
> See one more comment inline.
>
>
> >
> > v2: move reset queue flag for house keeping to process device.
> > split detect and reset into separate functions.
> > make reset call safe during power saving modes.
> > clean up some other nitpicks.
> >
> > Signed-off-by: Jonathan Kim 
>
> [snip]
>
> @@ -1929,6 +1966,135 @@ static int map_queues_cpsch(struct
> device_queue_manager *dqm)
> > return retval;
> >   }
> >
> > +static void set_queue_as_reset(struct device_queue_manager *dqm, struct
> queue *q,
> > +  struct qcm_process_device *qpd)
> > +{
> > +   struct kfd_process_device *pdd = qpd_to_pdd(qpd);
> > +
> > +   pr_err("queue id 0x%0x at pasid 0x%0x is reset\n",
> > +  q->properties.queue_id, q->process->pasid);
>
> This could also be a dev_err(dqm->dev->adev->dev, ...) or
> drm_err(dqm->dev->adev->ddev, ...). With that fixed, the patch is
>
> Reviewed-by: Felix Kuehling 

Done.  Changed both to dev_err.  Thanks for the review.

Jon
>
>
> > +
> > +   pdd->has_reset_queue = true;
> > +   if (q->properties.is_active) {
> > +   q->properties.is_active = false;
> > +   decrement_queue_count(dqm, qpd, q);
> > +   }
> > +}
> > +
> > +static int detect_queue_hang(struct device_queue_manager *dqm)
> > +{
> > +   int i;
> > +
> > +   /* detect should be used only in dqm locked queue reset */
> > +   if (WARN_ON(dqm->detect_hang_count > 0))
> > +   return 0;
> > +
> > +   memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size);
> > +
> > +   for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
> > +   uint32_t mec, pipe, queue;
> > +   int xcc_id;
> > +
> > +   mec = (i / dqm->dev->kfd-
> >shared_resources.num_queue_per_pipe)
> > +   / dqm->dev->kfd-
> >shared_resources.num_pipe_per_mec;
> > +
> > +   if (mec || !test_bit(i, dqm->dev->kfd-
> >shared_resources.cp_queue_bitmap))
> > +   continue;
> > +
> > +   amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev,
> i, &mec, &pipe, &queue);
> > +
> > +   for_each_inst(xcc_id, dqm->dev->xcc_mask) {
> > +   uint64_t queue_addr = dqm->dev->kfd2kgd-
> >hqd_get_pq_addr(
> > +   dqm->dev->adev, pipe,
> queue, xcc_id);
> > +   struct dqm_detect_hang_info hang_info;
> > +
> > +   if (!queue_addr)
> > +   continue;
> > +
> > +   hang_info.pipe_id = pipe;
> > +   hang_info.queue_id = queue;
> > +   hang_info.xcc_id = xcc_id;
> > +   hang_info.queue_address = queue_addr;
> > +
> > +   dqm->detect_hang_info[dqm->detect_hang_count] =
> hang_info;
> > +   dqm->detect_hang_count++;
> > +   }
> > +   }
> > +
> > +   return dqm->detect_hang_count;
> > +}
> > +
> > +static struct qu

RE: [PATCH] drm/amdkfd: fix debug watchpoints for logical devices

2024-07-31 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kasiviswanathan, Harish 
> Sent: Tuesday, July 30, 2024 12:23 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> 
> Subject: RE: [PATCH] drm/amdkfd: fix debug watchpoints for logical devices
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> One minor comment b. Either way Reviewed-by: Harish Kasiviswanathan
> 
>
> -Original Message-
> From: amd-gfx  On Behalf Of
> Jonathan Kim
> Sent: Monday, July 22, 2024 2:57 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> 
> Subject: [PATCH] drm/amdkfd: fix debug watchpoints for logical devices
>
> The number of watchpoints should be set and constrained per logical
> partition device, not by the socket device.
>
> Signed-off-by: Jonathan Kim 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c  | 20 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c |  4 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  8 
>  3 files changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 34a282540c7e..312dfa84f29f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -365,47 +365,47 @@ static int kfd_dbg_get_dev_watch_id(struct
> kfd_process_device *pdd, int *watch_i
>
> *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;
>
> -   spin_lock(&pdd->dev->kfd->watch_points_lock);
> +   spin_lock(&pdd->dev->watch_points_lock);
>
> for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
> /* device watchpoint in use so skip */
> -   if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1)
> +   if ((pdd->dev->alloc_watch_ids >> i) & 0x1)
> continue;
>
> pdd->alloc_watch_ids |= 0x1 << i;
> -   pdd->dev->kfd->alloc_watch_ids |= 0x1 << i;
> +   pdd->dev->alloc_watch_ids |= 0x1 << i;
> *watch_id = i;
> -   spin_unlock(&pdd->dev->kfd->watch_points_lock);
> +   spin_unlock(&pdd->dev->watch_points_lock);
> return 0;
> }
>
> -   spin_unlock(&pdd->dev->kfd->watch_points_lock);
> +   spin_unlock(&pdd->dev->watch_points_lock);
>
> return -ENOMEM;
>  }
>
>  static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int
> watch_id)
>  {
> -   spin_lock(&pdd->dev->kfd->watch_points_lock);
> +   spin_lock(&pdd->dev->watch_points_lock);
>
> /* process owns device watch point so safe to clear */
> if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
> pdd->alloc_watch_ids &= ~(0x1 << watch_id);
> -   pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id);
> +   pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
> }
>
> -   spin_unlock(&pdd->dev->kfd->watch_points_lock);
> +   spin_unlock(&pdd->dev->watch_points_lock);
>  }
>
>  static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int
> watch_id)
>  {
> bool owns_watch_id = false;
>
> -   spin_lock(&pdd->dev->kfd->watch_points_lock);
> +   spin_lock(&pdd->dev->watch_points_lock);
> owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
> ((pdd->alloc_watch_ids >> watch_id) & 0x1);
>
> -   spin_unlock(&pdd->dev->kfd->watch_points_lock);
> +   spin_unlock(&pdd->dev->watch_points_lock);
>
> return owns_watch_id;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index f4d20adaa068..f91a9b6ce3fb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -885,12 +885,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> goto node_init_error;
> }
>
> Wouldn't it better to initialize node->watch_points_lock here.
> spin_lock_init(&node->watch_points_lock);

Done.  Thanks for the review.

Jon

>
> kfd->nodes[i] = node;
> +
> +   spin_lock_init(&kfd->nodes[i]->watch_points_lock);
> }
>
> svm_range_set_max_pages(kfd->adev);
>
> -   spin_lock_init(&am

RE: [PATCH 2/2] drm/amdkfd: support the debugger during per-queue reset

2024-07-30 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Tuesday, July 30, 2024 6:17 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: Re: [PATCH 2/2] drm/amdkfd: support the debugger during per-
> queue reset
>
>
>
> On 2024-07-26 11:30, Jonathan Kim wrote:
> > In order to allow ROCm GDB to handle reset queues, raise an
> > EC_QUEUE_RESET exception so that the debugger can subscribe and
> > query this exception.
> >
> > Reset queues should still be considered suspendable with a status
> > flag of KFD_DBG_QUEUE_RESET_MASK.
> > However they should not be resumable since user space will no longer
> > be able to access reset queues.
> >
> > v2: move per-queue reset flag to this patch
> > rebase based on patch 1 changes
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 31
> ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
> >  include/uapi/linux/kfd_ioctl.h|  4 +++
> >  3 files changed, 31 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index e335703eff84..cb7b5bbf5c40 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -164,6 +164,10 @@ static void kfd_hws_hang(struct
> device_queue_manager *dqm)
> > struct kfd_process_device *pdd = qpd_to_pdd(qpd);
> >
> > pdd->has_reset_queue = true;
> > +   q->properties.is_reset = true;
> > +   kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_RESET),
> > +q->process, q->device, q-
> >doorbell_id,
> > +false, NULL, 0);
> > }
> > }
> >
> > @@ -986,7 +990,7 @@ static int suspend_single_queue(struct
> device_queue_manager *dqm,
> >  {
> > bool is_new;
> >
> > -   if (q->properties.is_suspended)
> > +   if (q->properties.is_suspended || q->properties.is_reset)
> > return 0;
> >
> > pr_debug("Suspending PASID %u queue [%i]\n",
> > @@ -1007,6 +1011,9 @@ static int suspend_single_queue(struct
> device_queue_manager *dqm,
> > if (dqm->dev->kfd->shared_resources.enable_mes) {
> > int r = remove_queue_mes(dqm, q, &pdd->qpd);
> >
> > +   if (q->properties.is_reset)
> > +   return 0;
> > +
> > if (r)
> > return r;
> > }
> > @@ -1967,10 +1974,14 @@ static void set_queue_as_reset(struct
> device_queue_manager *dqm, struct queue *q
> >q->properties.queue_id, q->process->pasid);
> >
> > pdd->has_reset_queue = true;
> > +   q->properties.is_reset = true;
> > if (q->properties.is_active) {
> > q->properties.is_active = false;
> > decrement_queue_count(dqm, qpd, q);
> > }
> > +
> > +   kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_RESET), q->process, q-
> >device,
> > +q->doorbell_id, false, NULL, 0);
> >  }
> >
> >  static int detect_queue_hang(struct device_queue_manager *dqm)
> > @@ -3037,7 +3048,8 @@ int resume_queues(struct kfd_process *p,
> > queue_ids[q_idx] &=
> >
>   ~KFD_DBG_QUEUE_INVALID_MASK;
> > } else {
> > -   queue_ids[q_idx] |=
> > +   queue_ids[q_idx] |= q-
> >properties.is_reset ?
> > +
>   KFD_DBG_QUEUE_RESET_MASK :
> >
>   KFD_DBG_QUEUE_ERROR_MASK;
> > break;
> > }
> > @@ -3072,7 +3084,7 @@ int resume_queues(struct kfd_process *p,
> > queue_ids);
> >
> > /* mask queue as error on resume fail
> */
> > -   if (q_idx != QUEUE_NOT_FOUND)
> > +   if (q_idx != QUEUE_NOT_FOUND
> && !q->properties.is_reset)
> > queue_ids[q_idx] |=

RE: [PATCH 1/2] drm/amdkfd: support per-queue reset on gfx9

2024-07-30 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Tuesday, July 30, 2024 6:07 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: Re: [PATCH 1/2] drm/amdkfd: support per-queue reset on gfx9
>
>
> On 2024-07-26 11:30, Jonathan Kim wrote:
> > Support per-queue reset for GFX9.  The recommendation is for the driver
> > to target reset the HW queue via a SPI MMIO register write.
> >
> > Since this requires pipe and HW queue info and MEC FW is limited to
> > doorbell reports of hung queues after an unmap failure, scan the HW
> > queue slots defined by SET_RESOURCES first to identify the user queue
> > candidates to reset.
> >
> > Only signal reset events to processes that have had a queue reset.
> >
> > If queue reset fails, fall back to GPU reset.
> >
> > v2: move reset queue flag for house keeping to process device.
> > split detect and reset into separate functions.
> > make reset call safe during power saving modes.
> > clean up some other nitpicks.
>
> Some more nit-picks inline.
>
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   2 +
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |   4 +-
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |   4 +-
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  16 ++
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|   9 +
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |   4 +-
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|  18 +-
> >  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  85 +
> >  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   9 +
> >  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 172
> +-
> >  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  12 ++
> >  drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  21 +++
> >  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |   6 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 +
> >  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |   2 +
> >  .../gpu/drm/amd/include/kgd_kfd_interface.h   |   6 +
> >  16 files changed, 360 insertions(+), 13 deletions(-)
> >
> [snip]
> > @@ -1680,6 +1700,14 @@ static int start_cpsch(struct
> device_queue_manager *dqm)
> > &dqm->wait_times);
> > }
> >
> > +   /* setup per-queue reset detection buffer  */
> > +   num_hw_queue_slots =  dqm->dev->kfd-
> >shared_resources.num_queue_per_pipe *
> > + dqm->dev->kfd-
> >shared_resources.num_pipe_per_mec *
> > + NUM_XCC(dqm->dev->xcc_mask);
> > +
> > +   dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct
> dqm_detect_hang_info);
> > +   dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size,
> GFP_KERNEL);
>
> You need to check the return value and handle allocation failures.
>
> > +
> > dqm_unlock(dqm);
> >
> > return 0;
> > @@ -1713,6 +1741,7 @@ static int stop_cpsch(struct
> device_queue_manager *dqm)
> > kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
> > if (!dqm->dev->kfd->shared_resources.enable_mes)
> > pm_uninit(&dqm->packet_mgr);
> > +   kfree(dqm->detect_hang_info);
>
> Reset dqm->detect_hang_info to NULL to avoid a dangling pointer.
>
> > dqm_unlock(dqm);
> >
> > return 0;
> > @@ -1929,6 +1958,131 @@ static int map_queues_cpsch(struct
> device_queue_manager *dqm)
> > return retval;
> >  }
> >
> > +static void set_queue_as_reset(struct device_queue_manager *dqm, struct
> queue *q,
> > +  struct qcm_process_device *qpd)
> > +{
> > +   struct kfd_process_device *pdd = qpd_to_pdd(qpd);
> > +
> > +   pr_err("queue id 0x%0x at pasid 0x%0x is reset\n",
> > +  q->properties.queue_id, q->process->pasid);
> > +
> > +   pdd->has_reset_queue = true;
> > +   if (q->properties.is_active) {
> > +   q->properties.is_active = false;
> > +   decrement_queue_count(dqm, qpd, q);
> > +   }
> > +}
> > +
> > +static int detect_queue_hang(struct device_queue_manager *dqm)
> > +{
> > +   int i;
> > +
> > +   memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size);
>
> Set dqm->detect_hang_count to 0 to avoid overflows in case multiple hand
> detections get kicked off. Or if that's no

RE: [PATCH] drm/amdkfd: fix debug watchpoints for logical devices

2024-07-29 Thread Kim, Jonathan
[Public]

Ping on review.

Thanks,

Jon

> -Original Message-
> From: Kim, Jonathan 
> Sent: Monday, July 22, 2024 2:57 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> 
> Subject: [PATCH] drm/amdkfd: fix debug watchpoints for logical devices
>
> The number of watchpoints should be set and constrained per logical
> partition device, not by the socket device.
>
> Signed-off-by: Jonathan Kim 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c  | 20 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c |  4 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  8 
>  3 files changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 34a282540c7e..312dfa84f29f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -365,47 +365,47 @@ static int kfd_dbg_get_dev_watch_id(struct
> kfd_process_device *pdd, int *watch_i
>
>   *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;
>
> - spin_lock(&pdd->dev->kfd->watch_points_lock);
> + spin_lock(&pdd->dev->watch_points_lock);
>
>   for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
>   /* device watchpoint in use so skip */
> - if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1)
> + if ((pdd->dev->alloc_watch_ids >> i) & 0x1)
>   continue;
>
>   pdd->alloc_watch_ids |= 0x1 << i;
> - pdd->dev->kfd->alloc_watch_ids |= 0x1 << i;
> + pdd->dev->alloc_watch_ids |= 0x1 << i;
>   *watch_id = i;
> - spin_unlock(&pdd->dev->kfd->watch_points_lock);
> + spin_unlock(&pdd->dev->watch_points_lock);
>   return 0;
>   }
>
> - spin_unlock(&pdd->dev->kfd->watch_points_lock);
> + spin_unlock(&pdd->dev->watch_points_lock);
>
>   return -ENOMEM;
>  }
>
>  static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int
> watch_id)
>  {
> - spin_lock(&pdd->dev->kfd->watch_points_lock);
> + spin_lock(&pdd->dev->watch_points_lock);
>
>   /* process owns device watch point so safe to clear */
>   if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
>   pdd->alloc_watch_ids &= ~(0x1 << watch_id);
> - pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id);
> + pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
>   }
>
> - spin_unlock(&pdd->dev->kfd->watch_points_lock);
> + spin_unlock(&pdd->dev->watch_points_lock);
>  }
>
>  static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int
> watch_id)
>  {
>   bool owns_watch_id = false;
>
> - spin_lock(&pdd->dev->kfd->watch_points_lock);
> + spin_lock(&pdd->dev->watch_points_lock);
>   owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
>   ((pdd->alloc_watch_ids >> watch_id) & 0x1);
>
> - spin_unlock(&pdd->dev->kfd->watch_points_lock);
> + spin_unlock(&pdd->dev->watch_points_lock);
>
>   return owns_watch_id;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index f4d20adaa068..f91a9b6ce3fb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -885,12 +885,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>   goto node_init_error;
>   }
>   kfd->nodes[i] = node;
> +
> + spin_lock_init(&kfd->nodes[i]->watch_points_lock);
>   }
>
>   svm_range_set_max_pages(kfd->adev);
>
> - spin_lock_init(&kfd->watch_points_lock);
> -
>   kfd->init_complete = true;
>   dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev-
> >vendor,
>kfd->adev->pdev->device);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 2b3ec92981e8..653e1f934107 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -310,6 +310,10 @@ struct kfd_node {
>   struct kfd_local_mem_info local_mem_info;
>
>   struct kfd_dev *kfd;
> +
> + /* Track per device allocated watch points */
> + uint32_t alloc_watch_ids;
> + spinlock_t watch_points_lock;
>  };
>
>  struct kfd_dev {
> @@ -362,10 +366,6 @@ struct kfd_dev {
>   struct kfd_node *nodes[MAX_KFD_NODES];
>   unsigned int num_nodes;
>
> - /* Track per device allocated watch points */
> - uint32_t alloc_watch_ids;
> - spinlock_t watch_points_lock;
> -
>   /* Kernel doorbells for KFD device */
>   struct amdgpu_bo *doorbells;
>
> --
> 2.34.1



RE: [PATCH 2/2] drm/amdkfd: support the debugger during per-queue reset

2024-07-26 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Alex Deucher 
> Sent: Friday, July 26, 2024 2:57 PM
> To: Kim, Jonathan 
> Cc: amd-gfx@lists.freedesktop.org; Kuehling, Felix
> ; Deucher, Alexander
> 
> Subject: Re: [PATCH 2/2] drm/amdkfd: support the debugger during per-
> queue reset
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> On Fri, Jul 26, 2024 at 11:40 AM Jonathan Kim 
> wrote:
> >
> > In order to allow ROCm GDB to handle reset queues, raise an
> > EC_QUEUE_RESET exception so that the debugger can subscribe and
> > query this exception.
> >
> > Reset queues should still be considered suspendable with a status
> > flag of KFD_DBG_QUEUE_RESET_MASK.
> > However they should not be resumable since user space will no longer
> > be able to access reset queues.
>
> Do you have a link to the proposed debugger code which uses this?

Not yet.  + Laurent/Lancelot for awareness.

Jon

>
> Alex
>
> >
> > v2: move per-queue reset flag to this patch
> > rebase based on patch 1 changes
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 31
> ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
> >  include/uapi/linux/kfd_ioctl.h|  4 +++
> >  3 files changed, 31 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index e335703eff84..cb7b5bbf5c40 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -164,6 +164,10 @@ static void kfd_hws_hang(struct
> device_queue_manager *dqm)
> > struct kfd_process_device *pdd = qpd_to_pdd(qpd);
> >
> > pdd->has_reset_queue = true;
> > +   q->properties.is_reset = true;
> > +   kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_RESET),
> > +q->process, q->device, 
> > q->doorbell_id,
> > +false, NULL, 0);
> > }
> > }
> >
> > @@ -986,7 +990,7 @@ static int suspend_single_queue(struct
> device_queue_manager *dqm,
> >  {
> > bool is_new;
> >
> > -   if (q->properties.is_suspended)
> > +   if (q->properties.is_suspended || q->properties.is_reset)
> > return 0;
> >
> > pr_debug("Suspending PASID %u queue [%i]\n",
> > @@ -1007,6 +1011,9 @@ static int suspend_single_queue(struct
> device_queue_manager *dqm,
> > if (dqm->dev->kfd->shared_resources.enable_mes) {
> > int r = remove_queue_mes(dqm, q, &pdd->qpd);
> >
> > +   if (q->properties.is_reset)
> > +   return 0;
> > +
> > if (r)
> > return r;
> > }
> > @@ -1967,10 +1974,14 @@ static void set_queue_as_reset(struct
> device_queue_manager *dqm, struct queue *q
> >q->properties.queue_id, q->process->pasid);
> >
> > pdd->has_reset_queue = true;
> > +   q->properties.is_reset = true;
> > if (q->properties.is_active) {
> > q->properties.is_active = false;
> > decrement_queue_count(dqm, qpd, q);
> > }
> > +
> > +   kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_RESET), q->process, q-
> >device,
> > +q->doorbell_id, false, NULL, 0);
> >  }
> >
> >  static int detect_queue_hang(struct device_queue_manager *dqm)
> > @@ -3037,7 +3048,8 @@ int resume_queues(struct kfd_process *p,
> > queue_ids[q_idx] &=
> > 
> > ~KFD_DBG_QUEUE_INVALID_MASK;
> > } else {
> > -   queue_ids[q_idx] |=
> > +   queue_ids[q_idx] |= 
> > q->properties.is_reset ?
> > +   
> > KFD_DBG_QUEUE_RESET_MASK :
> >   

RE: [PATCH 1/2] drm/amdkfd: support per-queue reset on gfx9

2024-07-25 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: amd-gfx  On Behalf Of Felix
> Kuehling
> Sent: Wednesday, July 24, 2024 4:35 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 1/2] drm/amdkfd: support per-queue reset on gfx9
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> On 2024-07-18 13:56, Jonathan Kim wrote:
> > Support per-queue reset for GFX9.  The recommendation is for the driver
> > to target reset the HW queue via a SPI MMIO register write.
> >
> > Since this requires pipe and HW queue info and MEC FW is limited to
> > doorbell reports of hung queues after an unmap failure, scan the HW
> > queue slots defined by SET_RESOURCES first to identify the user queue
> > candidates to reset.
> >
> > Only signal reset events to processes that have had a queue reset.
> >
> > If queue reset fails, fall back to GPU reset.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   1 +
> >   .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |   3 +-
> >   .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |   3 +-
> >   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|   9 +
> >   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|   6 +
> >   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |   3 +-
> >   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|  11 +-
> >   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  56 +
> >   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   6 +
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 195
> --
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  12 ++
> >   drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  15 ++
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |   6 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   4 +
> >   drivers/gpu/drm/amd/amdkfd/kfd_process.c  |   2 +
> >   .../gpu/drm/amd/include/kgd_kfd_interface.h   |   4 +
> >   16 files changed, 310 insertions(+), 26 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > index aff08321e976..1dc601e4518a 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > @@ -191,4 +191,5 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> >   .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> >   .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
> >   .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
> > + .hqd_detect_and_reset = kgd_gfx_v9_hqd_detect_and_reset
> >   };
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > index 3a3f3ce09f00..534975c722df 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > @@ -418,5 +418,6 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
> >   .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> >   .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
> >   .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> > - .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings
> > + .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
> > + .hqd_detect_and_reset = kgd_gfx_v9_hqd_detect_and_reset
> >   };
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> > index a5c7259cf2a3..b53c1cfa34de 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> > @@ -541,5 +541,6 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
> >   kgd_gfx_v9_4_3_set_wave_launch_trap_override,
> >   .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
> >   .set_address_watch = kgd_gfx_v9_4_3_set_address_watch,
> > - .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch
> > + .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch,
> > + .hqd_detect_and_reset = kgd_gfx_v9_hqd_detect_and_reset
> >   };
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> > index 3ab6c3aa0ad1..dd449a0caba8 100644
> > --- a/d

RE: [PATCH] drm/amdkfd: allow users to target recommended SDMA engines

2024-07-19 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Friday, July 19, 2024 2:34 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdkfd: allow users to target recommended SDMA
> engines
>
> On 2024-07-18 19:05, Jonathan Kim wrote:
> > Certain GPUs have better copy performance over xGMI on specific
> > SDMA engines depending on the source and destination GPU.
> > Allow users to create SDMA queues on these recommended engines.
> > Close to 2x overall performance has been observed with this
> > optimization.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 16 ++
> >   drivers/gpu/drm/amd/amdkfd/kfd_crat.h |  3 +-
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 39 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  5 +-
> >   .../amd/amdkfd/kfd_process_queue_manager.c|  1 +
> >   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 52
> +++
> >   drivers/gpu/drm/amd/amdkfd/kfd_topology.h |  1 +
> >   include/uapi/linux/kfd_ioctl.h|  6 ++-
> >   8 files changed, 119 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index 32e5db509560..9610cb90a47e 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -255,6 +255,7 @@ static int set_queue_properties_from_user(struct
> queue_properties *q_properties,
> > args->ctx_save_restore_address;
> > q_properties->ctx_save_restore_area_size = args-
> >ctx_save_restore_size;
> > q_properties->ctl_stack_size = args->ctl_stack_size;
> > +   q_properties->sdma_engine_id = args->sdma_engine_id;
> > if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
> > args->queue_type ==
> KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
> > q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
> > @@ -262,6 +263,8 @@ static int set_queue_properties_from_user(struct
> queue_properties *q_properties,
> > q_properties->type = KFD_QUEUE_TYPE_SDMA;
> > else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
> > q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
> > +   else if (args->queue_type ==
> KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID)
> > +   q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID;
> > else
> > return -ENOTSUPP;
> >
> > @@ -334,6 +337,18 @@ static int kfd_ioctl_create_queue(struct file *filep,
> struct kfd_process *p,
> > goto err_bind_process;
> > }
> >
> > +   if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
> > +   int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) +
> > + kfd_get_num_xgmi_sdma_engines(dev) -
> 1;
> > +
> > +   if (q_properties.sdma_engine_id > max_sdma_eng_id) {
> > +   err = -EINVAL;
> > +   pr_err("sdma_engine_id %i exceeds maximum id
> of %i\n",
> > +  q_properties.sdma_engine_id,
> max_sdma_eng_id);
> > +   goto err_sdma_engine_id;
> > +   }
> > +   }
> > +
> > if (!pdd->qpd.proc_doorbells) {
> > err = kfd_alloc_process_doorbells(dev->kfd, pdd);
> > if (err) {
> > @@ -425,6 +440,7 @@ static int kfd_ioctl_create_queue(struct file *filep,
> struct kfd_process *p,
> > if (wptr_bo)
> > amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
> >   err_wptr_map_gart:
> > +err_sdma_engine_id:
> >   err_bind_process:
> >   err_pdd:
> > mutex_unlock(&p->mutex);
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> > index a8ca7ecb6d27..e880a71837bc 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> > @@ -259,7 +259,7 @@ struct crat_subtype_ccompute {
> >   #define CRAT_IOLINK_TYPE_OTHER16
> >   #define CRAT_IOLINK_TYPE_MAX  255
> >
> > -#define CRAT_IOLINK_RESERVED_LENGTH24
> > +#define CRAT_IOLINK_RESERVED_LENGTH20
> >
> >   struct crat_subtype_iolink {
> > uint8_t type;
> > @@ -276,6 +276,7 @@ struct crat_subtype_iolink {
> > uint32_tminimum_bandwidth_mbs;
> > uint32_tmaximum_bandwidth_m

RE: [PATCH] Revert "drm/amdkfd: Add partition id field to location_id"

2024-04-30 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Lazar, Lijo 
> Sent: Tuesday, April 23, 2024 7:28 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking ; Deucher, Alexander
> ; Joshi, Mukul ;
> Kim, Jonathan ; Poag, Charis
> ; Cheung, Donald ; Yat
> Sin, David ; Chaudhary, Jatin Jaikishan
> ; Du, Wenkai 
> Subject: [PATCH] Revert "drm/amdkfd: Add partition id field to location_id"
>
> This reverts commit 925c7bd1d1cf9f173b22603c8bd4816d142d4935.
>
> RCCL library is currently not treating spatial partitions differently,
> hence this change is causing issues. Revert temporarily till RCCL
> implementation is ready for spatial partitions.
>
> Signed-off-by: Lijo Lazar 

Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index c51f131eaa2f..bc9eb847ecfe 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -1997,9 +1997,8 @@ int kfd_topology_add_device(struct kfd_node
> *gpu)
>   HSA_CAP_ASIC_REVISION_MASK);
>
>   dev->node_props.location_id = pci_dev_id(gpu->adev->pdev);
> - /* On multi-partition nodes, node id = location_id[31:28] */
> - if (gpu->kfd->num_nodes > 1)
> - dev->node_props.location_id |= (dev->gpu->node_id << 28);
> + if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3))
> + dev->node_props.location_id |= dev->gpu->node_id;
>
>   dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus);
>   dev->node_props.max_engine_clk_fcompute =
> --
> 2.25.1



RE: [PATCH] drm/amdkfd: Enable SQ watchpoint for gfx10

2024-04-15 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Six, Lancelot 
> Sent: Friday, April 12, 2024 8:54 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Six, Lancelot
> 
> Subject: [PATCH] drm/amdkfd: Enable SQ watchpoint for gfx10
>
> There are new control registers introduced in gfx10 used to configure
> hardware watchpoints triggered by SMEM instructions:
> SQ_WATCH{0,1,2,3}_{CNTL_ADDR_HI,ADDR_L}.
>
> Those registers work in a similar way as the TCP_WATCH* registers
> currently used for gfx9 and above.
>
> This patch adds support to program the SQ_WATCH registers for gfx10.
>
> The SQ_WATCH?_CNTL.MASK field has one bit more than
> TCP_WATCH?_CNTL.MASK, so SQ watchpoints can have a finer granularity
> than TCP_WATCH watchpoints.  In this patch, we keep the capabilities
> advertised to the debugger unchanged
> (HSA_DBG_WATCH_ADDR_MASK_*_BIT_GFX10) as this reflects what both
> TCP and
> SQ watchpoints can do and both watchpoints are configured together.
>
> Signed-off-by: Lancelot SIX 

Reviewed-by: Jonathan Kim 

> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 71
> +++
>  1 file changed, 58 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 69810b3f1c63..3ab6c3aa0ad1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -881,6 +881,7 @@ uint32_t
> kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
>  }
>
>  #define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H -
> mmTCP_WATCH0_ADDR_H)
> +#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H -
> mmSQ_WATCH0_ADDR_H)
>  uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
>   uint64_t watch_address,
>   uint32_t watch_address_mask,
> @@ -889,55 +890,93 @@ uint32_t kgd_gfx_v10_set_address_watch(struct
> amdgpu_device *adev,
>   uint32_t debug_vmid,
>   uint32_t inst)
>  {
> + /* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed
> with the
> +  * same values.
> +  */
>   uint32_t watch_address_high;
>   uint32_t watch_address_low;
> - uint32_t watch_address_cntl;
> -
> - watch_address_cntl = 0;
> + uint32_t tcp_watch_address_cntl;
> + uint32_t sq_watch_address_cntl;
>
>   watch_address_low = lower_32_bits(watch_address);
>   watch_address_high = upper_32_bits(watch_address) & 0x;
>
> - watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + tcp_watch_address_cntl = 0;
> + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
>   TCP_WATCH0_CNTL,
>   VMID,
>   debug_vmid);
> - watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
>   TCP_WATCH0_CNTL,
>   MODE,
>   watch_mode);
> - watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
>   TCP_WATCH0_CNTL,
>   MASK,
>   watch_address_mask >> 7);
>
> + sq_watch_address_cntl = 0;
> + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
> + SQ_WATCH0_CNTL,
> + VMID,
> + debug_vmid);
> + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
> + SQ_WATCH0_CNTL,
> + MODE,
> + watch_mode);
> + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
> + SQ_WATCH0_CNTL,
> + MASK,
> + watch_address_mask >> 6);
> +
>   /* Turning off this watch point until we set all the registers */
> - watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
>   TCP_WATCH0_CNTL,
>   VALID,
>   0);
> -
>   WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
>   (watch_id * TCP_WATCH_STRIDE)),
> - watch_address_cntl);
> + tcp_watch_address_cntl);
> +
> + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
> + SQ_WATCH0_CNTL,
> + VALI

RE: [PATCH] drm/amdkfd: range check cp bad op exception interrupts

2024-03-21 Thread Kim, Jonathan
[Public]

Ping for review.

Thanks,

Jon

> -Original Message-
> From: Kim, Jonathan 
> Sent: Wednesday, March 13, 2024 10:21 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Huang, JinHuiEric
> ; Kim, Jonathan ;
> Kim, Jonathan ; Zhang, Jesse(Jie)
> 
> Subject: [PATCH] drm/amdkfd: range check cp bad op exception interrupts
>
> Due to a CP interrupt bug, bad packet garbage exception codes are raised.
> Do a range check so that the debugger and runtime do not receive garbage
> codes.
> Update the user api to guard exception code type checking as well.
>
> Signed-off-by: Jonathan Kim 
> Tested-by: Jesse Zhang 
> ---
>  .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c|  3 ++-
>  .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c|  3 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c |  3 ++-
>  include/uapi/linux/kfd_ioctl.h  | 17 ++---
>  4 files changed, 20 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> index a8e76287dde0..013d0a073b9b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> @@ -339,7 +339,8 @@ static void event_interrupt_wq_v10(struct kfd_node
> *dev,
>   break;
>   }
>   kfd_signal_event_interrupt(pasid, context_id0 &
> 0x7f, 23);
> - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
> + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
> +
> KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id
> 0))) {
>   kfd_set_dbg_ev_from_interrupt(dev, pasid,
>   KFD_DEBUG_DOORBELL_ID(context_id0),
>
>   KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
> index 7e2859736a55..fe2ad0c0de95 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
> @@ -328,7 +328,8 @@ static void event_interrupt_wq_v11(struct kfd_node
> *dev,
>   /* CP */
>   if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
>   kfd_signal_event_interrupt(pasid, context_id0, 32);
> - else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
> + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
> +
> KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id
> 0)))
>   kfd_set_dbg_ev_from_interrupt(dev, pasid,
>   KFD_CTXID0_DOORBELL_ID(context_id0),
>
>   KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> index ff7392336795..5483211c5d3d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> @@ -388,7 +388,8 @@ static void event_interrupt_wq_v9(struct kfd_node
> *dev,
>   break;
>   }
>   kfd_signal_event_interrupt(pasid, sq_int_data, 24);
> - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
> + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
> +
> KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id
> 0))) {
>   kfd_set_dbg_ev_from_interrupt(dev, pasid,
>   KFD_DEBUG_DOORBELL_ID(context_id0),
>
>   KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 9ce46edc62a5..2040a470ddb4 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -913,14 +913,25 @@ enum kfd_dbg_trap_exception_code {
>KFD_EC_MASK(EC_DEVICE_NEW))
>  #define KFD_EC_MASK_PROCESS
>   (KFD_EC_MASK(EC_PROCESS_RUNTIME) |  \
>
> KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE))
> +#define KFD_EC_MASK_PACKET
>   (KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) |
>   \
> +
> KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVA
> LID) |\
> +
> KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) |  \
> +
> KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) |   \
> +
> KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) |\
> +
> KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID
> ) |   \
> +
> KFD_EC_MASK(EC_QU

RE: [PATCH] drm/amdkfd: fix shift out of bounds about gpu debug

2024-02-29 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Zhang, Jesse(Jie) 
> Sent: Friday, March 1, 2024 12:50 AM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Kuehling, Felix
> ; Zhang, Yifan 
> Subject: RE: [PATCH] drm/amdkfd: fix shift out of bounds about gpu debug
>
> [Public]
>
> Hi Jon,
>
> -Original Message-
> From: Kim, Jonathan 
> Sent: Thursday, February 29, 2024 11:58 PM
> To: Zhang, Jesse(Jie) ; amd-
> g...@lists.freedesktop.org
> Cc: Deucher, Alexander ; Kuehling, Felix
> ; Zhang, Yifan ; Zhang,
> Jesse(Jie) ; Zhang, Jesse(Jie)
> 
> Subject: RE: [PATCH] drm/amdkfd: fix shift out of bounds about gpu debug
>
> [Public]
>
> I think this was discussed in another thread.
> Exception codes should be range checked prior to applying the mask.  Raising
> null events to the debugger or runtime isn't useful.
> I haven't gotten around to fixing this yet.  I should have time this week.
> Just to double check, the out of bounds shift is because of a CP interrupt 
> that
> generates a null exception code?
>
> [Zhang, Jesse(Jie)] Thanks for your reminder, I saw that discussion.
> In this interrupt, other fields(such as, source id, client id pasid ) are 
> correct.
> only the value of context_id0 (0xf) is invalid.
>How about do the check ,like this:
>   } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
> +   /* filter out the invalidate context_id0 */
> +   if (!(context_id0 >> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT) 
> ||
> +   (context_id0 >> 
> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT) >
> EC_MAX)
> +   return;

The range check should probably flag any exception prefixed as 
EC_QUEUE_PACKET_* as valid defined in kfd_dbg_trap_exception_code:
https://github.com/torvalds/linux/blob/master/include/uapi/linux/kfd_ioctl.h#L857
+ Jay to confirm this is the correct exception range for CP_BAD_OPCODE

If that's the case, then I think we can define a 
KFD_DBG_EC_TYPE_IS_QUEUE_PACKET macro similar to:
https://github.com/torvalds/linux/blob/master/include/uapi/linux/kfd_ioctl.h#L917

That way, KFD process interrupts v9, v10, v11 can use that check prior to mask 
conversion and user space may find it useful as well.

Jon
> kfd_set_dbg_ev_from_interrupt(dev, pasid,
> KFD_DEBUG_DOORBELL_ID(context_id0),
>
> KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
>  Thanks
>  Jesse
> Jon
>
> > -Original Message-
> > From: Jesse Zhang 
> > Sent: Thursday, February 29, 2024 3:45 AM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Deucher, Alexander ; Kuehling, Felix
> > ; Kim, Jonathan ;
> Zhang,
> > Yifan ; Zhang, Jesse(Jie)
> ;
> > Zhang, Jesse(Jie) 
> > Subject: [PATCH] drm/amdkfd: fix shift out of bounds about gpu debug
> >
> >  the issue is :
> > [  388.151802] UBSAN: shift-out-of-bounds in
> > drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_int_process_v10.c:346:5
> > [  388.151807] shift exponent 4294967295 is too large for 64-bit type
> > 'long long unsigned int'
> > [  388.151812] CPU: 6 PID: 347 Comm: kworker/6:1H Tainted: GE
> > 6.7.0+ #1
> > [  388.151814] Hardware name: AMD Splinter/Splinter-GNR, BIOS
> > WS54117N_140 01/16/2024
> > [  388.151816] Workqueue: KFD IH interrupt_wq [amdgpu] [  388.152084]
> > Call Trace:
> > [  388.152086]  
> > [  388.152089]  dump_stack_lvl+0x4c/0x70 [  388.152096]
> > dump_stack+0x14/0x20 [  388.152098]  ubsan_epilogue+0x9/0x40 [
> > 388.152101]  __ubsan_handle_shift_out_of_bounds+0x113/0x170
> > [  388.152103]  ? vprintk+0x40/0x70
> > [  388.152106]  ? swsusp_check+0x131/0x190 [  388.152110]
> > event_interrupt_wq_v10.cold+0x16/0x1e [amdgpu] [  388.152411]  ?
> > raw_spin_rq_unlock+0x14/0x40 [  388.152415]  ?
> > finish_task_switch+0x85/0x2a0 [  388.152417]  ?
> > kfifo_copy_out+0x5f/0x70 [  388.152420]  interrupt_wq+0xb2/0x120
> > [amdgpu] [  388.152642]  ? interrupt_wq+0xb2/0x120 [amdgpu] [
> > 388.152728]  process_scheduled_works+0x9a/0x3a0
> > [  388.152731]  ? __pfx_worker_thread+0x10/0x10 [  388.152732]
> > worker_thread+0x15f/0x2d0 [  388.152733]  ?
> > __pfx_worker_thread+0x10/0x10 [  388.152734]  kthread+0xfb/0x130 [
> > 388.152735]  ? __pfx_kthread+0x10/0x10 [  388.152736]
> > ret_from_fork+0x3d/0x60 [  388.152738]  ? __pfx_kthread+0x10/0x10 [
> > 388.152739]  ret_from_fork_asm+0x1b/0x30 [  388.152742]  
> >
> > Signed-off-by: Jesse Zhang 
> > ---
> >  include/uapi/linux/kfd_ioctl.h | 2 +-
> >  1 file changed, 1 

RE: [PATCH] drm/amdkfd: fix shift out of bounds about gpu debug

2024-02-29 Thread Kim, Jonathan
[Public]

I think this was discussed in another thread.
Exception codes should be range checked prior to applying the mask.  Raising 
null events to the debugger or runtime isn't useful.
I haven't gotten around to fixing this yet.  I should have time this week.
Just to double check, the out of bounds shift is because of a CP interrupt that 
generates a null exception code?

Jon

> -Original Message-
> From: Jesse Zhang 
> Sent: Thursday, February 29, 2024 3:45 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Kuehling, Felix
> ; Kim, Jonathan ;
> Zhang, Yifan ; Zhang, Jesse(Jie)
> ; Zhang, Jesse(Jie) 
> Subject: [PATCH] drm/amdkfd: fix shift out of bounds about gpu debug
>
>  the issue is :
> [  388.151802] UBSAN: shift-out-of-bounds in
> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_int_process_v10.c:346:5
> [  388.151807] shift exponent 4294967295 is too large for 64-bit type 'long
> long unsigned int'
> [  388.151812] CPU: 6 PID: 347 Comm: kworker/6:1H Tainted: GE
> 6.7.0+ #1
> [  388.151814] Hardware name: AMD Splinter/Splinter-GNR, BIOS
> WS54117N_140 01/16/2024
> [  388.151816] Workqueue: KFD IH interrupt_wq [amdgpu]
> [  388.152084] Call Trace:
> [  388.152086]  
> [  388.152089]  dump_stack_lvl+0x4c/0x70
> [  388.152096]  dump_stack+0x14/0x20
> [  388.152098]  ubsan_epilogue+0x9/0x40
> [  388.152101]  __ubsan_handle_shift_out_of_bounds+0x113/0x170
> [  388.152103]  ? vprintk+0x40/0x70
> [  388.152106]  ? swsusp_check+0x131/0x190
> [  388.152110]  event_interrupt_wq_v10.cold+0x16/0x1e [amdgpu]
> [  388.152411]  ? raw_spin_rq_unlock+0x14/0x40
> [  388.152415]  ? finish_task_switch+0x85/0x2a0
> [  388.152417]  ? kfifo_copy_out+0x5f/0x70
> [  388.152420]  interrupt_wq+0xb2/0x120 [amdgpu]
> [  388.152642]  ? interrupt_wq+0xb2/0x120 [amdgpu]
> [  388.152728]  process_scheduled_works+0x9a/0x3a0
> [  388.152731]  ? __pfx_worker_thread+0x10/0x10
> [  388.152732]  worker_thread+0x15f/0x2d0
> [  388.152733]  ? __pfx_worker_thread+0x10/0x10
> [  388.152734]  kthread+0xfb/0x130
> [  388.152735]  ? __pfx_kthread+0x10/0x10
> [  388.152736]  ret_from_fork+0x3d/0x60
> [  388.152738]  ? __pfx_kthread+0x10/0x10
> [  388.152739]  ret_from_fork_asm+0x1b/0x30
> [  388.152742]  
>
> Signed-off-by: Jesse Zhang 
> ---
>  include/uapi/linux/kfd_ioctl.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 9ce46edc62a5..3d5867df17e8 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -887,7 +887,7 @@ enum kfd_dbg_trap_exception_code {
>  };
>
>  /* Mask generated by ecode in kfd_dbg_trap_exception_code */
> -#define KFD_EC_MASK(ecode)   (1ULL << (ecode - 1))
> +#define KFD_EC_MASK(ecode)   (ecode ? (1ULL << (ecode - 1)) : 0ULL)
>
>  /* Masks for exception code type checks below */
>  #define KFD_EC_MASK_QUEUE
>   (KFD_EC_MASK(EC_QUEUE_WAVE_ABORT) | \
> --
> 2.25.1



RE: [PATCH] drm/amdkfd: Add partition id field to location_id

2024-02-23 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Lazar, Lijo 
> Sent: Thursday, February 22, 2024 10:49 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking ; Deucher, Alexander
> ; Kim, Jonathan ;
> Poag, Charis ; Cheung, Donald
> ; Yat Sin, David ;
> Chaudhary, Jatin Jaikishan 
> Subject: [PATCH] drm/amdkfd: Add partition id field to location_id
>
> On devices which have multi-partition nodes, keep partition id in
> location_id[31:28].
>
> Signed-off-by: Lijo Lazar 

Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index aee2fcab241f..0da747d52975 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -1997,8 +1997,9 @@ int kfd_topology_add_device(struct kfd_node
> *gpu)
>   HSA_CAP_ASIC_REVISION_MASK);
>
>   dev->node_props.location_id = pci_dev_id(gpu->adev->pdev);
> - if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3))
> - dev->node_props.location_id |= dev->gpu->node_id;
> + /* On multi-partition nodes, node id = location_id[31:28] */
> + if (gpu->kfd->num_nodes > 1)
> + dev->node_props.location_id |= (dev->gpu->node_id << 28);
>
>   dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus);
>   dev->node_props.max_engine_clk_fcompute =
> --
> 2.25.1



RE: [PATCH] drm/amdkfd: Fix the shift-out-of-bounds warning

2024-01-11 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Thursday, January 11, 2024 11:03 AM
> To: Ma, Jun ; Ma, Jun ; amd-
> g...@lists.freedesktop.org; Kim, Jonathan 
> Subject: Re: [PATCH] drm/amdkfd: Fix the shift-out-of-bounds warning
>
> [+Jon]
>
> On 2024-01-11 01:05, Ma, Jun wrote:
>
> > Hi Felix,
> >
> > On 1/10/2024 11:57 PM, Felix Kuehling wrote:
> >> On 2024-01-10 04:39, Ma Jun wrote:
> >>> There is following shift-out-of-bounds warning if ecode=0.
> >>> "shift exponent 4294967295 is too large for 64-bit type 'long long
> unsigned int'"
> >>>
> >>> Signed-off-by: Ma Jun 
> >>> ---
> >>>include/uapi/linux/kfd_ioctl.h | 2 +-
> >>>1 file changed, 1 insertion(+), 1 deletion(-)
> >>>
> >>> diff --git a/include/uapi/linux/kfd_ioctl.h 
> >>> b/include/uapi/linux/kfd_ioctl.h
> >>> index 2aa88afe305b..129325b02a91 100644
> >>> --- a/include/uapi/linux/kfd_ioctl.h
> >>> +++ b/include/uapi/linux/kfd_ioctl.h
> >>> @@ -1004,7 +1004,7 @@ enum kfd_dbg_trap_exception_code {
> >>>};
> >>>
> >>>/* Mask generated by ecode in kfd_dbg_trap_exception_code */
> >>> -#define KFD_EC_MASK(ecode)   (1ULL << (ecode - 1))
> >>> +#define KFD_EC_MASK(ecode)   (BIT(ecode) - 1)
> >> This is not the same thing. We want a bit mask with one bit set. And
> >> ecode=1 should set bit 0. ecode=0 is not a valid code and doesn't have a
> >> valid mask. You could use BIT((ecode) - 1), but I think that would give
> >> you the same warning for ecode=0. I also don't see BIT defined anywhere
> >> under include/uapi, so I think using this in the API header would break
> >> the build in user mode.
> >>
> >> Where are you seeing the warning about the bad shift exponent? Looks
> >> like someone is using the KFD_EC_MASK macro incorrectly. Or if there is
> >> a legitimate use of it with ecode=0, then the correct fix would be
> >>
> > This warning is caused by following code in function
> event_interrupt_wq_v10()
> >
> > else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
> > kfd_set_dbg_ev_from_interrupt(dev, pasid,
> > KFD_DEBUG_DOORBELL_ID(context_id0),
> > KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
> > NULL,
> > 0);
> > }
>
> This looks OK. The compiler must be warning about a potential problem
> here, not a definite one.
>
> Question for Jon, how does the firmware encode the error code in the
> context ID? I see these macros:
>
> #define KFD_DEBUG_CP_BAD_OP_ECODE_MASK  0x3fffc00
> #define KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT 10
> #define KFD_DEBUG_CP_BAD_OP_ECODE(ctxid0) (((ctxid0) &  \
>  KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \
>  >> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT)
>
> It looks like we have 16 bits for the ECODE. That's enough to have a bit
> mask. Do we really need KFD_EC_MASK to convert an error number into a
> bitmask here?

Added Jay for confirmation.
I could be wrong but IIRC (and I'm quite fuzzy on this ... probably should 
document this), unlike the wave trap code interrupt mask (bit mask) the CP bad 
op code is a single error code that directly points to one of the exception 
code enums that we defined in the user API header.
If that's the case, the KFD_EC_MASK is convenient for the kfd debugger code to 
mask the payload to send to the debugger or runtime.
If that's been wrong this whole time (i.e. the bad ops code is actually a 
bitwise mask of ecodes), then I'm not sure how we were able to get away with 
running the runtime negative tests for as long as we have and we'd need to 
recheck those tests.

>
>
> >
> >
> >> #define KFD_EC_MASK(ecode) ((ecode) ? 1ULL << (ecode - 1) : 0ULL)
> > This can fix the warning.
>
> In the code above, if ecode is 0, that would lead to calling
> kfd_set_dbg_ev_from_interrupt with a event mask of 0. Not sure if that
> even makes sense. Jon, so we need special handling of cases where the
> error code is 0 or out of range, so we can warn about buggy firmware
> rather than creating nonsensical events for the debugger?

That makes sense.  Again, deferring to Jay if a NULL cp bad op code is expected 
under any circumstances.
Either way, raising undefined events to the debugger or runtime isn't useful so 
range checking to filter out non-encoded cp bad op interrupts would be needed.

Thanks,

Jon

>
> Regards,
>Felix
>
>
> >
> > Regards
> > Ma Jun
> >> Regards,
> >> Felix
> >>
> >>
> >>>
> >>>/* Masks for exception code type checks below */
> >>>#define KFD_EC_MASK_QUEUE
>   (KFD_EC_MASK(EC_QUEUE_WAVE_ABORT) | \
<>

RE: [PATCH] drm/amdkfd: fix mes set shader debugger process management

2023-12-12 Thread Kim, Jonathan
[Public]

Again, MES only knows to flush if there was something enqueued in the first 
place.
SET_SHADER dictates what's on the process list.
SET_SHADER can be the last call prior to process termination with nothing 
enqueued, hence no MES auto flush occurs.

MES doesn't block anything on the flush flag request.
The driver guarantees that flush is only done on process termination after 
device dequeue, whether there were queues or not.
MES has no idea what an invalid context is.
It just has a value stored in its linked list that's associated with a driver 
allocated BO that no longer exists after process termination.

If you're still not sure about this solution, then this should be discussed 
offline with the MES team.
We're not going to gain ground discussing this here.  The solution has already 
been merged.
Feel free to propose a better solution if you're not satisfied with this one.

Jon

From: Liu, Shaoyun 
Sent: Tuesday, December 12, 2023 11:08 PM
To: Kim, Jonathan ; Huang, JinHuiEric 
; amd-gfx@lists.freedesktop.org
Cc: Wong, Alice ; Kuehling, Felix 
; Kasiviswanathan, Harish 

Subject: Re: [PATCH] drm/amdkfd: fix mes set shader debugger process management


[Public]

You try to add one new interface to inform mes about the context flush after 
driver side finish process termination , from my understanding, mes already 
know the process context need to be purged after all the related queues been 
removed even without this notification. What do you expect mes to do about this 
context flush flag ? Mes should block this process context for next set_sched 
command? Mes can achieve  this by ignore the set_sched command with trap 
disable parameter on an invalid process context .

Shaoyun.liu

Get Outlook for iOS<https://aka.ms/o0ukef>
________
From: Kim, Jonathan mailto:jonathan@amd.com>>
Sent: Tuesday, December 12, 2023 8:19:09 PM
To: Liu, Shaoyun mailto:shaoyun@amd.com>>; Huang, 
JinHuiEric mailto:jinhuieric.hu...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
mailto:amd-gfx@lists.freedesktop.org>>
Cc: Wong, Alice mailto:shiwei.w...@amd.com>>; Kuehling, 
Felix mailto:felix.kuehl...@amd.com>>; Kasiviswanathan, 
Harish mailto:harish.kasiviswanat...@amd.com>>
Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process management

[Public]

> -Original Message-
> From: Liu, Shaoyun mailto:shaoyun@amd.com>>
> Sent: Tuesday, December 12, 2023 7:08 PM
> To: Kim, Jonathan mailto:jonathan@amd.com>>; Huang, 
> JinHuiEric
> mailto:jinhuieric.hu...@amd.com>>; 
> amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
> Cc: Wong, Alice mailto:shiwei.w...@amd.com>>; Kuehling, 
> Felix
> mailto:felix.kuehl...@amd.com>>; Kasiviswanathan, 
> Harish
> mailto:harish.kasiviswanat...@amd.com>>
> Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> management
>
> [Public]
>
> I see,  so the  problem is after process context , set_shader been  called 
> with
> disable parameter,  do you know the  reason why  MES re-added the
> process context into the  list ?

Because MES has no idea what disable means.

All it knows is that without the flush flag, set_shader should update the 
necessary per-VMID (process) registers as requested by the driver, which 
requires persistent per-process HW settings so that potential future waves can 
inherit those settings i.e. ADD_QUEUE.skip_process_ctx_clear is set (why 
ADD_QUEUE auto clears the process context otherwise is another long story, 
basically an unsolvable MES cache bug problem).

Common use case example:
add_queue -> set_shader call either transiently stalls the SPI per-VMID or 
transiently dequeues the HWS per-VMID depending on the request settings -> 
fulfils the per-VMID register write updates -> resumes process queues so that 
potential waves on those queues inherit new debug settings.

You can't do this kind of operation at the queue level alone.

The problem that this patch solves (along with the MES FW upgrade) is an 
unfortunate quirk of having to operate between process (debug requests) and 
queue space (non-debug requests).
Old HWS used to operate at the per-process level via MAP_PROCESS so it was a 
lot easier to balance debug versus non-debug requests back then (but it was 
also lot less efficient performance wise).

Jon

>
> Shaoyun.liu
>
> -Original Message-
> From: Kim, Jonathan mailto:jonathan@amd.com>>
> Sent: Tuesday, December 12, 2023 6:07 PM
> To: Liu, Shaoyun mailto:shaoyun@amd.com>>; Huang, 
> JinHuiEric
> mailto:jinhuieric.hu...@amd.com>>; 
> amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
> Cc: Wong, Alice mailto:shiwei.w...@amd.com>>; 

RE: [PATCH] drm/amdkfd: fix mes set shader debugger process management

2023-12-12 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Liu, Shaoyun 
> Sent: Tuesday, December 12, 2023 7:08 PM
> To: Kim, Jonathan ; Huang, JinHuiEric
> ; amd-gfx@lists.freedesktop.org
> Cc: Wong, Alice ; Kuehling, Felix
> ; Kasiviswanathan, Harish
> 
> Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> management
>
> [Public]
>
> I see,  so the  problem is after process context , set_shader been  called 
> with
> disable parameter,  do you know the  reason why  MES re-added the
> process context into the  list ?

Because MES has no idea what disable means.

All it knows is that without the flush flag, set_shader should update the 
necessary per-VMID (process) registers as requested by the driver, which 
requires persistent per-process HW settings so that potential future waves can 
inherit those settings i.e. ADD_QUEUE.skip_process_ctx_clear is set (why 
ADD_QUEUE auto clears the process context otherwise is another long story, 
basically an unsolvable MES cache bug problem).

Common use case example:
add_queue -> set_shader call either transiently stalls the SPI per-VMID or 
transiently dequeues the HWS per-VMID depending on the request settings -> 
fulfils the per-VMID register write updates -> resumes process queues so that 
potential waves on those queues inherit new debug settings.

You can't do this kind of operation at the queue level alone.

The problem that this patch solves (along with the MES FW upgrade) is an 
unfortunate quirk of having to operate between process (debug requests) and 
queue space (non-debug requests).
Old HWS used to operate at the per-process level via MAP_PROCESS so it was a 
lot easier to balance debug versus non-debug requests back then (but it was 
also lot less efficient performance wise).

Jon

>
> Shaoyun.liu
>
> -Original Message-
> From: Kim, Jonathan 
> Sent: Tuesday, December 12, 2023 6:07 PM
> To: Liu, Shaoyun ; Huang, JinHuiEric
> ; amd-gfx@lists.freedesktop.org
> Cc: Wong, Alice ; Kuehling, Felix
> ; Kasiviswanathan, Harish
> 
> Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> management
>
> [Public]
>
> > -Original Message-
> > From: Liu, Shaoyun 
> > Sent: Tuesday, December 12, 2023 5:44 PM
> > To: Kim, Jonathan ; Huang, JinHuiEric
> > ; amd-gfx@lists.freedesktop.org
> > Cc: Wong, Alice ; Kuehling, Felix
> > ; Kasiviswanathan, Harish
> > 
> > Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> > management
> >
> > [Public]
> >
> > Do you mean SET_SHADER_DEBUGER can  be called before ADD_QUEUE ?
> I
> > think  even in that  situation MES should still be able to handle it
> > as long as MES already  remove the process context from its list , MES
> > will treat the process context as a new item. I still don't understand why
> MES haven't
> > purged the  process context from the list after process termination .   Will
> > debug queue itself  also use the add/remove queue interface  and  is
> > it possible the debug queue itself from the  old process  still not be
> > removed ?
>
> SET_SHADER_DEBUGGER can be called independently from ADD_QUEUE.
> The process list is updated on either on SET_SHADER_DEBUGGER or
> ADD_QUEUE.
> e.g. runtime_enable (set_shader) -> add_queue -> remove_queue (list
> purged) -> runtime_disable (set_shader process re-added) -> process
> termination (stale list) or debug attach (set_shader) -> add_queue ->
> remove_queue (list purged) -> debug detach (set_shader process re-added) -
> >process termination (stale list)
>
> MES has no idea what process termination means.  The new flag is a proxy
> for this.
> There are reasons for process settings to take place prior to queue add
> (debugger, gfx11 cwsr workaround, core dump etc need this).
>
> I'm not sure what kernel/debug queues have to do with this.
> By that argument, the list should be purged.
>
> Jon
>
> >
> > Shaoyun.liu
> >
> > -Original Message-
> > From: Kim, Jonathan 
> > Sent: Tuesday, December 12, 2023 4:48 PM
> > To: Liu, Shaoyun ; Huang, JinHuiEric
> > ; amd-gfx@lists.freedesktop.org
> > Cc: Wong, Alice ; Kuehling, Felix
> > ; Kasiviswanathan, Harish
> > 
> > Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> > management
> >
> > [Public]
> >
> > > -Original Message-
> > > From: Liu, Shaoyun 
> > > Sent: Tuesday, December 12, 2023 4:45 PM
> > > To: Kim, Jonathan ; Huang, JinHuiEric
> > > ; amd-gfx@lists.freedesktop.org
> > > Cc: Wong, Alice ; Kuehling, Felix
> > > ; Kasiviswana

RE: [PATCH] drm/amdkfd: fix mes set shader debugger process management

2023-12-12 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Liu, Shaoyun 
> Sent: Tuesday, December 12, 2023 5:44 PM
> To: Kim, Jonathan ; Huang, JinHuiEric
> ; amd-gfx@lists.freedesktop.org
> Cc: Wong, Alice ; Kuehling, Felix
> ; Kasiviswanathan, Harish
> 
> Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> management
>
> [Public]
>
> Do you mean SET_SHADER_DEBUGER can  be called before ADD_QUEUE ?  I
> think  even in that  situation MES should still be able to handle it as long 
> as
> MES already  remove the process context from its list , MES will treat the
> process context as a new item. I still don't understand why MES haven't
> purged the  process context from the list after process termination .   Will
> debug queue itself  also use the add/remove queue interface  and  is it
> possible the debug queue itself from the  old process  still not be
> removed ?

SET_SHADER_DEBUGGER can be called independently from ADD_QUEUE.
The process list is updated on either on SET_SHADER_DEBUGGER or ADD_QUEUE.
e.g. runtime_enable (set_shader) -> add_queue -> remove_queue (list purged) -> 
runtime_disable (set_shader process re-added) -> process termination (stale 
list)
or debug attach (set_shader) -> add_queue -> remove_queue (list purged) -> 
debug detach (set_shader process re-added) ->process termination (stale list)

MES has no idea what process termination means.  The new flag is a proxy for 
this.
There are reasons for process settings to take place prior to queue add 
(debugger, gfx11 cwsr workaround, core dump etc need this).

I'm not sure what kernel/debug queues have to do with this.
By that argument, the list should be purged.

Jon

>
> Shaoyun.liu
>
> -Original Message-
> From: Kim, Jonathan 
> Sent: Tuesday, December 12, 2023 4:48 PM
> To: Liu, Shaoyun ; Huang, JinHuiEric
> ; amd-gfx@lists.freedesktop.org
> Cc: Wong, Alice ; Kuehling, Felix
> ; Kasiviswanathan, Harish
> 
> Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> management
>
> [Public]
>
> > -Original Message-
> > From: Liu, Shaoyun 
> > Sent: Tuesday, December 12, 2023 4:45 PM
> > To: Kim, Jonathan ; Huang, JinHuiEric
> > ; amd-gfx@lists.freedesktop.org
> > Cc: Wong, Alice ; Kuehling, Felix
> > ; Kasiviswanathan, Harish
> > 
> > Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> > management
> >
> > [Public]
> >
> > Shouldn't the driver side  remove all the remaining  queues for the
> > process during  process termination ?  If all the  queues been removed
> > for the process ,  MES should purge the  process context automatically
> > , otherwise it's bug inside MES .
>
> That's only if there were queues added to begin with.
>
> Jon
>
> >
> > Regard
> > Sshaoyun.liu
> >
> > -Original Message-
> > From: Kim, Jonathan 
> > Sent: Tuesday, December 12, 2023 4:33 PM
> > To: Liu, Shaoyun ; Huang, JinHuiEric
> > ; amd-gfx@lists.freedesktop.org
> > Cc: Wong, Alice ; Kuehling, Felix
> > ; Kasiviswanathan, Harish
> > 
> > Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> > management
> >
> > [Public]
> >
> > > -Original Message-
> > > From: Liu, Shaoyun 
> > > Sent: Tuesday, December 12, 2023 4:00 PM
> > > To: Huang, JinHuiEric ; Kim, Jonathan
> > > ; amd-gfx@lists.freedesktop.org
> > > Cc: Wong, Alice ; Kuehling, Felix
> > > ; Kasiviswanathan, Harish
> > > 
> > > Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> > > management
> > >
> > > [AMD Official Use Only - General]
> > >
> > > Does this requires the  new MES FW for this process_ctx_flush
> > > requirement ?  Can driver side add logic to guaranty when  call
> > > SET_SHADER_DEBUGGER, the process address  is always valid ?
> >
> > Call to flush on old fw is a NOP so it's harmless in that case.
> > Full solution will still require a new MES version as this is a
> > workaround on corner cases and not a new feature i.e. we can't stop
> > ROCm from running on old fw.
> > The process address is always valid from the driver side.  It's the
> > MES side of things that gets stale as mentioned in the description
> > (passed value to MES is reused with new BO but MES doesn't refresh).
> > i.e. MES auto refreshes it's process list assuming process queues were
> > all drained but driver can't guarantee that SET_SHADER_DEBUGGER
> (which
> >

RE: [PATCH] drm/amdkfd: fix mes set shader debugger process management

2023-12-12 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Liu, Shaoyun 
> Sent: Tuesday, December 12, 2023 4:45 PM
> To: Kim, Jonathan ; Huang, JinHuiEric
> ; amd-gfx@lists.freedesktop.org
> Cc: Wong, Alice ; Kuehling, Felix
> ; Kasiviswanathan, Harish
> 
> Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> management
>
> [Public]
>
> Shouldn't the driver side  remove all the remaining  queues for the process
> during  process termination ?  If all the  queues been removed for the
> process ,  MES should purge the  process context automatically , otherwise
> it's bug inside MES .

That's only if there were queues added to begin with.

Jon

>
> Regard
> Sshaoyun.liu
>
> -Original Message-
> From: Kim, Jonathan 
> Sent: Tuesday, December 12, 2023 4:33 PM
> To: Liu, Shaoyun ; Huang, JinHuiEric
> ; amd-gfx@lists.freedesktop.org
> Cc: Wong, Alice ; Kuehling, Felix
> ; Kasiviswanathan, Harish
> 
> Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> management
>
> [Public]
>
> > -----Original Message-
> > From: Liu, Shaoyun 
> > Sent: Tuesday, December 12, 2023 4:00 PM
> > To: Huang, JinHuiEric ; Kim, Jonathan
> > ; amd-gfx@lists.freedesktop.org
> > Cc: Wong, Alice ; Kuehling, Felix
> > ; Kasiviswanathan, Harish
> > 
> > Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> > management
> >
> > [AMD Official Use Only - General]
> >
> > Does this requires the  new MES FW for this process_ctx_flush
> > requirement ?  Can driver side add logic to guaranty when  call
> > SET_SHADER_DEBUGGER, the process address  is always valid ?
>
> Call to flush on old fw is a NOP so it's harmless in that case.
> Full solution will still require a new MES version as this is a workaround on
> corner cases and not a new feature i.e. we can't stop ROCm from running on
> old fw.
> The process address is always valid from the driver side.  It's the MES side 
> of
> things that gets stale as mentioned in the description (passed value to MES
> is reused with new BO but MES doesn't refresh).
> i.e. MES auto refreshes it's process list assuming process queues were all
> drained but driver can't guarantee that SET_SHADER_DEBUGGER (which
> adds to MES's process list) will get called after queues get added (in fact 
> it's
> a requirements that it can be called at any time).
> We can attempt to defer calls these calls in the KFD, considering all cases.
> But that would be a large shift in debugger/runtime_enable/KFD code,
> which is already complicated and could get buggy plus it would not be
> intuitive at all as to why we're doing this.
> I think a single flag set to flush MES on process termination is a simpler
> compromise that shows the limitation in a more obvious way.
>
> Thanks,
>
> Jon
>
>
> >
> > Regards
> > Shaoyun.liu
> >
> >
> > -Original Message-
> > From: amd-gfx  On Behalf Of
> > Eric Huang
> > Sent: Tuesday, December 12, 2023 12:49 PM
> > To: Kim, Jonathan ; amd-
> > g...@lists.freedesktop.org
> > Cc: Wong, Alice ; Kuehling, Felix
> > ; Kasiviswanathan, Harish
> > 
> > Subject: Re: [PATCH] drm/amdkfd: fix mes set shader debugger process
> > management
> >
> >
> > On 2023-12-11 16:16, Jonathan Kim wrote:
> > > MES provides the driver a call to explicitly flush stale process
> > > memory within the MES to avoid a race condition that results in a
> > > fatal memory violation.
> > >
> > > When SET_SHADER_DEBUGGER is called, the driver passes a memory
> > address
> > > that represents a process context address MES uses to keep track of
> > > future per-process calls.
> > >
> > > Normally, MES will purge its process context list when the last
> > > queue has been removed.  The driver, however, can call
> > > SET_SHADER_DEBUGGER regardless of whether a queue has been added
> or not.
> > >
> > > If SET_SHADER_DEBUGGER has been called with no queues as the last
> > > call prior to process termination, the passed process context
> > > address will still reside within MES.
> > >
> > > On a new process call to SET_SHADER_DEBUGGER, the driver may end
> up
> > > passing an identical process context address value (based on
> > > per-process gpu memory address) to MES but is now pointing to a new
> > > allocated buffer object during KFD process creation.  Since the MES
> > > is unaware of this, access of the passed address points 

RE: [PATCH] drm/amdkfd: fix mes set shader debugger process management

2023-12-12 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Liu, Shaoyun 
> Sent: Tuesday, December 12, 2023 4:00 PM
> To: Huang, JinHuiEric ; Kim, Jonathan
> ; amd-gfx@lists.freedesktop.org
> Cc: Wong, Alice ; Kuehling, Felix
> ; Kasiviswanathan, Harish
> 
> Subject: RE: [PATCH] drm/amdkfd: fix mes set shader debugger process
> management
>
> [AMD Official Use Only - General]
>
> Does this requires the  new MES FW for this process_ctx_flush
> requirement ?  Can driver side add logic to guaranty when  call
> SET_SHADER_DEBUGGER, the process address  is always valid ?

Call to flush on old fw is a NOP so it's harmless in that case.
Full solution will still require a new MES version as this is a workaround on 
corner cases and not a new feature i.e. we can't stop ROCm from running on old 
fw.
The process address is always valid from the driver side.  It's the MES side of 
things that gets stale as mentioned in the description (passed value to MES is 
reused with new BO but MES doesn't refresh).
i.e. MES auto refreshes it's process list assuming process queues were all 
drained but driver can't guarantee that SET_SHADER_DEBUGGER (which adds to 
MES's process list) will get called after queues get added (in fact it's a 
requirements that it can be called at any time).
We can attempt to defer calls these calls in the KFD, considering all cases.
But that would be a large shift in debugger/runtime_enable/KFD code, which is 
already complicated and could get buggy plus it would not be intuitive at all 
as to why we're doing this.
I think a single flag set to flush MES on process termination is a simpler 
compromise that shows the limitation in a more obvious way.

Thanks,

Jon


>
> Regards
> Shaoyun.liu
>
>
> -Original Message-
> From: amd-gfx  On Behalf Of Eric
> Huang
> Sent: Tuesday, December 12, 2023 12:49 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Cc: Wong, Alice ; Kuehling, Felix
> ; Kasiviswanathan, Harish
> 
> Subject: Re: [PATCH] drm/amdkfd: fix mes set shader debugger process
> management
>
>
> On 2023-12-11 16:16, Jonathan Kim wrote:
> > MES provides the driver a call to explicitly flush stale process
> > memory within the MES to avoid a race condition that results in a
> > fatal memory violation.
> >
> > When SET_SHADER_DEBUGGER is called, the driver passes a memory
> address
> > that represents a process context address MES uses to keep track of
> > future per-process calls.
> >
> > Normally, MES will purge its process context list when the last queue
> > has been removed.  The driver, however, can call SET_SHADER_DEBUGGER
> > regardless of whether a queue has been added or not.
> >
> > If SET_SHADER_DEBUGGER has been called with no queues as the last call
> > prior to process termination, the passed process context address will
> > still reside within MES.
> >
> > On a new process call to SET_SHADER_DEBUGGER, the driver may end up
> > passing an identical process context address value (based on
> > per-process gpu memory address) to MES but is now pointing to a new
> > allocated buffer object during KFD process creation.  Since the MES is
> > unaware of this, access of the passed address points to the stale
> > object within MES and triggers a fatal memory violation.
> >
> > The solution is for KFD to explicitly flush the process context
> > address from MES on process termination.
> >
> > Note that the flush call and the MES debugger calls use the same MES
> > interface but are separated as KFD calls to avoid conflicting with
> > each other.
> >
> > Signed-off-by: Jonathan Kim 
> > Tested-by: Alice Wong 
> Reviewed-by: Eric Huang 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   | 31
> +++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   | 10 +++---
> >   .../amd/amdkfd/kfd_process_queue_manager.c|  1 +
> >   drivers/gpu/drm/amd/include/mes_v11_api_def.h |  3 +-
> >   4 files changed, 40 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > index e544b823abf6..e98de23250dc 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > @@ -916,6 +916,11 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
> >   op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
> >   op_input.set_shader_debugger.process_context_addr =
> process_context_addr;
> >   op_input.set_shader_debugger.flags.u32all = flags;
> > +
> > + /* use amdgpu mes_flush_sh

RE: [PATCH] drm/amdgpu: xgmi_fill_topology_info

2023-12-08 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Chander, Vignesh 
> Sent: Thursday, December 7, 2023 7:42 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Lazar, Lijo ; Luo, Zhigang
> ; Kim, Jonathan ;
> Chander, Vignesh 
> Subject: [PATCH] drm/amdgpu: xgmi_fill_topology_info
>
> 1. Use the mirrored topology info to fill links for VF.
> The new solution is required to simplify and optimize host driver logic.
> Only use the new solution for VFs that support full duplex and
> extended_peer_link_info otherwise the info would be incomplete.
>
> 2. avoid calling extended_link_info on VF as its not supported
>
> Signed-off-by: Vignesh Chander 

Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 58
> 
>  2 files changed, 52 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index a21045d018f2..1bf975b8d083 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -1433,8 +1433,8 @@ int psp_xgmi_get_topology_info(struct
> psp_context *psp,
>get_extended_data) ||
>   amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
>   IP_VERSION(13, 0, 6);
> - bool ta_port_num_support = psp-
> >xgmi_context.xgmi_ta_caps &
> -
>   EXTEND_PEER_LINK_INFO_CMD_FLAG;
> + bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ?
> 0 :
> + psp->xgmi_context.xgmi_ta_caps &
> EXTEND_PEER_LINK_INFO_CMD_FLAG;
>
>   /* popluate the shared output buffer rather than the cmd
> input buffer
>* with node_ids as the input for GET_PEER_LINKS command
> execution.
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 44d8c1a11e1b..dd82d73daed6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -823,6 +823,28 @@ static int
> amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf
>   return 0;
>  }
>
> +void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev,
> + struct amdgpu_device *peer_adev)
> +{
> + struct psp_xgmi_topology_info *top_info = &adev-
> >psp.xgmi_context.top_info;
> + struct psp_xgmi_topology_info *peer_info = &peer_adev-
> >psp.xgmi_context.top_info;
> +
> + for (int i = 0; i < peer_info->num_nodes; i++) {
> + if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id)
> {
> + for (int j = 0; j < top_info->num_nodes; j++) {
> + if (top_info->nodes[j].node_id == peer_adev-
> >gmc.xgmi.node_id) {
> + peer_info->nodes[i].num_hops =
> top_info->nodes[j].num_hops;
> + peer_info-
> >nodes[i].is_sharing_enabled =
> + top_info-
> >nodes[j].is_sharing_enabled;
> + peer_info->nodes[i].num_links =
> + top_info-
> >nodes[j].num_links;
> + return;
> + }
> + }
> + }
> + }
> +}
> +
>  int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>  {
>   struct psp_xgmi_topology_info *top_info;
> @@ -897,18 +919,38 @@ int amdgpu_xgmi_add_device(struct
> amdgpu_device *adev)
>   goto exit_unlock;
>   }
>
> - /* get latest topology info for each device from psp */
> - list_for_each_entry(tmp_adev, &hive->device_list,
> gmc.xgmi.head) {
> - ret = psp_xgmi_get_topology_info(&tmp_adev->psp,
> count,
> - &tmp_adev-
> >psp.xgmi_context.top_info, false);
> + if (amdgpu_sriov_vf(adev) &&
> + psp->xgmi_context.xgmi_ta_caps &
> EXTEND_PEER_LINK_INFO_CMD_FLAG) {
> + /* only get topology for VF being init if it can
> support full duplex */
> + ret = psp_xgmi_get_topology_info(&adev->psp,
> count,
> + &adev-
> >psp.xgmi_context.top_info, false);
>   if (ret) {
> - dev_err(tmp_adev->dev,
> + dev_err(adev

RE: [PATCH] drm/amdkfd: fix add queue process context clear for hsa non-init cases

2023-09-12 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Tuesday, September 12, 2023 9:01 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Ji, Ruili ; Guo, Shikai ; Huang,
> JinHuiEric 
> Subject: Re: [PATCH] drm/amdkfd: fix add queue process context clear for hsa
> non-init cases
>
> On 2023-09-12 20:53, Kim, Jonathan wrote:
> > [Public]
> >
> >> -Original Message-
> >> From: Kuehling, Felix 
> >> Sent: Tuesday, September 12, 2023 8:36 PM
> >> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> >> Cc: Ji, Ruili ; Guo, Shikai ;
> >> jinhuierichu...@amd.com
> >> Subject: Re: [PATCH] drm/amdkfd: fix add queue process context clear for
> hsa
> >> non-init cases
> >>
> >> On 2023-09-12 8:17, Jonathan Kim wrote:
> >>> There are cases where HSA is not initialized when adding queues
> >> This statement doesn't make sense to me. If HSA is not initialized, it
> >> means user mode hasn't opened the KFD device. So it can't create queues.
> >> What do you really mean here?
> > I meant the call to runtime enable e.g. KFD test can add a queue without
> runtime enable call.
>
> OK, this can also happen when you run an older version of the HSA
> runtime that doesn't support the ROCm debugger yet. Please update the
> patch description accordingly.

Ok will do thanks.  Sorry for the confusing description.
For some reason I forgot that HSA was umbrella term and doesn't only refer to 
the ROCr component.

Thanks,

Jon

>
> Thanks,
>Felix
>
>
> >
> > Thanks,
> >
> > Jon
> >
> >> Regards,
> >> Felix
> >>
> >>
> >>>and
> >>> the ADD_QUEUE API should clear the MES process context instead of
> >>> SET_SHADER_DEBUGGER.
> >>>
> >>> The only time ADD_QUEUE.skip_process_ctx_clear is required is for
> >>> debugger use cases and a debugged process is always runtime enabled
> >>> when adding a queue.
> >>>
> >>> Tested-by: Shikai Guo 
> >>> Signed-off-by: Jonathan Kim 
> >>> ---
> >>>drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 --
> >>>1 file changed, 4 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >>> index 6d07a5dd2648..77159b03a422 100644
> >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >>> @@ -227,8 +227,10 @@ static int add_queue_mes(struct
> >> device_queue_manager *dqm, struct queue *q,
> >>>  queue_input.tba_addr = qpd->tba_addr;
> >>>  queue_input.tma_addr = qpd->tma_addr;
> >>>  queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
> >>> -   queue_input.skip_process_ctx_clear = qpd->pqm->process-
> >>> debug_trap_enabled ||
> >>> -
> >> kfd_dbg_has_ttmps_always_setup(q->device);
> >>> +   queue_input.skip_process_ctx_clear =
> >>> +   qpd->pqm->process->runtime_info.runtime_state ==
> >> DEBUG_RUNTIME_STATE_ENABLED &&
> >>> +   (qpd->pqm->process-
> >>> debug_trap_enabled ||
> >>> +
> >> kfd_dbg_has_ttmps_always_setup(q->device));
> >>>  queue_type = convert_to_mes_queue_type(q->properties.type);
> >>>  if (queue_type < 0) {


RE: [PATCH] drm/amdkfd: fix add queue process context clear for hsa non-init cases

2023-09-12 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Tuesday, September 12, 2023 8:36 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Ji, Ruili ; Guo, Shikai ;
> jinhuierichu...@amd.com
> Subject: Re: [PATCH] drm/amdkfd: fix add queue process context clear for hsa
> non-init cases
>
> On 2023-09-12 8:17, Jonathan Kim wrote:
> > There are cases where HSA is not initialized when adding queues
>
> This statement doesn't make sense to me. If HSA is not initialized, it
> means user mode hasn't opened the KFD device. So it can't create queues.
> What do you really mean here?

I meant the call to runtime enable e.g. KFD test can add a queue without 
runtime enable call.

Thanks,

Jon

>
> Regards,
>Felix
>
>
> >   and
> > the ADD_QUEUE API should clear the MES process context instead of
> > SET_SHADER_DEBUGGER.
> >
> > The only time ADD_QUEUE.skip_process_ctx_clear is required is for
> > debugger use cases and a debugged process is always runtime enabled
> > when adding a queue.
> >
> > Tested-by: Shikai Guo 
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 --
> >   1 file changed, 4 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index 6d07a5dd2648..77159b03a422 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -227,8 +227,10 @@ static int add_queue_mes(struct
> device_queue_manager *dqm, struct queue *q,
> > queue_input.tba_addr = qpd->tba_addr;
> > queue_input.tma_addr = qpd->tma_addr;
> > queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
> > -   queue_input.skip_process_ctx_clear = qpd->pqm->process-
> >debug_trap_enabled ||
> > -
> kfd_dbg_has_ttmps_always_setup(q->device);
> > +   queue_input.skip_process_ctx_clear =
> > +   qpd->pqm->process->runtime_info.runtime_state ==
> DEBUG_RUNTIME_STATE_ENABLED &&
> > +   (qpd->pqm->process-
> >debug_trap_enabled ||
> > +
> kfd_dbg_has_ttmps_always_setup(q->device));
> >
> > queue_type = convert_to_mes_queue_type(q->properties.type);
> > if (queue_type < 0) {


RE: [PATCH] drm/amdkfd: fix add queue process context clear for hsa non-init cases

2023-09-12 Thread Kim, Jonathan
[Public]

+ Eric

> -Original Message-
> From: Kim, Jonathan 
> Sent: Tuesday, September 12, 2023 8:17 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Ji, Ruili ; Guo, Shikai ;
> jinhuierichu...@amd.com; Kuehling, Felix ; Kim,
> Jonathan ; Guo, Shikai ;
> Kim, Jonathan 
> Subject: [PATCH] drm/amdkfd: fix add queue process context clear for hsa
> non-init cases
>
> There are cases where HSA is not initialized when adding queues and
> the ADD_QUEUE API should clear the MES process context instead of
> SET_SHADER_DEBUGGER.
>
> The only time ADD_QUEUE.skip_process_ctx_clear is required is for
> debugger use cases and a debugged process is always runtime enabled
> when adding a queue.
>
> Tested-by: Shikai Guo 
> Signed-off-by: Jonathan Kim 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 6d07a5dd2648..77159b03a422 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -227,8 +227,10 @@ static int add_queue_mes(struct
> device_queue_manager *dqm, struct queue *q,
>   queue_input.tba_addr = qpd->tba_addr;
>   queue_input.tma_addr = qpd->tma_addr;
>   queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
> - queue_input.skip_process_ctx_clear = qpd->pqm->process-
> >debug_trap_enabled ||
> -
> kfd_dbg_has_ttmps_always_setup(q->device);
> + queue_input.skip_process_ctx_clear =
> + qpd->pqm->process->runtime_info.runtime_state ==
> DEBUG_RUNTIME_STATE_ENABLED &&
> + (qpd->pqm->process-
> >debug_trap_enabled ||
> +
> kfd_dbg_has_ttmps_always_setup(q->device));
>
>   queue_type = convert_to_mes_queue_type(q->properties.type);
>   if (queue_type < 0) {
> --
> 2.34.1



RE: [PATCH] drm/amdkfd: Fix reg offset for setting CWSR grace period

2023-08-29 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Joshi, Mukul 
> Sent: Tuesday, August 29, 2023 10:55 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Kim, Jonathan
> ; Joshi, Mukul 
> Subject: [PATCH] drm/amdkfd: Fix reg offset for setting CWSR grace period
>
> This patch fixes the case where the code currently passes
> absolute register address and not the reg offset, which HWS
> expects, when sending the PM4 packet to set/update CWSR grace
> period. Additionally, cleanup the signature of
> build_grace_period_packet_info function as it no longer needs
> the inst parameter.
>
> Signed-off-by: Mukul Joshi 

Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 3 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h| 3 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 3 +--
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +--
>  drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c| 3 +--
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 3 +--
>  7 files changed, 8 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index f1f2c24de081..69810b3f1c63 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -980,8 +980,7 @@ void
> kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
>   uint32_t wait_times,
>   uint32_t grace_period,
>   uint32_t *reg_offset,
> - uint32_t *reg_data,
> - uint32_t inst)
> + uint32_t *reg_data)
>  {
>   *reg_data = wait_times;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> index ecaead24e8c9..67bcaa3d4226 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> @@ -55,5 +55,4 @@ void
> kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
>  uint32_t wait_times,
>  uint32_t grace_period,
>  uint32_t *reg_offset,
> -uint32_t *reg_data,
> -uint32_t inst);
> +uint32_t *reg_data);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index fa5ee96f8845..3c45a188b701 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -1103,8 +1103,7 @@ void
> kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
>   uint32_t wait_times,
>   uint32_t grace_period,
>   uint32_t *reg_offset,
> - uint32_t *reg_data,
> - uint32_t inst)
> + uint32_t *reg_data)
>  {
>   *reg_data = wait_times;
>
> @@ -1120,8 +1119,7 @@ void
> kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
>   SCH_WAVE,
>   grace_period);
>
> - *reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
> - mmCP_IQ_WAIT_TIME2);
> + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
>  }
>
>  void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device
> *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index 936e501908ce..ce424615f59b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -100,5 +100,4 @@ void
> kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
>  uint32_t wait_times,
>  uint32_t grace_period,
>  uint32_t *reg_offset,
> -uint32_t *reg_data,
> -uint32_t inst);
> +uint32_t *reg_data);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_qu

RE: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Kim, Jonathan
[Public]

Sounds good.

I'd also change:

>>>>>> KFD currently relies on MEC FW to clear tcp watch control
>>>>>> register by sending MAP_PROCESS packet with 0 of field
>>>>>> tcp_watch_cntl to HWS, but if the queue is suspended, the
>>>>>> packet will not be sent and the previous value will be
>>>>>> left on the register, that will affect the following apps.
>>>>>> So the solution is to clear the register as gfx v9 in KFD.

To something like:

KFD currently relies on MEC FW to clear tcp watch control
register on UNMAP_QUEUES.  Due to a FW bug, MEC does not
do this.
So the solution is to clear the register as gfx v9 in KFD.

With those fixed, this patch is Reviewed-by: Jonathan Kim 

Hopefully we can get away with this since every watch instance register is 
supposed to be 1-1 to a process ...
And that there's no race scenarios with trailing exceptions on dynamic watch 
point address changes ...

Thanks,

Jon

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, August 10, 2023 6:31 PM
> To: Kim, Jonathan ; Kuehling, Felix
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2
>
> I will change title to "drm/amdkfd: workaround address watch clearing
> bug for gfx v9.4.2". is it OK?
>
> Regards,
> Eric
>
> On 2023-08-10 18:25, Kim, Jonathan wrote:
> > [Public]
> >
> > Yeah this is a recent bug so this workaround is new.  More rigorous tests
> revealed this is probably a miss on the FW side.  We explicitly requested
> UNMAP_QUEUES unconditionally invalidate watch controls during the
> beginning of design to prevent any watch point racing.
> >
> > Note GFX11 MES calls are different on the surface but under the hood it's
> the same (registers get invalidated on unmap then get updated on map.
> Only difference it's at the queue level).
> >
> > I'm fine with this solution but I think it'd be good to describe this as a
> workaround somewhere (as opposed to a driver issue) so that folks aren't
> scratching their heads later on looking at code for GFX11 and up and
> wondering why we don't nuke the control setting with the KFD for those
> devices.
> >
> > Thanks,
> >
> > Jon
> >
> >> -Original Message-
> >> From: Kuehling, Felix 
> >> Sent: Thursday, August 10, 2023 5:56 PM
> >> To: Huang, JinHuiEric ; Kim, Jonathan
> >> ; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx
> v9.4.2
> >>
> >> I think Jon is suggesting that the UNMAP_QUEUES command should clear
> the
> >> address watch registers. Requesting such a change from the the HWS team
> >> may take a long time.
> >>
> >> That said, when was this workaround implemented and reviewed? Did I
> >> review it as part of Jon's debugger upstreaming patch series? Or did
> >> this come later? This patch only enables the workaround for v9.4.2.
> >>
> >> Regards,
> >> Felix
> >>
> >>
> >> On 2023-08-10 17:52, Eric Huang wrote:
> >>> The problem is the queue is suspended before clearing address watch
> >>> call in KFD, there is not queue preemption and queue resume after
> >>> clearing call, and the test ends. So there is not chance to send
> >>> MAP_PROCESS to HWS. At this point FW has nothing to do. We have
> >>> several test FWs from Tej, none of them works, so I recalled the
> >>> kernel debug log and found out the problem.
> >>>
> >>> GFX11 has different scheduler, when calling clear address watch, KFD
> >>> directly sends the MES_MISC_OP_SET_SHADER_DEBUGGER to MES, it
> >> doesn't
> >>> consider if the queue is suspended. So GFX11 doesn't have this issue.
> >>>
> >>> Regards,
> >>> Eric
> >>>
> >>> On 2023-08-10 17:27, Kim, Jonathan wrote:
> >>>> [AMD Official Use Only - General]
> >>>>
> >>>> This is a strange solution because the MEC should set watch controls
> >>>> as non-valid automatically on queue preemption to avoid this kind of
> >>>> issue in the first place by design.  MAP_PROCESS on resume will take
> >>>> whatever the driver requests.
> >>>> GFX11 has no issue with letting the HWS do this.
> >>>>
> >>>> Are we sure we're not working around some HWS bug?
> >>>>
> >>>>

RE: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Kim, Jonathan
[Public]

Yeah this is a recent bug so this workaround is new.  More rigorous tests 
revealed this is probably a miss on the FW side.  We explicitly requested 
UNMAP_QUEUES unconditionally invalidate watch controls during the beginning of 
design to prevent any watch point racing.

Note GFX11 MES calls are different on the surface but under the hood it's the 
same (registers get invalidated on unmap then get updated on map.  Only 
difference it's at the queue level).

I'm fine with this solution but I think it'd be good to describe this as a 
workaround somewhere (as opposed to a driver issue) so that folks aren't 
scratching their heads later on looking at code for GFX11 and up and wondering 
why we don't nuke the control setting with the KFD for those devices.

Thanks,

Jon

> -Original Message-
> From: Kuehling, Felix 
> Sent: Thursday, August 10, 2023 5:56 PM
> To: Huang, JinHuiEric ; Kim, Jonathan
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2
>
> I think Jon is suggesting that the UNMAP_QUEUES command should clear the
> address watch registers. Requesting such a change from the the HWS team
> may take a long time.
>
> That said, when was this workaround implemented and reviewed? Did I
> review it as part of Jon's debugger upstreaming patch series? Or did
> this come later? This patch only enables the workaround for v9.4.2.
>
> Regards,
>Felix
>
>
> On 2023-08-10 17:52, Eric Huang wrote:
> > The problem is the queue is suspended before clearing address watch
> > call in KFD, there is not queue preemption and queue resume after
> > clearing call, and the test ends. So there is not chance to send
> > MAP_PROCESS to HWS. At this point FW has nothing to do. We have
> > several test FWs from Tej, none of them works, so I recalled the
> > kernel debug log and found out the problem.
> >
> > GFX11 has different scheduler, when calling clear address watch, KFD
> > directly sends the MES_MISC_OP_SET_SHADER_DEBUGGER to MES, it
> doesn't
> > consider if the queue is suspended. So GFX11 doesn't have this issue.
> >
> > Regards,
> > Eric
> >
> > On 2023-08-10 17:27, Kim, Jonathan wrote:
> >> [AMD Official Use Only - General]
> >>
> >> This is a strange solution because the MEC should set watch controls
> >> as non-valid automatically on queue preemption to avoid this kind of
> >> issue in the first place by design.  MAP_PROCESS on resume will take
> >> whatever the driver requests.
> >> GFX11 has no issue with letting the HWS do this.
> >>
> >> Are we sure we're not working around some HWS bug?
> >>
> >> Thanks,
> >>
> >> Jon
> >>
> >>> -Original Message-
> >>> From: Kuehling, Felix 
> >>> Sent: Thursday, August 10, 2023 5:03 PM
> >>> To: Huang, JinHuiEric ; amd-
> >>> g...@lists.freedesktop.org
> >>> Cc: Kim, Jonathan 
> >>> Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for
> >>> gfx v9.4.2
> >>>
> >>> I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a bit
> >>> different because it needs to support multiple XCCs.
> >>>
> >>> That said, this patch is
> >>>
> >>> Reviewed-by: Felix Kuehling 
> >>>
> >>>
> >>> On 2023-08-10 16:47, Eric Huang wrote:
> >>>> KFD currently relies on MEC FW to clear tcp watch control
> >>>> register by sending MAP_PROCESS packet with 0 of field
> >>>> tcp_watch_cntl to HWS, but if the queue is suspended, the
> >>>> packet will not be sent and the previous value will be
> >>>> left on the register, that will affect the following apps.
> >>>> So the solution is to clear the register as gfx v9 in KFD.
> >>>>
> >>>> Signed-off-by: Eric Huang 
> >>>> ---
> >>>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +-
> --
> >>>>1 file changed, 1 insertion(+), 7 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> >>>> index e2fed6edbdd0..aff08321e976 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> >>>> @@ -163,12 +163,6 @@ static uint32_t
> >>> kg

RE: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Kim, Jonathan
[AMD Official Use Only - General]

This is a strange solution because the MEC should set watch controls as 
non-valid automatically on queue preemption to avoid this kind of issue in the 
first place by design.  MAP_PROCESS on resume will take whatever the driver 
requests.
GFX11 has no issue with letting the HWS do this.

Are we sure we're not working around some HWS bug?

Thanks,

Jon

> -Original Message-
> From: Kuehling, Felix 
> Sent: Thursday, August 10, 2023 5:03 PM
> To: Huang, JinHuiEric ; amd-
> g...@lists.freedesktop.org
> Cc: Kim, Jonathan 
> Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2
>
> I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a bit
> different because it needs to support multiple XCCs.
>
> That said, this patch is
>
> Reviewed-by: Felix Kuehling 
>
>
> On 2023-08-10 16:47, Eric Huang wrote:
> > KFD currently relies on MEC FW to clear tcp watch control
> > register by sending MAP_PROCESS packet with 0 of field
> > tcp_watch_cntl to HWS, but if the queue is suspended, the
> > packet will not be sent and the previous value will be
> > left on the register, that will affect the following apps.
> > So the solution is to clear the register as gfx v9 in KFD.
> >
> > Signed-off-by: Eric Huang 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +---
> >   1 file changed, 1 insertion(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > index e2fed6edbdd0..aff08321e976 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > @@ -163,12 +163,6 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
> > return watch_address_cntl;
> >   }
> >
> > -static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct
> amdgpu_device *adev,
> > - uint32_t watch_id)
> > -{
> > -   return 0;
> > -}
> > -
> >   const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> > .program_sh_mem_settings =
> kgd_gfx_v9_program_sh_mem_settings,
> > .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
> > @@ -193,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> > .set_wave_launch_trap_override =
> kgd_aldebaran_set_wave_launch_trap_override,
> > .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
> > .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
> > -   .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
> > +   .clear_address_watch = kgd_gfx_v9_clear_address_watch,
> > .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> > .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
> > .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,


RE: [PATCH] drm/amdgpu: enable trap of each kfd vmid for gfx v9.4.3

2023-07-25 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Tuesday, July 25, 2023 2:16 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Huang, JinHuiEric
> 
> Subject: [PATCH] drm/amdgpu: enable trap of each kfd vmid for gfx v9.4.3
>
> To setup ttmp on as default for gfx v9.4.3 in IP hw init.
>
> Signed-off-by: Eric Huang 

Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index 86a84a0970f0..9a90fd187909 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -898,6 +898,7 @@ static void gfx_v9_4_3_xcc_init_compute_vmid(struct
> amdgpu_device *adev,
>   int i;
>   uint32_t sh_mem_config;
>   uint32_t sh_mem_bases;
> + uint32_t data;
>
>   /*
>* Configure apertures:
> @@ -917,6 +918,11 @@ static void
> gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev,
>   /* CP and shaders */
>   WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
> regSH_MEM_CONFIG, sh_mem_config);
>   WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
> regSH_MEM_BASES, sh_mem_bases);
> +
> + /* Enable trap for each kfd vmid. */
> + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
> regSPI_GDBG_PER_VMID_CNTL);
> + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL,
> TRAP_EN, 1);
> + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
> regSPI_GDBG_PER_VMID_CNTL, data);
>   }
>   soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
>   mutex_unlock(&adev->srbm_mutex);
> --
> 2.34.1



RE: [PATCH 1/2] drm/amdkfd: fix trap handling work around for debugging

2023-07-18 Thread Kim, Jonathan
[Public]

+ Ruiji Li as this is a follow up to

commit 52223c7e74d124bea47beec467e59fdfc77559fc
Author: Ruili Ji 
Date:   Tue Jun 6 14:06:01 2023 +0800

drm/amdkfd: To enable traps for GC_11_0_4 and up

Flag trap_en should be enabled for trap handler.

Signed-off-by: Ruili Ji 
Signed-off-by: Aaron Liu 
Reviewed-by: Alex Deucher 

To ensure debugger is consistent with other checks.

Thanks,

Jon

> -Original Message-
> From: Kim, Jonathan 
> Sent: Friday, July 14, 2023 5:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Kim, Jonathan
> 
> Subject: [PATCH 1/2] drm/amdkfd: fix trap handling work around for
> debugging
>
> Update the list of devices that require the cwsr trap handling
> workaround for debugging use cases.
>
> Signed-off-by: Jonathan Kim 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 5 ++---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.h| 6 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++
>  3 files changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 190b03efe5ff..ccfc81f085ce 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -302,8 +302,7 @@ static int kfd_dbg_set_queue_workaround(struct
> queue *q, bool enable)
>   if (!q)
>   return 0;
>
> - if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
> - KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0))
> + if (!kfd_dbg_has_cwsr_workaround(q->device))
>   return 0;
>
>   if (enable && q->properties.is_user_cu_masked)
> @@ -349,7 +348,7 @@ int kfd_dbg_set_mes_debug_mode(struct
> kfd_process_device *pdd)
>  {
>   uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd-
> >spi_dbg_launch_mode;
>   uint32_t flags = pdd->process->dbg_flags;
> - bool sq_trap_en = !!spi_dbg_cntl;
> + bool sq_trap_en = !!spi_dbg_cntl ||
> !kfd_dbg_has_cwsr_workaround(pdd->dev);
>
>   if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
>   return 0;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index ba616ed17dee..586d7f886712 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -101,6 +101,12 @@ static inline bool
> kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev)
>KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
>  }
>
> +static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev)
> +{
> + return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
> +KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3);
> +}
> +
>  static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
>  {
>   if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 31cac1fd0d58..761963ad6154 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -226,8 +226,7 @@ static int add_queue_mes(struct
> device_queue_manager *dqm, struct queue *q,
>   queue_input.paging = false;
>   queue_input.tba_addr = qpd->tba_addr;
>   queue_input.tma_addr = qpd->tma_addr;
> - queue_input.trap_en = KFD_GC_VERSION(q->device) <
> IP_VERSION(11, 0, 0) ||
> -   KFD_GC_VERSION(q->device) > IP_VERSION(11, 0,
> 3);
> + queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
>   queue_input.skip_process_ctx_clear = qpd->pqm->process-
> >debug_trap_enabled;
>
>   queue_type = convert_to_mes_queue_type(q->properties.type);
> @@ -1827,8 +1826,7 @@ static int create_queue_cpsch(struct
> device_queue_manager *dqm, struct queue *q,
>*/
>   q->properties.is_evicted = !!qpd->evicted;
>   q->properties.is_dbg_wa = qpd->pqm->process-
> >debug_trap_enabled &&
> - KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0)
> &&
> - KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3);
> +   kfd_dbg_has_cwsr_workaround(q->device);
>
>   if (qd)
>   mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q-
> >mqd_mem_obj, &q->gart_mqd_addr,
> --
> 2.25.1



RE: [PATCH] drm/amdkfd: enable grace period for xcp instance

2023-07-11 Thread Kim, Jonathan
[Public]

Isn't a KFD node already mapped as a partition?
The xcc instance mask should already be a unique offset per node so I think the 
LSB set position offset by 1 should work fine as an instance ID here:

Snip:
#define XCP_INST_MASK(num_inst, xcp_id)\
(num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)

Thanks,

Jon

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Tuesday, July 11, 2023 10:28 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Huang, JinHuiEric
> 
> Subject: [PATCH] drm/amdkfd: enable grace period for xcp instance
>
> Read/write grace period from/to first xcc instance of
> xcp in kfd node.
>
> Signed-off-by: Eric Huang 
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 21 ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c|  8 ---
>  3 files changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 31cac1fd0d58..9000c4b778fd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1619,10 +1619,14 @@ static int initialize_cpsch(struct
> device_queue_manager *dqm)
>
>   init_sdma_bitmaps(dqm);
>
> - if (dqm->dev->kfd2kgd->get_iq_wait_times)
> + if (dqm->dev->kfd2kgd->get_iq_wait_times) {
> + u32 first_inst = dqm->dev->xcp->id *
> +  dqm->dev->adev->gfx.num_xcc_per_xcp;
>   dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> - &dqm->wait_times,
> - ffs(dqm->dev->xcc_mask) - 1);
> + &dqm->wait_times[first_inst],
> + first_inst);
> + }
> +
>   return 0;
>  }
>
> @@ -1675,13 +1679,16 @@ static int start_cpsch(struct
> device_queue_manager *dqm)
>   grace_period);
>   if (retval)
>   pr_err("Setting grace timeout failed\n");
> - else if (dqm->dev->kfd2kgd-
> >build_grace_period_packet_info)
> + else if (dqm->dev->kfd2kgd-
> >build_grace_period_packet_info) {
> + u32 first_inst = dqm->dev->xcp->id *
> +  dqm->dev->adev-
> >gfx.num_xcc_per_xcp;
>   /* Update dqm->wait_times maintained in software
> */
>   dqm->dev->kfd2kgd-
> >build_grace_period_packet_info(
> - dqm->dev->adev, dqm-
> >wait_times,
> + dqm->dev->adev, dqm-
> >wait_times[first_inst],
>   grace_period, ®_offset,
> - &dqm->wait_times,
> - ffs(dqm->dev->xcc_mask) - 1);
> + &dqm->wait_times[first_inst],
> + first_inst);
> + }
>   }
>
>   dqm_unlock(dqm);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index 7dd4b177219d..45959c33b944 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -262,7 +262,7 @@ struct device_queue_manager {
>   /* used for GFX 9.4.3 only */
>   uint32_tcurrent_logical_xcc_start;
>
> - uint32_twait_times;
> + uint32_twait_times[MAX_XCP];
>
>   wait_queue_head_t   destroy_wait;
>  };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> index 8fda16e6fee6..960404a6379b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> @@ -292,17 +292,19 @@ static int pm_set_grace_period_v9(struct
> packet_manager *pm,
>   struct pm4_mec_write_data_mmio *packet;
>   uint32_t reg_offset = 0;
>   uint32_t reg_data = 0;
> + uint32_t first_inst = pm->dqm->dev->xcp->id *
> +   pm->dqm->dev->adev->gfx.num_xcc_per_xcp;
>
>   pm->dqm->dev->kfd2kgd->build_grace_period_packet_

Re: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

2023-07-07 Thread Kim, Jonathan
Yeah that's an unfortunate mismatch.
Leave it then.  We can always clean it up later if theres a strong preference 
to do so.

Jon


From: Huang, JinHuiEric 
Sent: Friday, July 7, 2023 8:25 PM
To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC 
v9.4.3

Thanks for your review. The prefix name change will be contradictory that new 
functions prefix name is different with existing functions prefix name. Are you 
sure it doesn't matter?

Regards,
Eric

On 2023-07-07 19:52, Kim, Jonathan wrote:
I would change the static prefix names from kgd_gfx_ to kgd_gc_ to match file 
name and specify it as the target GC version.

With that fixed and assuming grace period instance fix ups will follow after, 
this patch and series is:

Reviewed-by: Jonathan Kim <mailto:jonathan@amd.com>



From: Huang, JinHuiEric 
<mailto:jinhuieric.hu...@amd.com>
Sent: Friday, July 7, 2023 1:46 PM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
<mailto:amd-gfx@lists.freedesktop.org>
Cc: Kim, Jonathan <mailto:jonathan@amd.com>; Kim, 
Jonathan <mailto:jonathan@amd.com>; Huang, JinHuiEric 
<mailto:jinhuieric.hu...@amd.com>
Subject: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

From: Jonathan Kim <mailto:jonathan@amd.com>

Implement the similarities as GC v9.4.2, and the difference
for GC v9.4.3 HW spec, i.e. xcc instance.

Signed-off-by: Jonathan Kim <mailto:jonathan@amd.com>
Signed-off-by: Eric Huang 
<mailto:jinhuieric.hu...@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   8 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h  |  27 +++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 166 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|   3 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|   6 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c|   3 +-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |   3 +-
 10 files changed, 213 insertions(+), 12 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 60f9e027fb66..a06a99c5d311 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -23,6 +23,7 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_amdkfd_arcturus.h"
 #include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
 #include 
@@ -36,7 +37,7 @@
  * initialize the debug mode registers after it has disabled GFX off during the
  * debug session.
  */
-static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
 bool restore_dbg_registers,
 uint32_t vmid)
 {
@@ -107,7 +108,7 @@ static uint32_t 
kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device
 return data;
 }

-static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
 uint8_t wave_launch_mode,
 uint32_t vmid)
 {
@@ -125,7 +126,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
 uint32_t watch_address_mask,
 uint32_t watch_id,
 uint32_t watch_mode,
-   uint32_t debug_vmid)
+   uint32_t debug_vmid,
+   uint32_t inst )
 {
 uint32_t watch_address_high;
 uint32_t watch_address_low;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
new file mode 100644
index ..a7bdaf8d82dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Soft

Re: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

2023-07-07 Thread Kim, Jonathan
I would change the static prefix names from kgd_gfx_ to kgd_gc_ to match file 
name and specify it as the target GC version.

With that fixed and assuming grace period instance fix ups will follow after, 
this patch and series is:

Reviewed-by: Jonathan Kim 



From: Huang, JinHuiEric 
Sent: Friday, July 7, 2023 1:46 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Kim, Jonathan ; Kim, Jonathan ; 
Huang, JinHuiEric 
Subject: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

From: Jonathan Kim 

Implement the similarities as GC v9.4.2, and the difference
for GC v9.4.3 HW spec, i.e. xcc instance.

Signed-off-by: Jonathan Kim 
Signed-off-by: Eric Huang 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   8 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h  |  27 +++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 166 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|   3 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|   6 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c|   3 +-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |   3 +-
 10 files changed, 213 insertions(+), 12 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 60f9e027fb66..a06a99c5d311 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -23,6 +23,7 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_amdkfd_arcturus.h"
 #include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
 #include 
@@ -36,7 +37,7 @@
  * initialize the debug mode registers after it has disabled GFX off during the
  * debug session.
  */
-static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
 bool restore_dbg_registers,
 uint32_t vmid)
 {
@@ -107,7 +108,7 @@ static uint32_t 
kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device
 return data;
 }

-static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
 uint8_t wave_launch_mode,
 uint32_t vmid)
 {
@@ -125,7 +126,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
 uint32_t watch_address_mask,
 uint32_t watch_id,
 uint32_t watch_mode,
-   uint32_t debug_vmid)
+   uint32_t debug_vmid,
+   uint32_t inst )
 {
 uint32_t watch_address_high;
 uint32_t watch_address_low;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
new file mode 100644
index ..a7bdaf8d82dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+   bool restore_dbg_registers,
+   uint32_t vmid);
+uint32_t kgd_aldebaran_set_wave_launch_

RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kim, Jonathan
> Sent: Friday, July 7, 2023 1:06 PM
> To: Huang, JinHuiEric ; amd-
> g...@lists.freedesktop.org
> Cc: Joshi, Mukul 
> Subject: RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
>
>
>
> > -Original Message-
> > From: Huang, JinHuiEric 
> > Sent: Friday, July 7, 2023 12:44 PM
> > To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> > Cc: Joshi, Mukul 
> > Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> >
> >
> > On 2023-07-07 11:56, Kim, Jonathan wrote:
> > > [Public]
> > >
> > >> -Original Message-
> > >> From: Huang, JinHuiEric 
> > >> Sent: Friday, July 7, 2023 11:46 AM
> > >> To: Kim, Jonathan ; amd-
> > g...@lists.freedesktop.org
> > >> Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc
> instance
> > >>
> > >>
> > >> On 2023-07-07 10:59, Kim, Jonathan wrote:
> > >>> [Public]
> > >>>
> > >>>> -Original Message-
> > >>>> From: Huang, JinHuiEric 
> > >>>> Sent: Thursday, July 6, 2023 2:19 PM
> > >>>> To: amd-gfx@lists.freedesktop.org
> > >>>> Cc: Kim, Jonathan ; Huang, JinHuiEric
> > >>>> 
> > >>>> Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> > >>>>
> > >>>> each xcc instance needs to get iq wait time and set
> > >>>> grace period accordingly.
> > >>>>
> > >>>> Signed-off-by: Eric Huang 
> > >>>> ---
> > >>>>.../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 --
> > >>>>.../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
> > >>>>.../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++-
> --
> > 
> > >> -
> > >>>>.../drm/amd/amdkfd/kfd_packet_manager_v9.c|  9 +++---
> > >>>>drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
> > >>>>5 files changed, 32 insertions(+), 22 deletions(-)
> > >>>>
> > >>>> diff --git
> a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > >>>> index a2bff3f01359..0f12c1989e14 100644
> > >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > >>>> @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
> > >>>> device_queue_manager *dqm)
> > >>>>
> > >>>>static int initialize_cpsch(struct device_queue_manager *dqm)
> > >>>>{
> > >>>> + uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
> > >>>> +
> > >>>> pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
> > >>>>
> > >>>> mutex_init(&dqm->lock_hidden);
> > >>>> @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
> > >>>> device_queue_manager *dqm)
> > >>>> init_sdma_bitmaps(dqm);
> > >>>>
> > >>>> if (dqm->dev->kfd2kgd->get_iq_wait_times)
> > >>>> - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> > >>>> - &dqm->wait_times, 0);
> > >>>> + for_each_inst(xcc_id, xcc_mask)
> > >>>> + dqm->dev->kfd2kgd->get_iq_wait_times(
> > >>>> + dqm->dev->adev,
> > >>>> + &dqm->wait_times[xcc_id],
> > >>>> + xcc_id);
> > >>>> return 0;
> > >>>>}
> > >>>>
> > >>>> diff --git
> a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > >>>> index 7dd4b177219d..62a6dc8d3032 100644
> > >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > >>>> @@ -262,7 +2

RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Friday, July 7, 2023 12:44 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Joshi, Mukul 
> Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
>
>
> On 2023-07-07 11:56, Kim, Jonathan wrote:
> > [Public]
> >
> >> -Original Message-
> >> From: Huang, JinHuiEric 
> >> Sent: Friday, July 7, 2023 11:46 AM
> >> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> >> Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> >>
> >>
> >> On 2023-07-07 10:59, Kim, Jonathan wrote:
> >>> [Public]
> >>>
> >>>> -----Original Message-
> >>>> From: Huang, JinHuiEric 
> >>>> Sent: Thursday, July 6, 2023 2:19 PM
> >>>> To: amd-gfx@lists.freedesktop.org
> >>>> Cc: Kim, Jonathan ; Huang, JinHuiEric
> >>>> 
> >>>> Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> >>>>
> >>>> each xcc instance needs to get iq wait time and set
> >>>> grace period accordingly.
> >>>>
> >>>> Signed-off-by: Eric Huang 
> >>>> ---
> >>>>.../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 --
> >>>>.../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
> >>>>.../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++---
> 
> >> -
> >>>>.../drm/amd/amdkfd/kfd_packet_manager_v9.c|  9 +++---
> >>>>drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
> >>>>5 files changed, 32 insertions(+), 22 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >>>> index a2bff3f01359..0f12c1989e14 100644
> >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >>>> @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
> >>>> device_queue_manager *dqm)
> >>>>
> >>>>static int initialize_cpsch(struct device_queue_manager *dqm)
> >>>>{
> >>>> + uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
> >>>> +
> >>>> pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
> >>>>
> >>>> mutex_init(&dqm->lock_hidden);
> >>>> @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
> >>>> device_queue_manager *dqm)
> >>>> init_sdma_bitmaps(dqm);
> >>>>
> >>>> if (dqm->dev->kfd2kgd->get_iq_wait_times)
> >>>> - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> >>>> - &dqm->wait_times, 0);
> >>>> + for_each_inst(xcc_id, xcc_mask)
> >>>> + dqm->dev->kfd2kgd->get_iq_wait_times(
> >>>> + dqm->dev->adev,
> >>>> + &dqm->wait_times[xcc_id],
> >>>> + xcc_id);
> >>>> return 0;
> >>>>}
> >>>>
> >>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >>>> index 7dd4b177219d..62a6dc8d3032 100644
> >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >>>> @@ -262,7 +262,7 @@ struct device_queue_manager {
> >>>> /* used for GFX 9.4.3 only */
> >>>> uint32_tcurrent_logical_xcc_start;
> >>>>
> >>>> - uint32_twait_times;
> >>>> + uint32_twait_times[32];
> >>> I think wait_times[16] should be sufficient.  We only get the hamming
> >> weight of 16 bits for NUM_XCC and I believe the xcc_mask is declared as a
> >> uint16_t in the KGD portion anyway.  We may as well align to that.
> >>>> wait_queue_head_t   destroy_wait;
> >>>>};
> >>>> 

RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Friday, July 7, 2023 11:46 AM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
>
>
> On 2023-07-07 10:59, Kim, Jonathan wrote:
> > [Public]
> >
> >> -Original Message-
> >> From: Huang, JinHuiEric 
> >> Sent: Thursday, July 6, 2023 2:19 PM
> >> To: amd-gfx@lists.freedesktop.org
> >> Cc: Kim, Jonathan ; Huang, JinHuiEric
> >> 
> >> Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> >>
> >> each xcc instance needs to get iq wait time and set
> >> grace period accordingly.
> >>
> >> Signed-off-by: Eric Huang 
> >> ---
> >>   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 --
> >>   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
> >>   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++---
> -
> >>   .../drm/amd/amdkfd/kfd_packet_manager_v9.c|  9 +++---
> >>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
> >>   5 files changed, 32 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> index a2bff3f01359..0f12c1989e14 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
> >> device_queue_manager *dqm)
> >>
> >>   static int initialize_cpsch(struct device_queue_manager *dqm)
> >>   {
> >> + uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
> >> +
> >>pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
> >>
> >>mutex_init(&dqm->lock_hidden);
> >> @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
> >> device_queue_manager *dqm)
> >>init_sdma_bitmaps(dqm);
> >>
> >>if (dqm->dev->kfd2kgd->get_iq_wait_times)
> >> - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> >> - &dqm->wait_times, 0);
> >> + for_each_inst(xcc_id, xcc_mask)
> >> + dqm->dev->kfd2kgd->get_iq_wait_times(
> >> + dqm->dev->adev,
> >> + &dqm->wait_times[xcc_id],
> >> + xcc_id);
> >>return 0;
> >>   }
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> index 7dd4b177219d..62a6dc8d3032 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> @@ -262,7 +262,7 @@ struct device_queue_manager {
> >>/* used for GFX 9.4.3 only */
> >>uint32_tcurrent_logical_xcc_start;
> >>
> >> - uint32_twait_times;
> >> + uint32_twait_times[32];
> > I think wait_times[16] should be sufficient.  We only get the hamming
> weight of 16 bits for NUM_XCC and I believe the xcc_mask is declared as a
> uint16_t in the KGD portion anyway.  We may as well align to that.
> >
> >>wait_queue_head_t   destroy_wait;
> >>   };
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> index 401096c103b2..f37ab4b6d88c 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> @@ -374,27 +374,31 @@ int pm_update_grace_period(struct
> >> packet_manager *pm, uint32_t grace_period)
> >>   {
> >>int retval = 0;
> >>uint32_t *buffer, size;
> >> + uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;
> >>
> >>size = pm->pmf->set_grace_period_size;
> >>
> >>mutex_lock(&pm->lock);
> >>
> >>if (size) {
> >> - kq_acquire_packet_buffer(pm->priv_queue,
> >> - size / sizeof(uint32_t),
> >> - (unsigned int **)

RE: [PATCH 6/6] drm/amdkfd: add multi-process debugging support for GC v9.4.3

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, July 6, 2023 2:19 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> ; Kuehling, Felix ;
> Huang, JinHuiEric 
> Subject: [PATCH 6/6] drm/amdkfd: add multi-process debugging support for
> GC v9.4.3
>
> From: Jonathan Kim 
>
> Similar to GC v9.4.2, GC v9.4.3 should use the 5-Dword extended
> MAP_PROCESS packet to support multi-process debugging.  Update the
> mutli-process debug support list so that the KFD updates the runlist
> on debug mode setting and that it allocates enough GTT memory during
> KFD device initialization.
>
> Signed-off-by: Jonathan Kim 
> Reviewed-by: Felix Kuehling 
> Signed-off-by: Eric Huang 

This patch is Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index a289e59ceb79..a0afc6a7b6c4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -76,8 +76,9 @@ int kfd_dbg_send_exception_to_runtime(struct
> kfd_process *p,
>
>  static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
>  {
> - return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
> -KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0);
> + return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
> + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
> + KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
>  }
>
>  void debug_event_write_work_handler(struct work_struct *work);
> --
> 2.34.1



RE: [PATCH 3/6] drm/amdkfd: enable watch points globally for gfx943

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, July 6, 2023 2:19 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> ; Kuehling, Felix ;
> Huang, JinHuiEric 
> Subject: [PATCH 3/6] drm/amdkfd: enable watch points globally for gfx943
>
> From: Jonathan Kim 
>
> Set watch points for all xcc instances on GFX943.
>
> Signed-off-by: Jonathan Kim 
> Reviewed-by: Felix Kuehling 
> Signed-off-by: Eric Huang 

This patch is Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 24083db44724..190b03efe5ff 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -446,7 +446,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct
> kfd_process_device *pdd,
>   uint32_t *watch_id,
>   uint32_t watch_mode)
>  {
> - int r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
> + int xcc_id, r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
> + uint32_t xcc_mask = pdd->dev->xcc_mask;
>
>   if (r)
>   return r;
> @@ -460,14 +461,15 @@ int kfd_dbg_trap_set_dev_address_watch(struct
> kfd_process_device *pdd,
>   }
>
>   amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> - pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd-
> >set_address_watch(
> + for_each_inst(xcc_id, xcc_mask)
> + pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd-
> >set_address_watch(
>   pdd->dev->adev,
>   watch_address,
>   watch_address_mask,
>   *watch_id,
>   watch_mode,
>   pdd->dev->vm_info.last_vmid_kfd,
> - 0);
> + xcc_id);
>   amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
>   if (!pdd->dev->kfd->shared_resources.enable_mes)
> --
> 2.34.1



RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, July 6, 2023 2:19 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Huang, JinHuiEric
> 
> Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
>
> each xcc instance needs to get iq wait time and set
> grace period accordingly.
>
> Signed-off-by: Eric Huang 
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 --
>  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
>  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++
>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c|  9 +++---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
>  5 files changed, 32 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index a2bff3f01359..0f12c1989e14 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
> device_queue_manager *dqm)
>
>  static int initialize_cpsch(struct device_queue_manager *dqm)
>  {
> + uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
> +
>   pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
>
>   mutex_init(&dqm->lock_hidden);
> @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
> device_queue_manager *dqm)
>   init_sdma_bitmaps(dqm);
>
>   if (dqm->dev->kfd2kgd->get_iq_wait_times)
> - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> - &dqm->wait_times, 0);
> + for_each_inst(xcc_id, xcc_mask)
> + dqm->dev->kfd2kgd->get_iq_wait_times(
> + dqm->dev->adev,
> + &dqm->wait_times[xcc_id],
> + xcc_id);
>   return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index 7dd4b177219d..62a6dc8d3032 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -262,7 +262,7 @@ struct device_queue_manager {
>   /* used for GFX 9.4.3 only */
>   uint32_tcurrent_logical_xcc_start;
>
> - uint32_twait_times;
> + uint32_twait_times[32];

I think wait_times[16] should be sufficient.  We only get the hamming weight of 
16 bits for NUM_XCC and I believe the xcc_mask is declared as a uint16_t in the 
KGD portion anyway.  We may as well align to that.

>
>   wait_queue_head_t   destroy_wait;
>  };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> index 401096c103b2..f37ab4b6d88c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> @@ -374,27 +374,31 @@ int pm_update_grace_period(struct
> packet_manager *pm, uint32_t grace_period)
>  {
>   int retval = 0;
>   uint32_t *buffer, size;
> + uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;
>
>   size = pm->pmf->set_grace_period_size;
>
>   mutex_lock(&pm->lock);
>
>   if (size) {
> - kq_acquire_packet_buffer(pm->priv_queue,
> - size / sizeof(uint32_t),
> - (unsigned int **)&buffer);
> -
> - if (!buffer) {
> - pr_err("Failed to allocate buffer on kernel queue\n");
> - retval = -ENOMEM;
> - goto out;
> - }
> + for_each_inst(xcc_id, xcc_mask) {
> + kq_acquire_packet_buffer(pm->priv_queue,
> + size / sizeof(uint32_t),
> + (unsigned int **)&buffer);
>
> - retval = pm->pmf->set_grace_period(pm, buffer,
> grace_period);
> - if (!retval)
> - kq_submit_packet(pm->priv_queue);
> - else
> - kq_rollback_packet(pm->priv_queue);
> + if (!buffer) {
> + pr_err("Failed to allocate buffer on kernel
> queue\n");
> + retval = -ENOMEM;
> + goto out;
> + }
> +
> + retval = pm-

RE: [PATCH 5/6] drm/amdkfd: always keep trap enabled for GC v9.4.3

2023-07-07 Thread Kim, Jonathan
[Public]

If we implement this in the GC 9.4.3 KGD disable call in patch 1 (see comments 
for that one), then it will look less awkward and we can drop this.

Thanks,

Jon

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, July 6, 2023 2:19 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Huang, JinHuiEric
> 
> Subject: [PATCH 5/6] drm/amdkfd: always keep trap enabled for GC v9.4.3
>
> To set TTMP setup on by default.
>
> Signed-off-by: Eric Huang 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c   | 3 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 +++---
>  3 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index cf1db0ab3471..47c5d16677d6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2842,7 +2842,7 @@ static int runtime_disable(struct kfd_process *p)
>   pdd->spi_dbg_override =
>   pdd->dev->kfd2kgd-
> >disable_debug_trap(
>   pdd->dev->adev,
> - false,
> + KFD_GC_VERSION(pdd->dev) ==
> IP_VERSION(9, 4, 3),
>   pdd->dev->vm_info.last_vmid_kfd);
>
>   if (!pdd->dev->kfd->shared_resources.enable_mes)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 190b03efe5ff..4cb9b3b18065 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -591,7 +591,8 @@ void kfd_dbg_trap_deactivate(struct kfd_process
> *target, bool unwind, int unwind
>   pdd->spi_dbg_override =
>   pdd->dev->kfd2kgd->disable_debug_trap(
>   pdd->dev->adev,
> - target->runtime_info.ttmp_setup,
> + KFD_GC_VERSION(pdd->dev) ==
> IP_VERSION(9, 4, 3) ?
> + true : target-
> >runtime_info.ttmp_setup,
>   pdd->dev->vm_info.last_vmid_kfd);
>   amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index ba04a4baecf2..91ae9121e2bf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1644,9 +1644,9 @@ struct kfd_process_device
> *kfd_create_process_device_data(struct kfd_node *dev,
>   p->pdds[p->n_pdds++] = pdd;
>   if (kfd_dbg_is_per_vmid_supported(pdd->dev))
>   pdd->spi_dbg_override = pdd->dev->kfd2kgd-
> >disable_debug_trap(
> - pdd->dev->adev,
> - false,
> - 0);
> + pdd->dev->adev,
> + KFD_GC_VERSION(dev) == IP_VERSION(9, 4,
> 3),
> + 0);
>
>   /* Init idr used for memory handle translation */
>   idr_init(&pdd->alloc_idr);
> --
> 2.34.1



RE: [PATCH 1/6] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

2023-07-07 Thread Kim, Jonathan
[AMD Official Use Only - General]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, July 6, 2023 2:19 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> ; Huang, JinHuiEric 
> Subject: [PATCH 1/6] drm/amdkfd: add kfd2kgd debugger callbacks for GC
> v9.4.3
>
> From: Jonathan Kim 
>
> Implement the similarities as GC v9.4.2, and the difference
> for GC v9.4.3 HW spec, i.e. xcc instance.
>
> Signed-off-by: Jonathan Kim 
> Signed-off-by: Eric Huang 
> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  10 +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h  |  30 
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 152
> +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|   9 +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|  10 +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|   3 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  15 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  10 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c|   3 +-
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |   2 +-
>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c|   3 +-
>  .../gpu/drm/amd/include/kgd_kfd_interface.h   |   9 +-
>  12 files changed, 230 insertions(+), 26 deletions(-)
>  create mode 100644
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index 60f9e027fb66..7d7eaed68531 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -23,6 +23,7 @@
>  #include "amdgpu_amdkfd.h"
>  #include "amdgpu_amdkfd_arcturus.h"
>  #include "amdgpu_amdkfd_gfx_v9.h"
> +#include "amdgpu_amdkfd_aldebaran.h"
>  #include "gc/gc_9_4_2_offset.h"
>  #include "gc/gc_9_4_2_sh_mask.h"
>  #include 
> @@ -36,7 +37,7 @@
>   * initialize the debug mode registers after it has disabled GFX off during 
> the
>   * debug session.
>   */
> -static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device
> *adev,
> +uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
>   bool restore_dbg_registers,
>   uint32_t vmid)
>  {
> @@ -50,7 +51,7 @@ static uint32_t
> kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
>  }
>
>  /* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
> -static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device
> *adev,
> +uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
>   bool keep_trap_enabled,
>   uint32_t vmid)
>  {
> @@ -107,7 +108,7 @@ static uint32_t
> kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device
>   return data;
>  }
>
> -static uint32_t kgd_aldebaran_set_wave_launch_mode(struct
> amdgpu_device *adev,
> +uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device
> *adev,
>   uint8_t wave_launch_mode,
>   uint32_t vmid)
>  {
> @@ -125,7 +126,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>   uint32_t watch_address_mask,
>   uint32_t watch_id,
>   uint32_t watch_mode,
> - uint32_t debug_vmid)
> + uint32_t debug_vmid,
> + uint32_t inst )
>  {
>   uint32_t watch_address_high;
>   uint32_t watch_address_low;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
> new file mode 100644
> index ..ed349ff397bd
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
> @@ -0,0 +1,30 @@
> +/*
> + * Copyright 2023 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notic

RE: [PATCH 5/5] drm/amdkfd: add multi-process debugging support for GC v9.4.3

2023-07-06 Thread Kim, Jonathan
[Public]

As mentioned before, if we're going to set up the trap temporaries, we may as 
well enable them persistently now that GC 9.4.3 should not have a performance 
penalty on TTMP setup i.e. we can keep_trap_enabled any time we 
disable_debug_trap.

Thanks,

Jon

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Wednesday, July 5, 2023 6:57 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> ; Kuehling, Felix ;
> Huang, JinHuiEric 
> Subject: [PATCH 5/5] drm/amdkfd: add multi-process debugging support for
> GC v9.4.3
>
> From: Jonathan Kim 
>
> Similar to GC v9.4.2, GC v9.4.3 should use the 5-Dword extended
> MAP_PROCESS packet to support multi-process debugging.  Update the
> mutli-process debug support list so that the KFD updates the runlist
> on debug mode setting and that it allocates enough GTT memory during
> KFD device initialization.
>
> Signed-off-by: Jonathan Kim 
> Reviewed-by: Felix Kuehling 
> Signed-off-by: Eric Huang 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index a289e59ceb79..a0afc6a7b6c4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -76,8 +76,9 @@ int kfd_dbg_send_exception_to_runtime(struct
> kfd_process *p,
>
>  static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
>  {
> - return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
> -KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0);
> + return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
> + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
> + KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
>  }
>
>  void debug_event_write_work_handler(struct work_struct *work);
> --
> 2.34.1



RE: [PATCH 2/5] drm/amdkfd: restore debugger additional info for gfx v9_4_3

2023-07-06 Thread Kim, Jonathan
[AMD Official Use Only - General]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Wednesday, July 5, 2023 6:57 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> ; Kasiviswanathan, Harish
> ; Lin, Amber ;
> Huang, JinHuiEric 
> Subject: [PATCH 2/5] drm/amdkfd: restore debugger additional info for gfx
> v9_4_3
>
> From: Jonathan Kim 
>
> The additional information that the KFD reports to the debugger was
> destroyed when the following commit was merged:
> "drm/amdkfd: convert switches to IP version checking"
>
> Signed-off-by: Jonathan Kim 
> Reviewed-by: Harish Kasiviswanathan 
> Signed-off-by: Jonathan Kim 
> Acked-by: Amber Lin 
> Signed-off-by: Eric Huang 

This patch is Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 10 --
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.h |  3 +++
>  2 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 61fc62f3e003..1a4cdee86759 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -1932,8 +1932,14 @@ static void kfd_topology_set_capabilities(struct
> kfd_topology_device *dev)
>
>   HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
>
>   if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
> - dev->node_props.debug_prop |=
> HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
> -
>   HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
> + if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
> + dev->node_props.debug_prop |=
> +
>   HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 |
> +
>   HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3;
> + else
> + dev->node_props.debug_prop |=
> + HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9
> |
> + HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
>
>   if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 4, 2))
>   dev->node_props.debug_prop |=
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> index cba2cd5ed9d1..dea32a9e5506 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> @@ -32,9 +32,12 @@
>  #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
>
>  #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9  6
> +#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 7
>  #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 7
>  #define HSA_DBG_WATCH_ADDR_MASK_HI_BIT  \
>   (29 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
> +#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3 \
> + (30 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
>
>  struct kfd_node_properties {
>   uint64_t hive_id;
> --
> 2.34.1



RE: [PATCH 3/5] drm/amdkfd: add xcc instance for debugger APIs

2023-07-06 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Wednesday, July 5, 2023 6:57 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Huang, JinHuiEric
> 
> Subject: [PATCH 3/5] drm/amdkfd: add xcc instance for debugger APIs
>
> Since GFX9 GPU has multiple xcc instances, this is to
> implement this change in KFD for debugger APIs.

This redefines the KGD calls in patch 1 so I think this patch and patch 1 can 
be squashed.
Spatial partitioning is a known requirement outside of debugging so I don't 
think there's a need to explicitly point this out for debugger updates in the 
description.
Some other inline comments ...

>
> Signed-off-by: Eric Huang 
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c|  6 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c |  6 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c  | 12 ---
> -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h  | 13 +-
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c  |  6 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c   | 12 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h   | 13 +--
> --
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c  |  6 --
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c   |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c  |  3 ++-
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 12 
>  11 files changed, 61 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index f3f7e0437447..c7f88bfa1976 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -126,7 +126,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>   uint32_t watch_address_mask,
>   uint32_t watch_id,
>   uint32_t watch_mode,
> - uint32_t debug_vmid)
> + uint32_t debug_vmid,
> + uint32_t inst )
>  {
>   uint32_t watch_address_high;
>   uint32_t watch_address_low;
> @@ -163,7 +164,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>  }
>
>  static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct
> amdgpu_device *adev,
> -   uint32_t watch_id)
> +   uint32_t watch_id,
> +   uint32_t inst)

Why do we need to instance this on a 0 return?  I don't think we need to change 
the prototype here.

>  {
>   return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> index 3299e268f234..c0546db91579 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> @@ -454,7 +454,8 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
>   uint32_t watch_address_mask,
>   uint32_t watch_id,
>   uint32_t watch_mode,
> - uint32_t debug_vmid)
> + uint32_t debug_vmid,
> + uint32_t inst)

Let's use the inst arg in this function instead of hardcoding it to 0.
You're already setting the GC instance to 0 in the caller in this patch anyways 
so we may as well use the arg here to give context as to why we've updated the 
prototypes.

>  {
>   uint32_t watch_address_high;
>   uint32_t watch_address_low;
> @@ -491,7 +492,8 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
>  }
>
>  static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device
> *adev,
> - uint32_t watch_id)
> + uint32_t watch_id,
> + uint32_t inst)
>  {
>   return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 8ad7a7779e14..04daa8f9456b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -886,7 +886,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct
> amdgpu_device *adev,
>   uint32_t watch_address_mask,
>   uint32_t 

RE: [PATCH 4/5] drm/amdkfd: add xcc instance for debugger APIs

2023-07-04 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Wednesday, June 28, 2023 5:23 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Huang, JinHuiEric
> 
> Subject: [PATCH 4/5] drm/amdkfd: add xcc instance for debugger APIs
>
> Since GFX9 GPU has multiple xcc instances, this is to
> implement this change in KFD for debugger APIs.
>
> Signed-off-by: Eric Huang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 6 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c  | 6 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c   | 6 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h   | 6 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c   | 6 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c| 6 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h| 6 --
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c   | 6 --
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h  | 6 --
>  9 files changed, 36 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index f3f7e0437447..c7f88bfa1976 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -126,7 +126,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>   uint32_t watch_address_mask,
>   uint32_t watch_id,
>   uint32_t watch_mode,
> - uint32_t debug_vmid)
> + uint32_t debug_vmid,
> + uint32_t inst )
>  {
>   uint32_t watch_address_high;
>   uint32_t watch_address_low;
> @@ -163,7 +164,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>  }
>
>  static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct
> amdgpu_device *adev,
> -   uint32_t watch_id)
> +   uint32_t watch_id,
> +   uint32_t inst)
>  {
>   return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> index 7aab8dcf46e1..17fe4e90f203 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> @@ -454,7 +454,8 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
>   uint32_t watch_address_mask,
>   uint32_t watch_id,
>   uint32_t watch_mode,
> - uint32_t debug_vmid)
> + uint32_t debug_vmid,
> + uint32_t inst)
>  {
>   uint32_t watch_address_high;
>   uint32_t watch_address_low;
> @@ -491,7 +492,8 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
>  }
>
>  static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device
> *adev,
> - uint32_t watch_id)
> + uint32_t watch_id,
> + uint32_t inst)
>  {
>   return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 8ad7a7779e14..225b8929a878 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -886,7 +886,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct
> amdgpu_device *adev,
>   uint32_t watch_address_mask,
>   uint32_t watch_id,
>   uint32_t watch_mode,
> - uint32_t debug_vmid)
> + uint32_t debug_vmid,
> + uint32_t inst)
>  {
>   uint32_t watch_address_high;
>   uint32_t watch_address_low;
> @@ -942,7 +943,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct
> amdgpu_device *adev,
>  }
>
>  uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
> - uint32_t watch_id)
> + uint32_t watch_id,
> + uint32_t inst)
>  {
>   uint32_t watch_address_cntl;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_am

RE: [PATCH 2/5] drm/amdkfd: add multi-process debugging support for GC v9.4.3

2023-07-04 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Wednesday, June 28, 2023 5:23 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> ; Kuehling, Felix ;
> Huang, JinHuiEric 
> Subject: [PATCH 2/5] drm/amdkfd: add multi-process debugging support for
> GC v9.4.3
>
> From: Jonathan Kim 
>
> Similar to GC v9.4.2, GC v9.4.3 should use the 5-Dword extended
> MAP_PROCESS packet to support multi-process debugging.  Update the
> mutli-process debug support list so that the KFD updates the runlist
> on debug mode setting and that it allocates enough GTT memory during
> KFD device initialization.
>
> Signed-off-by: Jonathan Kim 
> Reviewed-by: Felix Kuehling 
> Signed-off-by: Eric Huang 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index a289e59ceb79..a0afc6a7b6c4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -76,8 +76,9 @@ int kfd_dbg_send_exception_to_runtime(struct
> kfd_process *p,
>
>  static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
>  {
> - return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
> -KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0);
> + return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
> + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
> + KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
>  }

This should probably be the last patch in the series since the rest of the 
series is required to enable debugging correctly.
Also did we ever close on enabling the trap temporaries regardless of debug 
mode?
IIRC, core dump will require this to capture consistent wave data.
That should probably be done in this patch since this check will also trigger 
the ttmp setup call on process creation.

Thanks,

Jon

>
>  void debug_event_write_work_handler(struct work_struct *work);
> --
> 2.34.1



RE: [PATCH 1/5] drm/amdgpu: add debugger support for GC v9.4.3

2023-07-04 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Wednesday, June 28, 2023 5:23 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> ; Huang, JinHuiEric 
> Subject: [PATCH 1/5] drm/amdgpu: add debugger support for GC v9.4.3

This patch doesn't add support but defines the KGD callbacks required for 
support.

>
> From: Jonathan Kim 
>
> Implement the similarities as GC v9.4.2, and the difference
> for GC v9.4.3 HW spec.
>
> Signed-off-by: Jonathan Kim 
> Signed-off-by: Eric Huang 
> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   7 +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h  |  30 
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 146
> +-
>  3 files changed, 179 insertions(+), 4 deletions(-)
>  create mode 100644
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index 60f9e027fb66..f3f7e0437447 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -23,6 +23,7 @@
>  #include "amdgpu_amdkfd.h"
>  #include "amdgpu_amdkfd_arcturus.h"
>  #include "amdgpu_amdkfd_gfx_v9.h"
> +#include "amdgpu_amdkfd_aldebaran.h"
>  #include "gc/gc_9_4_2_offset.h"
>  #include "gc/gc_9_4_2_sh_mask.h"
>  #include 
> @@ -36,7 +37,7 @@
>   * initialize the debug mode registers after it has disabled GFX off during 
> the
>   * debug session.
>   */
> -static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device
> *adev,
> +uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
>   bool restore_dbg_registers,
>   uint32_t vmid)
>  {
> @@ -50,7 +51,7 @@ static uint32_t
> kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
>  }
>
>  /* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
> -static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device
> *adev,
> +uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
>   bool keep_trap_enabled,
>   uint32_t vmid)
>  {
> @@ -107,7 +108,7 @@ static uint32_t
> kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device
>   return data;
>  }
>
> -static uint32_t kgd_aldebaran_set_wave_launch_mode(struct
> amdgpu_device *adev,
> +uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device
> *adev,
>   uint8_t wave_launch_mode,
>   uint32_t vmid)
>  {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
> new file mode 100644
> index ..5f776ede295e
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
> @@ -0,0 +1,30 @@
> +/*
> + * Copyright 2021 Advanced Micro Devices, Inc.

Should be dated 2023.

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included
> in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
> + bool restore_dbg_registers,
> + uint32_t vmid);
> +uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
> + bool keep_trap_enabled,
> +   

RE: [PATCH v2] gpu: drm/amd: Remove the redundant null pointer check in list_for_each_entry() loops

2023-06-12 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Monday, June 12, 2023 11:25 AM
> To: Lu Hongfei ; Deucher, Alexander
> ; Koenig, Christian
> ; Pan, Xinhui ; David
> Airlie ; Daniel Vetter ; open list:AMD
> KFD ; open list:DRM DRIVERS  de...@lists.freedesktop.org>; open list ; Kim,
> Jonathan 
> Cc: opensource.ker...@vivo.com
> Subject: Re: [PATCH v2] gpu: drm/amd: Remove the redundant null pointer
> check in list_for_each_entry() loops
>
> [+Jon]
>
> Am 2023-06-12 um 07:58 schrieb Lu Hongfei:
> > pqn bound in list_for_each_entry loop will not be null, so there is
> > no need to check whether pqn is NULL or not.
> > Thus remove a redundant null pointer check.
> >
> > Signed-off-by: Lu Hongfei 
> > ---
> > The filename of the previous version was:
> > 0001-gpu-drm-amd-Fix-the-bug-in-list_for_each_entry-loops.patch
> >
> > The modifications made compared to the previous version are as follows:
> > 1. Modified the patch title
> > 2. "Thus remove a redundant null pointer check." is used instead of
> > "We could remove this check."
> >
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 3 ---
> >   1 file changed, 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> > index cd34e7aaead4..10d0cef844f0 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> > @@ -1097,9 +1097,6 @@ void
> kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
> >
> > pqm = &target->pqm;
> > list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
> > -   if (!pqn)
>
> Right, this check doesn't make a lot of sense. Jon, was this meant to
> check pqn->q?

Yes that's a bug.  It should be a null check on the queue itself.
I'll send out the fix shortly.

Thanks,

Jon

>
> Regards,
>Felix
>
>
> > -   continue;
> > -
> > found_mask |= pqn->q->properties.exception_status;
> > }
> >


RE: [PATCH] drm/amdkfd: fix and enable debugging for gfx11

2023-06-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Wednesday, June 7, 2023 6:23 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Huang, JinHuiEric 
> Subject: Re: [PATCH] drm/amdkfd: fix and enable debugging for gfx11
>
>
> On 2023-06-07 16:20, Jonathan Kim wrote:
> > There are a couple of fixes required to enable gfx11 debugging.
> >
> > First, ADD_QUEUE.trap_en is an inappropriate place to toggle
> > a per-process register so move it to SET_SHADER_DEBUGGER.trap_en.
> > When ADD_QUEUE.skip_process_ctx_clear is set, MES will prioritize
> > the SET_SHADER_DEBUGGER.trap_en setting.
> >
> > Second, to preserve correct save/restore priviledged wave states
> > in coordination with the trap enablement setting, resume suspended
> > waves early in the disable call.
> >
> > NOTE: The AMDGPU_MES_VERSION_MASK check is a place holder as
> > MES FW updates have been reviewed but is awaiting binary
> > creation.  Once the binaries have been created, this check may
> > be subject to change.
> >
> > v2: do a trap_en safety check in case old mes doesn't accept
> > unused trap_en d-word.
> > remove unnecessary process termination work around.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c|  7 ++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h|  4 +++-
> >   drivers/gpu/drm/amd/amdgpu/mes_v11_0.c |  1 +
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 14 ++
> >   .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  3 +--
> >   drivers/gpu/drm/amd/amdkfd/kfd_topology.c  | 12 +++-
> >   drivers/gpu/drm/amd/include/mes_v11_api_def.h  |  1 +
> >   7 files changed, 25 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > index 20cc3fffe921..e9091ebfe230 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > @@ -928,7 +928,8 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
> > uint64_t process_context_addr,
> > uint32_t spi_gdbg_per_vmid_cntl,
> > const uint32_t *tcp_watch_cntl,
> > -   uint32_t flags)
> > +   uint32_t flags,
> > +   bool trap_en)
> >   {
> > struct mes_misc_op_input op_input = {0};
> > int r;
> > @@ -945,6 +946,10 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
> > memcpy(op_input.set_shader_debugger.tcp_watch_cntl,
> tcp_watch_cntl,
> >
>   sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
> >
> > +   if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
> >>
> > +   AMDGPU_MES_API_VERSION_SHIFT) >= 14)
> > +   op_input.set_shader_debugger.trap_en = trap_en;
> > +
>
> It's probably too late to change the GFX11 MES API at this point. But
> why didn't they just add a trap_en bit in the existing flags field? That
> could have avoided the need for the compatibility checks.

Thanks for the review.  That was a decision I made.
The flags line up with the SQ_DEBUG register and will line up with the 
set_flags API.
Right now, they're a small selection but could expand to the full 32-bit width 
in the future (or even skip bit places and HW is much harder to change).
Also, trap_en really only needs to toggle for GFX11 as a work around.  It 
should always be set for non-GFX11.
So the flags should probably be reserved for things we actually want to toggle 
on an ongoing conditional basis.

Thanks,

Jon

>
> Anyway, the patch is
>
> Reviewed-by: Felix Kuehling 
>
>
> > amdgpu_mes_lock(&adev->mes);
> >
> > r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > index b5f5eed2b5ef..2d6ac30b7135 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > @@ -294,6 +294,7 @@ struct mes_misc_op_input {
> > } flags;
> > uint32_t spi_gdbg_per_vmid_cntl;
> > uint32_t tcp_watch_cntl[4];
> > +   uint32_t trap_en;
> > } set_shader_debugger;
> > };
> >   };
> > @@ -361,7 +362,8 @@ int amdgpu_mes_set_shader_de

RE: [PATCH] drm/amdkfd: fix and enable debugging for gfx11

2023-06-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kim, Jonathan
> Sent: Wednesday, June 7, 2023 3:28 PM
> To: Kuehling, Felix ; amd-
> g...@lists.freedesktop.org
> Cc: Huang, JinHuiEric 
> Subject: RE: [PATCH] drm/amdkfd: fix and enable debugging for gfx11
>
>
>
> > -Original Message-
> > From: Kuehling, Felix 
> > Sent: Wednesday, June 7, 2023 2:20 PM
> > To: amd-gfx@lists.freedesktop.org; Kim, Jonathan
> 
> > Cc: Huang, JinHuiEric 
> > Subject: Re: [PATCH] drm/amdkfd: fix and enable debugging for gfx11
> >
> >
> > On 2023-06-07 13:26, Jonathan Kim wrote:
> > > There are a few fixes required to enable gfx11 debugging.
> > >
> > > First, ADD_QUEUE.trap_en is an inappropriate place to toggle
> > > a per-process register so move it to SET_SHADER_DEBUGGER.trap_en.
> > > When ADD_QUEUE.skip_process_ctx_clear is set, MES will prioritize
> > > the SET_SHADER_DEBUGGER.trap_en setting.
> >
> > I see you have a firmware version check for enabling debugging. But is
> > the struct SET_SHADER_DEBUGGER change safe with older firmware when
> > debugging is disabled?
>
> Right.  It changes the shape of MISC_OPs.
> I'll have to figure out something that's backwards compatible.

Actually, I think we should be okay.  MISC_OPs allows a max data packet of 20 
D-WORDs.
So adding another D-WORD to SET_SHADER_DEBUGGER should be well under that limit.
The writing to an unused D-WORD is likely not harmful but I can version check 
the trap_en setting in the MES KGD call itself just to be safe.

Thanks,

Jon

>
> >
> >
> > >
> > > Second, to preserve correct save/restore priviledged wave states
> > > in coordination with the trap enablement setting, resume suspended
> > > waves early in the disable call.
> > >
> > > Finally, displaced single stepping can cause non-fatal illegal
> > > instructions during process termination on debug disable.  To work
> > > around this, stall the waves prior to disable and allow clean
> > > up to happen naturally on process termination.
> > >
> > > NOTE: The AMDGPU_MES_VERSION_MASK check is a place holder as
> > > MES FW updates have been reviewed but is awaiting binary
> > > creation.  Once the binaries have been created, this check may
> > > be subject to change.
> > >
> > > Signed-off-by: Jonathan Kim 
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  5 ++-
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   |  4 ++-
> > >   drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|  1 +
> > >   drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 31 ++
> -
> > >   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  3 +-
> > >   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 12 ---
> > >   drivers/gpu/drm/amd/include/mes_v11_api_def.h |  1 +
> > >   7 files changed, 40 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > > index 20cc3fffe921..95d69f9c7361 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > > @@ -928,7 +928,8 @@ int amdgpu_mes_set_shader_debugger(struct
> > amdgpu_device *adev,
> > >   uint64_t process_context_addr,
> > >   uint32_t spi_gdbg_per_vmid_cntl,
> > >   const uint32_t *tcp_watch_cntl,
> > > - uint32_t flags)
> > > + uint32_t flags,
> > > + bool trap_en)
> > >   {
> > >   struct mes_misc_op_input op_input = {0};
> > >   int r;
> > > @@ -945,6 +946,8 @@ int amdgpu_mes_set_shader_debugger(struct
> > amdgpu_device *adev,
> > >   memcpy(op_input.set_shader_debugger.tcp_watch_cntl,
> > tcp_watch_cntl,
> > >
> > sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
> > >
> > > + op_input.set_shader_debugger.trap_en = trap_en;
> > > +
> > >   amdgpu_mes_lock(&adev->mes);
> > >
> > >   r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > > index b5f5eed2b5ef..2d6ac30b7135 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > > 

RE: [PATCH] drm/amdkfd: fix and enable debugging for gfx11

2023-06-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Wednesday, June 7, 2023 2:20 PM
> To: amd-gfx@lists.freedesktop.org; Kim, Jonathan 
> Cc: Huang, JinHuiEric 
> Subject: Re: [PATCH] drm/amdkfd: fix and enable debugging for gfx11
>
>
> On 2023-06-07 13:26, Jonathan Kim wrote:
> > There are a few fixes required to enable gfx11 debugging.
> >
> > First, ADD_QUEUE.trap_en is an inappropriate place to toggle
> > a per-process register so move it to SET_SHADER_DEBUGGER.trap_en.
> > When ADD_QUEUE.skip_process_ctx_clear is set, MES will prioritize
> > the SET_SHADER_DEBUGGER.trap_en setting.
>
> I see you have a firmware version check for enabling debugging. But is
> the struct SET_SHADER_DEBUGGER change safe with older firmware when
> debugging is disabled?

Right.  It changes the shape of MISC_OPs.
I'll have to figure out something that's backwards compatible.

>
>
> >
> > Second, to preserve correct save/restore priviledged wave states
> > in coordination with the trap enablement setting, resume suspended
> > waves early in the disable call.
> >
> > Finally, displaced single stepping can cause non-fatal illegal
> > instructions during process termination on debug disable.  To work
> > around this, stall the waves prior to disable and allow clean
> > up to happen naturally on process termination.
> >
> > NOTE: The AMDGPU_MES_VERSION_MASK check is a place holder as
> > MES FW updates have been reviewed but is awaiting binary
> > creation.  Once the binaries have been created, this check may
> > be subject to change.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  5 ++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   |  4 ++-
> >   drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|  1 +
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 31 ++-
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  3 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 12 ---
> >   drivers/gpu/drm/amd/include/mes_v11_api_def.h |  1 +
> >   7 files changed, 40 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > index 20cc3fffe921..95d69f9c7361 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > @@ -928,7 +928,8 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
> > uint64_t process_context_addr,
> > uint32_t spi_gdbg_per_vmid_cntl,
> > const uint32_t *tcp_watch_cntl,
> > -   uint32_t flags)
> > +   uint32_t flags,
> > +   bool trap_en)
> >   {
> > struct mes_misc_op_input op_input = {0};
> > int r;
> > @@ -945,6 +946,8 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
> > memcpy(op_input.set_shader_debugger.tcp_watch_cntl,
> tcp_watch_cntl,
> >
>   sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
> >
> > +   op_input.set_shader_debugger.trap_en = trap_en;
> > +
> > amdgpu_mes_lock(&adev->mes);
> >
> > r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > index b5f5eed2b5ef..2d6ac30b7135 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > @@ -294,6 +294,7 @@ struct mes_misc_op_input {
> > } flags;
> > uint32_t spi_gdbg_per_vmid_cntl;
> > uint32_t tcp_watch_cntl[4];
> > +   uint32_t trap_en;
> > } set_shader_debugger;
> > };
> >   };
> > @@ -361,7 +362,8 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
> > uint64_t process_context_addr,
> > uint32_t spi_gdbg_per_vmid_cntl,
> > const uint32_t *tcp_watch_cntl,
> > -   uint32_t flags);
> > +   uint32_t flags,
> > +   bool trap_en);
> >
> >   int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
> > int queue_type, int idx,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/mes

RE: [PATCH] drm/amdkfd: optimize gfx off enable toggle for debugging

2023-06-07 Thread Kim, Jonathan
[Public]

+ Felix (typo on email)

> -Original Message-
> From: Kim, Jonathan 
> Sent: Wednesday, June 7, 2023 1:32 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: felix.kuel...@amd.com; Huang, JinHuiEric ;
> Kim, Jonathan 
> Subject: [PATCH] drm/amdkfd: optimize gfx off enable toggle for debugging
>
> Legacy debug devices limited to pinning a single debug VMID for debugging
> are the only devices that require disabling GFX OFF while accessing
> debug registers.  Debug devices that support multi-process debugging
> rely on the hardware scheduler to update debug registers and do not run
> into GFX OFF access issues.
>
> Remove KFD GFX OFF enable toggle clutter by moving these calls into the
> KGD debug calls themselves.
>
> v2: toggle gfx off around address watch hi/lo settings as well.
>
> Signed-off-by: Jonathan Kim 
> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  4 +++
>  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  7 
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 33
> ++-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|  4 +++
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 22 +++--
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 21 +---
>  7 files changed, 77 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index 60f9e027fb66..1f0e6ec56618 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -150,6 +150,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>   VALID,
>   1);
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
>   WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H)
> +
>   (watch_id * TCP_WATCH_STRIDE)),
>   watch_address_high);
> @@ -158,6 +160,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>   (watch_id * TCP_WATCH_STRIDE)),
>   watch_address_low);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
>   return watch_address_cntl;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index 625db444df1c..a4e28d547173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -350,6 +350,8 @@ static uint32_t
> kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
>   bool restore_dbg_registers,
>   uint32_t vmid)
>  {
> + amdgpu_gfx_off_ctrl(adev, false);
> +
>   mutex_lock(&adev->grbm_idx_mutex);
>
>   kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> @@ -362,6 +364,8 @@ static uint32_t
> kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
>
>   mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
>   return 0;
>  }
>
> @@ -375,6 +379,7 @@ static uint32_t
> kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
>   bool keep_trap_enabled,
>   uint32_t vmid)
>  {
> + amdgpu_gfx_off_ctrl(adev, false);
>
>   mutex_lock(&adev->grbm_idx_mutex);
>
> @@ -388,6 +393,8 @@ static uint32_t
> kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
>
>   mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
>   return 0;
>  }
>  const struct kfd2kgd_calls arcturus_kfd2kgd = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 8ad7a7779e14..415928139861 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -754,12 +754,13 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct
> amdgpu_device *adev,
>   bool restore_dbg_registers,
>   uint32_t vmid)
>  {
> + amdgpu_gfx_off_ctrl(adev, false);
>
>   mutex_lock(&adev->grbm_idx_mutex);
>
>   kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
>
> - /* assume gfx off is disabled for the debug session if rlc restore not
> supported. */
> + /* keep gfx off disabled for the debug session if rlc restore not
> supported. */
>   if (restore_dbg_registers) {

RE: [PATCH] drm/amdkfd: fix and enable debugging for gfx11

2023-06-07 Thread Kim, Jonathan
[Public]

+ Felix (typo on email)

> -Original Message-
> From: Kim, Jonathan 
> Sent: Wednesday, June 7, 2023 1:27 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: felix.kuel...@amd.com; Huang, JinHuiEric ;
> Kim, Jonathan 
> Subject: [PATCH] drm/amdkfd: fix and enable debugging for gfx11
>
> There are a few fixes required to enable gfx11 debugging.
>
> First, ADD_QUEUE.trap_en is an inappropriate place to toggle
> a per-process register so move it to SET_SHADER_DEBUGGER.trap_en.
> When ADD_QUEUE.skip_process_ctx_clear is set, MES will prioritize
> the SET_SHADER_DEBUGGER.trap_en setting.
>
> Second, to preserve correct save/restore priviledged wave states
> in coordination with the trap enablement setting, resume suspended
> waves early in the disable call.
>
> Finally, displaced single stepping can cause non-fatal illegal
> instructions during process termination on debug disable.  To work
> around this, stall the waves prior to disable and allow clean
> up to happen naturally on process termination.
>
> NOTE: The AMDGPU_MES_VERSION_MASK check is a place holder as
> MES FW updates have been reviewed but is awaiting binary
> creation.  Once the binaries have been created, this check may
> be subject to change.
>
> Signed-off-by: Jonathan Kim 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  5 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   |  4 ++-
>  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|  1 +
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 31 ++-
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  3 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 12 ---
>  drivers/gpu/drm/amd/include/mes_v11_api_def.h |  1 +
>  7 files changed, 40 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index 20cc3fffe921..95d69f9c7361 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -928,7 +928,8 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
>   uint64_t process_context_addr,
>   uint32_t spi_gdbg_per_vmid_cntl,
>   const uint32_t *tcp_watch_cntl,
> - uint32_t flags)
> + uint32_t flags,
> + bool trap_en)
>  {
>   struct mes_misc_op_input op_input = {0};
>   int r;
> @@ -945,6 +946,8 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
>   memcpy(op_input.set_shader_debugger.tcp_watch_cntl,
> tcp_watch_cntl,
>
>   sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
>
> + op_input.set_shader_debugger.trap_en = trap_en;
> +
>   amdgpu_mes_lock(&adev->mes);
>
>   r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index b5f5eed2b5ef..2d6ac30b7135 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -294,6 +294,7 @@ struct mes_misc_op_input {
>   } flags;
>   uint32_t spi_gdbg_per_vmid_cntl;
>   uint32_t tcp_watch_cntl[4];
> + uint32_t trap_en;
>   } set_shader_debugger;
>   };
>  };
> @@ -361,7 +362,8 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
>   uint64_t process_context_addr,
>   uint32_t spi_gdbg_per_vmid_cntl,
>   const uint32_t *tcp_watch_cntl,
> - uint32_t flags);
> + uint32_t flags,
> + bool trap_en);
>
>  int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
>   int queue_type, int idx,
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index c4e3cb8d44de..1bdaa00c0b46 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -347,6 +347,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes
> *mes,
>   memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
>   input->set_shader_debugger.tcp_watch_cntl,
>
>   sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
> + misc_pkt.set_shader_debugger.trap_en = input-
> >set_shader_debugger.trap_en;
>   break;
>   default:
>   DRM_ERROR("uns

RE: [PATCH] drm/amdkfd: potential error pointer dereference in ioctl

2023-06-06 Thread Kim, Jonathan
[AMD Official Use Only - General]

> -Original Message-
> From: Dan Carpenter 
> Sent: Tuesday, June 6, 2023 4:34 AM
> To: Kim, Jonathan 
> Cc: Kuehling, Felix ; Deucher, Alexander
> ; Koenig, Christian
> ; Pan, Xinhui ; David
> Airlie ; Daniel Vetter ; amd-
> g...@lists.freedesktop.org; kernel-janit...@vger.kernel.org
> Subject: [PATCH] drm/amdkfd: potential error pointer dereference in ioctl
>
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
>
>
> The "target" either comes from kfd_create_process() which returns error
> pointers on error or kfd_lookup_process_by_pid() which returns NULL on
> error.  So we need to check for both types of errors.
>
> Fixes: a42e42c4e3b1 ("drm/amdkfd: prepare per-process debug enable and
> disable")
> Signed-off-by: Dan Carpenter 

Thank you for catching this.
This looks good to me.

Reviewed-by: Jonathan Kim 

> ---
> I'm not sure how to compile this code or why I'm seeing this warning
> again after two years...  Very strange.
>
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index fc385000c007..6a27b000a246 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2920,9 +2920,9 @@ static int kfd_ioctl_set_debug_trap(struct file
> *filep, struct kfd_process *p, v
> target = kfd_lookup_process_by_pid(pid);
> }
>
> -   if (!target) {
> +   if (IS_ERR_OR_NULL(target)) {
> pr_debug("Cannot find process PID %i to debug\n", args->pid);
> -   r = -ESRCH;
> +   r = target ? PTR_ERR(target) : -ESRCH;
> goto out;
> }
>
> --
> 2.39.2



RE: [PATCH 01/33] drm/amdkfd: add debug and runtime enable interface

2023-05-31 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Alex Deucher 
> Sent: Wednesday, May 31, 2023 2:15 PM
> To: Kuehling, Felix 
> Cc: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org; dri-de...@lists.freedesktop.org; Huang, JinHuiEric
> 
> Subject: Re: [PATCH 01/33] drm/amdkfd: add debug and runtime enable
> interface
>
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
>
>
> On Tue, May 30, 2023 at 3:17 PM Felix Kuehling 
> wrote:
> >
> > Am 2023-05-25 um 13:27 schrieb Jonathan Kim:
> > > Introduce the GPU debug operations interface.
> > >
> > > For ROCm-GDB to extend the GNU Debugger's ability to inspect the AMD
> GPU
> > > instruction set, provide the necessary interface to allow the debugger
> > > to HW debug-mode set and query exceptions per HSA queue, process or
> > > device.
> > >
> > > The runtime_enable interface coordinates exception handling with the
> > > HSA runtime.
> > >
> > > Usage is available in the kern docs at uapi/linux/kfd_ioctl.h.
> > >
> > > v2: add num_xcc to device snapshot entry.
> > > fixup missing EC_QUEUE_PACKET_RESERVED mask.
> > >
> > > Signed-off-by: Jonathan Kim 
> >
> > Reviewed-by: Felix Kuehling 
>
> Can you provide a link to the userspace which uses this?

Hi Alex,

Current WIP user space link is here -> 
https://github.com/ROCm-Developer-Tools/ROCdbgapi/tree/wip-dbgapi.
This will eventually go to amd-master.

Thanks,

Jon

>
> Alex
>
> >
> >
> > > ---
> > >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  48 ++
> > >   include/uapi/linux/kfd_ioctl.h   | 668 ++-
> > >   2 files changed, 715 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > > index 88fe1f31739d..f4b50b74818e 100644
> > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > > @@ -2729,6 +2729,48 @@ static int kfd_ioctl_criu(struct file *filep, 
> > > struct
> kfd_process *p, void *data)
> > >   return ret;
> > >   }
> > >
> > > +static int kfd_ioctl_runtime_enable(struct file *filep, struct 
> > > kfd_process
> *p, void *data)
> > > +{
> > > + return 0;
> > > +}
> > > +
> > > +static int kfd_ioctl_set_debug_trap(struct file *filep, struct 
> > > kfd_process
> *p, void *data)
> > > +{
> > > + struct kfd_ioctl_dbg_trap_args *args = data;
> > > + int r = 0;
> > > +
> > > + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> > > + pr_err("Debugging does not support sched_policy %i",
> sched_policy);
> > > + return -EINVAL;
> > > + }
> > > +
> > > + switch (args->op) {
> > > + case KFD_IOC_DBG_TRAP_ENABLE:
> > > + case KFD_IOC_DBG_TRAP_DISABLE:
> > > + case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT:
> > > + case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED:
> > > + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:
> > > + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:
> > > + case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES:
> > > + case KFD_IOC_DBG_TRAP_RESUME_QUEUES:
> > > + case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:
> > > + case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH:
> > > + case KFD_IOC_DBG_TRAP_SET_FLAGS:
> > > + case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:
> > > + case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:
> > > + case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:
> > > + case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:
> > > + pr_warn("Debugging not supported yet\n");
> > > + r = -EACCES;
> > > + break;
> > > + default:
> > > + pr_err("Invalid option: %i\n", args->op);
> > > + r = -EINVAL;
> > > + }
> > > +
> > > + return r;
> > > +}
> > > +
> > >   #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
> > >   [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
> > >   .cmd_drv = 0, .name = #ioctl}
> > > @@ -2841,6 +2883,12 @@ static const struct amdkfd_ioctl_desc
> amdkfd_ioctls[] = {
> > >
> >

RE: [PATCH 14/33] drm/amdgpu: prepare map process for multi-process debug devices

2023-05-30 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Tuesday, May 30, 2023 3:56 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> Cc: Huang, JinHuiEric 
> Subject: Re: [PATCH 14/33] drm/amdgpu: prepare map process for multi-
> process debug devices
>
> Am 2023-05-25 um 13:27 schrieb Jonathan Kim:
> > Unlike single process debug devices, multi-process debug devices allow
> > debug mode setting per-VMID (non-device-global).
> >
> > Because the HWS manages PASID-VMID mapping, the new MAP_PROCESS
> API allows
> > the KFD to forward the required SPI debug register write requests.
> >
> > To request a new debug mode setting change, the KFD must be able to
> > preempt all queues then remap all queues with these new setting
> > requests for MAP_PROCESS to take effect.
> >
> > Note that by default, trap enablement in non-debug mode must be
> disabled
> > for performance reasons for multi-process debug devices due to setup
> > overhead in FW.
> >
> > v2: spot fixup new kfd_node references
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.h|  5 ++
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 51
> +++
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 ++
> >   .../drm/amd/amdkfd/kfd_packet_manager_v9.c| 14 +
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  9 
> >   drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  5 ++
> >   6 files changed, 87 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> > index a8abfe2a0a14..db6d72e7930f 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> > @@ -29,4 +29,9 @@ int kfd_dbg_trap_disable(struct kfd_process *target);
> >   int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
> > void __user *runtime_info,
> > uint32_t *runtime_info_size);
> > +static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
> > +{
> > +   return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2);
>
> This needs to be updated to include 9.4.3 as well. Is that coming in a
> different patch? Other than that, this patch is

That's correct.  This series does not enable the debugger for GFX9.4.3.
This will be a follow-up series that Eric will provide.

Thanks.

Jon

>
> Reviewed-by: Felix Kuehling 
>
>
> > +}
> > +
> >   #endif
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index c8519adc89ac..badfe1210bc4 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -36,6 +36,7 @@
> >   #include "kfd_kernel_queue.h"
> >   #include "amdgpu_amdkfd.h"
> >   #include "mes_api_def.h"
> > +#include "kfd_debug.h"
> >
> >   /* Size of the per-pipe EOP queue */
> >   #define CIK_HPD_EOP_BYTES_LOG2 11
> > @@ -2593,6 +2594,56 @@ int release_debug_trap_vmid(struct
> device_queue_manager *dqm,
> > return r;
> >   }
> >
> > +int debug_lock_and_unmap(struct device_queue_manager *dqm)
> > +{
> > +   int r;
> > +
> > +   if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> > +   pr_err("Unsupported on sched_policy: %i\n", dqm-
> >sched_policy);
> > +   return -EINVAL;
> > +   }
> > +
> > +   if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
> > +   return 0;
> > +
> > +   dqm_lock(dqm);
> > +
> > +   r = unmap_queues_cpsch(dqm,
> KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false);
> > +   if (r)
> > +   dqm_unlock(dqm);
> > +
> > +   return r;
> > +}
> > +
> > +int debug_map_and_unlock(struct device_queue_manager *dqm)
> > +{
> > +   int r;
> > +
> > +   if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> > +   pr_err("Unsupported on sched_policy: %i\n", dqm-
> >sched_policy);
> > +   return -EINVAL;
> > +   }
> > +
> > +   if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
> > +   return 0;
> > +
> > +   r = map_queues_cpsch(dqm);
> > +
> > +   dqm_unlock(dqm);
> > +
> > +   return r;
> > +}
> > +
> > +int debug_refresh_runlist(struct device_queue_manag

RE: [PATCH] drm/amdkfd: remove unused sq_int_priv variable

2023-03-30 Thread Kim, Jonathan
[Public]

Hi Felix,

That is correct.  The debugger will need sq_int_priv to work.

Thanks,

Jon

> -Original Message-
> From: Kuehling, Felix 
> Sent: Thursday, March 30, 2023 11:39 AM
> To: Tom Rix ; Deucher, Alexander
> ; Koenig, Christian
> ; Pan, Xinhui ;
> airl...@gmail.com; dan...@ffwll.ch; nat...@kernel.org;
> ndesaulni...@google.com; Kim, Jonathan 
> Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; linux-
> ker...@vger.kernel.org; l...@lists.linux.dev
> Subject: Re: [PATCH] drm/amdkfd: remove unused sq_int_priv variable
>
> Am 2023-03-30 um 11:20 schrieb Tom Rix:
> > clang with W=1 reports
> > drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_int_process_v11.c:282:38:
> error: variable
> >'sq_int_priv' set but not used [-Werror,-Wunused-but-set-variable]
> >  uint8_t sq_int_enc, sq_int_errtype, sq_int_priv;
> >  ^
> > This variable is not used so remove it.
>
> Hi Jon,
>
> I think your debugger patches are going to start using this. Can you
> comment?
>
> I'd prefer not to apply this patch now, as Jon's patches are expected to
> land soon, once Alex is done upstreaming GFX 9.4.3 support.
>
> Regards,
>Felix
>
>
> >
> > Signed-off-by: Tom Rix 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 9 +
> >   1 file changed, 1 insertion(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
> > index 0d53f6067422..bbd646c0dee7 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
> > @@ -279,7 +279,7 @@ static void event_interrupt_wq_v11(struct kfd_dev
> *dev,
> >   {
> > uint16_t source_id, client_id, ring_id, pasid, vmid;
> > uint32_t context_id0, context_id1;
> > -   uint8_t sq_int_enc, sq_int_errtype, sq_int_priv;
> > +   uint8_t sq_int_enc, sq_int_errtype;
> > struct kfd_vm_fault_info info = {0};
> > struct kfd_hsa_memory_exception_data exception_data;
> >
> > @@ -348,13 +348,6 @@ static void event_interrupt_wq_v11(struct kfd_dev
> *dev,
> > break;
> > case SQ_INTERRUPT_WORD_ENCODING_INST:
> > print_sq_intr_info_inst(context_id0,
> context_id1);
> > -   sq_int_priv = REG_GET_FIELD(context_id0,
> > -
>   SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV);
> > -   /*if (sq_int_priv &&
> (kfd_set_dbg_ev_from_interrupt(dev, pasid,
> > -
>   KFD_CTXID0_DOORBELL_ID(context_id0),
> > -
>   KFD_CTXID0_TRAP_CODE(context_id0),
> > -   NULL, 0)))
> > -   return;*/
> > break;
> > case SQ_INTERRUPT_WORD_ENCODING_ERROR:
> > print_sq_intr_info_error(context_id0,
> context_id1);


RE: [PATCH 12/34] drm/amdgpu: add configurable grace period for unmap queues

2023-03-28 Thread Kim, Jonathan
[Public]

Thanks for catch Kent.
I'll fix up the typos with a follow-on.

Jon

> -Original Message-
> From: Russell, Kent 
> Sent: Tuesday, March 28, 2023 11:19 AM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org;
> dri-de...@lists.freedesktop.org
> Cc: Kuehling, Felix ; Kim, Jonathan
> 
> Subject: RE: [PATCH 12/34] drm/amdgpu: add configurable grace period for
> unmap queues
>
> [AMD Official Use Only - General]
>
> 3 tiny grammar/spelling things inline (not critical)
>
>  Kent
>
> > -Original Message-
> > From: amd-gfx  On Behalf Of
> > Jonathan Kim
> > Sent: Monday, March 27, 2023 2:43 PM
> > To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> > Cc: Kuehling, Felix ; Kim, Jonathan
> > 
> > Subject: [PATCH 12/34] drm/amdgpu: add configurable grace period for
> unmap
> > queues
> >
> > The HWS schedule allows a grace period for wave completion prior to
> > preemption for better performance by avoiding CWSR on waves that can
> > potentially complete quickly. The debugger, on the other hand, will
> > want to inspect wave status immediately after it actively triggers
> > preemption (a suspend function to be provided).
> >
> > To minimize latency between preemption and debugger wave inspection,
> allow
> > immediate preemption by setting the grace period to 0.
> >
> > Note that setting the preepmtion grace period to 0 will result in an
> > infinite grace period being set due to a CP FW bug so set it to 1 for now.
> >
> > v2: clarify purpose in the description of this patch
> >
> > Signed-off-by: Jonathan Kim 
> > Reviewed-by: Felix Kuehling 
> > ---
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  2 +
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 +
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 43 
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|  6 ++
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  2 +
> >  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 43 
> >  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  9 ++-
> >  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 62 +-
> >  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +
> >  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +
> >  .../drm/amd/amdkfd/kfd_packet_manager_v9.c| 39 +++
> >  .../gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h   | 65
> +++
> >  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  5 ++
> >  13 files changed, 291 insertions(+), 21 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > index a6f98141c29c..b811a0985050 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > @@ -82,5 +82,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> > .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
> > .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
> > +   .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> > +   .build_grace_period_packet_info =
> > kgd_gfx_v9_build_grace_period_packet_info,
> > .program_trap_handler_settings =
> > kgd_gfx_v9_program_trap_handler_settings,
> >  };
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > index d2918e5c0dea..a62bd0068515 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > @@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
> >
> > kgd_gfx_v9_set_vm_context_page_table_base,
> > .enable_debug_trap = kgd_arcturus_enable_debug_trap,
> > .disable_debug_trap = kgd_arcturus_disable_debug_trap,
> > +   .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> > +   .build_grace_period_packet_info =
> > kgd_gfx_v9_build_grace_period_packet_info,
> > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> > .program_trap_handler_settings =
> > kgd_gfx_v9_program_trap_handler_settings
> >  };
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> > index 969015281510..605387e55d33 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> > @@ -802,6 +

RE: [PATCH 19/32] drm/amdkfd: add runtime enable operation

2023-03-23 Thread Kim, Jonathan
[AMD Official Use Only - General]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Monday, March 20, 2023 8:31 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> Subject: Re: [PATCH 19/32] drm/amdkfd: add runtime enable operation
>
>
> On 2023-01-25 14:53, Jonathan Kim wrote:
> > The debugger can attach to a process prior to HSA enablement (i.e.
> > inferior is spawned by the debugger and attached to immediately before
> > target process has been enabled for HSA dispatches) or it
> > can attach to a running target that is already HSA enabled.  Either
> > way, the debugger needs to know the enablement status to know when
> > it can inspect queues.
> >
> > For the scenario where the debugger spawns the target process,
> > it will have to wait for ROCr's runtime enable request from the target.
> > The runtime enable request will be able to see that its process has been
> > debug attached.  ROCr raises an EC_PROCESS_RUNTIME signal to the
> > debugger then blocks the target process while waiting the debugger's
> > response. Once the debugger has received the runtime signal, it will
> > unblock the target process.
> >
> > For the scenario where the debugger attaches to a running target
> > process, ROCr will set the target process' runtime status as enabled so
> > that on an attach request, the debugger will be able to see this
> > status and will continue with debug enablement as normal.
> >
> > A secondary requirement is to conditionally enable the trap tempories
> only
> > if the user requests it (env var HSA_ENABLE_DEBUG=1) or if the debugger
> > attaches with HSA runtime enabled.  This is because setting up the trap
> > temporaries incurs a performance overhead that is unacceptable for
> > microbench performance in normal mode for certain customers.
> >
> > In the scenario where the debugger spawns the target process, when ROCr
> > detects that the debugger has attached during the runtime enable
> > request, it will enable the trap temporaries before it blocks the target
> > process while waiting for the debugger to respond.
> >
> > In the scenario where the debugger attaches to a running target process,
> > it will enable to trap temporaries itself.
> >
> > Finally, there is an additional restriction that is required to be
> > enforced with runtime enable and HW debug mode setting. The debugger
> must
> > first ensure that HW debug mode has been enabled before permitting HW
> debug
> > mode operations.
> >
> > With single process debug devices, allowing the debugger to set debug
> > HW modes prior to trap activation means that debug HW mode setting can
> > occur before the KFD has reserved the debug VMID (0xf) from the hardware
> > scheduler's VMID allocation resource pool.  This can result in the
> > hardware scheduler assigning VMID 0xf to a non-debugged process and
> > having that process inherit debug HW mode settings intended for the
> > debugged target process instead, which is both incorrect and potentially
> > fatal for normal mode operation.
> >
> > With multi process debug devices, allowing the debugger to set debug
> > HW modes prior to trap activation means that non-debugged processes
> > migrating to a new VMID could inherit unintended debug settings.
> >
> > All debug operations that touch HW settings must require trap activation
> > where trap activation is triggered by both debug attach and runtime
> > enablement (target has KFD opened and is ready to dispatch work).
> >
> > v2: fix up hierarchy of semantics in description.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 150
> ++-
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.c   |   6 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.h   |   4 +
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   1 +
> >   4 files changed, 157 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index 09fe8576dc8c..46f9d453dc5e 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -2654,11 +2654,147 @@ static int kfd_ioctl_criu(struct file *filep,
> struct kfd_process *p, void *data)
> > return ret;
> >   }
> >
> > -static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process 
> > *p,
> void *data)
> > +static int runtime_enable(struct kfd_process *p, uin

RE: [PATCH 03/32] drm/amdkfd: prepare per-process debug enable and disable

2023-03-23 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Thursday, February 16, 2023 6:44 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> Subject: Re: [PATCH 03/32] drm/amdkfd: prepare per-process debug enable
> and disable
>
>
> On 2023-01-25 14:53, Jonathan Kim wrote:
> > The ROCm debugger will attach to a process to debug by PTRACE and will
> > expect the KFD to prepare a process for the target PID, whether the
> > target PID has opened the KFD device or not.
> >
> > This patch is to explicity handle this requirement.  Further HW mode
> > setting and runtime coordination requirements will be handled in
> > following patches.
> >
> > In the case where the target process has not opened the KFD device,
> > a new KFD process must be created for the target PID.
> > The debugger as well as the target process for this case will have not
> > acquired any VMs so handle process restoration to correctly account for
> > this.
> >
> > To coordinate with HSA runtime, the debugger must be aware of the target
> > process' runtime enablement status and will copy the runtime status
> > information into the debugged KFD process for later query.
> >
> > On enablement, the debugger will subscribe to a set of exceptions where
> > each exception events will notify the debugger through a pollable FIFO
> > file descriptor that the debugger provides to the KFD to manage.
> > Some events will be synchronously raised while other are scheduled,
> > which is why a debug_event_workarea worker is initialized.
> >
> > Finally on process termination of either the debugger or the target,
> > debugging must be disabled if it has not been done so.
> >
> > v3: fix typo on debug trap disable and PTRACE ATTACH relax check.
> > remove unnecessary queue eviction counter reset when there's nothing
> > to evict.
> > change err code to EALREADY if attaching to an already attached process.
> > move debug disable to release worker to avoid race with disable from
> > ioctl call.
> >
> > v2: relax debug trap disable and PTRACE ATTACH requirement.
> >
> > Signed-off-by: Jonathan Kim
> > ---
> >   drivers/gpu/drm/amd/amdkfd/Makefile   |  3 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 88 -
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 94
> +++
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.h| 33 +++
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 22 -
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 34 ++-
> >   drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 63 +
> >   7 files changed, 308 insertions(+), 29 deletions(-)
> >   create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> >   create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile
> b/drivers/gpu/drm/amd/amdkfd/Makefile
> > index e758c2a24cd0..747754428073 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> > +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> > @@ -55,7 +55,8 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
> > $(AMDKFD_PATH)/kfd_int_process_v9.o \
> > $(AMDKFD_PATH)/kfd_int_process_v11.o \
> > $(AMDKFD_PATH)/kfd_smi_events.o \
> > -   $(AMDKFD_PATH)/kfd_crat.o
> > +   $(AMDKFD_PATH)/kfd_crat.o \
> > +   $(AMDKFD_PATH)/kfd_debug.o
> >
> >   ifneq ($(CONFIG_AMD_IOMMU_V2),)
> >   AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index d3b019e64093..ee05c2e54ef6 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -44,6 +44,7 @@
> >   #include "amdgpu_amdkfd.h"
> >   #include "kfd_smi_events.h"
> >   #include "amdgpu_dma_buf.h"
> > +#include "kfd_debug.h"
> >
> >   static long kfd_ioctl(struct file *, unsigned int, unsigned long);
> >   static int kfd_open(struct inode *, struct file *);
> > @@ -142,10 +143,15 @@ static int kfd_open(struct inode *inode, struct
> file *filep)
> > return -EPERM;
> > }
> >
> > -   process = kfd_create_process(filep);
> > +   process = kfd_create_process(current);
> > if (IS_ERR(process))
> > return PTR_ERR(process);
> >
> > +   if (kfd_process_init_cwsr_apu(process, filep)) {
> > 

RE: [PATCH 15/32] drm/amdkfd: prepare trap workaround for gfx11

2023-03-23 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Monday, March 20, 2023 5:50 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> Subject: Re: [PATCH 15/32] drm/amdkfd: prepare trap workaround for gfx11
>
>
> On 2023-01-25 14:53, Jonathan Kim wrote:
> > Due to a HW bug, waves in only half the shader arrays can enter trap.
> >
> > When starting a debug session, relocate all waves to the first shader
> > array of each shader engine and mask off the 2nd shader array as
> > unavailable.
> >
> > When ending a debug session, re-enable the 2nd shader array per
> > shader engine.
> >
> > User CU masking per queue cannot be guaranteed to remain functional
> > if requested during debugging (e.g. user cu mask requests only 2nd shader
> > array as an available resource leading to zero HW resources available)
> > nor can runtime be alerted of any of these changes during execution.
> >
> > Make user CU masking and debugging mutual exclusive with respect to
> > availability.
> >
> > If the debugger tries to attach to a process with a user cu masked
> > queue, return the runtime status as enabled but busy.
> >
> > If the debugger tries to attach and fails to reallocate queue waves to
> > the first shader array of each shader engine, return the runtime status
> > as enabled but with an error.
> >
> > In addition, like any other mutli-process debug supported devices,
> > disable trap temporary setup per-process to avoid performance impact
> from
> > setup overhead.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   |  2 +
> >   drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|  7 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  2 -
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 64
> +++
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.h|  3 +-
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  7 ++
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |  3 +-
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  3 +-
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 42 
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  3 +-
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |  3 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  5 +-
> >   .../amd/amdkfd/kfd_process_queue_manager.c|  9 ++-
> >   13 files changed, 124 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > index d20df0cf0d88..b5f5eed2b5ef 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > @@ -219,6 +219,8 @@ struct mes_add_queue_input {
> > uint32_tgws_size;
> > uint64_ttba_addr;
> > uint64_ttma_addr;
> > +   uint32_ttrap_en;
> > +   uint32_tskip_process_ctx_clear;
> > uint32_tis_kfd_process;
> > uint32_tis_aql_queue;
> > uint32_tqueue_size;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> > index fbacdc42efac..38c7a0cbf264 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> > @@ -197,17 +197,14 @@ static int mes_v11_0_add_hw_queue(struct
> amdgpu_mes *mes,
> > mes_add_queue_pkt.gws_size = input->gws_size;
> > mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
> > mes_add_queue_pkt.tma_addr = input->tma_addr;
> > +   mes_add_queue_pkt.trap_en = input->trap_en;
> > +   mes_add_queue_pkt.skip_process_ctx_clear = input-
> >skip_process_ctx_clear;
> > mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
> >
> > /* For KFD, gds_size is re-used for queue size (needed in MES for AQL
> queues) */
> > mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
> > mes_add_queue_pkt.gds_size = input->queue_size;
> >
> > -   if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >=
> 4) &&
> > - (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
> &&
> > - (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3
> > -   mes_add_queue_pkt.trap_en = 1;
> > -
> > /* For KFD, gds_size is re-used for queue size (needed in MES for AQL
> queues) */
> > mes_add_queue_pkt.is_aql_queue = input-&

RE: [PATCH] drm/amdgpu: improve debug VRAM access performance using sdma

2023-03-20 Thread Kim, Jonathan
[Public]

This was a long time ago but I think we agreed allocation was ok before GART 
was ready.
IIRC, there was also some mentioned related scenario where APUs needed to work 
without VRAM but allocations were required (but I don't know the details 
regarding that).
I vaguely remember the requirement for GART readiness for the bounce buffer 
allocation caused some problems elsewhere.
Are there problems observed with the bounce buffer being allocated without GART 
readiness?

Thanks,

Jon
> -Original Message-
> From: Christian König 
> Sent: Monday, March 20, 2023 1:02 PM
> To: Quan, Evan ; Kim, Jonathan
> ; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Koenig, Christian
> 
> Subject: Re: [PATCH] drm/amdgpu: improve debug VRAM access performance
> using sdma
>
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
>
>
> I don't think so. Have we recently re-ordered something here?
>
> Christian.
>
> Am 20.03.23 um 08:05 schrieb Quan, Evan:
> > [AMD Official Use Only - General]
> >
> > I happened to find the sdma_access_bo allocation from GTT seems
> performing before gart is ready.
> > That makes the "amdgpu_gart_map" is skipped since adev->gart.ptr is still
> NULL.
> > Is that done intentionally ?
> >
> > Evan
> >> -Original Message-
> >> From: amd-gfx  On Behalf Of
> >> Jonathan Kim
> >> Sent: Wednesday, January 5, 2022 3:12 AM
> >> To: amd-gfx@lists.freedesktop.org
> >> Cc: Kuehling, Felix ; Kim, Jonathan
> >> ; Koenig, Christian
> 
> >> Subject: [PATCH] drm/amdgpu: improve debug VRAM access performance
> >> using sdma
> >>
> >> For better performance during VRAM access for debugged processes, do
> >> read/write copies over SDMA.
> >>
> >> In order to fulfill post mortem debugging on a broken device, fallback to
> >> stable MMIO access when gpu recovery is disabled or when job
> submission
> >> time outs are set to max.  Failed SDMA access should automatically fall
> >> back to MMIO access.
> >>
> >> Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid
> >> page-table updates and TLB flushes on access.
> >>
> >> Signed-off-by: Jonathan Kim 
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78
> >> +
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  5 +-
> >>   2 files changed, 82 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> index 367abed1d6e6..512df4c09772 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> @@ -48,6 +48,7 @@
> >>   #include 
> >>
> >>   #include 
> >> +#include 
> >>
> >>   #include "amdgpu.h"
> >>   #include "amdgpu_object.h"
> >> @@ -1429,6 +1430,70 @@ static void
> amdgpu_ttm_vram_mm_access(struct
> >> amdgpu_device *adev, loff_t pos,
> >>  }
> >>   }
> >>
> >> +static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object
> >> *bo,
> >> +unsigned long offset, void *buf, int
> >> len, int write)
> >> +{
> >> +struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> >> +struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> >> +struct amdgpu_job *job;
> >> +struct dma_fence *fence;
> >> +uint64_t src_addr, dst_addr;
> >> +unsigned int num_dw;
> >> +int r, idx;
> >> +
> >> +if (len != PAGE_SIZE)
> >> +return -EINVAL;
> >> +
> >> +if (!adev->mman.sdma_access_ptr)
> >> +return -EACCES;
> >> +
> >> +r = drm_dev_enter(adev_to_drm(adev), &idx);
> >> +if (r)
> >> +return r;
> >> +
> >> +if (write)
> >> +memcpy(adev->mman.sdma_access_ptr, buf, len);
> >> +
> >> +num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
> >> +r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
> >> AMDGPU_IB_POOL_DELAYED, &job);
> >> +if (r)
> >> +goto out;
> >> +
> >> +src_addr = write ? amdgpu_bo_gpu_offset(adev-
> >>> mman.sdma_access_bo) :
> >

RE: [PATCH 27/29] drm/amdkfd: add debug queue snapshot operation

2022-12-02 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: November 30, 2022 6:55 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 27/29] drm/amdkfd: add debug queue snapshot
> operation
>
>
> On 2022-10-31 12:23, Jonathan Kim wrote:
> > Allow the debugger to get a snapshot of a specified number of queues
> > containing various queue property information that is copied to the
> > debugger.
> >
> > Since the debugger doesn't know how many queues exist at any given
> time,
> > allow the debugger to pass the requested number of snapshots as 0 to get
> > the actual number of potential snapshots to use for a subsequent snapshot
> > request for actual information.
> >
> > To prevent future ABI breakage, pass in the requested entry_size.
> > The KFD will return it's own entry_size in case the debugger still wants
> > log the information in a core dump on sizing failure.
> >
> > Also allow the debugger to clear exceptions when doing a snapshot.
> >
> > v2: change buf_size arg to num_queues for clarity.
> > fix minimum entry size calculation.
> >
> > Signed-off-by: Jonathan Kim 
>
> Two nit-picks inline.
>
>
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  6 +++
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 41
> +++
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  4 ++
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  5 +++
> >   .../amd/amdkfd/kfd_process_queue_manager.c| 40
> ++
> >   5 files changed, 96 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index 2c8f107237ee..cea393350980 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -2961,6 +2961,12 @@ static int kfd_ioctl_set_debug_trap(struct file
> *filep, struct kfd_process *p, v
> > &args->query_exception_info.info_size);
> > break;
> > case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:
> > +   r = pqm_get_queue_snapshot(&target->pqm,
> > +   args->queue_snapshot.exception_mask,
> > +   (void __user *)args-
> >queue_snapshot.snapshot_buf_ptr,
> > +   &args->queue_snapshot.num_queues,
> > +   &args->queue_snapshot.entry_size);
> > +   break;
> > case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:
> > pr_warn("Debug op %i not supported yet\n", args->op);
> > r = -EACCES;
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index 589efbefc8dc..51f8c5676c56 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -2950,6 +2950,47 @@ int suspend_queues(struct kfd_process *p,
> > return total_suspended;
> >   }
> >
> > +static uint32_t set_queue_type_for_user(struct queue_properties
> *q_props)
> > +{
> > +   switch (q_props->type) {
> > +   case KFD_QUEUE_TYPE_COMPUTE:
> > +   return q_props->format == KFD_QUEUE_FORMAT_PM4
> > +   ? KFD_IOC_QUEUE_TYPE_COMPUTE
> > +   :
> KFD_IOC_QUEUE_TYPE_COMPUTE_AQL;
> > +   case KFD_QUEUE_TYPE_SDMA:
> > +   return KFD_IOC_QUEUE_TYPE_SDMA;
> > +   case KFD_QUEUE_TYPE_SDMA_XGMI:
> > +   return KFD_IOC_QUEUE_TYPE_SDMA_XGMI;
> > +   default:
> > +   WARN_ONCE(true, "queue type not recognized!");
> > +   return 0x;
> > +   };
> > +}
> > +
> > +void set_queue_snapshot_entry(struct device_queue_manager *dqm,
> > + struct queue *q,
> > + uint64_t exception_clear_mask,
> > + struct kfd_queue_snapshot_entry *qss_entry)
>
> The dqm parameter is not needed. The function can get this from
> q->device->dqm. It's also only needed for dqm locking. I'm not sure
> that's even necessary. Aren't the event_mutex and target process mutex
> held by the caller enough to protect the exception_status and other
> queue properties?

I can't really remember why we device locked in the experimental phase tbh but 
I think you're right.
The process e

RE: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver initialization

2022-12-02 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: November 30, 2022 7:24 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver
> initialization
>
>
> On 2022-10-31 12:23, Jonathan Kim wrote:
> > Add missing debug trap registers references and initialize all debug
> > registers on boot by clearing the hardware exception overrides and the
> > wave allocation ID index.
> >
> > For debug devices that only support single process debugging, enable
> > trap temporary setup by default.
> >
> > Debug devices that support multi-process debugging require trap
> > temporary setup to be disabled by default in order to satisfy microbench
> > performance when in non-debug mode.
> >
> > The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
> > waves onto dispatch during compute context inspection.
> > In order to correctly this up, set the special reserved CP bit by default
> > whenever the MQD is initailized.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 26 +++
> >   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  5 ++
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  5 ++
> >   .../include/asic_reg/gc/gc_10_1_0_offset.h| 14 
> >   .../include/asic_reg/gc/gc_10_1_0_sh_mask.h   | 69
> +++
> >   .../include/asic_reg/gc/gc_10_3_0_offset.h| 10 +++
> >   .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   |  4 ++
> >   8 files changed, 163 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index af94ac580d3e..d49aff0b4ba3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -4904,6 +4904,29 @@ static u32
> gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
> >
> >   #define DEFAULT_SH_MEM_BASES  (0x6000)
> >
> > +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device
> *adev,
> > +   uint32_t first_vmid,
> > +   uint32_t last_vmid)
> > +{
> > +   uint32_t data;
> > +   uint32_t trap_config_vmid_mask = 0;
> > +   int i;
> > +
> > +   /* Calculate trap config vmid mask */
> > +   for (i = first_vmid; i < last_vmid; i++)
> > +   trap_config_vmid_mask |= (1 << i);
> > +
> > +   data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
> > +   VMID_SEL, trap_config_vmid_mask);
> > +   data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
> > +   TRAP_EN, 1);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG),
> data);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK),
> 0);
> > +
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0),
> 0);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1),
> 0);
> > +}
> > +
> >   static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
> >   {
> > int i;
> > @@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct
> amdgpu_device *adev)
> > WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
> > WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
> > }
> > +
> > +   gfx_v10_0_debug_trap_config_init(adev, adev-
> >vm_manager.first_kfd_vmid,
> > +   AMDGPU_NUM_VMID);
> >   }
> >
> >   static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 0320be4a5fc6..a0e5ad342f13 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct
> amdgpu_device *adev)
> > adev->gfx.config.num_rbs = hweight32(active_rbs);
> >   }
> >
> > +static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device
> *adev,
> > +   uint32_t first_vmid,
> > +   uint32_t last_vmid)
> > +{
> > +   uint32_t data;
> > +   uint32_t trap_config_vmid_mask = 0;
> > +   int i;
> > +
> > +   /* Calculate trap config vmid mask */
> > +   for (i = first_vmid; i < last_vmid; i++)
> > +   trap_config_vmid_mask |= (1 

RE: [PATCH 17/29] drm/amdkfd: Add debug trap enabled flag to TMA

2022-11-24 Thread Kim, Jonathan
[AMD Official Use Only - General]

> -Original Message-
> From: Kuehling, Felix 
> Sent: November 24, 2022 11:24 AM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 17/29] drm/amdkfd: Add debug trap enabled flag to
> TMA
>
>
> Am 2022-11-24 um 09:51 schrieb Kim, Jonathan:
> > [Public]
> >
> >> -Original Message-
> >> From: Kuehling, Felix 
> >> Sent: November 22, 2022 7:45 PM
> >> To: Kim, Jonathan ; amd-
> >> g...@lists.freedesktop.org
> >> Subject: Re: [PATCH 17/29] drm/amdkfd: Add debug trap enabled flag to
> >> TMA
> >>
> >>
> >> On 2022-10-31 12:23, Jonathan Kim wrote:
> >>> From: Jay Cornwall 
> >>>
> >>> Trap handler behavior will differ when a debugger is attached.
> >>>
> >>> Make the debug trap flag available in the trap handler TMA.
> >>> Update it when the debug trap ioctl is invoked.
> >>>
> >>> v3: Rebase for upstream
> >>>
> >>> v2:
> >>> Add missing debug flag setup on APUs
> >>>
> >>> Signed-off-by: Jay Cornwall 
> >>> Reviewed-by: Felix Kuehling 
> >>> Signed-off-by: Jonathan Kim 
> >>> ---
> >>>drivers/gpu/drm/amd/amdkfd/kfd_debug.c   |  4 
> >>>drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  2 ++
> >>>drivers/gpu/drm/amd/amdkfd/kfd_process.c | 16
> 
> >>>3 files changed, 22 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> >>> index ae6e701a2656..d4f87f2adada 100644
> >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> >>> @@ -193,6 +193,8 @@ void kfd_dbg_trap_deactivate(struct
> kfd_process
> >> *target, bool unwind, int unwind
> >>>  if (unwind && count == unwind_count)
> >>>  break;
> >>>
> >>> +   kfd_process_set_trap_debug_flag(&pdd->qpd, false);
> >>> +
> >>>  /* GFX off is already disabled by debug activate if not RLC
> >> restore supported. */
> >>>  if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> >>>  amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> >>> @@ -278,6 +280,8 @@ int kfd_dbg_trap_activate(struct kfd_process
> >> *target)
> >>>  if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> >>>  amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> >>>
> >>> +   kfd_process_set_trap_debug_flag(&pdd->qpd, true);
> >>> +
> >>>  r = debug_refresh_runlist(pdd->dev->dqm);
> >>>  if (r) {
> >>>  target->runtime_info.runtime_state =
> >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >>> index 9690a2adb9ed..82b28588ab72 100644
> >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >>> @@ -1101,6 +1101,8 @@ int kfd_init_apertures(struct kfd_process
> >> *process);
> >>>void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
> >>>uint64_t tba_addr,
> >>>uint64_t tma_addr);
> >>> +void kfd_process_set_trap_debug_flag(struct qcm_process_device
> >> *qpd,
> >>> +bool enabled);
> >>>
> >>>/* CWSR initialization */
> >>>int kfd_process_init_cwsr_apu(struct kfd_process *process, struct file
> >> *filep);
> >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> >>> index 59c4c38833b6..d62e0c62df76 100644
> >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> >>> @@ -1252,6 +1252,8 @@ int kfd_process_init_cwsr_apu(struct
> >> kfd_process *p, struct file *filep)
> >>>  memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev-
> >>> cwsr_isa_size);
> >>>
> >>> +   kfd_process_set_trap_debug_flag(qpd, p-
> >>> debug_trap_enabled);
> >>> +
> >>> 

RE: [PATCH 07/29] drm/amdgpu: add gfx9.4.1 hw debug mode enable and disable calls

2022-11-24 Thread Kim, Jonathan
[AMD Official Use Only - General]

> -Original Message-
> From: Kuehling, Felix 
> Sent: November 22, 2022 6:59 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 07/29] drm/amdgpu: add gfx9.4.1 hw debug mode
> enable and disable calls
>
>
> On 2022-10-31 12:23, Jonathan Kim wrote:
> > On GFX9.4.1, the implicit wait count instruction on s_barrier is
> > disabled by default in the driver during normal operation for
> > performance requirements.
> >
> > There is a hardware bug in GFX9.4.1 where if the implicit wait count
> > instruction after an s_barrier instruction is disabled, any wave that
> > hits an exception may step over the s_barrier when returning from the
> > trap handler with the barrier logic having no ability to be
> > aware of this, thereby causing other waves to wait at the barrier
> > indefinitely resulting in a shader hang.  This bug has been corrected
> > for GFX9.4.2 and onward.
> >
> > Since the debugger subscribes to hardware exceptions, in order to avoid
> > this bug, the debugger must enable implicit wait count on s_barrier
> > for a debug session and disable it on detach.
> >
> > In order to change this setting in the in the device global SQ_CONFIG
> > register, the GFX pipeline must be idle.  GFX9.4.1 as a compute device
> > will either dispatch work through the compute ring buffers used for
> > image post processing or through the hardware scheduler by the KFD.
> >
> > Have the KGD suspend and drain the compute ring buffer, then suspend
> the
> > hardware scheduler and block any future KFD process job requests before
> > changing the implicit wait count setting.  Once set, resume all work.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   3 +
> >   .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 105
> +-
> >   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |   4 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_process.c  |   2 +-
> >   4 files changed, 110 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 0e6ddf05c23c..9f2499f52d2c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -1034,6 +1034,9 @@ struct amdgpu_device {
> > struct pci_saved_state  *pci_state;
> > pci_channel_state_t pci_channel_state;
> >
> > +   /* Track auto wait count on s_barrier settings */
> > +   boolbarrier_has_auto_waitcnt;
> > +
> > struct amdgpu_reset_control *reset_cntl;
> > uint32_t
> ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > index 4191af5a3f13..13f02a0aa828 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > @@ -26,6 +26,7 @@
> >   #include "amdgpu.h"
> >   #include "amdgpu_amdkfd.h"
> >   #include "amdgpu_amdkfd_arcturus.h"
> > +#include "amdgpu_reset.h"
> >   #include "sdma0/sdma0_4_2_2_offset.h"
> >   #include "sdma0/sdma0_4_2_2_sh_mask.h"
> >   #include "sdma1/sdma1_4_2_2_offset.h"
> > @@ -48,6 +49,8 @@
> >   #include "amdgpu_amdkfd_gfx_v9.h"
> >   #include "gfxhub_v1_0.h"
> >   #include "mmhub_v9_4.h"
> > +#include "gc/gc_9_0_offset.h"
> > +#include "gc/gc_9_0_sh_mask.h"
> >
> >   #define HQD_N_REGS 56
> >   #define DUMP_REG(addr) do {   \
> > @@ -276,6 +279,104 @@ int kgd_arcturus_hqd_sdma_destroy(struct
> amdgpu_device *adev, void *mqd,
> > return 0;
> >   }
> >
> > +/*
> > + * Helper used to suspend/resume gfx pipe for image post process work
> to set
> > + * barrier behaviour.
> > + */
> > +static int suspend_resume_compute_scheduler(struct amdgpu_device
> *adev, bool suspend)
> > +{
> > +   int i, r = 0;
> > +
> > +   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> > +   struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
> > +
> > +   if (!(ring && ring->sched.thread))
> > +   continue;
> > +
> > +   /* stop secheduler and drain ring. */
> > +   if (suspend) {
&g

RE: [PATCH 17/29] drm/amdkfd: Add debug trap enabled flag to TMA

2022-11-24 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: November 22, 2022 7:45 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 17/29] drm/amdkfd: Add debug trap enabled flag to
> TMA
>
>
> On 2022-10-31 12:23, Jonathan Kim wrote:
> > From: Jay Cornwall 
> >
> > Trap handler behavior will differ when a debugger is attached.
> >
> > Make the debug trap flag available in the trap handler TMA.
> > Update it when the debug trap ioctl is invoked.
> >
> > v3: Rebase for upstream
> >
> > v2:
> > Add missing debug flag setup on APUs
> >
> > Signed-off-by: Jay Cornwall 
> > Reviewed-by: Felix Kuehling 
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.c   |  4 
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  2 ++
> >   drivers/gpu/drm/amd/amdkfd/kfd_process.c | 16 
> >   3 files changed, 22 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> > index ae6e701a2656..d4f87f2adada 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> > @@ -193,6 +193,8 @@ void kfd_dbg_trap_deactivate(struct kfd_process
> *target, bool unwind, int unwind
> > if (unwind && count == unwind_count)
> > break;
> >
> > +   kfd_process_set_trap_debug_flag(&pdd->qpd, false);
> > +
> > /* GFX off is already disabled by debug activate if not RLC
> restore supported. */
> > if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> > amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> > @@ -278,6 +280,8 @@ int kfd_dbg_trap_activate(struct kfd_process
> *target)
> > if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> > amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> >
> > +   kfd_process_set_trap_debug_flag(&pdd->qpd, true);
> > +
> > r = debug_refresh_runlist(pdd->dev->dqm);
> > if (r) {
> > target->runtime_info.runtime_state =
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > index 9690a2adb9ed..82b28588ab72 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > @@ -1101,6 +1101,8 @@ int kfd_init_apertures(struct kfd_process
> *process);
> >   void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
> >   uint64_t tba_addr,
> >   uint64_t tma_addr);
> > +void kfd_process_set_trap_debug_flag(struct qcm_process_device
> *qpd,
> > +bool enabled);
> >
> >   /* CWSR initialization */
> >   int kfd_process_init_cwsr_apu(struct kfd_process *process, struct file
> *filep);
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> > index 59c4c38833b6..d62e0c62df76 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> > @@ -1252,6 +1252,8 @@ int kfd_process_init_cwsr_apu(struct
> kfd_process *p, struct file *filep)
> >
> > memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev-
> >cwsr_isa_size);
> >
> > +   kfd_process_set_trap_debug_flag(qpd, p-
> >debug_trap_enabled);
> > +
> > qpd->tma_addr = qpd->tba_addr +
> KFD_CWSR_TMA_OFFSET;
> > pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for
> pqm.\n",
> > qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
> > @@ -1288,6 +1290,9 @@ static int
> kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
> >
> > memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
> >
> > +   kfd_process_set_trap_debug_flag(&pdd->qpd,
> > +   pdd->process-
> >debug_trap_enabled);
> > +
> > qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
> > pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
> >  qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
> > @@ -1374,6 +1379,17 @@ bool kfd_process_xnack_mode(struct
> kfd_process *p, bool supported)
> > return true;
> >   }
> >
> > +void kfd_process_set_trap_debug_flag(struc

RE: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver initialization

2022-11-23 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: November 22, 2022 6:39 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver
> initialization
>
>
> On 2022-10-31 12:23, Jonathan Kim wrote:
> > Add missing debug trap registers references and initialize all debug
> > registers on boot by clearing the hardware exception overrides and the
> > wave allocation ID index.
> >
> > For debug devices that only support single process debugging, enable
> > trap temporary setup by default.
> >
> > Debug devices that support multi-process debugging require trap
> > temporary setup to be disabled by default in order to satisfy microbench
> > performance when in non-debug mode.
>
> Where is this done? I don't think it's in the MQD setup because that
> happens unconditionally on all GPUs.

Right I forgot to update gfx_v9_4_2_debug_trap_config_init to clear TRAP_EN 
instead of setting it.
I'll fix that.

>
>
> >
> > The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
> > waves onto dispatch during compute context inspection.
> > In order to correctly this up, set the special reserved CP bit by default
> > whenever the MQD is initailized.
>
> There is a word missing here. "In order to correctly _set_ this up ..."?

Whoops.  Thanks.

>
> This patch covers GFXv9 and 10. Will GFXv11 be handled separately?

Ok.  I'll include GFX11 as well for the next round of reviews in this patch.

Thanks,

Jon

>
> Regards,
>Felix
>
>
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 26 +++
> >   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  5 ++
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  5 ++
> >   .../include/asic_reg/gc/gc_10_1_0_offset.h| 14 
> >   .../include/asic_reg/gc/gc_10_1_0_sh_mask.h   | 69
> +++
> >   .../include/asic_reg/gc/gc_10_3_0_offset.h| 10 +++
> >   .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   |  4 ++
> >   8 files changed, 163 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index af94ac580d3e..d49aff0b4ba3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -4904,6 +4904,29 @@ static u32
> gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
> >
> >   #define DEFAULT_SH_MEM_BASES  (0x6000)
> >
> > +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device
> *adev,
> > +   uint32_t first_vmid,
> > +   uint32_t last_vmid)
> > +{
> > +   uint32_t data;
> > +   uint32_t trap_config_vmid_mask = 0;
> > +   int i;
> > +
> > +   /* Calculate trap config vmid mask */
> > +   for (i = first_vmid; i < last_vmid; i++)
> > +   trap_config_vmid_mask |= (1 << i);
> > +
> > +   data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
> > +   VMID_SEL, trap_config_vmid_mask);
> > +   data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
> > +   TRAP_EN, 1);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG),
> data);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK),
> 0);
> > +
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0),
> 0);
> > +   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1),
> 0);
> > +}
> > +
> >   static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
> >   {
> > int i;
> > @@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct
> amdgpu_device *adev)
> > WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
> > WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
> > }
> > +
> > +   gfx_v10_0_debug_trap_config_init(adev, adev-
> >vm_manager.first_kfd_vmid,
> > +   AMDGPU_NUM_VMID);
> >   }
> >
> >   static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 0320be4a5fc6..a0e5ad342f13 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct
> amdgpu_device *

RE: [PATCH 01/29] drm/amdkfd: add debug and runtime enable interface

2022-11-23 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: November 22, 2022 6:05 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 01/29] drm/amdkfd: add debug and runtime enable
> interface
>
>
> On 2022-10-31 12:23, Jonathan Kim wrote:
> > Introduce the GPU debug operations interface.
> >
> > For ROCm-GDB to extend the GNU Debugger's ability to inspect the AMD
> GPU
> > instruction set, provide the necessary interface to allow the debugger
> > to HW debug-mode set and query exceptions per HSA queue, process or
> > device.
> >
> > The runtime_enable interface coordinates exception handling with the
> > HSA runtime.
> >
> > Usage is available in the kern docs at uapi/linux/kfd_ioctl.h.
> >
> > v2: add more documentation on semantics and error returns.
> > expand kfd_dbg_device_info_entry with new fields.
> > update device_snapshot sematics to match queue snapshot semantics
>
> This looks really good. I have 3 more nit-picks inline. Other than that,
> this patch is
>
> Reviewed-by: Felix Kuehling 
>
> Do we have a debugger branch that uses the API yet? We should make this
> public in order to complete this upstream code review.

Thanks for the review.  I've given the heads up to the ROCm GDB maintainers to 
expect to sync with this API version soon so hopefully they can surface one 
shortly.

Thanks,

Jon

>
>
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 48 ++
> > include/uapi/linux/kfd_ioctl.h | 655 ++-
> > 2 files changed, 702 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index 5feaba6a77de..11a960c83fb2 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -2644,6 +2644,48 @@ static int kfd_ioctl_criu(struct file *filep,
> > struct kfd_process *p, void *data)
> > return ret;
> > }
> > +static int kfd_ioctl_runtime_enable(struct file *filep, struct
> > kfd_process *p, void *data)
> > +{
> > + return 0;
> > +}
> > +
> > +static int kfd_ioctl_set_debug_trap(struct file *filep, struct
> > kfd_process *p, void *data)
> > +{
> > + struct kfd_ioctl_dbg_trap_args *args = data;
> > + int r = 0;
> > +
> > + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> > + pr_err("Debugging does not support sched_policy %i", sched_policy);
> > + return -EINVAL;
> > + }
> > +
> > + switch (args->op) {
> > + case KFD_IOC_DBG_TRAP_ENABLE:
> > + case KFD_IOC_DBG_TRAP_DISABLE:
> > + case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT:
> > + case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED:
> > + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:
> > + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:
> > + case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES:
> > + case KFD_IOC_DBG_TRAP_RESUME_QUEUES:
> > + case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:
> > + case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH:
> > + case KFD_IOC_DBG_TRAP_SET_FLAGS:
> > + case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:
> > + case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:
> > + case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:
> > + case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:
> > + pr_warn("Debugging not supported yet\n");
> > + r = -EACCES;
> > + break;
> > + default:
> > + pr_err("Invalid option: %i\n", args->op);
> > + r = -EINVAL;
> > + }
> > +
> > + return r;
> > +}
> > +
> > #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
> > [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
> > .cmd_drv = 0, .name = #ioctl}
> > @@ -2753,6 +2795,12 @@ static const struct amdkfd_ioctl_desc
> > amdkfd_ioctls[] = {
> > AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
> > kfd_ioctl_get_available_memory, 0),
> > +
> > + AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE,
> > + kfd_ioctl_runtime_enable, 0),
> > +
> > + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP,
> > + kfd_ioctl_set_debug_trap, 0),
> > };
> > #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
> > diff --git a/include/uapi/linux/kfd_ioctl.h
> > b/include/uapi/linux/kfd_ioctl.h
> > index 42b60198b6c5..bedf1b823f57 100644
> > --- a/include/uapi/linux/kfd_ioctl.h
> > +++ b/include/uapi/linux/kfd_ioctl.h
> > @@ -109,6 +109,28 @@ struct kfd_ioctl_get_available_memory_args {
> > __u32 pad;
&

RE: [PATCH] drm/amdgpu: fix reset domain xgmi hive info reference leak

2022-08-12 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: August 12, 2022 6:12 PM
> To: Grodzovsky, Andrey ; Kim, Jonathan
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: fix reset domain xgmi hive info reference
> leak
>
>
> On 2022-08-12 18:05, Andrey Grodzovsky wrote:
> >
> > On 2022-08-12 14:38, Kim, Jonathan wrote:
> >> [Public]
> >>
> >> Hi Andrey,
> >>
> >> Here's the load/unload stack trace.  This is a 2 GPU xGMI system.  I
> >> put dbg_xgmi_hive_get/put refcount print post kobj get/put.
> >> It's stuck at 2 on unload.  If it's an 8 GPU system, it's stuck at 8.
> >>
> >> e.g. of sysfs leak after driver unload:
> >>
> atitest@atitest:/sys/devices/pci:80/:80:02.0/:81:00.0/:82:00
> .0/:83:00.0$
> >> ls xgmi_hive_info/
> >> xgmi_hive_id
> >>
> >> Thanks,
> >>
> >> Jon
> >
> >
> > I see the leak, but how is it related to amdgpu_reset_domain ? How you
> > think that he causing this ?
> Does YiPeng's patch "[PATCH 2/2] drm/amdgpu: fix hive reference leak
> when adding xgmi device" address the same issue?

Yes, this is the extra reference I was talking about in the snippet I posted.

Thanks,

Jon

>
> Regards,
>Felix
>
>
> >
> > Andrey
> >
> >
> >>
> >>
> >> Driver load (get ref happens on both device add to hive and init per
> >> device):
> >> [   61.975900] amdkcl: loading out-of-tree module taints kernel.
> >> [   61.975973] amdkcl: module verification failed: signature and/or
> >> required key missing - tainting kernel
> >> [   62.065546] amdkcl: Warning: fail to get symbol cancel_work,
> >> replace it with kcl stub
> >> [   62.081920] AMD-Vi: AMD IOMMUv2 functionality not available on
> >> this system - This is not a bug.
> >> [   62.491119] [drm] amdgpu kernel modesetting enabled.
> >> [   62.491122] [drm] amdgpu version: 5.18.2
> >> [   62.491124] [drm] OS DRM version: 5.15.0
> >> [   62.491337] amdgpu: CRAT table not found
> >> [   62.491341] amdgpu: Virtual CRAT table created for CPU
> >> [   62.491360] amdgpu: Topology: Add CPU node
> >> [   62.603556] amdgpu: PeerDirect support was initialized successfully
> >> [   62.603847] amdgpu :83:00.0: enabling device (0100 -> 0102)
> >> [   62.603987] [drm] initializing kernel modesetting (VEGA20
> >> 0x1002:0x66A1 0x1002:0x0834 0x00).
> >> [   62.604023] [drm] register mmio base: 0xFBD0
> >> [   62.604026] [drm] register mmio size: 524288
> >> [   62.604171] [drm] add ip block number 0 
> >> [   62.604175] [drm] add ip block number 1 
> >> [   62.604177] [drm] add ip block number 2 
> >> [   62.604180] [drm] add ip block number 3 
> >> [   62.604182] [drm] add ip block number 4 
> >> [   62.604185] [drm] add ip block number 5 
> >> [   62.604187] [drm] add ip block number 6 
> >> [   62.604190] [drm] add ip block number 7 
> >> [   62.604192] [drm] add ip block number 8 
> >> [   62.604194] [drm] add ip block number 9 
> >> [   62.641771] amdgpu :83:00.0: amdgpu: Fetched VBIOS from ROM BAR
> >> [   62.641777] amdgpu: ATOM BIOS: 113-D1630200-112
> >> [   62.713418] [drm] UVD(0) is enabled in VM mode
> >> [   62.713423] [drm] UVD(1) is enabled in VM mode
> >> [   62.713426] [drm] UVD(0) ENC is enabled in VM mode
> >> [   62.713428] [drm] UVD(1) ENC is enabled in VM mode
> >> [   62.713430] [drm] VCE enabled in VM mode
> >> [   62.713433] amdgpu :83:00.0: amdgpu: Trusted Memory Zone (TMZ)
> >> feature not supported
> >> [   62.713472] [drm] GPU posting now...
> >> [   62.713993] amdgpu :83:00.0: amdgpu: MEM ECC is active.
> >> [   62.713995] amdgpu :83:00.0: amdgpu: SRAM ECC is active.
> >> [   62.714006] amdgpu :83:00.0: amdgpu: RAS INFO: ras initialized
> >> successfully, hardware ability[7fff] ras_mask[7fff]
> >> [   62.714018] [drm] vm size is 262144 GB, 4 levels, block size is
> >> 9-bit, fragment size is 9-bit
> >> [   62.714026] amdgpu :83:00.0: amdgpu: VRAM: 32752M
> >> 0x0080 - 0x0087FEFF (32752M used)
> >> [   62.714029] amdgpu :83:00.0: amdgpu: GART: 512M
> >> 0x - 0x1FFF
> >> [   62.714032] amdgpu :83:00.0: amdgpu: AGP: 267845632M
> >> 0x0090 - 0x
> >> [   62.714043] [drm] Detected VR

RE: [PATCH] drm/amdgpu: fix reset domain xgmi hive info reference leak

2022-08-12 Thread Kim, Jonathan
.765134]  release_nodes+0x40/0xb0
[  110.765137]  devres_release_all+0x9e/0xe0
[  110.765141]  device_release_driver_internal+0x117/0x1f0
[  110.765144]  driver_detach+0x4c/0xa0
[  110.765146]  bus_remove_driver+0x6c/0xf0
[  110.765148]  driver_unregister+0x31/0x50
[  110.765150]  pci_unregister_driver+0x40/0x90
[  110.765154]  amdgpu_exit+0x15/0x446 [amdgpu]
[  110.765434]  __x64_sys_delete_module+0x14e/0x260
[  110.765438]  ? do_syscall_64+0x69/0xc0
[  110.765441]  ? __x64_sys_read+0x1a/0x20
[  110.765444]  ? do_syscall_64+0x69/0xc0
[  110.765446]  ? ksys_read+0x67/0xf0
[  110.765449]  do_syscall_64+0x5c/0xc0
[  110.765451]  ? __x64_sys_read+0x1a/0x20
[  110.765454]  ? do_syscall_64+0x69/0xc0
[  110.765456]  ? syscall_exit_to_user_mode+0x27/0x50
[  110.765460]  ? __x64_sys_openat+0x20/0x30
[  110.765464]  ? do_syscall_64+0x69/0xc0
[  110.765466]  ? do_syscall_64+0x69/0xc0
[  110.765469]  ? irqentry_exit+0x1d/0x30
[  110.765472]  ? exc_page_fault+0x89/0x170
[  110.765476]  entry_SYSCALL_64_after_hwframe+0x61/0xcb
[  110.765480] RIP: 0033:0x7f1576682a6b
[  110.765482] Code: 73 01 c3 48 8b 0d 25 c4 0c 00 f7 d8 64 89 01 48 83 c8 ff 
c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 
f0 ff ff 73 01 c3 48 8b 0d f5 c3 0c 00 f7 d8 64 89 01 48
[  110.765485] RSP: 002b:7ffcb96e0bf8 EFLAGS: 0206 ORIG_RAX: 
00b0
[  110.765488] RAX: ffda RBX: 56347ba57550 RCX: 7f1576682a6b
[  110.765489] RDX:  RSI: 0800 RDI: 56347ba575b8
[  110.765491] RBP: 56347ba57550 R08:  R09: 
[  110.765492] R10: 7f15766feac0 R11: 0206 R12: 56347ba575b8
[  110.765494] R13:  R14: 56347ba575b8 R15: 56347ba57550
[  110.765496]  
[  110.768091] [drm] amdgpu: ttm finalized

> -Original Message-
> From: Grodzovsky, Andrey 
> Sent: August 11, 2022 12:43 PM
> To: Kim, Jonathan ; Kuehling, Felix
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: fix reset domain xgmi hive info reference
> leak
>
>
> On 2022-08-11 11:34, Kim, Jonathan wrote:
> > [Public]
> >
> >> -Original Message-
> >> From: Kuehling, Felix 
> >> Sent: August 11, 2022 11:19 AM
> >> To: amd-gfx@lists.freedesktop.org; Kim, Jonathan
> 
> >> Subject: Re: [PATCH] drm/amdgpu: fix reset domain xgmi hive info reference
> >> leak
> >>
> >> Am 2022-08-11 um 09:42 schrieb Jonathan Kim:
> >>> When an xgmi node is added to the hive, it takes another hive
> >>> reference for its reset domain.
> >>>
> >>> This extra reference was not dropped on device removal from the
> >>> hive so drop it.
> >>>
> >>> Signed-off-by: Jonathan Kim 
> >>> ---
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 3 +++
> >>>1 file changed, 3 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> >>> index 1b108d03e785..560bf1c98f08 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> >>> @@ -731,6 +731,9 @@ int amdgpu_xgmi_remove_device(struct
> >> amdgpu_device *adev)
> >>>  mutex_unlock(&hive->hive_lock);
> >>>
> >>>  amdgpu_put_xgmi_hive(hive);
> >>> +   /* device is removed from the hive so remove its reset domain
> >> reference */
> >>> +   if (adev->reset_domain && adev->reset_domain == hive-
> >>> reset_domain)
> >>> +   amdgpu_put_xgmi_hive(hive);
> >> This is some messed up reference counting. If you need an extra
> >> reference from the reset_domain to the hive, that should be owned by the
> >> reset_domain and dropped when the reset_domain is destroyed. And it's
> >> only one reference for the reset_domain, not one reference per adev in
> >> the reset_domain.
> > Cc'ing Andrey.
> >
> > What you're saying seems to make more sense to me, but what I got from an
> offline conversation with Andrey
> > was that the reset domain reference per device was intentional.
> > Maybe Andrey can comment here.
> >
> >> What you're doing here looks like every adev that's in a reset_domain of
> >> its hive has two references to the hive. And if you're dropping the
> >> extra reference here, it still leaves the reset_domain with a dangling
> >> pointer to a hive that may no longer exist. So this extra reference is
> >> kind of pointless.
>
>
> reset_domain doesn't have 

RE: [PATCH] drm/amdgpu: fix reset domain xgmi hive info reference leak

2022-08-11 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: August 11, 2022 11:19 AM
> To: amd-gfx@lists.freedesktop.org; Kim, Jonathan 
> Subject: Re: [PATCH] drm/amdgpu: fix reset domain xgmi hive info reference
> leak
>
> Am 2022-08-11 um 09:42 schrieb Jonathan Kim:
> > When an xgmi node is added to the hive, it takes another hive
> > reference for its reset domain.
> >
> > This extra reference was not dropped on device removal from the
> > hive so drop it.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 3 +++
> >   1 file changed, 3 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> > index 1b108d03e785..560bf1c98f08 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> > @@ -731,6 +731,9 @@ int amdgpu_xgmi_remove_device(struct
> amdgpu_device *adev)
> > mutex_unlock(&hive->hive_lock);
> >
> > amdgpu_put_xgmi_hive(hive);
> > +   /* device is removed from the hive so remove its reset domain
> reference */
> > +   if (adev->reset_domain && adev->reset_domain == hive-
> >reset_domain)
> > +   amdgpu_put_xgmi_hive(hive);
>
> This is some messed up reference counting. If you need an extra
> reference from the reset_domain to the hive, that should be owned by the
> reset_domain and dropped when the reset_domain is destroyed. And it's
> only one reference for the reset_domain, not one reference per adev in
> the reset_domain.

Cc'ing Andrey.

What you're saying seems to make more sense to me, but what I got from an 
offline conversation with Andrey
was that the reset domain reference per device was intentional.
Maybe Andrey can comment here.

>
> What you're doing here looks like every adev that's in a reset_domain of
> its hive has two references to the hive. And if you're dropping the
> extra reference here, it still leaves the reset_domain with a dangling
> pointer to a hive that may no longer exist. So this extra reference is
> kind of pointless.

Yes.  Currently one reference is fetched from the device's lifetime on the hive 
and the other is from the
per-device reset domain.

Snippet from amdgpu_device_ip_init:
/**
 * In case of XGMI grab extra reference for reset domain for this device
 */
if (adev->gmc.xgmi.num_physical_nodes > 1) {
if (amdgpu_xgmi_add_device(adev) == 0) { <- [JK] reference is 
fetched here
struct amdgpu_hive_info *hive = 
amdgpu_get_xgmi_hive(adev); <- [JK] then here again

if (!hive->reset_domain ||
!amdgpu_reset_get_reset_domain(hive->reset_domain)) 
{
r = -ENOENT;
goto init_failed;
}

/* Drop the early temporary reset domain we created for 
device */
amdgpu_reset_put_reset_domain(adev->reset_domain);
adev->reset_domain = hive->reset_domain;
}
}

One of these never gets dropped so a leak happens.
So either the extra reference has to be dropped on device removal from the hive 
or from what you've mentioned,
the reset_domain reference fetch should be fixed to grab at the 
hive/reset_domain level.

Thanks,

Jon

>
> Regards,
>Felix
>
>
> > adev->hive = NULL;
> >
> > if (atomic_dec_return(&hive->number_devices) == 0) {


RE: [PATCH] drm/amdgpu: fix hive reference leak when reflecting psp topology info

2022-07-28 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Liu, Shaoyun 
> Sent: July 28, 2022 1:10 PM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Cc: Kim, Jonathan 
> Subject: RE: [PATCH] drm/amdgpu: fix hive reference leak when reflecting
> psp topology info
>
> [AMD Official Use Only - General]
>
> Looks good to me .
> BTW , why we didn't catch it on baremetal mode  ?

Thanks for the review Shaoyun.
Good question.  I'll double check what we're doing for unload testing.

Thanks,

Jon

>
> Reviewed-by: Shaoyun.liu 
>
> -Original Message-
> From: amd-gfx  On Behalf Of
> Jonathan Kim
> Sent: Thursday, July 28, 2022 1:06 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan 
> Subject: [PATCH] drm/amdgpu: fix hive reference leak when reflecting psp
> topology info
>
> Hives that require psp topology info to be reflected will leak hive reference
> so fix it.
>
> Signed-off-by: Jonathan Kim 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index 3ee363bfbac2..6c23e89366bf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -1292,6 +1292,8 @@ static void psp_xgmi_reflect_topology_info(struct
> psp_context *psp,
>
> break;
> }
> +
> +   amdgpu_put_xgmi_hive(hive);
>  }
>
>  int psp_xgmi_get_topology_info(struct psp_context *psp,
> --
> 2.25.1
>



RE: [PATCH] drm/amdgpu: fix aldebaran xgmi topology for vf

2022-03-09 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: March 9, 2022 6:12 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Liu, Shaoyun 
> Subject: Re: [PATCH] drm/amdgpu: fix aldebaran xgmi topology for vf
>
> On 2022-03-09 17:16, Jonathan Kim wrote:
> > VFs must also distinguish whether or not the TA supports full duplex
> > or half duplex link records in order to report the correct xGMI topology.
> >
> > Signed-off-by: Jonathan Kim 
> I think I'm missing something here. Your condition for setting
> supports_extended_data is exactly the same, but you're initializing it in a
> different function. Can you explain how that change relates to SRIOV?

I probably should have included more context when sending this out.
The proposed support assignment happens after this:

if (amdgpu_sriov_vf(adev))
ret = psp_init_sriov_microcode(psp);
else
ret = psp_init_microcode(psp);
if (ret) {
DRM_ERROR("Failed to load psp firmware!\n");
return ret;
}

and psp_init_sriov_microde doesn't set secure OS micro code info (this is where 
the support assignment currently is).

Thanks,

Jon

>
> Thanks,
>Felix
>
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 --
> >   1 file changed, 4 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > index 3ce1d38a7822..a6acec1a6155 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > @@ -310,6 +310,10 @@ static int psp_sw_init(void *handle)
> > return ret;
> > }
> >
> > +   adev->psp.xgmi_context.supports_extended_data =
> > +   !adev->gmc.xgmi.connected_to_cpu &&
> > +   adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13,
> 0, 2);
> > +
> > memset(&boot_cfg_entry, 0, sizeof(boot_cfg_entry));
> > if (psp_get_runtime_db_entry(adev,
> > PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG,
> > @@ -3008,7 +3012,6 @@ static int psp_init_sos_base_fw(struct
> amdgpu_device *adev)
> > adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr-
> >sos.size_bytes);
> > adev->psp.sos.start_addr = ucode_array_start_addr +
> > le32_to_cpu(sos_hdr->sos.offset_bytes);
> > -   adev->psp.xgmi_context.supports_extended_data = false;
> > } else {
> > /* Load alternate PSP SOS FW */
> > sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3
> > *)adev->psp.sos_fw->data; @@ -3023,7 +3026,6 @@ static int
> psp_init_sos_base_fw(struct amdgpu_device *adev)
> > adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr_v1_3-
> >sos_aux.size_bytes);
> > adev->psp.sos.start_addr = ucode_array_start_addr +
> > le32_to_cpu(sos_hdr_v1_3->sos_aux.offset_bytes);
> > -   adev->psp.xgmi_context.supports_extended_data = true;
> > }
> >
> > if ((adev->psp.sys.size_bytes == 0) || (adev->psp.sos.size_bytes ==
> > 0)) {


RE: [PATCH] drm/amdkfd: map sdma queues onto extended engines for navi2x

2022-02-09 Thread Kim, Jonathan
[AMD Official Use Only]

> -Original Message-
> From: Kuehling, Felix 
> Sent: February 9, 2022 4:26 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdkfd: map sdma queues onto extended engines for
> navi2x
>
>
> On 2022-02-09 11:11, Jonathan Kim wrote:
> > The hardware scheduler requires that all SDMA 5.2.x queues are put on
> > the RUN_LIST through the extended engines.
> >
> > Make extended engine unmap available as well.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 5 +++--
> >   drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c| 8 +---
> >   drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c| 3 ++-
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++--
> >   5 files changed, 14 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index 7f6f1a842b0b..f12e32335eb3 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -1555,7 +1555,7 @@ static int unmap_queues_cpsch(struct
> device_queue_manager *dqm,
> > return retval;
> >
> > retval = pm_send_unmap_queue(&dqm->packet_mgr,
> KFD_QUEUE_TYPE_COMPUTE,
> > -   filter, filter_param, reset, 0);
> > +   filter, filter_param, reset, 0, false);
>
> Does this still work correctly? We currently rely on HWS unmapping SDMA
> queues when we request unmapping of compute queues. Is that still the case
> with extended queue selection in map_queues?

I wasn't aware of the implicit sdma unmap ...
That makes much more sense.

I followed up on the FW spec and apparently as long as 
extended_engine_select=0x1 (sdma0_sdma7),
a single call to unmap all queues or all dynamic queues will unmap both compute
queues mapped in legacy mode and sdma queues mapped in extended engine mode.

>
> How would the caller know to set this to "true"? For mapping, this detail is
> hidden in the packet-manager implementation. But for unmapping the caller
> needs to know? That doesn't make sense. But we could probably remove the
> SDMA filtering functionality from pm_send_unmap_queue completely. I don't
> see any calls where we try to unmap specific SDMA queues. Since we always
> have to replace the entire runlist anyway, there is not use case for it.

Agreed.
Aside from removing SDMA checks, maybe also pass the device itself through to 
pm_send_unmap_queue then?
Or could it be the SDMA ip version?
That way we can hide the check to toggle between extended_engine_select = 0x0 
or 0x1 from the caller.

Thanks,

Jon

>
> Regards,
>Felix
>
>
> > if (retval)
> > return retval;
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> > index 1439420925a0..8694cfcd57d1 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> > @@ -371,7 +371,8 @@ int pm_send_query_status(struct packet_manager
> *pm, uint64_t fence_address,
> >   int pm_send_unmap_queue(struct packet_manager *pm, enum
> kfd_queue_type type,
> > enum kfd_unmap_queues_filter filter,
> > uint32_t filter_param, bool reset,
> > -   unsigned int sdma_engine)
> > +   unsigned int sdma_engine,
> > +   bool is_sdma_ext)
> >   {
> > uint32_t *buffer, size;
> > int retval = 0;
> > @@ -387,7 +388,7 @@ int pm_send_unmap_queue(struct packet_manager
> *pm, enum kfd_queue_type type,
> > }
> >
> > retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
> > -  reset, sdma_engine);
> > +  reset, sdma_engine, is_sdma_ext);
> > if (!retval)
> > kq_submit_packet(pm->priv_queue);
> > else
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> > index 7ea3f671b325..08f736080b7e 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> > @@ -183,6 +183,7 @@ static int pm_map_queues_v9(struct packet_manager
> *pm, uint32_t *buff

RE: [PATCH] drm/amdgpu: remove gart.ready flag

2022-01-20 Thread Kim, Jonathan
[Public]

Switching to a VRAM bounce buffer can drop performance around 4x~6x on Vega20 
over larger access so it's not desired.

Jon

> -Original Message-
> From: Koenig, Christian 
> Sent: January 20, 2022 9:10 AM
> To: Chen, Guchun ; Christian König
> ; Kim, Jonathan
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: remove gart.ready flag
>
> I actually suggested to allocate the bounce buffer in VRAM, but that add a
> bit more latency.
>
> Christian.
>
> Am 20.01.22 um 15:00 schrieb Chen, Guchun:
> > [Public]
> >
> > Hi Christian,
> >
> > Unfortunately, your patch brings another warning from the same
> sdma_access_bo's creation in amdgpu_ttm_init.
> >
> > In your patch, you introduce a new check of WARN_ON(!adev->gart.ptr)),
> however, sdma_access_bo targets to create a bo in GTT domain, but adev-
> >gart.ptr is ready in gmc_v10_0_gart_init only.
> >
> > Hi Jonathan,
> >
> > Is it mandatory to create this sdma_access_bo in GTT domain? Can we
> change it to VRAM?
> >
> > Regards,
> > Guchun
> >
> > -Original Message-
> > From: Koenig, Christian 
> > Sent: Wednesday, January 19, 2022 10:38 PM
> > To: Chen, Guchun ; Christian König
> > ; Kim, Jonathan
> > ; amd-gfx@lists.freedesktop.org
> > Subject: Re: [PATCH] drm/amdgpu: remove gart.ready flag
> >
> > Hi Guchun,
> >
> > yes, just haven't found time to do this yet.
> >
> > Regards,
> > Christian.
> >
> > Am 19.01.22 um 15:24 schrieb Chen, Guchun:
> >> [Public]
> >>
> >> Hello Christian,
> >>
> >> Do you plan to submit your code to drm-next branch?
> >>
> >> Regards,
> >> Guchun
> >>
> >> -Original Message-
> >> From: Chen, Guchun
> >> Sent: Tuesday, January 18, 2022 10:22 PM
> >> To: 'Christian König' ; Kim,
> >> Jonathan ; amd-gfx@lists.freedesktop.org
> >> Subject: RE: [PATCH] drm/amdgpu: remove gart.ready flag
> >>
> >> [Public]
> >>
> >> Thanks for the clarification. The patch is:
> >> Reviewed-by: Guchun Chen 
> >>
> >> Regards,
> >> Guchun
> >>
> >> -Original Message-
> >> From: Christian König 
> >> Sent: Tuesday, January 18, 2022 10:10 PM
> >> To: Chen, Guchun ; Kim, Jonathan
> >> ; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH] drm/amdgpu: remove gart.ready flag
> >>
> >> Am 18.01.22 um 14:28 schrieb Chen, Guchun:
> >>> [Public]
> >>>
> >>> - if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
> >>> - goto skip_pin_bo;
> >>> -
> >>> - r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
> >>> - if (r)
> >>> - return r;
> >>> -
> >>> -skip_pin_bo:
> >>>
> >>> Does deleting skip_pin_bo path cause bo redundant pin in SRIOV case?
> >> Pinning/unpinning the BO was already removed as well.
> >>
> >> See Nirmoy's patches in the git log.
> >>
> >> Regards,
> >> Christian.
> >>
> >>> Regards,
> >>> Guchun
> >>>
> >>> -Original Message-
> >>> From: Christian König 
> >>> Sent: Tuesday, January 18, 2022 8:02 PM
> >>> To: Chen, Guchun ; Kim, Jonathan
> >>> ; amd-gfx@lists.freedesktop.org
> >>> Subject: [PATCH] drm/amdgpu: remove gart.ready flag
> >>>
> >>> That's just a leftover from old radeon days and was preventing CS and
> GART bindings before the hardware was initialized. But nowdays that is
> perfectly valid.
> >>>
> >>> The only thing we need to warn about are GART binding before the
> table is even allocated.
> >>>
> >>> Signed-off-by: Christian König 
> >>> ---
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c| 35 +++---
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h| 15 ++--
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  9 +--
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 77 ++-
> --
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  4 +-
> >>> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 11 +--
> >>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c   |  7 +-
> >>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   |  8 +--
> >>> 

RE: amd-staging-drm-next breaks suspend

2022-01-19 Thread Kim, Jonathan
[Public]

This should fix the issue by getting rid of the unneeded flag check during gart 
bind:
https://patchwork.freedesktop.org/patch/469907/

Thanks,

Jon

> -Original Message-
> From: amd-gfx  On Behalf Of Bert
> Karwatzki
> Sent: January 19, 2022 8:12 PM
> To: Alex Deucher 
> Cc: Chris Hixon ; Zhuo, Qingqing
> (Lillian) ; Das, Nirmoy
> ; amd-gfx@lists.freedesktop.org; Scott Bruce
> ; Limonciello, Mario
> ; Kazlauskas, Nicholas
> 
> Subject: Re: amd-staging-drm-next breaks suspend
>
> [CAUTION: External Email]
>
> Unfortunately this does not work either:
>
> [0.859998] [ cut here ]
> [0.859998] trying to bind memory to uninitialized GART !
> [0.860003] WARNING: CPU: 13 PID: 235 at
> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c:254
> amdgpu_gart_bind+0x29/0x40 [amdgpu]
> [0.860099] Modules linked in: amdgpu(+) drm_ttm_helper ttm
> gpu_sched i2c_algo_bit drm_kms_helper syscopyarea hid_sensor_hub
> sysfillrect mfd_core sysimgblt hid_generic fb_sys_fops cec xhci_pci
> xhci_hcd nvme drm r8169 nvme_core psmouse crc32c_intel realtek
> amd_sfh usbcore i2c_hid_acpi mdio_devres t10_pi crc_t10dif i2c_hid
> i2c_piix4 crct10dif_generic libphy crct10dif_common hid backlight
> i2c_designware_platform i2c_designware_core
> [0.860113] CPU: 13 PID: 235 Comm: systemd-udevd Not tainted 5.13.0+
> #15
> [0.860115] Hardware name: Micro-Star International Co., Ltd. Alpha
> 15 B5EEK/MS-158L, BIOS E158LAMS.107 11/10/2021
> [0.860116] RIP: 0010:amdgpu_gart_bind+0x29/0x40 [amdgpu]
> [0.860210] Code: 00 80 bf 34 25 00 00 00 74 14 4c 8b 8f 20 25 00 00
> 4d 85 c9 74 05 e9 16 ff ff ff 31 c0 c3 48 c7 c7 08 06 7d c0 e8 8e cc 31
> e2 <0f> 0b b8 ea ff ff ff c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40
> [0.860212] RSP: 0018:bb9e80b6f968 EFLAGS: 00010286
> [0.860213] RAX:  RBX: 0067 RCX:
> a3080968
> [0.860214] RDX:  RSI: efff RDI:
> a3028960
> [0.860215] RBP: 947c91e49a80 R08:  R09:
> bb9e80b6f798
> [0.860215] R10: bb9e80b6f790 R11: a30989a8 R12:
> 
> [0.860216] R13: 947c8a74 R14: 947c8a74 R15:
> 
> [0.860216] FS:  7f60a3c918c0() GS:947f5e94()
> knlGS:
> [0.860217] CS:  0010 DS:  ES:  CR0: 80050033
> [0.860218] CR2: 7f60a4213480 CR3: 000135ee2000 CR4:
> 00550ee0
> [0.860218] PKRU: 5554
> [0.860219] Call Trace:
> [0.860221]  amdgpu_ttm_gart_bind+0x74/0xc0 [amdgpu]
> [0.860305]  amdgpu_ttm_alloc_gart+0x13e/0x190 [amdgpu]
> [0.860385]  amdgpu_bo_create_reserved.part.0+0xf3/0x1b0 [amdgpu]
> [0.860465]  ? amdgpu_ttm_debugfs_init+0x110/0x110 [amdgpu]
> [0.860554]  amdgpu_bo_create_kernel+0x36/0xa0 [amdgpu]
> [0.860641]  amdgpu_ttm_init.cold+0x167/0x181 [amdgpu]
> [0.860784]  gmc_v10_0_sw_init+0x2d7/0x430 [amdgpu]
> [0.860889]  amdgpu_device_init.cold+0x147f/0x1ad7 [amdgpu]
> [0.861007]  ? acpi_ns_get_node+0x4a/0x55
> [0.861011]  ? acpi_get_handle+0x89/0xb2
> [0.861012]  amdgpu_driver_load_kms+0x55/0x290 [amdgpu]
> [0.861098]  amdgpu_pci_probe+0x181/0x250 [amdgpu]
> [0.861188]  pci_device_probe+0xcd/0x140
> [0.861191]  really_probe+0xed/0x460
> [0.861193]  driver_probe_device+0xe3/0x150
> [0.861195]  device_driver_attach+0x9c/0xb0
> [0.861196]  __driver_attach+0x8a/0x150
> [0.861197]  ? device_driver_attach+0xb0/0xb0
> [0.861198]  ? device_driver_attach+0xb0/0xb0
> [0.861198]  bus_for_each_dev+0x73/0xb0
> [0.861200]  bus_add_driver+0x121/0x1e0
> [0.861201]  driver_register+0x8a/0xe0
> [0.861202]  ? 0xc1117000
> [0.861203]  do_one_initcall+0x47/0x180
> [0.861205]  ? do_init_module+0x19/0x230
> [0.861208]  ? kmem_cache_alloc+0x182/0x260
> [0.861210]  do_init_module+0x51/0x230
> [0.861211]  __do_sys_finit_module+0xb1/0x110
> [0.861213]  do_syscall_64+0x40/0xb0
> [0.861216]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> [0.861218] RIP: 0033:0x7f60a4149679
> [0.861220] Code: 48 8d 3d 9a a1 0c 00 0f 05 eb a5 66 0f 1f 44 00 00
> 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f
> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c7 57 0c 00 f7 d8 64 89 01 48
> [0.861221] RSP: 002b:7ffe25f17ea8 EFLAGS: 0246 ORIG_RAX:
> 0139
> [0.861223] RAX: ffda RBX: 56004a10a660 RCX:
> 7f60a4149679
> [0.861224] RDX:  RSI: 7f60a42e9eed RDI:
> 0016
> [0.861224] RBP: 0002 R08:  R09:
> 56004a105980
> [0.861225] R10: 0016 R11: 0246 R12:
> 7f60a42e9eed
> [0.861225] R13:  R14: 56004a0efdd0 R15:
> 56004a10a660
> [0.861226] ---[ end trace 0319f26df48f8ef0 ]---
> [0.861228] [drm:amdgpu_ttm_gart_bind [amdgpu]] *ERROR* f

RE: [PATCH] drm/amdgpu: improve debug VRAM access performance using sdma

2022-01-12 Thread Kim, Jonathan
[Public]

Thanks Christian.  I've already merged based on Felix's review.
I'll send your suggested cleanup for review out soon.

Jon

> -Original Message-
> From: Koenig, Christian 
> Sent: January 12, 2022 2:33 AM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Cc: Kuehling, Felix 
> Subject: Re: [PATCH] drm/amdgpu: improve debug VRAM access
> performance using sdma
>
> Am 04.01.22 um 20:12 schrieb Jonathan Kim:
> > For better performance during VRAM access for debugged processes, do
> > read/write copies over SDMA.
> >
> > In order to fulfill post mortem debugging on a broken device, fallback
> > to stable MMIO access when gpu recovery is disabled or when job
> > submission time outs are set to max.  Failed SDMA access should
> > automatically fall back to MMIO access.
> >
> > Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid
> > page-table updates and TLB flushes on access.
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78
> +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  5 +-
> >   2 files changed, 82 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 367abed1d6e6..512df4c09772 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -48,6 +48,7 @@
> >   #include 
> >
> >   #include 
> > +#include 
> >
> >   #include "amdgpu.h"
> >   #include "amdgpu_object.h"
> > @@ -1429,6 +1430,70 @@ static void
> amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
> > }
> >   }
> >
> > +static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object
> *bo,
> > +   unsigned long offset, void *buf, int
> len, int write) {
> > +   struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> > +   struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> > +   struct amdgpu_job *job;
> > +   struct dma_fence *fence;
> > +   uint64_t src_addr, dst_addr;
> > +   unsigned int num_dw;
> > +   int r, idx;
> > +
> > +   if (len != PAGE_SIZE)
> > +   return -EINVAL;
> > +
> > +   if (!adev->mman.sdma_access_ptr)
> > +   return -EACCES;
> > +
> > +   r = drm_dev_enter(adev_to_drm(adev), &idx);
> > +   if (r)
> > +   return r;
> > +
> > +   if (write)
> > +   memcpy(adev->mman.sdma_access_ptr, buf, len);
> > +
> > +   num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
> > +   r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
> AMDGPU_IB_POOL_DELAYED, &job);
> > +   if (r)
> > +   goto out;
> > +
> > +   src_addr = write ? amdgpu_bo_gpu_offset(adev-
> >mman.sdma_access_bo) :
> > +   amdgpu_bo_gpu_offset(abo);
> > +   dst_addr = write ? amdgpu_bo_gpu_offset(abo) :
> > +   amdgpu_bo_gpu_offset(adev-
> >mman.sdma_access_bo);
>
> I suggest to write this as
>
> src_addr = a;
> dst_addr = b;
> if (write)
>  swap(src_addr, dst_addr);
>
> This way we are not duplicating getting the different offsets.
>
> > +   amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
> > +PAGE_SIZE, false);
> > +
> > +   amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job-
> >ibs[0]);
> > +   WARN_ON(job->ibs[0].length_dw > num_dw);
> > +
> > +   r = amdgpu_job_submit(job, &adev->mman.entity,
> AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
> > +   if (r) {
> > +   amdgpu_job_free(job);
> > +   goto out;
> > +   }
> > +
> > +   if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
> > +   r = -ETIMEDOUT;
> > +   dma_fence_put(fence);
> > +
> > +   if (!(r || write))
> > +   memcpy(buf, adev->mman.sdma_access_ptr, len);
> > +out:
> > +   drm_dev_exit(idx);
> > +   return r;
> > +}
> > +
> > +static inline bool amdgpu_ttm_allow_post_mortem_debug(struct
> > +amdgpu_device *adev) {
> > +   return amdgpu_gpu_recovery == 0 ||
> > +   adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
> > +   adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
> > +   adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
> > +   adev->video_timeout == MAX_SCHEDULE_TI

RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init (v2)

2021-12-20 Thread Kim, Jonathan



> -Original Message-
> From: Sider, Graham 
> Sent: December 20, 2021 1:19 AM
> To: Kim, Jonathan ; Chen, Guchun
> ; amd-gfx@lists.freedesktop.org; Deucher,
> Alexander ; Kuehling, Felix
> 
> Subject: RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd
> device init (v2)
> 
> [Public]
> 
> > -Original Message-
> > From: Kim, Jonathan 
> > Sent: Monday, December 20, 2021 12:44 AM
> > To: Chen, Guchun ; amd-
> > g...@lists.freedesktop.org; Deucher, Alexander
> > ; Sider, Graham
> ;
> > Kuehling, Felix 
> > Subject: RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd
> > device init (v2)
> >
> > [AMD Official Use Only]
> >
> > > -Original Message-
> > > From: Chen, Guchun 
> > > Sent: December 19, 2021 10:09 PM
> > > To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> > > ; Sider, Graham
> > ;
> > > Kuehling, Felix ; Kim, Jonathan
> > > 
> > > Cc: Chen, Guchun 
> > > Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd
> device
> > > init (v2)
> > >
> > > sdma queue number is not correct like on vega20, this patch promises
> > > the
> >
> > I think you've also fixed Vega12 and Raven (they were being set to 8
> > before rather than 2).  No need to mention this in your description,
> > just double checking.
> >
> 
> I believe it was only Vega20 that was being set incorrectly. The condition
> was:
> 
>   sdma_version >= IP_VERSION(4, 0, 0)  &&
>   sdma_version <= IP_VERSION(4, 2, 0))
> 
> which encapsulates Vega12 and Raven setting sdma_queues_per_engine to
> 2, but also accidently encapsulates Vega20.

Ah right.  It was a range check before. 

Thanks,

Jon

> 
> > > setting keeps the same after code refactor.
> > > Additionally, improve code to use switch case to list IP version to
> > > complete kfd device_info structure filling.
> > > This keeps consistency with the IP parse code in amdgpu_discovery.c.
> > >
> > > v2: use dev_warn for the default switch case;
> > > set default sdma queue per engine(8) and IH handler to v9.
> > > (Jonathan)
> > >
> > > Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> > > Signed-off-by: Guchun Chen 
> >
> > Other than the missing checks for Raven when setting the interrupt
> > class (see inline comments and reference kgd2kfd_probe in
> > kfd_device.c) and one nit-pick inline, this looks good to me.
> >
> > With those fixed, this patch is
> > Reviewed-by: Jonathan Kim 
> >
> 
> Other than Jon's comments, this patch is also
> 
> Reviewed-by: Graham Sider 
> 
> > > ---
> > >  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 77
> > > ++---
> > >  1 file changed, 68 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > index facc28f58c1f..36406a261203 100644
> > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > @@ -59,11 +59,75 @@ static void kfd_gtt_sa_fini(struct kfd_dev
> > > *kfd);
> > >
> > >  static int kfd_resume(struct kfd_dev *kfd);
> > >
> > > +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev
> *kfd)
> > {
> > > + uint32_t sdma_version = kfd->adev-
> > > >ip_versions[SDMA0_HWIP][0];
> > > +
> > > + switch (sdma_version) {
> >
> > Please pull in the indentation for all cases to line up with the switch 
> > block.
> >
> > > + case IP_VERSION(4, 0, 0):/* VEGA10 */
> > > + case IP_VERSION(4, 0, 1):/* VEGA12 */
> > > + case IP_VERSION(4, 1, 0):/* RAVEN */
> > > + case IP_VERSION(4, 1, 1):/* RAVEN */
> >
> > As mentioned, you've also fixed Vega12 & Raven here I presume since
> > afaik, they're based off Vega10?
> >
> > > + case IP_VERSION(4, 1, 2):/* RENIOR */
> > > + case IP_VERSION(5, 2, 1):/* VANGOGH */
> > > + case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> > > + kfd->device_info.num_sdma_queues_per_engine =
> > > 2;
> > > + break;
> > > + case IP_VERSION(4, 2, 0):/* VEGA20 */
> > > + case IP_VERSION(4, 2, 2):/* AR

RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init (v2)

2021-12-19 Thread Kim, Jonathan
[AMD Official Use Only]

> -Original Message-
> From: Chen, Guchun 
> Sent: December 19, 2021 10:09 PM
> To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> ; Sider, Graham
> ; Kuehling, Felix ;
> Kim, Jonathan 
> Cc: Chen, Guchun 
> Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd device
> init (v2)
>
> sdma queue number is not correct like on vega20, this patch promises the

I think you've also fixed Vega12 and Raven (they were being set to 8 before 
rather than 2).  No need to mention this in your description, just double 
checking.

> setting keeps the same after code refactor.
> Additionally, improve code to use switch case to list IP version to complete
> kfd device_info structure filling.
> This keeps consistency with the IP parse code in amdgpu_discovery.c.
>
> v2: use dev_warn for the default switch case;
> set default sdma queue per engine(8) and IH handler to v9. (Jonathan)
>
> Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> Signed-off-by: Guchun Chen 

Other than the missing checks for Raven when setting the interrupt class (see 
inline comments and reference kgd2kfd_probe in kfd_device.c) and one nit-pick 
inline, this looks good to me.

With those fixed, this patch is
Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 77
> ++---
>  1 file changed, 68 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index facc28f58c1f..36406a261203 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -59,11 +59,75 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
>
>  static int kfd_resume(struct kfd_dev *kfd);
>
> +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) {
> + uint32_t sdma_version = kfd->adev-
> >ip_versions[SDMA0_HWIP][0];
> +
> + switch (sdma_version) {

Please pull in the indentation for all cases to line up with the switch block.

> + case IP_VERSION(4, 0, 0):/* VEGA10 */
> + case IP_VERSION(4, 0, 1):/* VEGA12 */
> + case IP_VERSION(4, 1, 0):/* RAVEN */
> + case IP_VERSION(4, 1, 1):/* RAVEN */

As mentioned, you've also fixed Vega12 & Raven here I presume since afaik, 
they're based off Vega10?

> + case IP_VERSION(4, 1, 2):/* RENIOR */
> + case IP_VERSION(5, 2, 1):/* VANGOGH */
> + case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> + kfd->device_info.num_sdma_queues_per_engine =
> 2;
> + break;
> + case IP_VERSION(4, 2, 0):/* VEGA20 */
> + case IP_VERSION(4, 2, 2):/* ARCTUTUS */
> + case IP_VERSION(4, 4, 0):/* ALDEBARAN */
> + case IP_VERSION(5, 0, 0):/* NAVI10 */
> + case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
> + case IP_VERSION(5, 0, 2):/* NAVI14 */
> + case IP_VERSION(5, 0, 5):/* NAVI12 */
> + case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
> + case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
> + case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
> + kfd->device_info.num_sdma_queues_per_engine =
> 8;
> + break;
> + default:
> + dev_warn(kfd_device,
> + "Default sdma queue per engine(8) is set due
> to "
> + "mismatch of sdma ip
> block(SDMA_HWIP:0x%x).\n",
> +sdma_version);
> + kfd->device_info.num_sdma_queues_per_engine =
> 8;
> + }
> +}
> +
> +static void kfd_device_info_set_event_interrupt_class(struct kfd_dev
> +*kfd) {
> + uint32_t gc_version = KFD_GC_VERSION(kfd);
> +
> + switch (gc_version) {
> + case IP_VERSION(9, 0, 1): /* VEGA10 */

Missing check for case IP_VERSION(9, 1, 0): /* RAVEN */

> + case IP_VERSION(9, 2, 1): /* VEGA12 */

Missing check for case IP_VERSION(9, 2, 2): /* RAVEN */

Thanks,

Jon

> + case IP_VERSION(9, 3, 0): /* RENOIR */
> + case IP_VERSION(9, 4, 0): /* VEGA20 */
> + case IP_VERSION(9, 4, 1): /* ARCTURUS */
> + case IP_VERSION(9, 4, 2): /* ALDEBARAN */
> + case IP_VERSION(10, 3, 1): /* VANGOGH */
> + case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
> + case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
> + case IP_VERSION(10, 1, 10): /* NAVI10 */
> + case IP_VERSION(10, 1, 2): /* NAVI12 */
> + case IP_VERSION(10, 1, 1): /* NAVI14 */
> + case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
> + c

RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init

2021-12-17 Thread Kim, Jonathan



> -Original Message-
> From: Sider, Graham 
> Sent: December 17, 2021 10:06 AM
> To: Chen, Guchun ; amd-
> g...@lists.freedesktop.org; Deucher, Alexander
> ; Kuehling, Felix
> ; Kim, Jonathan 
> Subject: RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd
> device init
> 
> [Public]
> 
> > -Original Message-
> > From: Chen, Guchun 
> > Sent: Friday, December 17, 2021 9:31 AM
> > To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> > ; Sider, Graham
> ;
> > Kuehling, Felix ; Kim, Jonathan
> > 
> > Cc: Chen, Guchun 
> > Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd device
> > init
> >
> > sdma queue number is not correct like on vega20, this patch promises
> > the setting keeps the same after code refactor.
> > Additionally, improve code to use switch case to list IP version to
> > complete kfd device_info structure filling.
> > This keeps consistency with the IP parse code in amdgpu_discovery.c.
> >
> > Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> > Signed-off-by: Guchun Chen 
> > ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 74
> > ++---
> >  1 file changed, 65 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index facc28f58c1f..e50bf992f298 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -59,11 +59,72 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
> >
> >  static int kfd_resume(struct kfd_dev *kfd);
> >
> > +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
> {
> > +   uint32_t sdma_version = kfd->adev-
> >ip_versions[SDMA0_HWIP][0];
> > +
> > +   switch (sdma_version) {
> > +   case IP_VERSION(4, 0, 0):/* VEGA10 */
> > +   case IP_VERSION(4, 0, 1):/* VEGA12 */
> > +   case IP_VERSION(4, 1, 0):/* RAVEN */
> > +   case IP_VERSION(4, 1, 1):/* RAVEN */
> > +   case IP_VERSION(4, 1, 2):/* RENIOR */
> > +   case IP_VERSION(5, 2, 1):/* VANGOGH */
> > +   case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> > +   kfd->device_info.num_sdma_queues_per_engine =
> > 2;
> > +   break;
> > +   case IP_VERSION(4, 2, 0):/* VEGA20 */
> 
> Thanks for spotting this Guchun. My previous patch should have used a "<"
> instead of a "<=" on IP_VERSION(4, 2, 0).
> 
> > +   case IP_VERSION(4, 2, 2):/* ARCTUTUS */
> > +   case IP_VERSION(4, 4, 0):/* ALDEBARAN */
> > +   case IP_VERSION(5, 0, 0):/* NAVI10 */
> > +   case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
> > +   case IP_VERSION(5, 0, 2):/* NAVI14 */
> > +   case IP_VERSION(5, 0, 5):/* NAVI12 */
> > +   case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
> > +   case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
> > +   case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
> > +   kfd->device_info.num_sdma_queues_per_engine =
> > 8;
> > +   break;
> > +   default:
> > +   dev_err(kfd_device,
> > +   "Failed to find sdma ip
> > blocks(SDMA_HWIP:0x%x) in %s\n",
> > +sdma_version, __func__);
> > +   }
> > +}
> > +
> > +static void kfd_device_info_set_event_interrupt_class(struct kfd_dev
> > +*kfd) {
> > +   uint32_t gc_version = KFD_GC_VERSION(kfd);
> > +
> > +   switch (gc_version) {
> > +   case IP_VERSION(9, 0, 1): /* VEGA10 */
> > +   case IP_VERSION(9, 2, 1): /* VEGA12 */
> > +   case IP_VERSION(9, 3, 0): /* RENOIR */
> > +   case IP_VERSION(9, 4, 0): /* VEGA20 */
> > +   case IP_VERSION(9, 4, 1): /* ARCTURUS */
> > +   case IP_VERSION(9, 4, 2): /* ALDEBARAN */
> > +   case IP_VERSION(10, 3, 1): /* VANGOGH */
> > +   case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
> > +   case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
> > +   case IP_VERSION(10, 1, 10): /* NAVI10 */
> > +   case IP_VERSION(10, 1, 2): /* NAVI12 */
> > +   case IP_VERSION(10, 1, 1): /* NAVI14 */
> > +   case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
> > +   case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
> > +   case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
> > +   case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
> > +   kfd->device_inf

RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init

2021-12-17 Thread Kim, Jonathan
[AMD Official Use Only]

Are safeguards required for KFD interrupt initialization to fail gracefully in 
the event of a non-assignment?
Same would apply when KGD forwards interrupts to the KFD (although the KFD 
device reference might not exist at this point if the above comment is handled 
well so a check may not apply in this case).

Also should the dev_errs mention what it's failing to do rather than just 
reporting that it could not find the HW IP block?
In the case of non-assignment of sdma queues per engine, it still seems like 
the KFD could move forward but the user wouldn't know what the context of the 
dev_err was.

Thanks,

Jon

> -Original Message-
> From: Chen, Guchun 
> Sent: December 17, 2021 9:31 AM
> To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> ; Sider, Graham
> ; Kuehling, Felix ;
> Kim, Jonathan 
> Cc: Chen, Guchun 
> Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd device
> init
>
> sdma queue number is not correct like on vega20, this patch promises the
> setting keeps the same after code refactor.
> Additionally, improve code to use switch case to list IP version to complete
> kfd device_info structure filling.
> This keeps consistency with the IP parse code in amdgpu_discovery.c.
>
> Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> Signed-off-by: Guchun Chen 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 74
> ++---
>  1 file changed, 65 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index facc28f58c1f..e50bf992f298 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -59,11 +59,72 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
>
>  static int kfd_resume(struct kfd_dev *kfd);
>
> +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) {
> + uint32_t sdma_version = kfd->adev-
> >ip_versions[SDMA0_HWIP][0];
> +
> + switch (sdma_version) {
> + case IP_VERSION(4, 0, 0):/* VEGA10 */
> + case IP_VERSION(4, 0, 1):/* VEGA12 */
> + case IP_VERSION(4, 1, 0):/* RAVEN */
> + case IP_VERSION(4, 1, 1):/* RAVEN */
> + case IP_VERSION(4, 1, 2):/* RENIOR */
> + case IP_VERSION(5, 2, 1):/* VANGOGH */
> + case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> + kfd->device_info.num_sdma_queues_per_engine =
> 2;
> + break;
> + case IP_VERSION(4, 2, 0):/* VEGA20 */
> + case IP_VERSION(4, 2, 2):/* ARCTUTUS */
> + case IP_VERSION(4, 4, 0):/* ALDEBARAN */
> + case IP_VERSION(5, 0, 0):/* NAVI10 */
> + case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
> + case IP_VERSION(5, 0, 2):/* NAVI14 */
> + case IP_VERSION(5, 0, 5):/* NAVI12 */
> + case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
> + case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
> + case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
> + kfd->device_info.num_sdma_queues_per_engine =
> 8;
> + break;
> + default:
> + dev_err(kfd_device,
> + "Failed to find sdma ip
> blocks(SDMA_HWIP:0x%x) in %s\n",
> +sdma_version, __func__);
> + }
> +}
> +
> +static void kfd_device_info_set_event_interrupt_class(struct kfd_dev
> +*kfd) {
> + uint32_t gc_version = KFD_GC_VERSION(kfd);
> +
> + switch (gc_version) {
> + case IP_VERSION(9, 0, 1): /* VEGA10 */
> + case IP_VERSION(9, 2, 1): /* VEGA12 */
> + case IP_VERSION(9, 3, 0): /* RENOIR */
> + case IP_VERSION(9, 4, 0): /* VEGA20 */
> + case IP_VERSION(9, 4, 1): /* ARCTURUS */
> + case IP_VERSION(9, 4, 2): /* ALDEBARAN */
> + case IP_VERSION(10, 3, 1): /* VANGOGH */
> + case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
> + case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
> + case IP_VERSION(10, 1, 10): /* NAVI10 */
> + case IP_VERSION(10, 1, 2): /* NAVI12 */
> + case IP_VERSION(10, 1, 1): /* NAVI14 */
> + case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
> + case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
> + case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
> + case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
> + kfd->device_info.event_interrupt_class =
> &event_interrupt_class_v9;
> + break;
> + default:
> + dev_err(kfd_device, "Failed to find gc ip
> blocks(GC_HWIP:0x%x) in %s\n",
> +   

RE: [PATCH] drm/amdkfd: add Navi2x to GWS init conditions

2021-12-09 Thread Kim, Jonathan
[AMD Official Use Only]

> -Original Message-
> From: Sider, Graham 
> Sent: December 9, 2021 1:33 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kuehling, Felix
> ; Sider, Graham 
> Subject: [PATCH] drm/amdkfd: add Navi2x to GWS init conditions
>
> Initalize GWS on Navi2x with mec2_fw_version >= 0x42.
>
> Signed-off-by: Graham Sider 

Reviewed-and-tested-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index facc28f58c1f..67dd94b0b9a7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -368,7 +368,10 @@ static int kfd_gws_init(struct kfd_dev *kfd)
>   (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)
>   && kfd->mec2_fw_version >= 0x30)   ||
>   (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)
> - && kfd->mec2_fw_version >= 0x28
> + && kfd->mec2_fw_version >= 0x28)   ||
> + (KFD_GC_VERSION(kfd) >= IP_VERSION(10, 3, 0)
> + && KFD_GC_VERSION(kfd) <= IP_VERSION(10, 3, 5)
> + && kfd->mec2_fw_version >= 0x42
>   ret = amdgpu_amdkfd_alloc_gws(kfd->adev,
>   kfd->adev->gds.gws_size, &kfd->gws);
>
> --
> 2.25.1



RE: [PATCH v2 1/1] drm/amdkfd: Add sysfs bitfields and enums to uAPI

2021-11-04 Thread Kim, Jonathan
[AMD Official Use Only]

> -Original Message-
> From: amd-gfx  On Behalf Of Felix
> Kuehling
> Sent: September 13, 2021 5:23 PM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH v2 1/1] drm/amdkfd: Add sysfs bitfields and enums to
> uAPI
>
> [CAUTION: External Email]
>
> These bits are de-facto part of the uAPI, so declare them in a uAPI header.
>
> The corresponding bit-fields and enums in user mode are defined in
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit
> hub.com%2FRadeonOpenCompute%2FROCT-Thunk-
> Interface%2Fblob%2Fmaster%2Finclude%2Fhsakmttypes.h&data=04%
> 7C01%7Cjonathan.kim%40amd.com%7C60c91f7b30794bf670c808d976fcc
> 000%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637671650
> 194006492%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQI
> joiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=WQU
> JKcixWV0gxBYSxUzZx05nAtyYjoRNz7oJO%2BnjsLA%3D&reserved=0
>
> HSA_CAP_...   -> HSA_CAPABILITY
> HSA_MEM_HEAP_TYPE_... -> HSA_HEAPTYPE
> HSA_MEM_FLAGS_... -> HSA_MEMORYPROPERTY
> HSA_CACHE_TYPE_...-> HsaCacheType
> HSA_IOLINK_TYPE_...   -> HSA_IOLINKTYPE
> HSA_IOLINK_FLAGS_...  -> HSA_LINKPROPERTY
>
> Signed-off-by: Felix Kuehling 

Reviewed-by: Jonathan Kim 

> ---
>  MAINTAINERS   |   1 +
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.h |  46 +
>  include/uapi/linux/kfd_sysfs.h| 108 ++
>  3 files changed, 110 insertions(+), 45 deletions(-)  create mode 100644
> include/uapi/linux/kfd_sysfs.h
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 84cd16694640..7554ec928ee2 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -930,6 +930,7 @@ F:
> drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>  F: drivers/gpu/drm/amd/include/v9_structs.h
>  F: drivers/gpu/drm/amd/include/vi_structs.h
>  F: include/uapi/linux/kfd_ioctl.h
> +F: include/uapi/linux/kfd_sysfs.h
>
>  AMD SPI DRIVER
>  M: Sanjay R Mehta 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> index a8db017c9b8e..f0cc59d2fd5d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> @@ -25,38 +25,11 @@
>
>  #include 
>  #include 
> +#include 
>  #include "kfd_crat.h"
>
>  #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
>
> -#define HSA_CAP_HOT_PLUGGABLE  0x0001
> -#define HSA_CAP_ATS_PRESENT0x0002
> -#define HSA_CAP_SHARED_WITH_GRAPHICS   0x0004
> -#define HSA_CAP_QUEUE_SIZE_POW20x0008
> -#define HSA_CAP_QUEUE_SIZE_32BIT   0x0010
> -#define HSA_CAP_QUEUE_IDLE_EVENT   0x0020
> -#define HSA_CAP_VA_LIMIT   0x0040
> -#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x0080
> -#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK0x0f00
> -#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT   8
> -#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK   0x3000
> -#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT  12
> -
> -#define HSA_CAP_DOORBELL_TYPE_PRE_1_0  0x0
> -#define HSA_CAP_DOORBELL_TYPE_1_0  0x1
> -#define HSA_CAP_DOORBELL_TYPE_2_0  0x2
> -#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP   0x4000
> -
> -#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x0008
> /* Old buggy user mode depends on this being 0 */
> -#define HSA_CAP_MEM_EDCSUPPORTED   0x0010
> -#define HSA_CAP_RASEVENTNOTIFY 0x0020
> -#define HSA_CAP_ASIC_REVISION_MASK 0x03c0
> -#define HSA_CAP_ASIC_REVISION_SHIFT22
> -#define HSA_CAP_SRAM_EDCSUPPORTED  0x0400
> -#define HSA_CAP_SVMAPI_SUPPORTED   0x0800
> -#define HSA_CAP_FLAGS_COHERENTHOSTACCESS   0x1000
> -#define HSA_CAP_RESERVED   0xe00f8000
> -
>  struct kfd_node_properties {
> uint64_t hive_id;
> uint32_t cpu_cores_count;
> @@ -93,17 +66,6 @@ struct kfd_node_properties {
> char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
>  };
>
> -#define HSA_MEM_HEAP_TYPE_SYSTEM   0
> -#define HSA_MEM_HEAP_TYPE_FB_PUBLIC1
> -#define HSA_MEM_HEAP_TYPE_FB_PRIVATE   2
> -#define HSA_MEM_HEAP_TYPE_GPU_GDS  3
> -#define HSA_MEM_HEAP_TYPE_GPU_LDS  4
> -#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH  5
> -
> -#define HSA_MEM_FLAGS_HOT_PLUGGABLE0x0001
> -#define HSA_MEM_FLAGS_NON_VOLATILE 0x0002
> -#define HSA_MEM_FLAGS_RESERVED 0xfffc
> -
>  struct kfd_mem_properties {
> struct list_headlist;
> uint32_theap_type;
> @@ -116,12 +78,6 @@ struct kfd_mem_properties {
> struct attributeattr;
>  };
>
> -#define HSA_CACHE_TYPE_DATA0x0001
> -#define HSA_CACHE_TYPE_INSTRUCTION 0x0002
> -#define HSA_CACHE_TYPE_CPU 0x0004
> -#define HSA_CACHE_TYPE_HSACU   0x0008
> -#define 

RE: [PATCH] drm/amdkfd: drop process ref count when xnack disable

2021-09-01 Thread Kim, Jonathan
[Public]

I wouldn’t know if it was another bug elsewhere.
From what I was seeing, the leak was coming from !p->xnack_enable on the 
svm_range_restore_pages call.

If it helps, I saw this on Aldebaran where a shader does some bad memory access 
on purpose on a debugged ptraced child process.
The vm fault prompt pops up in dmesgs and a stale KFD process appends per run 
without this fix.
I’m just assuming at this point that the IV retry bit is set but I never 
confirmed that.

Thanks,

Jon
From: Yang, Philip 
Sent: Wednesday, September 1, 2021 12:30 PM
To: Kim, Jonathan ; Yang, Philip ; 
Sierra Guiza, Alejandro (Alex) ; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdkfd: drop process ref count when xnack disable



On 2021-09-01 9:45 a.m., Kim, Jonathan wrote:

[AMD Official Use Only]

We were seeing process leaks on a couple of machines running certain tests that 
triggered vm faults on purpose.
I think svm_range_restore_pages gets called unconditionally on vm fault 
handling (unless the retry interrupt payload bit is supposed to be clear with 
xnack off)?


yes, with xnack off, sh_mem_config retry should be off, retry bit is supposed 
to be clear in fault interrupt vector, we should not try to recover vm fault, 
just report the vm fault back to application and evict user queues. Maybe it is 
another bug cause p->xnack_enabled and sh_mem_config retry mismatch under 
specific condition?

Regards,

Philip
Either way, this patch prevents the process leaks we seeing and is also:
Reviewed-by: Jonathan Kim <mailto:jonathan@amd.com>

Thanks,

Jon


From: amd-gfx 
<mailto:amd-gfx-boun...@lists.freedesktop.org>
 On Behalf Of philip yang
Sent: Wednesday, September 1, 2021 7:30 AM
To: Sierra Guiza, Alejandro (Alex) 
<mailto:alex.sie...@amd.com>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH] drm/amdkfd: drop process ref count when xnack disable

[CAUTION: External Email]


On 2021-08-31 10:41 p.m., Alex Sierra wrote:

During svm restore pages interrupt handler, kfd_process ref count was

never dropped when xnack was disabled. Therefore, the object was never

released.

Good catch, but if xnack is off, we should not get here to recover fault.

The fix looks good to me.

Reviewed-by: Philip Yang <mailto:philip.y...@amd.com>



Signed-off-by: Alex Sierra <mailto:alex.sie...@amd.com>

---

 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 ++-

 1 file changed, 2 insertions(+), 1 deletion(-)



diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 8f9b5b53dab5..110c46cd7fac 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

@@ -2484,7 +2484,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,

 }

 if (!p->xnack_enabled) {

pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);

-   return -EFAULT;

+   r = -EFAULT;

+   goto out;

 }

 svms = &p->svms;




RE: [PATCH] drm/amdkfd: drop process ref count when xnack disable

2021-09-01 Thread Kim, Jonathan
[AMD Official Use Only]

We were seeing process leaks on a couple of machines running certain tests that 
triggered vm faults on purpose.
I think svm_range_restore_pages gets called unconditionally on vm fault 
handling (unless the retry interrupt payload bit is supposed to be clear with 
xnack off)?

Either way, this patch prevents the process leaks we seeing and is also:
Reviewed-by: Jonathan Kim 

Thanks,

Jon


From: amd-gfx  On Behalf Of philip yang
Sent: Wednesday, September 1, 2021 7:30 AM
To: Sierra Guiza, Alejandro (Alex) ; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdkfd: drop process ref count when xnack disable

[CAUTION: External Email]


On 2021-08-31 10:41 p.m., Alex Sierra wrote:

During svm restore pages interrupt handler, kfd_process ref count was

never dropped when xnack was disabled. Therefore, the object was never

released.

Good catch, but if xnack is off, we should not get here to recover fault.

The fix looks good to me.

Reviewed-by: Philip Yang 



Signed-off-by: Alex Sierra 

---

 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 ++-

 1 file changed, 2 insertions(+), 1 deletion(-)



diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 8f9b5b53dab5..110c46cd7fac 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

@@ -2484,7 +2484,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,

 }

 if (!p->xnack_enabled) {

pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);

-   return -EFAULT;

+   r = -EFAULT;

+   goto out;

 }

 svms = &p->svms;




RE: [PATCH 2/3] drm/amdkfd: report xgmi bandwidth between direct peers to the kfd

2021-07-19 Thread Kim, Jonathan
[AMD Official Use Only]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Saturday, July 17, 2021 1:47 AM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 2/3] drm/amdkfd: report xgmi bandwidth between
> direct peers to the kfd
>
> Am 2021-07-16 um 12:43 p.m. schrieb Jonathan Kim:
> > Report the min/max bandwidth in megabytes to the kfd for direct xgmi
> > connections only.
>
> By "direct XGMI connections", you mean this doesn't work for links with
> more than one hop? Will that spew out DRM_ERROR messages for such links?
> Then it's probably better to downgrade that to an INFO.

No DRM_ERROR only happens if psp fails on invoke.
I've added footnotes to the description and code to clear this up.
Non-adjacent peers return num_links as 0 since indirect route is unknown and 
linkage is asymmetrical.

>
>
> >
> > v2: change reporting from num links to bandwidth
> >
> > Signed-off-by: Jonathan Kim 
>
> This patch is OK to provide bandwidth information on Aldebaran. What can
> we do on older GPUs? Can we assume num_links = 1? Or maybe have some
> hard-coded numbers depending on the number of nodes in the hive?

We could assume num_links = 1 but that wouldn't represent certain non-Aldebaran 
setups well.
For non-Aldebaran min/max bandwidth, we may be able to get away with setting 
non-zero values on non-adjacent peers since setup is symmetrical to date but 
that may raise questions on why Aldebaran indirect min/max-bandwidth is 0.  For 
consistency, we'd have to use num_hops then to check directness.
Maybe it's worth making a bid to the FW team to support all other chips moving 
forward  ...

Thanks,

Jon

>
> Either way, patch 1 and 2 are
>
> Reviewed-by: Felix Kuehling 
>
>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 23
> > ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 12 +++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 ++
> >  drivers/gpu/drm/amd/amdkfd/kfd_crat.c  | 12 +++
> >  5 files changed, 50 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > index bfab2f9fdd17..3978578a1c49 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > @@ -553,6 +553,29 @@ uint8_t
> amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev
> *s
> > return  (uint8_t)ret;
> >  }
> >
> > +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst,
> > +struct kgd_dev *src, bool is_min) {
> > +   struct amdgpu_device *adev = (struct amdgpu_device *)dst,
> *peer_adev;
> > +   int num_links;
> > +
> > +   if (adev->asic_type != CHIP_ALDEBARAN)
> > +   return 0;
> > +
> > +   if (src)
> > +   peer_adev = (struct amdgpu_device *)src;
> > +
> > +   num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev,
> peer_adev);
> > +   if (num_links < 0) {
> > +   DRM_ERROR("amdgpu: failed to get xgmi num links between
> node %d and %d. ret = %d\n",
> > +   adev->gmc.xgmi.physical_node_id,
> > +   peer_adev->gmc.xgmi.physical_node_id, num_links);
> > +   num_links = 0;
> > +   }
> > +
> > +   /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for
> bandwidth. */
> > +   return (num_links * 16 * 25000)/BITS_PER_BYTE; }
> > +
> >  uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev
> *kgd)
> > {
> > struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git
> > a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > index 81264517d532..e12fccb2d2c4 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > @@ -226,6 +226,7 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct
> kgd_dev
> > *kgd);  uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd);
> > int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd);  uint8_t
> > amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct
> kgd_dev
> > *src);
> > +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst,
> > +struct kgd_dev *src, bool is_min);
> >
> >  /* Read user wptr from a specified user address space with page fault
> >   * disabled. The memory must be pinned and mapped to the hardware
> > when diff --git a/drivers/gpu/drm/amd

RE: [PATCH 2/3] drm/amdkfd: report xgmi bandwidth between direct peers to the kfd

2021-07-19 Thread Kim, Jonathan
[AMD Official Use Only]

> -Original Message-
> From: Lazar, Lijo 
> Sent: Monday, July 19, 2021 3:22 AM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Cc: Kuehling, Felix 
> Subject: Re: [PATCH 2/3] drm/amdkfd: report xgmi bandwidth between
> direct peers to the kfd
>
>
>
> On 7/16/2021 10:13 PM, Jonathan Kim wrote:
> > Report the min/max bandwidth in megabytes to the kfd for direct xgmi
> > connections only.
> >
> > v2: change reporting from num links to bandwidth
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 23
> ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 12 +++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 ++
> >   drivers/gpu/drm/amd/amdkfd/kfd_crat.c  | 12 +++
> >   5 files changed, 50 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > index bfab2f9fdd17..3978578a1c49 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > @@ -553,6 +553,29 @@ uint8_t
> amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev
> *s
> > return  (uint8_t)ret;
> >   }
> >
> > +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst,
> > +struct kgd_dev *src, bool is_min) {
> > +   struct amdgpu_device *adev = (struct amdgpu_device *)dst,
> *peer_adev;
> > +   int num_links;
> > +
> > +   if (adev->asic_type != CHIP_ALDEBARAN)
> > +   return 0;
> > +
> > +   if (src)
> > +   peer_adev = (struct amdgpu_device *)src;
> > +
> > +   num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev,
> peer_adev);
> > +   if (num_links < 0) {
> > +   DRM_ERROR("amdgpu: failed to get xgmi num links between
> node %d and %d. ret = %d\n",
> > +   adev->gmc.xgmi.physical_node_id,
> > +   peer_adev->gmc.xgmi.physical_node_id, num_links);
> > +   num_links = 0;
> > +   }
> > +
> > +   /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for
> bandwidth. */
> > +   return (num_links * 16 * 25000)/BITS_PER_BYTE;
>
> Instead of having ASIC family checks and bandwidth info in interface
> functions, better to have this info come from base layer (amdgpu_xgmi or
> xgmi ip). That will help to handle other ASICs.

Ok.  We can revisit this as a follow up.  Maybe the full solution is a link 
width/speed support mask analogous to pcie.

Thanks,

Jon

>
> Thanks,
> Lijo
>
> >   uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev
> *kgd)
> >   {
> > struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff
> > --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > index 81264517d532..e12fccb2d2c4 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > @@ -226,6 +226,7 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct
> kgd_dev *kgd);
> >   uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd);
> >   int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd);
> >   uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst,
> > struct kgd_dev *src);
> > +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst,
> > +struct kgd_dev *src, bool is_min);
> >
> >   /* Read user wptr from a specified user address space with page fault
> >* disabled. The memory must be pinned and mapped to the hardware
> > when diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> > index 8567d5d77346..258cf86b32f6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> > @@ -486,6 +486,18 @@ int amdgpu_xgmi_get_hops_count(struct
> amdgpu_device *adev,
> > return  -EINVAL;
> >   }
> >
> > +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
> > +   struct amdgpu_device *peer_adev)
> > +{
> > +   struct psp_xgmi_topology_info *top = &adev-
> >psp.xgmi_context.top_info;
> > +   int i;
> > +
> > +   for (i = 0 ; i < top->num_nodes; ++i)
> > +   if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
> > +   return top->nodes[i].num_links;
> > +   return  -EINVAL;
> > +}
>

RE: [PATCH 3/3] drm/amdkfd: report pcie bandwidth to the kfd

2021-07-19 Thread Kim, Jonathan
[AMD Official Use Only]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Saturday, July 17, 2021 1:37 AM
> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 3/3] drm/amdkfd: report pcie bandwidth to the kfd
>
> Am 2021-07-16 um 12:43 p.m. schrieb Jonathan Kim:
> > Similar to xGMI reporting the min/max bandwidth between direct peers,
> > PCIe will report the min/max bandwidth to the KFD.
> >
> > v2: change to bandwidth
> >
> > Signed-off-by: Jonathan Kim 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 61
> > ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
> >  drivers/gpu/drm/amd/amdkfd/kfd_crat.c  |  4 ++
> >  3 files changed, 66 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > index 3978578a1c49..b7db52f1a9d1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > @@ -21,6 +21,7 @@
> >   */
> >
> >  #include "amdgpu_amdkfd.h"
> > +#include "amd_pcie.h"
> >  #include "amd_shared.h"
> >
> >  #include "amdgpu.h"
> > @@ -576,6 +577,66 @@ int
> amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct
> kgd_dev
> > return (num_links * 16 * 25000)/BITS_PER_BYTE;  }
> >
> > +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev,
> bool
> > +is_min) {
> > +   struct amdgpu_device *adev = (struct amdgpu_device *)dev;
> > +   int num_lanes_shift = is_min ? ffs(adev->pm.pcie_mlw_mask >>
> > +
>   CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT) - 1 :
> > +   fls(adev->pm.pcie_mlw_mask >>
> > +
>   CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT) - 1;
> > +   int gen_speed_shift = is_min ? ffs(adev->pm.pcie_gen_mask >>
> > +
>   CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT) - 1 :
> > +   fls(adev->pm.pcie_gen_mask >>
> > +
>   CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT) - 1;
>
> The shifting is not necessary because you undo it below. I think this would
> do the trick and be more readable:
>
>   int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
>   fls(adev->pm.pcie_mlw_mask)) - 1;
>   int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask) :
>   fls(adev->pm.pcie_gen_mask)) - 1;

Ok thanks for the review and suggestion.  I've adjusted your suggestion by 
masking pcie_gen_mask with CAIL_PCIE_LINK_SPEED_SUPPORT_MASK as the mask sets 
some non-speed related lower bits.

Thanks,

Jon

>   uint32_t num_lanes_mask = 1 << num_lanes_shift;
>   uint32_t gen_speed_mask = 1 << gen_speed_shift;
>
> With that fixed, this patch is
>
> Reviewed-by: Felix Kuehling 
>
>
> > +   uint32_t num_lanes_mask = (1UL << num_lanes_shift) <<
> CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT;
> > +   uint32_t gen_speed_mask = (1UL << gen_speed_shift) <<
> CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
> > +   int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
> > +
> > +   switch (num_lanes_mask) {
> > +   case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
> > +   num_lanes_factor = 1;
> > +   break;
> > +   case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
> > +   num_lanes_factor = 2;
> > +   break;
> > +   case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
> > +   num_lanes_factor = 4;
> > +   break;
> > +   case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
> > +   num_lanes_factor = 8;
> > +   break;
> > +   case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
> > +   num_lanes_factor = 12;
> > +   break;
> > +   case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
> > +   num_lanes_factor = 16;
> > +   break;
> > +   case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
> > +   num_lanes_factor = 32;
> > +   break;
> > +   }
> > +
> > +   switch (gen_speed_mask) {
> > +   case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
> > +   gen_speed_mbits_factor = 2500;
> > +   break;
> > +   case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
> > +   gen_speed_mbits_factor = 5000;
> > +   break;
> > +   case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
> > +   gen_speed_mbits_factor = 8000;
> > +   break;
> > +   case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
> > +   g

RE: [PATCH 3/4] drm/amdkfd: report pcie bandwidth as number of lanes

2021-06-28 Thread Kim, Jonathan
[AMD Official Use Only]

Ping on series.
Note Patch 4 can be dropped.  Runtime doesn't require an extra flag to 
determine direct connections.

Thanks,

Jon

> -Original Message-
> From: Kim, Jonathan 
> Sent: Monday, June 21, 2021 3:24 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking ; Kuehling, Felix
> ; Kim, Jonathan 
> Subject: [PATCH 3/4] drm/amdkfd: report pcie bandwidth as number of lanes
>
> Similar to xGMI reporting the min/max bandwidth as the number of links
> between peers, PCIe will report the min/max bandwidth as the number of
> supported lanes.
>
> Signed-off-by: Jonathan Kim 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 24
> ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  3 +++
>  drivers/gpu/drm/amd/amdkfd/kfd_crat.c  |  3 +++
>  3 files changed, 30 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index c84989eda8eb..99c662b70519 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -568,6 +568,30 @@ uint8_t
> amdgpu_amdkfd_get_xgmi_num_links(struct kgd_dev *dst, struct kgd_dev
> *sr
>   return  (uint8_t)ret;
>  }
>
> +uint32_t amdgpu_amdkfd_get_pcie_min_lanes(struct kgd_dev *dev) {
> + struct amdgpu_device *adev = (struct amdgpu_device *)dev;
> + int min_lane_shift = ffs(adev->pm.pcie_mlw_mask >>
> + CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT) - 1;
> +
> + if (min_lane_shift < 0)
> + return 0;
> +
> + return 1UL << min_lane_shift;
> +}
> +
> +uint32_t amdgpu_amdkfd_get_pcie_max_lanes(struct kgd_dev *dev) {
> + struct amdgpu_device *adev = (struct amdgpu_device *)dev;
> + int max_lane_shift = fls(adev->pm.pcie_mlw_mask >>
> + CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT) - 1;
> +
> + if (max_lane_shift < 0)
> + return 0;
> +
> + return 1UL << max_lane_shift;
> +}
> +
>  uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev
> *kgd)  {
>   struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 20e4bfce62be..88322c72a43d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -31,6 +31,7 @@
>  #include 
>  #include 
>  #include 
> +#include "amd_pcie.h"
>  #include "amdgpu_sync.h"
>  #include "amdgpu_vm.h"
>
> @@ -227,6 +228,8 @@ uint32_t amdgpu_amdkfd_get_asic_rev_id(struct
> kgd_dev *kgd);  int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd);
> uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct
> kgd_dev *src);  uint8_t amdgpu_amdkfd_get_xgmi_num_links(struct kgd_dev
> *dst, struct kgd_dev *src);
> +uint32_t amdgpu_amdkfd_get_pcie_min_lanes(struct kgd_dev *dev);
> +uint32_t amdgpu_amdkfd_get_pcie_max_lanes(struct kgd_dev *dev);
>
>  /* Read user wptr from a specified user address space with page fault
>   * disabled. The memory must be pinned and mapped to the hardware
> when diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 75047b77649b..f70d69035fe7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1036,6 +1036,7 @@ static int kfd_parse_subtype_iolink(struct
> crat_subtype_iolink *iolink,
>   props->max_latency = iolink->maximum_latency;
>   props->min_bandwidth = iolink-
> >minimum_bandwidth;
>   props->max_bandwidth = iolink-
> >maximum_bandwidth;
> +
>   props->rec_transfer_size =
>   iolink->recommended_transfer_size;
>
> @@ -1993,6 +1994,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int
> *avail_size,
>   sub_type_hdr->maximum_bandwidth = 1;
>   } else {
>   sub_type_hdr->io_interface_type =
> CRAT_IOLINK_TYPE_PCIEXPRESS;
> + sub_type_hdr->minimum_bandwidth =
> amdgpu_amdkfd_get_pcie_min_lanes(kdev->kgd);
> + sub_type_hdr->maximum_bandwidth =
> +amdgpu_amdkfd_get_pcie_max_lanes(kdev->kgd);
>   }
>
>   sub_type_hdr->proximity_domain_from = proximity_domain;
> --
> 2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


  1   2   >