Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-16 Thread Christian König

Hi Philip,

Am 16.06.22 um 00:41 schrieb philip yang:

[SNIP]

+    ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+    if (ret) {
+    pr_err("Failed to pin bo. ret %d\n", ret);
+    goto err_pin_bo_failed;
+    }


Oh! Is that something we do for every MQD? When yes that here is 
pretty

much a NAK.

We can't do this or create a trivial deny of service attack against 
the kernel

driver.

Regards,
Christian.

Hi Christian, could you elaborate on this? Right now this is only 
being used to pin the queue wptr BO.


Well is this wptr BO per process, per queue or global?

amdgpu_bo_pin() is only allowed if we pin global resources, otherwise 
I have to reject that.


wptr BO is per queue, allocated as queue structure, 1 page size on 
system memory.




Yeah, I was hoping for this explanation as well. My status was still 
that the WPTR and RPTR are part of the ring buffer.


We should add a check that we really only pin a buffer with 1 page size 
here, then that should be ok.


Regards,
Christian.

KFD limit number of queues globally, max_queues = 127; /* HWS limit 
*/, so this will pin max 508KB and take max 127 GART page mapping.


wptr is updated by app and read by HWS, if we don't pin wptr, we have 
to evict queue when wptr bo is moved on system memory, then update 
GART mapping and restore queue.


Regards,

Philip



Regards,
Christian.



Best,
Graham


+
+    ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+    if (ret) {
+    pr_err("Failed to bind bo to GART. ret %d\n", ret);
+    goto err_map_bo_gart_failed;
+    }
+
+    amdgpu_amdkfd_remove_eviction_fence(
+    bo, bo->kfd_bo->process_info->eviction_fence);
+ list_del_init(&bo->kfd_bo->validate_list.head);
+
+    amdgpu_bo_unreserve(bo);
+
+    bo = amdgpu_bo_ref(bo);
+
+    return 0;
+
+err_map_bo_gart_failed:
+    amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+    amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+    return ret;
+}
+
   int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct

amdgpu_device *adev,

   struct kgd_mem *mem, void **kptr, uint64_t *size)
   {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e9766e165c38..1789ed8b79f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -289,6 +289,7 @@ static int kfd_ioctl_create_queue(struct file 
*filep,

struct kfd_process *p,

   struct kfd_process_device *pdd;
   struct queue_properties q_properties;
   uint32_t doorbell_offset_in_process = 0;
+    struct amdgpu_bo *wptr_bo = NULL;

   memset(&q_properties, 0, sizeof(struct queue_properties));

@@ -316,12 +317,41 @@ static int kfd_ioctl_create_queue(struct file

*filep, struct kfd_process *p,

   goto err_bind_process;
   }

+    /* Starting with GFX11, wptr BOs must be mapped to GART for MES

to determine work

+ * on unmapped queues for usermode queue oversubscription (no

aggregated doorbell)

+ */
+    if (dev->shared_resources.enable_mes && (dev->adev-
mes.sched_version & 0xff) >= 3) {
+    struct amdgpu_bo_va_mapping *wptr_mapping;
+    struct amdgpu_vm *wptr_vm;
+
+    wptr_vm = drm_priv_to_vm(pdd->drm_priv);
+    err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+    if (err)
+    goto err_wptr_map_gart;
+
+    wptr_mapping = amdgpu_vm_bo_lookup_mapping(
+    wptr_vm, args->write_pointer_address >>

PAGE_SHIFT);

+ amdgpu_bo_unreserve(wptr_vm->root.bo);
+    if (!wptr_mapping) {
+    pr_err("Failed to lookup wptr bo\n");
+    err = -EINVAL;
+    goto err_wptr_map_gart;
+    }
+
+    wptr_bo = wptr_mapping->bo_va->base.bo;
+    err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev,

wptr_bo);

+    if (err) {
+    pr_err("Failed to map wptr bo to GART\n");
+    goto err_wptr_map_gart;
+    }
+    }
+
   pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
   p->pasid,
   dev->id);

-    err = pqm_create_queue(&p->pqm, dev, filep, &q_properties,

&queue_id, NULL, NULL, NULL,

- &doorbell_offset_in_process);
+    err = pqm_create_queue(&p->pqm, dev, filep, &q_properties,

&queue_id, wptr_bo,

+    NULL, NULL, NULL, &doorbell_offset_in_process);
   if (err != 0)
   goto err_create_queue;

@@ -354,6 +384,9 @@ static int kfd_ioctl_create_queue(struct file 
*filep,

struct kfd_process *p,

   return 0;

   err_create_queue:
+    if (wptr_bo)
+    amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
+err_wptr_map_gart:
   err_bind_process:
   err_pdd:
   mutex_unlock(&p->mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b39d89c52887..d8de2fbdfc7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -208,6 +208,7 @@ static int add_queue_mes(struct

device_que

Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-16 Thread Alex Deucher
On Wed, Jun 15, 2022 at 6:41 PM philip yang  wrote:
>
>
> On 2022-06-15 10:06, Christian König wrote:
>
> Am 15.06.22 um 15:17 schrieb Sider, Graham:
>
> [AMD Official Use Only - General]
>
> -Original Message-
> From: Koenig, Christian 
> Sent: Wednesday, June 15, 2022 3:29 AM
> To: Sider, Graham ; amd-
> g...@lists.freedesktop.org
> Cc: Joshi, Mukul ; Kuehling, Felix
> ; Yang, Philip 
> Subject: Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue
> oversubscription
>
>
>
> Am 13.06.22 um 17:20 schrieb Graham Sider:
>
> Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped
> to GART for usermode queues in order to support oversubscription. In
> the case that work is submitted to an unmapped queue, MES must have a
> GART wptr address to determine whether the queue should be mapped.
>
> This change is accompanied with changes in MES and is applicable for
> MES_VERSION >= 3.
>
> v2:
> - Update MES_VERSION check from 2 to 3.
> v3:
> - Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
> - Move wptr_bo refcount increment to
>
> amdgpu_amdkfd_map_gtt_bo_to_gart
>
> - Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
> - Cleanup/fix create_queue wptr_bo error handling
>
> Signed-off-by: Graham Sider 
> ---
>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
>.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49
>
> +++
>
>drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 37 +-
>.../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
>.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
>drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
>.../amd/amdkfd/kfd_process_queue_manager.c| 17 +--
>7 files changed, 110 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 429b16ba10bf..dba26d1e3be9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -301,6 +301,7 @@ int
>
> amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device
> *adev,
>
>struct kgd_mem *mem, void **kptr, uint64_t *size);
>void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct
>
> amdgpu_device *adev,
>
>struct kgd_mem *mem);
> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev,
> +struct amdgpu_bo *bo);
>
>int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>struct dma_fence **ef);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index efab923056f4..888d08128a94 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -2030,6 +2030,55 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>return ret;
>}
>
> +/**
> + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and
>
> increment
>
> +reference count
> + * @adev: Device to which allocated BO belongs
> + * @bo: Buffer object to be mapped
> + *
> + * Before return, bo reference count is incremented. To release the
> +reference and unpin/
> + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
> + */
> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev,
> +struct amdgpu_bo *bo) {
> +int ret;
> +
> +ret = amdgpu_bo_reserve(bo, true);
> +if (ret) {
> +pr_err("Failed to reserve bo. ret %d\n", ret);
> +goto err_reserve_bo_failed;
> +}
> +
> +ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
> +if (ret) {
> +pr_err("Failed to pin bo. ret %d\n", ret);
> +goto err_pin_bo_failed;
> +}
>
>
> Oh! Is that something we do for every MQD? When yes that here is pretty
> much a NAK.
>
> We can't do this or create a trivial deny of service attack against the kernel
> driver.
>
> Regards,
> Christian.
>
> Hi Christian, could you elaborate on this? Right now this is only being used 
> to pin the queue wptr BO.
>
>
> Well is this wptr BO per process, per queue or global?
>
> amdgpu_bo_pin() is only allowed if we pin global resources, otherwise I have 
> to reject that.
>
> wptr BO is per queue, allocated as queue structure, 1 page size on system 
> memory.
>
> KFD limit number of queues globally, max_queues = 127; /* HWS limit */, so 
> this will pin max 508KB and take max 127 GART page mapping.
>
> wptr is updated by app and read by HWS, if we don't pin wptr, we have to 
> evict queue when wptr bo is mov

Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-15 Thread philip yang

  


On 2022-06-15 10:06, Christian König
  wrote:

Am
  15.06.22 um 15:17 schrieb Sider, Graham:
  
  [AMD Official Use Only - General]


-Original Message-
  
  From: Koenig, Christian 
  
  Sent: Wednesday, June 15, 2022 3:29 AM
  
  To: Sider, Graham ; amd-
  
  g...@lists.freedesktop.org
  
  Cc: Joshi, Mukul ; Kuehling, Felix
  
  ; Yang, Philip
  
  
  Subject: Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode
  queue
  
  oversubscription
  
  
  
  
  Am 13.06.22 um 17:20 schrieb Graham Sider:
  
  Starting with GFX11, MES requires wptr
BOs to be GTT allocated/mapped

to GART for usermode queues in order to support
oversubscription. In

the case that work is submitted to an unmapped queue, MES
must have a

GART wptr address to determine whether the queue should be
mapped.


This change is accompanied with changes in MES and is
applicable for

MES_VERSION >= 3.


v2:

- Update MES_VERSION check from 2 to 3.

v3:

- Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup

- Move wptr_bo refcount increment to

  
  amdgpu_amdkfd_map_gtt_bo_to_gart
  
  - Remove list_del_init from
amdgpu_amdkfd_map_gtt_bo_to_gart

- Cleanup/fix create_queue wptr_bo error handling


Signed-off-by: Graham Sider 

---

   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +

   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49

  
  +++
  
    
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 37
+-

   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-

   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +

   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++

   .../amd/amdkfd/kfd_process_queue_manager.c    | 17
+--

   7 files changed, 110 insertions(+), 8 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index 429b16ba10bf..dba26d1e3be9 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

@@ -301,6 +301,7 @@ int

  
  amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device
  
  *adev,
  
     struct kgd_mem *mem, void
**kptr, uint64_t *size);

   void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct

  
  amdgpu_device *adev,
  
     struct kgd_mem *mem);

+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device
*adev,

+struct amdgpu_bo *bo);


   int amdgpu_amdkfd_gpuvm_restore_process_bos(void
*process_info,

   struct dma_fence **ef);

diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index efab923056f4..888d08128a94 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

@@ -2030,6 +2030,55 @@ int amdgpu_amdkfd_gpuvm_sync_memory(

   return ret;

   }


+/**

+ * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and

  
  increment
  
  +reference count

+ * @adev: Device to which allocated BO belongs

+ * @bo: Buffer object to be mapped

+ *

+ * Before return, bo reference count is incremented. To
release the

+reference and un

Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-15 Thread Christian König

Am 15.06.22 um 15:17 schrieb Sider, Graham:

[AMD Official Use Only - General]


-Original Message-
From: Koenig, Christian 
Sent: Wednesday, June 15, 2022 3:29 AM
To: Sider, Graham ; amd-
g...@lists.freedesktop.org
Cc: Joshi, Mukul ; Kuehling, Felix
; Yang, Philip 
Subject: Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue
oversubscription



Am 13.06.22 um 17:20 schrieb Graham Sider:

Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped
to GART for usermode queues in order to support oversubscription. In
the case that work is submitted to an unmapped queue, MES must have a
GART wptr address to determine whether the queue should be mapped.

This change is accompanied with changes in MES and is applicable for
MES_VERSION >= 3.

v2:
- Update MES_VERSION check from 2 to 3.
v3:
- Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
- Move wptr_bo refcount increment to

amdgpu_amdkfd_map_gtt_bo_to_gart

- Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
- Cleanup/fix create_queue wptr_bo error handling

Signed-off-by: Graham Sider 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49

+++

   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 37 +-
   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
   .../amd/amdkfd/kfd_process_queue_manager.c| 17 +--
   7 files changed, 110 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 429b16ba10bf..dba26d1e3be9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -301,6 +301,7 @@ int

amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device
*adev,

struct kgd_mem *mem, void **kptr, uint64_t *size);
   void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct

amdgpu_device *adev,

struct kgd_mem *mem);
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev,
+struct amdgpu_bo *bo);

   int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index efab923056f4..888d08128a94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2030,6 +2030,55 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
return ret;
   }

+/**
+ * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and

increment

+reference count
+ * @adev: Device to which allocated BO belongs
+ * @bo: Buffer object to be mapped
+ *
+ * Before return, bo reference count is incremented. To release the
+reference and unpin/
+ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
+ */
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev,
+struct amdgpu_bo *bo) {
+   int ret;
+
+   ret = amdgpu_bo_reserve(bo, true);
+   if (ret) {
+   pr_err("Failed to reserve bo. ret %d\n", ret);
+   goto err_reserve_bo_failed;
+   }
+
+   ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+   if (ret) {
+   pr_err("Failed to pin bo. ret %d\n", ret);
+   goto err_pin_bo_failed;
+   }


Oh! Is that something we do for every MQD? When yes that here is pretty
much a NAK.

We can't do this or create a trivial deny of service attack against the kernel
driver.

Regards,
Christian.


Hi Christian, could you elaborate on this? Right now this is only being used to 
pin the queue wptr BO.


Well is this wptr BO per process, per queue or global?

amdgpu_bo_pin() is only allowed if we pin global resources, otherwise I 
have to reject that.


Regards,
Christian.



Best,
Graham


+
+   ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+   if (ret) {
+   pr_err("Failed to bind bo to GART. ret %d\n", ret);
+   goto err_map_bo_gart_failed;
+   }
+
+   amdgpu_amdkfd_remove_eviction_fence(
+   bo, bo->kfd_bo->process_info->eviction_fence);
+   list_del_init(&bo->kfd_bo->validate_list.head);
+
+   amdgpu_bo_unreserve(bo);
+
+   bo = amdgpu_bo_ref(bo);
+
+   return 0;
+
+err_map_bo_gart_failed:
+   amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+   amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+   return ret;
+}
+
   int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct

amdgpu_device *adev,

struct kgd_mem *mem, void **kptr, uint64_t *size)
   {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e9766e165c38..1789ed8b79f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers

RE: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-15 Thread Sider, Graham
[AMD Official Use Only - General]

> -Original Message-
> From: Koenig, Christian 
> Sent: Wednesday, June 15, 2022 3:29 AM
> To: Sider, Graham ; amd-
> g...@lists.freedesktop.org
> Cc: Joshi, Mukul ; Kuehling, Felix
> ; Yang, Philip 
> Subject: Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue
> oversubscription
> 
> 
> 
> Am 13.06.22 um 17:20 schrieb Graham Sider:
> > Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped
> > to GART for usermode queues in order to support oversubscription. In
> > the case that work is submitted to an unmapped queue, MES must have a
> > GART wptr address to determine whether the queue should be mapped.
> >
> > This change is accompanied with changes in MES and is applicable for
> > MES_VERSION >= 3.
> >
> > v2:
> > - Update MES_VERSION check from 2 to 3.
> > v3:
> > - Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
> > - Move wptr_bo refcount increment to
> amdgpu_amdkfd_map_gtt_bo_to_gart
> > - Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
> > - Cleanup/fix create_queue wptr_bo error handling
> >
> > Signed-off-by: Graham Sider 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
> >   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49
> +++
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 37 +-
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
> >   .../amd/amdkfd/kfd_process_queue_manager.c| 17 +--
> >   7 files changed, 110 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > index 429b16ba10bf..dba26d1e3be9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > @@ -301,6 +301,7 @@ int
> amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device
> *adev,
> > struct kgd_mem *mem, void **kptr, uint64_t *size);
> >   void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct
> amdgpu_device *adev,
> > struct kgd_mem *mem);
> > +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev,
> > +struct amdgpu_bo *bo);
> >
> >   int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
> > struct dma_fence **ef);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > index efab923056f4..888d08128a94 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > @@ -2030,6 +2030,55 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
> > return ret;
> >   }
> >
> > +/**
> > + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and
> increment
> > +reference count
> > + * @adev: Device to which allocated BO belongs
> > + * @bo: Buffer object to be mapped
> > + *
> > + * Before return, bo reference count is incremented. To release the
> > +reference and unpin/
> > + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
> > + */
> > +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev,
> > +struct amdgpu_bo *bo) {
> > +   int ret;
> > +
> > +   ret = amdgpu_bo_reserve(bo, true);
> > +   if (ret) {
> > +   pr_err("Failed to reserve bo. ret %d\n", ret);
> > +   goto err_reserve_bo_failed;
> > +   }
> > +
> > +   ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
> > +   if (ret) {
> > +   pr_err("Failed to pin bo. ret %d\n", ret);
> > +   goto err_pin_bo_failed;
> > +   }
> 
> 
> Oh! Is that something we do for every MQD? When yes that here is pretty
> much a NAK.
> 
> We can't do this or create a trivial deny of service attack against the kernel
> driver.
> 
> Regards,
> Christian.
> 

Hi Christian, could you elaborate on this? Right now this is only being used to 
pin the queue wptr BO.

Best,
Graham

> > +
> > +   ret = amdgpu_ttm_alloc_gart(&bo->tbo);
> > +   if (ret) {
> > +   pr_err("Failed to bind bo to GART. ret %d\n", ret);
> > +   goto err_map_bo_gart_failed;
> > +   }
> > +
> > +   amdgpu_amdkfd_remove_eviction_fence(
> > +   bo, bo->kfd_bo->process_info->eviction_fence);
> > 

Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-15 Thread Christian König




Am 13.06.22 um 17:20 schrieb Graham Sider:

Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
GART for usermode queues in order to support oversubscription. In the
case that work is submitted to an unmapped queue, MES must have a GART
wptr address to determine whether the queue should be mapped.

This change is accompanied with changes in MES and is applicable for
MES_VERSION >= 3.

v2:
- Update MES_VERSION check from 2 to 3.
v3:
- Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
- Move wptr_bo refcount increment to amdgpu_amdkfd_map_gtt_bo_to_gart
- Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
- Cleanup/fix create_queue wptr_bo error handling

Signed-off-by: Graham Sider 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49 +++
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 37 +-
  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
  .../amd/amdkfd/kfd_process_queue_manager.c| 17 +--
  7 files changed, 110 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 429b16ba10bf..dba26d1e3be9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -301,6 +301,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct 
amdgpu_device *adev,
struct kgd_mem *mem, void **kptr, uint64_t *size);
  void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem);
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo);
  
  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,

struct dma_fence **ef);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index efab923056f4..888d08128a94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2030,6 +2030,55 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
return ret;
  }
  
+/**

+ * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference 
count
+ * @adev: Device to which allocated BO belongs
+ * @bo: Buffer object to be mapped
+ *
+ * Before return, bo reference count is incremented. To release the reference 
and unpin/
+ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
+ */
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo)
+{
+   int ret;
+
+   ret = amdgpu_bo_reserve(bo, true);
+   if (ret) {
+   pr_err("Failed to reserve bo. ret %d\n", ret);
+   goto err_reserve_bo_failed;
+   }
+
+   ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+   if (ret) {
+   pr_err("Failed to pin bo. ret %d\n", ret);
+   goto err_pin_bo_failed;
+   }



Oh! Is that something we do for every MQD? When yes that here is pretty 
much a NAK.


We can't do this or create a trivial deny of service attack against the 
kernel driver.


Regards,
Christian.


+
+   ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+   if (ret) {
+   pr_err("Failed to bind bo to GART. ret %d\n", ret);
+   goto err_map_bo_gart_failed;
+   }
+
+   amdgpu_amdkfd_remove_eviction_fence(
+   bo, bo->kfd_bo->process_info->eviction_fence);
+   list_del_init(&bo->kfd_bo->validate_list.head);
+
+   amdgpu_bo_unreserve(bo);
+
+   bo = amdgpu_bo_ref(bo);
+
+   return 0;
+
+err_map_bo_gart_failed:
+   amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+   amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+   return ret;
+}
+
  int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem, void **kptr, uint64_t *size)
  {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e9766e165c38..1789ed8b79f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -289,6 +289,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
struct kfd_process_device *pdd;
struct queue_properties q_properties;
uint32_t doorbell_offset_in_process = 0;
+   struct amdgpu_bo *wptr_bo = NULL;
  
  	memset(&q_properties, 0, sizeof(struct queue_properties));
  
@@ -316,12 +317,41 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,

goto err_bind_process;
}
  
+	/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work

+* on unmapped queues for usermode queue oversubscription (no 
aggregated doorbell)
+*/
+   if (d

RE: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-14 Thread Sider, Graham
[AMD Official Use Only - General]

>> From: Yang, Philip  
>> Sent: Tuesday, June 14, 2022 2:22 PM
>> To: Sider, Graham ; amd-gfx@lists.freedesktop.org
>> Cc: Kuehling, Felix ; Joshi, Mukul 
>> ; Yang, Philip 
>> Subject: Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue 
>> oversubscription
>>
>>
>> On 2022-06-13 11:20, Graham Sider wrote:
>> Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
>> GART for usermode queues in order to support oversubscription. In the
>> case that work is submitted to an unmapped queue, MES must have a GART
>> wptr address to determine whether the queue should be mapped.
>>
>> This change is accompanied with changes in MES and is applicable for
>> MES_VERSION >= 3.
>>
>> v2:
>> - Update MES_VERSION check from 2 to 3.
>> v3:
>> - Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
>> - Move wptr_bo refcount increment to amdgpu_amdkfd_map_gtt_bo_to_gart
>> - Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
>> - Cleanup/fix create_queue wptr_bo error handling
>> Two nit-pick below, with those fixed, this patch is
>> Reviewed-by: Philip Yangmailto:philip.y...@amd.com
>>
>> Signed-off-by: Graham Sider mailto:graham.si...@amd.com
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49 +++
>>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 37 +-
>> .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
>> .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
>> .../amd/amdkfd/kfd_process_queue_manager.c| 17 +--
>>  7 files changed, 110 insertions(+), 8 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 429b16ba10bf..dba26d1e3be9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -301,6 +301,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct 
>> amdgpu_device *adev,
>>  struct kgd_mem *mem, void **kptr, uint64_t *size);
>>  void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device 
>> *adev,
>>  struct kgd_mem *mem);
>> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
>> amdgpu_bo *bo);
>>  
>>  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>>  struct dma_fence **ef);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index efab923056f4..888d08128a94 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -2030,6 +2030,55 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>>  return ret;
>>  }
>>  
>> +/**
>> + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment 
>> reference count
>> + * @adev: Device to which allocated BO belongs
>> + * @bo: Buffer object to be mapped
>> + *
>> + * Before return, bo reference count is incremented. To release the 
>> reference and unpin/
>> + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
>> + */
>> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
>> amdgpu_bo *bo)
>> +{
>> +int ret;
>> +
>> +ret = amdgpu_bo_reserve(bo, true);
>> +if (ret) {
>> +pr_err("Failed to reserve bo. ret %d\n", ret);
>> +goto err_reserve_bo_failed;
>> +}
>> +
>> +ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
>> +if (ret) {
>> +pr_err("Failed to pin bo. ret %d\n", ret);
>> +goto err_pin_bo_failed;
>> +}
>> +
>> +ret = amdgpu_ttm_alloc_gart(&bo->tbo);
>> +if (ret) {
>> +pr_err("Failed to bind bo to GART. ret %d\n", ret);
>> +goto err_map_bo_gart_failed;
>> +}
>> +
>> +amdgpu_amdkfd_remove_eviction_fence(
>> +bo, bo->kfd_bo->process_info->eviction_fence);
>> +list_del_init(&bo->kfd_bo->validate_list.head);
>
> pinned bo should keep in validate_list as PDB/PTB may move and update, please 
> remove list_del_init here.
>

Thought I deleted this line - good catch.

>> +
>> +amdgpu_bo_unreserve(bo);
>> +
>> +bo =

Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-14 Thread philip yang

  


On 2022-06-13 11:20, Graham Sider
  wrote:


  Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
GART for usermode queues in order to support oversubscription. In the
case that work is submitted to an unmapped queue, MES must have a GART
wptr address to determine whether the queue should be mapped.

This change is accompanied with changes in MES and is applicable for
MES_VERSION >= 3.

v2:
- Update MES_VERSION check from 2 to 3.
v3:
- Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
- Move wptr_bo refcount increment to amdgpu_amdkfd_map_gtt_bo_to_gart
- Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
- Cleanup/fix create_queue wptr_bo error handling


Two nit-pick below, with those fixed, this patch is
Reviewed-by: Philip Yang


  
Signed-off-by: Graham Sider 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49 +++
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 37 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
 .../amd/amdkfd/kfd_process_queue_manager.c| 17 +--
 7 files changed, 110 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 429b16ba10bf..dba26d1e3be9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -301,6 +301,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
 		struct kgd_mem *mem, void **kptr, uint64_t *size);
 void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
 		struct kgd_mem *mem);
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
 
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
 	struct dma_fence **ef);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index efab923056f4..888d08128a94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2030,6 +2030,55 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
 	return ret;
 }
 
+/**
+ * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
+ * @adev: Device to which allocated BO belongs
+ * @bo: Buffer object to be mapped
+ *
+ * Before return, bo reference count is incremented. To release the reference and unpin/
+ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
+ */
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+	int ret;
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		pr_err("Failed to reserve bo. ret %d\n", ret);
+		goto err_reserve_bo_failed;
+	}
+
+	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+	if (ret) {
+		pr_err("Failed to pin bo. ret %d\n", ret);
+		goto err_pin_bo_failed;
+	}
+
+	ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+	if (ret) {
+		pr_err("Failed to bind bo to GART. ret %d\n", ret);
+		goto err_map_bo_gart_failed;
+	}
+
+	amdgpu_amdkfd_remove_eviction_fence(
+		bo, bo->kfd_bo->process_info->eviction_fence);
+	list_del_init(&bo->kfd_bo->validate_list.head);

pinned bo should keep in validate_list as PDB/PTB may move and
update, please remove list_del_init here.

  
+
+	amdgpu_bo_unreserve(bo);
+
+	bo = amdgpu_bo_ref(bo);
+
+	return 0;
+
+err_map_bo_gart_failed:
+	amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+	amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+	return ret;
+}
+
 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
 		struct kgd_mem *mem, void **kptr, uint64_t *size)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e9766e165c38..1789ed8b79f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -289,6 +289,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 	struct kfd_process_device *pdd;
 	struct queue_properties q_properties;
 	uint32_t doorbell_offset_in_process = 0;
+	struct amdgpu_bo *wptr_bo = NULL;
 
 	memset(&q_properties, 0, sizeof(struct queue_properties));
 
@@ -316,12 +317,41 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 		goto err_bind_process;
 	}
 
+	/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
+	 * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
+	 */
+	if (dev->shared_resources.enable_mes && (dev->adev->mes.sched_version & 0xff) >= 3) {

Should we check ip version for GFX11 only? Because GFX10 set
  enable_mes, and may set adev->mes.sched_version later as well.

Regards,
Philip


  
+		struct amdgpu_bo_va_mapping *wptr_m