Re: [PATCH 5/6] drm/amdgpu: Implement userqueue signal/wait IOCTL functions

2023-02-27 Thread Arunpravin Paneer Selvam




On 2/27/2023 6:53 PM, Christian König wrote:

Hi Arun,

Am 27.02.23 um 14:20 schrieb Arunpravin Paneer Selvam:

Hi Christian,


On 2/27/2023 6:29 PM, Christian König wrote:

Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:

This patch introduces new IOCTL for userqueue secure semaphore.

The signal IOCTL called from userspace application creates a drm
syncobj and array of bo GEM handles and passed in as parameter to
the driver to install the fence into it.

The wait IOCTL gets an array of drm syncobjs, finds the fences
attached to the drm syncobjs and obtain the array of
memory_address/fence_value combintion which are returned to
userspace.

v2: Worked on review comments from Christian for the following
 modifications

 - Install fence into GEM BO object.
 - Lock all BO's using the dma resv subsystem
 - Reorder the sequence in signal IOCTL function.
 - Get write pointer from the shadow wptr
 - use userq_fence to fetch the va/value in wait IOCTL.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   3 +
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 258 
++

  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |   6 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |   1 +
  5 files changed, 270 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 1c3eba2d0390..255d73795493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -964,6 +964,8 @@ struct amdgpu_device {
  struct amdgpu_mes   mes;
  struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
  +    struct amdgpu_userq_mgr *userq_mgr;
+
  /* df */
  struct amdgpu_df    df;
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 6b7ac1ebd04c..66a7304fabe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2752,6 +2752,9 @@ const struct drm_ioctl_desc 
amdgpu_ioctls_kms[] = {
  DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, 
amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_DOORBELL_RING, 
amdgpu_userq_doorbell_ring_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+    DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, 
amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+    DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),

+
  };
    static const struct drm_driver amdgpu_kms_driver = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c

index 609a7328e9a6..26fd1d4f758a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -249,3 +249,261 @@ static const struct dma_fence_ops 
amdgpu_userq_fence_ops = {

  .signaled = amdgpu_userq_fence_signaled,
  .release = amdgpu_userq_fence_release,
  };
+
+static int amdgpu_userq_fence_read_wptr(struct drm_file *filp,
+    struct amdgpu_usermode_queue *queue,
+    u64 *wptr)
+{
+    struct amdgpu_fpriv *fpriv = filp->driver_priv;
+    struct amdgpu_bo_va_mapping *mapping;
+    struct amdgpu_vm *vm = &fpriv->vm;
+    struct amdgpu_bo *bo;
+    u64 *ptr;
+    int r;
+
+    mapping = amdgpu_vm_bo_lookup_mapping(vm, queue->wptr_gpu_addr 
>> PAGE_SHIFT);

+    if (!mapping)
+    return -EINVAL;
+
+    bo = mapping->bo_va->base.bo;
+    r = amdgpu_bo_kmap(bo, (void **)&ptr);


Oh, that's not something you can do that easily.

The BO must be reserved (locked) first if you want to call 
amdgpu_bo_kmap() on it.

sure, I will take care



+    if (r) {
+    DRM_ERROR("Failed mapping the userqueue wptr bo");
+    return r;
+    }
+
+    *wptr = le64_to_cpu(*ptr);
+
+    return 0;
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *filp)
+{
+    struct drm_amdgpu_userq_signal *args = data;
+    struct amdgpu_device *adev = drm_to_adev(dev);
+    struct amdgpu_userq_mgr *userq_mgr = adev->userq_mgr;
+    struct amdgpu_usermode_queue *queue;
+    struct drm_syncobj *syncobj = NULL;
+    struct drm_gem_object **gobj;
+    u64 num_bo_handles, wptr;
+    struct dma_fence *fence;
+    u32 *bo_handles;
+    bool shared;
+    int r, i;
+
+    /* Retrieve the user queue */
+    queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+    if (!queue)
+    return -ENOENT;
+
+    r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr);
+    if (r)
+    return -EINVAL;
+
+    /* Find Syncobj if any */
+    syncobj = drm_syncobj_find(filp, args->handle);
+
+    /* Array of bo handles */
+    num_bo_handles = args->num_bo_handles;
+    bo_handles = kmalloc_array(num_bo_handles, 
sizeof(*bo_ha

Re: [PATCH 5/6] drm/amdgpu: Implement userqueue signal/wait IOCTL functions

2023-02-27 Thread Christian König

Hi Arun,

Am 27.02.23 um 14:20 schrieb Arunpravin Paneer Selvam:

Hi Christian,


On 2/27/2023 6:29 PM, Christian König wrote:

Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:

This patch introduces new IOCTL for userqueue secure semaphore.

The signal IOCTL called from userspace application creates a drm
syncobj and array of bo GEM handles and passed in as parameter to
the driver to install the fence into it.

The wait IOCTL gets an array of drm syncobjs, finds the fences
attached to the drm syncobjs and obtain the array of
memory_address/fence_value combintion which are returned to
userspace.

v2: Worked on review comments from Christian for the following
 modifications

 - Install fence into GEM BO object.
 - Lock all BO's using the dma resv subsystem
 - Reorder the sequence in signal IOCTL function.
 - Get write pointer from the shadow wptr
 - use userq_fence to fetch the va/value in wait IOCTL.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   3 +
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 258 
++

  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |   6 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |   1 +
  5 files changed, 270 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 1c3eba2d0390..255d73795493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -964,6 +964,8 @@ struct amdgpu_device {
  struct amdgpu_mes   mes;
  struct amdgpu_mqd   mqds[AMDGPU_HW_IP_NUM];
  +    struct amdgpu_userq_mgr *userq_mgr;
+
  /* df */
  struct amdgpu_df    df;
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 6b7ac1ebd04c..66a7304fabe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2752,6 +2752,9 @@ const struct drm_ioctl_desc 
amdgpu_ioctls_kms[] = {
  DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, 
amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_DOORBELL_RING, 
amdgpu_userq_doorbell_ring_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+    DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, 
amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+    DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),

+
  };
    static const struct drm_driver amdgpu_kms_driver = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c

index 609a7328e9a6..26fd1d4f758a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -249,3 +249,261 @@ static const struct dma_fence_ops 
amdgpu_userq_fence_ops = {

  .signaled = amdgpu_userq_fence_signaled,
  .release = amdgpu_userq_fence_release,
  };
+
+static int amdgpu_userq_fence_read_wptr(struct drm_file *filp,
+    struct amdgpu_usermode_queue *queue,
+    u64 *wptr)
+{
+    struct amdgpu_fpriv *fpriv = filp->driver_priv;
+    struct amdgpu_bo_va_mapping *mapping;
+    struct amdgpu_vm *vm = &fpriv->vm;
+    struct amdgpu_bo *bo;
+    u64 *ptr;
+    int r;
+
+    mapping = amdgpu_vm_bo_lookup_mapping(vm, queue->wptr_gpu_addr 
>> PAGE_SHIFT);

+    if (!mapping)
+    return -EINVAL;
+
+    bo = mapping->bo_va->base.bo;
+    r = amdgpu_bo_kmap(bo, (void **)&ptr);


Oh, that's not something you can do that easily.

The BO must be reserved (locked) first if you want to call 
amdgpu_bo_kmap() on it.

sure, I will take care



+    if (r) {
+    DRM_ERROR("Failed mapping the userqueue wptr bo");
+    return r;
+    }
+
+    *wptr = le64_to_cpu(*ptr);
+
+    return 0;
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *filp)
+{
+    struct drm_amdgpu_userq_signal *args = data;
+    struct amdgpu_device *adev = drm_to_adev(dev);
+    struct amdgpu_userq_mgr *userq_mgr = adev->userq_mgr;
+    struct amdgpu_usermode_queue *queue;
+    struct drm_syncobj *syncobj = NULL;
+    struct drm_gem_object **gobj;
+    u64 num_bo_handles, wptr;
+    struct dma_fence *fence;
+    u32 *bo_handles;
+    bool shared;
+    int r, i;
+
+    /* Retrieve the user queue */
+    queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+    if (!queue)
+    return -ENOENT;
+
+    r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr);
+    if (r)
+    return -EINVAL;
+
+    /* Find Syncobj if any */
+    syncobj = drm_syncobj_find(filp, args->handle);
+
+    /* Array of bo handles */
+    num_bo_handles = args->num_bo_handles;
+    bo_handles = kmalloc_array(num_bo_handles, sizeof(*bo_handles), 
GFP_KERNEL);

+    if (bo_h

Re: [PATCH 5/6] drm/amdgpu: Implement userqueue signal/wait IOCTL functions

2023-02-27 Thread Arunpravin Paneer Selvam

Hi Christian,


On 2/27/2023 6:29 PM, Christian König wrote:

Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:

This patch introduces new IOCTL for userqueue secure semaphore.

The signal IOCTL called from userspace application creates a drm
syncobj and array of bo GEM handles and passed in as parameter to
the driver to install the fence into it.

The wait IOCTL gets an array of drm syncobjs, finds the fences
attached to the drm syncobjs and obtain the array of
memory_address/fence_value combintion which are returned to
userspace.

v2: Worked on review comments from Christian for the following
 modifications

 - Install fence into GEM BO object.
 - Lock all BO's using the dma resv subsystem
 - Reorder the sequence in signal IOCTL function.
 - Get write pointer from the shadow wptr
 - use userq_fence to fetch the va/value in wait IOCTL.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   3 +
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 258 ++
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |   6 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |   1 +
  5 files changed, 270 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 1c3eba2d0390..255d73795493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -964,6 +964,8 @@ struct amdgpu_device {
  struct amdgpu_mes   mes;
  struct amdgpu_mqd   mqds[AMDGPU_HW_IP_NUM];
  +    struct amdgpu_userq_mgr *userq_mgr;
+
  /* df */
  struct amdgpu_df    df;
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 6b7ac1ebd04c..66a7304fabe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2752,6 +2752,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] 
= {
  DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_DOORBELL_RING, 
amdgpu_userq_doorbell_ring_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+    DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, 
amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+    DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),

+
  };
    static const struct drm_driver amdgpu_kms_driver = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c

index 609a7328e9a6..26fd1d4f758a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -249,3 +249,261 @@ static const struct dma_fence_ops 
amdgpu_userq_fence_ops = {

  .signaled = amdgpu_userq_fence_signaled,
  .release = amdgpu_userq_fence_release,
  };
+
+static int amdgpu_userq_fence_read_wptr(struct drm_file *filp,
+    struct amdgpu_usermode_queue *queue,
+    u64 *wptr)
+{
+    struct amdgpu_fpriv *fpriv = filp->driver_priv;
+    struct amdgpu_bo_va_mapping *mapping;
+    struct amdgpu_vm *vm = &fpriv->vm;
+    struct amdgpu_bo *bo;
+    u64 *ptr;
+    int r;
+
+    mapping = amdgpu_vm_bo_lookup_mapping(vm, queue->wptr_gpu_addr 
>> PAGE_SHIFT);

+    if (!mapping)
+    return -EINVAL;
+
+    bo = mapping->bo_va->base.bo;
+    r = amdgpu_bo_kmap(bo, (void **)&ptr);


Oh, that's not something you can do that easily.

The BO must be reserved (locked) first if you want to call 
amdgpu_bo_kmap() on it.

sure, I will take care



+    if (r) {
+    DRM_ERROR("Failed mapping the userqueue wptr bo");
+    return r;
+    }
+
+    *wptr = le64_to_cpu(*ptr);
+
+    return 0;
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *filp)
+{
+    struct drm_amdgpu_userq_signal *args = data;
+    struct amdgpu_device *adev = drm_to_adev(dev);
+    struct amdgpu_userq_mgr *userq_mgr = adev->userq_mgr;
+    struct amdgpu_usermode_queue *queue;
+    struct drm_syncobj *syncobj = NULL;
+    struct drm_gem_object **gobj;
+    u64 num_bo_handles, wptr;
+    struct dma_fence *fence;
+    u32 *bo_handles;
+    bool shared;
+    int r, i;
+
+    /* Retrieve the user queue */
+    queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+    if (!queue)
+    return -ENOENT;
+
+    r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr);
+    if (r)
+    return -EINVAL;
+
+    /* Find Syncobj if any */
+    syncobj = drm_syncobj_find(filp, args->handle);
+
+    /* Array of bo handles */
+    num_bo_handles = args->num_bo_handles;
+    bo_handles = kmalloc_array(num_bo_handles, sizeof(*bo_handles), 
GFP_KERNEL);

+    if (bo_handles == NULL)
+    return -ENOMEM;
+
+    if (copy_from_user(b

Re: [PATCH 5/6] drm/amdgpu: Implement userqueue signal/wait IOCTL functions

2023-02-27 Thread Christian König

Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:

This patch introduces new IOCTL for userqueue secure semaphore.

The signal IOCTL called from userspace application creates a drm
syncobj and array of bo GEM handles and passed in as parameter to
the driver to install the fence into it.

The wait IOCTL gets an array of drm syncobjs, finds the fences
attached to the drm syncobjs and obtain the array of
memory_address/fence_value combintion which are returned to
userspace.

v2: Worked on review comments from Christian for the following
 modifications

 - Install fence into GEM BO object.
 - Lock all BO's using the dma resv subsystem
 - Reorder the sequence in signal IOCTL function.
 - Get write pointer from the shadow wptr
 - use userq_fence to fetch the va/value in wait IOCTL.

Signed-off-by: Arunpravin Paneer Selvam 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   3 +
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 258 ++
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |   6 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |   1 +
  5 files changed, 270 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1c3eba2d0390..255d73795493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -964,6 +964,8 @@ struct amdgpu_device {
struct amdgpu_mes   mes;
struct amdgpu_mqd   mqds[AMDGPU_HW_IP_NUM];
  
+	struct amdgpu_userq_mgr *userq_mgr;

+
/* df */
struct amdgpu_dfdf;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 6b7ac1ebd04c..66a7304fabe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2752,6 +2752,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_DOORBELL_RING, 
amdgpu_userq_doorbell_ring_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+
  };
  
  static const struct drm_driver amdgpu_kms_driver = {

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 609a7328e9a6..26fd1d4f758a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -249,3 +249,261 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops 
= {
.signaled = amdgpu_userq_fence_signaled,
.release = amdgpu_userq_fence_release,
  };
+
+static int amdgpu_userq_fence_read_wptr(struct drm_file *filp,
+   struct amdgpu_usermode_queue *queue,
+   u64 *wptr)
+{
+   struct amdgpu_fpriv *fpriv = filp->driver_priv;
+   struct amdgpu_bo_va_mapping *mapping;
+   struct amdgpu_vm *vm = &fpriv->vm;
+   struct amdgpu_bo *bo;
+   u64 *ptr;
+   int r;
+
+   mapping = amdgpu_vm_bo_lookup_mapping(vm, queue->wptr_gpu_addr >> 
PAGE_SHIFT);
+   if (!mapping)
+   return -EINVAL;
+
+   bo = mapping->bo_va->base.bo;
+   r = amdgpu_bo_kmap(bo, (void **)&ptr);


Oh, that's not something you can do that easily.

The BO must be reserved (locked) first if you want to call 
amdgpu_bo_kmap() on it.



+   if (r) {
+   DRM_ERROR("Failed mapping the userqueue wptr bo");
+   return r;
+   }
+
+   *wptr = le64_to_cpu(*ptr);
+
+   return 0;
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+   struct drm_amdgpu_userq_signal *args = data;
+   struct amdgpu_device *adev = drm_to_adev(dev);
+   struct amdgpu_userq_mgr *userq_mgr = adev->userq_mgr;
+   struct amdgpu_usermode_queue *queue;
+   struct drm_syncobj *syncobj = NULL;
+   struct drm_gem_object **gobj;
+   u64 num_bo_handles, wptr;
+   struct dma_fence *fence;
+   u32 *bo_handles;
+   bool shared;
+   int r, i;
+
+   /* Retrieve the user queue */
+   queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+   if (!queue)
+   return -ENOENT;
+
+   r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr);
+   if (r)
+   return -EINVAL;
+
+   /* Find Syncobj if any */
+   syncobj = drm_syncobj_find(filp, args->handle);
+
+   /* Array of bo handles */
+   num_bo_handles = args->num_bo_handles;
+   bo_handles = kmalloc_array(num_bo_handle

[PATCH 5/6] drm/amdgpu: Implement userqueue signal/wait IOCTL functions

2023-02-26 Thread Arunpravin Paneer Selvam
This patch introduces new IOCTL for userqueue secure semaphore.

The signal IOCTL called from userspace application creates a drm
syncobj and array of bo GEM handles and passed in as parameter to
the driver to install the fence into it.

The wait IOCTL gets an array of drm syncobjs, finds the fences
attached to the drm syncobjs and obtain the array of
memory_address/fence_value combintion which are returned to
userspace.

v2: Worked on review comments from Christian for the following
modifications

- Install fence into GEM BO object.
- Lock all BO's using the dma resv subsystem
- Reorder the sequence in signal IOCTL function.
- Get write pointer from the shadow wptr
- use userq_fence to fetch the va/value in wait IOCTL.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   3 +
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 258 ++
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |   1 +
 5 files changed, 270 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1c3eba2d0390..255d73795493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -964,6 +964,8 @@ struct amdgpu_device {
struct amdgpu_mes   mes;
struct amdgpu_mqd   mqds[AMDGPU_HW_IP_NUM];
 
+   struct amdgpu_userq_mgr *userq_mgr;
+
/* df */
struct amdgpu_dfdf;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6b7ac1ebd04c..66a7304fabe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2752,6 +2752,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_DOORBELL_RING, 
amdgpu_userq_doorbell_ring_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+
 };
 
 static const struct drm_driver amdgpu_kms_driver = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 609a7328e9a6..26fd1d4f758a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -249,3 +249,261 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops 
= {
.signaled = amdgpu_userq_fence_signaled,
.release = amdgpu_userq_fence_release,
 };
+
+static int amdgpu_userq_fence_read_wptr(struct drm_file *filp,
+   struct amdgpu_usermode_queue *queue,
+   u64 *wptr)
+{
+   struct amdgpu_fpriv *fpriv = filp->driver_priv;
+   struct amdgpu_bo_va_mapping *mapping;
+   struct amdgpu_vm *vm = &fpriv->vm;
+   struct amdgpu_bo *bo;
+   u64 *ptr;
+   int r;
+
+   mapping = amdgpu_vm_bo_lookup_mapping(vm, queue->wptr_gpu_addr >> 
PAGE_SHIFT);
+   if (!mapping)
+   return -EINVAL;
+
+   bo = mapping->bo_va->base.bo;
+   r = amdgpu_bo_kmap(bo, (void **)&ptr);
+   if (r) {
+   DRM_ERROR("Failed mapping the userqueue wptr bo");
+   return r;
+   }
+
+   *wptr = le64_to_cpu(*ptr);
+
+   return 0;
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+   struct drm_amdgpu_userq_signal *args = data;
+   struct amdgpu_device *adev = drm_to_adev(dev);
+   struct amdgpu_userq_mgr *userq_mgr = adev->userq_mgr;
+   struct amdgpu_usermode_queue *queue;
+   struct drm_syncobj *syncobj = NULL;
+   struct drm_gem_object **gobj;
+   u64 num_bo_handles, wptr;
+   struct dma_fence *fence;
+   u32 *bo_handles;
+   bool shared;
+   int r, i;
+
+   /* Retrieve the user queue */
+   queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+   if (!queue)
+   return -ENOENT;
+
+   r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr);
+   if (r)
+   return -EINVAL;
+
+   /* Find Syncobj if any */
+   syncobj = drm_syncobj_find(filp, args->handle);
+
+   /* Array of bo handles */
+   num_bo_handles = args->num_bo_handles;
+   bo_handles = kmalloc_array(num_bo_handles, sizeof(*bo_handles), 
GFP_KERNEL);
+   if (bo_handles == NULL)
+   return -ENOMEM;
+
+   if (copy_from_user(bo_handles, u64_to_user_ptr(args->bo_handles_array),
+