With more thinking for you performance reason, Can we go further more not to create temp bo list at all? directly add them into parser->validated list?

In fact, if bo array is very long, then overhead of bo list creation in CS is considerable, which will double iterate all BOs compared to original.

From UMD perspective, they don't create bo list for every CS, they could use old created bo_list for next several CS, if there is a new bo, they just add it.

Thanks,
David Zhou
On 2018年07月12日 12:02, zhoucm1 wrote:


On 2018年07月12日 11:09, Zhou, David(ChunMing) wrote:
Hi Andrey,

Could you add compatibility flag or increase kms driver version? So that user space can keep old path when using new one.
Sorry for noise, it's already at end of path.

Regards,
David Zhou

Regards,
David Zhou

-----Original Message-----
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of zhoucm1
Sent: Thursday, July 12, 2018 10:31 AM
To: Grodzovsky, Andrey <andrey.grodzov...@amd.com>; amd-gfx@lists.freedesktop.org Cc: Olsak, Marek <marek.ol...@amd.com>; Koenig, Christian <christian.koe...@amd.com> Subject: Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2



On 2018年07月12日 04:57, Andrey Grodzovsky wrote:
This change is to support MESA performace optimization.
Modify CS IOCTL to allow its input as command buffer and an array of
buffer handles to create a temporay bo list and then destroy it when
IOCTL completes.
This saves on calling for BO_LIST create and destry IOCTLs in MESA and
by this improves performance.

v2: Avoid inserting the temp list into idr struct.

Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 11 ++++
   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 ++++++++++++++++++-----------
   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 +++++++++++++++--
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
   include/uapi/drm/amdgpu_drm.h               |  1 +
   5 files changed, 114 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8eaba0f..9b472b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
                    struct list_head *validated);
   void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
   void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+                      struct drm_amdgpu_bo_list_entry **info_param);
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
+                 struct drm_file *filp,
+                 struct drm_amdgpu_bo_list_entry *info,
+                 unsigned num_entries,
+                 int *id,
+                 struct amdgpu_bo_list **list);
+
+void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
      /*
    * GFX stuff
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 92be7f6..14c7c59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref)
       kfree_rcu(list, rhead);
   }
   -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
                    struct drm_file *filp,
                    struct drm_amdgpu_bo_list_entry *info,
                    unsigned num_entries,
-                 int *id)
+                 int *id,
+                 struct amdgpu_bo_list **list_out)
   {
       int r;
       struct amdgpu_fpriv *fpriv = filp->driver_priv; @@ -78,20 +79,25 @@
static int amdgpu_bo_list_create(struct amdgpu_device *adev,
           return r;
       }
   +    if (id) {
       /* idr alloc should be called only after initialization of bo list. */
-    mutex_lock(&fpriv->bo_list_lock);
-    r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
-    mutex_unlock(&fpriv->bo_list_lock);
-    if (r < 0) {
-        amdgpu_bo_list_free(list);
-        return r;
+        mutex_lock(&fpriv->bo_list_lock);
+        r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
+        mutex_unlock(&fpriv->bo_list_lock);
+        if (r < 0) {
+            amdgpu_bo_list_free(list);
+            return r;
+        }
+        *id = r;
       }
-    *id = r;
+
+    if (list_out)
+        *list_out = list;
          return 0;
   }
   -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int
id)
+void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
   {
       struct amdgpu_bo_list *list;
   @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
       kfree(list);
   }
   -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
-                struct drm_file *filp)
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+                      struct drm_amdgpu_bo_list_entry **info_param)
   {
-    const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
-
-    struct amdgpu_device *adev = dev->dev_private;
-    struct amdgpu_fpriv *fpriv = filp->driver_priv;
-    union drm_amdgpu_bo_list *args = data;
-    uint32_t handle = args->in.list_handle;
-    const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
-
       struct drm_amdgpu_bo_list_entry *info;
-    struct amdgpu_bo_list *list;
-
       int r;
+    const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
+    const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
   -    info = kvmalloc_array(args->in.bo_number,
+    info = kvmalloc_array(in->bo_number,
                    sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
       if (!info)
           return -ENOMEM;
          /* copy the handle array from userspace to a kernel buffer */
       r = -EFAULT;
-    if (likely(info_size == args->in.bo_info_size)) {
-        unsigned long bytes = args->in.bo_number *
-            args->in.bo_info_size;
+    if (likely(info_size == in->bo_info_size)) {
+        unsigned long bytes = in->bo_number *
+            in->bo_info_size;
              if (copy_from_user(info, uptr, bytes))
               goto error_free;
          } else {
-        unsigned long bytes = min(args->in.bo_info_size, info_size);
+        unsigned long bytes = min(in->bo_info_size, info_size);
           unsigned i;
   -        memset(info, 0, args->in.bo_number * info_size);
-        for (i = 0; i < args->in.bo_number; ++i) {
+        memset(info, 0, in->bo_number * info_size);
+        for (i = 0; i < in->bo_number; ++i) {
               if (copy_from_user(&info[i], uptr, bytes))
                   goto error_free;
   -            uptr += args->in.bo_info_size;
+            uptr += in->bo_info_size;
           }
       }
   +    *info_param = info;
+    return 0;
+
+error_free:
+    kvfree(info);
+    return r;
+}
+
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+                struct drm_file *filp)
+{
+    struct amdgpu_device *adev = dev->dev_private;
+    struct amdgpu_fpriv *fpriv = filp->driver_priv;
+    union drm_amdgpu_bo_list *args = data;
+    uint32_t handle = args->in.list_handle;
+    struct drm_amdgpu_bo_list_entry *info = NULL;
+    struct amdgpu_bo_list *list;
+    int r;
+
+    r = amdgpu_bo_create_list_entry_array(&args->in, &info);
+    if (r)
+        goto error_free;
+
       switch (args->in.operation) {
       case AMDGPU_BO_LIST_OP_CREATE:
           r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
-                      &handle);
+                      &handle, NULL);
           if (r)
               goto error_free;
           break;
@@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
       return 0;
      error_free:
-    kvfree(info);
+    if (info)
+        kvfree(info);
       return r;
   }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9881a1e..30026b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
       return 0;
   }
   -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void
*data)
+static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
+                      struct drm_amdgpu_bo_list_in *data) {
+    int r;
+    struct drm_amdgpu_bo_list_entry *info = NULL;
+
+    r = amdgpu_bo_create_list_entry_array(data, &info);
+    if (r)
+        return r;
+
+    r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, NULL, &p->bo_list);
+    if (r)
+        goto error_free;
+
+    kvfree(info);
+    return 0;
+
+error_free:
+    if (info)
+        kvfree(info);
+
+    return r;
+}
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union
+drm_amdgpu_cs *cs)
   {
       struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
       struct amdgpu_vm *vm = &fpriv->vm;
-    union drm_amdgpu_cs *cs = data;
       uint64_t *chunk_array_user;
       uint64_t *chunk_array;
       unsigned size, num_ibs = 0;
@@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct
amdgpu_cs_parser *p, void *data)
                  break;
   +        case AMDGPU_CHUNK_ID_BO_HANDLES:
+            size = sizeof(struct drm_amdgpu_bo_list_in);
+            if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+                ret = -EINVAL;
+                goto free_partial_kdata;
+            }
+
+            ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+            if (ret)
+                goto free_partial_kdata;
+
+            break;
+
           case AMDGPU_CHUNK_ID_DEPENDENCIES:
           case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
           case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct
amdgpu_cs_parser *p,
          INIT_LIST_HEAD(&p->validated);
   -    p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+    /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+    if (!p->bo_list)
+        p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+    else
+        mutex_lock(&p->bo_list->lock);
+
       if (p->bo_list) {
           amdgpu_bo_list_get_list(p->bo_list, &p->validated);
           if (p->bo_list->first_userptr != p->bo_list->num_entries) @@
-747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
    * used by parsing context.
    **/
   static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
-                  bool backoff)
+                  bool backoff, int id)
Don't need it after you get bo_list without idr.

   {
       unsigned i;
   @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
       r = amdgpu_cs_submit(&parser, cs);
      out:
-    amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+    amdgpu_cs_parser_fini(&parser, r, reserved_buffers,
+cs->in.bo_list_handle);
Don't need it after you get bo_list without idr.

Otherwise it looks really good to me, Reviewed-by: Chunming Zhou <david1.z...@amd.com>

       return r;
   }
   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 06aede1..529500c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -69,9 +69,10 @@
    * - 3.24.0 - Add high priority compute support for gfx9
    * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
+ * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
    */
   #define KMS_DRIVER_MAJOR    3
-#define KMS_DRIVER_MINOR    26
+#define KMS_DRIVER_MINOR    27
   #define KMS_DRIVER_PATCHLEVEL    0
      int amdgpu_vram_limit = 0;
diff --git a/include/uapi/drm/amdgpu_drm.h
b/include/uapi/drm/amdgpu_drm.h index 09741ba..94444ee 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
   #define AMDGPU_CHUNK_ID_DEPENDENCIES    0x03
   #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
   #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
+#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
      struct drm_amdgpu_cs_chunk {
       __u32        chunk_id;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to