KFD buffer objects do not associate a GEM handle with them so cannot
directly be used with libdrm to initiate a system dma (sDMA) operation
to speedup the checkpoint and restore operation so export them as dmabuf
objects and use with libdrm helper (amdgpu_bo_import) to further process
the sdma command submissions.

With sDMA, we see huge improvement in checkpoint and restore operations
compared to the generic pci based access via host data path.

Suggested-by: Felix Kuehling <felix.kuehl...@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhard...@amd.com>
Signed-off-by: David Yat Sin <david.yat...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 71 +++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 20652d488cde..178b0ccfb286 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -35,6 +35,7 @@
 #include <linux/mman.h>
 #include <linux/ptrace.h>
 #include <linux/dma-buf.h>
+#include <linux/fdtable.h>
 #include <asm/processor.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
@@ -43,6 +44,7 @@
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
 #include "amdgpu_object.h"
+#include "amdgpu_dma_buf.h"
 
 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
 static int kfd_open(struct inode *, struct file *);
@@ -1932,6 +1934,33 @@ uint64_t get_process_num_bos(struct kfd_process *p)
        return num_of_bos;
 }
 
+static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags,
+                                     u32 *shared_fd)
+{
+       struct dma_buf *dmabuf;
+       int ret;
+
+       dmabuf = amdgpu_gem_prime_export(gobj, flags);
+       if (IS_ERR(dmabuf)) {
+               ret = PTR_ERR(dmabuf);
+               pr_err("dmabuf export failed for the BO\n");
+               return ret;
+       }
+
+       ret = dma_buf_fd(dmabuf, flags);
+       if (ret < 0) {
+               pr_err("dmabuf create fd failed, ret:%d\n", ret);
+               goto out_free_dmabuf;
+       }
+
+       *shared_fd = ret;
+       return 0;
+
+out_free_dmabuf:
+       dma_buf_put(dmabuf);
+       return ret;
+}
+
 static int criu_checkpoint_bos(struct kfd_process *p,
                               uint32_t num_bos,
                               uint8_t __user *user_bos,
@@ -1992,6 +2021,14 @@ static int criu_checkpoint_bos(struct kfd_process *p,
                                        goto exit;
                                }
                        }
+                       if (bo_bucket->alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+                               ret = 
criu_get_prime_handle(&dumper_bo->tbo.base,
+                                               bo_bucket->alloc_flags &
+                                               
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
+                                               &bo_bucket->dmabuf_fd);
+                               if (ret)
+                                       goto exit;
+                       }
                        if (bo_bucket->alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
                                bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
                                        KFD_MMAP_GPU_ID(pdd->dev->id);
@@ -2031,6 +2068,10 @@ static int criu_checkpoint_bos(struct kfd_process *p,
        *priv_offset += num_bos * sizeof(*bo_privs);
 
 exit:
+       while (ret && bo_index--) {
+               if (bo_buckets[bo_index].alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+                       close_fd(bo_buckets[bo_index].dmabuf_fd);
+       }
 
        kvfree(bo_buckets);
        kvfree(bo_privs);
@@ -2131,16 +2172,28 @@ static int criu_checkpoint(struct file *filep,
                ret = kfd_criu_checkpoint_queues(p, (uint8_t __user 
*)args->priv_data,
                                                 &priv_offset);
                if (ret)
-                       goto exit_unlock;
+                       goto close_bo_fds;
 
                ret = kfd_criu_checkpoint_events(p, (uint8_t __user 
*)args->priv_data,
                                                 &priv_offset);
                if (ret)
-                       goto exit_unlock;
+                       goto close_bo_fds;
 
                /* TODO: Dump SVM-Ranges */
        }
 
+close_bo_fds:
+       if (ret) {
+               /* If IOCTL returns err, user assumes all FDs opened in 
criu_dump_bos are closed */
+               uint32_t i;
+               struct kfd_criu_bo_bucket *bo_buckets = (struct 
kfd_criu_bo_bucket *) args->bos;
+
+               for (i = 0; i < num_bos; i++) {
+                       if (bo_buckets[i].alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+                               close_fd(bo_buckets[i].dmabuf_fd);
+               }
+       }
+
 exit_unlock:
        mutex_unlock(&p->mutex);
        if (ret)
@@ -2335,6 +2388,7 @@ static int criu_restore_bos(struct kfd_process *p,
                struct kfd_criu_bo_priv_data *bo_priv;
                struct kfd_dev *dev;
                struct kfd_process_device *pdd;
+               struct kgd_mem *kgd_mem;
                void *mem;
                u64 offset;
                int idr_handle;
@@ -2479,6 +2533,15 @@ static int criu_restore_bos(struct kfd_process *p,
                }
 
                pr_debug("map memory was successful for the BO\n");
+               /* create the dmabuf object and export the bo */
+               kgd_mem = (struct kgd_mem *)mem;
+               if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+                       ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base,
+                                                   DRM_RDWR,
+                                                   &bo_bucket->dmabuf_fd);
+                       if (ret)
+                               goto exit;
+               }
        } /* done */
 
        if (flush_tlbs) {
@@ -2506,6 +2569,10 @@ static int criu_restore_bos(struct kfd_process *p,
                ret = -EFAULT;
 
 exit:
+       while (ret && i--) {
+               if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+                       close_fd(bo_buckets[i].dmabuf_fd);
+       }
        kvfree(bo_buckets);
        kvfree(bo_privs);
        return ret;
-- 
2.17.1

Reply via email to