Am 2021-10-15 um 2:54 a.m. schrieb Lang Yu:
> Currently, all kfd BOs use same destruction routine. But pinned
> BOs are not unpinned properly. Separate them from general routine.
>
> v2 (Felix):
> Add safeguard to prevent user space from freeing signal BO.
> Kunmap signal BO in the event of setting event page error.
> Just kunmap signal BO to avoid duplicating the code.
>
> Signed-off-by: Lang Yu <lang...@amd.com>

Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   2 +
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  31 +++--
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 +
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 110 +++++++++++++-----
>  5 files changed, 119 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 69de31754907..751557af09bb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -279,6 +279,8 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>               struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
>  int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
>               struct kgd_mem *mem, void **kptr, uint64_t *size);
> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, 
> struct kgd_mem *mem);
> +
>  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>                                           struct dma_fence **ef);
>  int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index cdf46bd0d8d5..4969763c2e47 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1871,6 +1871,16 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct 
> kgd_dev *kgd,
>       return ret;
>  }
>  
> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, 
> struct kgd_mem *mem)
> +{
> +     struct amdgpu_bo *bo = mem->bo;
> +
> +     amdgpu_bo_reserve(bo, true);
> +     amdgpu_bo_kunmap(bo);
> +     amdgpu_bo_unpin(bo);
> +     amdgpu_bo_unreserve(bo);
> +}
> +
>  int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
>                                             struct kfd_vm_fault_info *mem)
>  {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index f1e7edeb4e6b..9317a2e238d0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1011,11 +1011,6 @@ static int kfd_ioctl_create_event(struct file *filp, 
> struct kfd_process *p,
>               void *mem, *kern_addr;
>               uint64_t size;
>  
> -             if (p->signal_page) {
> -                     pr_err("Event page is already set\n");
> -                     return -EINVAL;
> -             }
> -
>               kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
>               if (!kfd) {
>                       pr_err("Getting device by id failed in %s\n", __func__);
> @@ -1023,6 +1018,13 @@ static int kfd_ioctl_create_event(struct file *filp, 
> struct kfd_process *p,
>               }
>  
>               mutex_lock(&p->mutex);
> +
> +             if (p->signal_page) {
> +                     pr_err("Event page is already set\n");
> +                     err = -EINVAL;
> +                     goto out_unlock;
> +             }
> +
>               pdd = kfd_bind_process_to_device(kfd, p);
>               if (IS_ERR(pdd)) {
>                       err = PTR_ERR(pdd);
> @@ -1037,20 +1039,24 @@ static int kfd_ioctl_create_event(struct file *filp, 
> struct kfd_process *p,
>                       err = -EINVAL;
>                       goto out_unlock;
>               }
> -             mutex_unlock(&p->mutex);
>  
>               err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
>                                               mem, &kern_addr, &size);
>               if (err) {
>                       pr_err("Failed to map event page to kernel\n");
> -                     return err;
> +                     goto out_unlock;
>               }
>  
>               err = kfd_event_page_set(p, kern_addr, size);
>               if (err) {
>                       pr_err("Failed to set event page\n");
> -                     return err;
> +                     amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->kgd, 
> mem);
> +                     goto out_unlock;
>               }
> +
> +             p->signal_handle = args->event_page_offset;
> +
> +             mutex_unlock(&p->mutex);
>       }
>  
>       err = kfd_event_create(filp, p, args->event_type,
> @@ -1368,6 +1374,15 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
> *filep,
>               return -EINVAL;
>  
>       mutex_lock(&p->mutex);
> +     /*
> +      * Safeguard to prevent user space from freeing signal BO.
> +      * It will be freed at process termination.
> +      */
> +     if (p->signal_handle && (p->signal_handle == args->handle)) {
> +             pr_err("Free signal BO is not allowed\n");
> +             ret = -EPERM;
> +             goto err_unlock;
> +     }
>  
>       pdd = kfd_get_process_device_data(dev, p);
>       if (!pdd) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 6d8f9bb2d905..30f08f1606bb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -608,12 +608,14 @@ struct qcm_process_device {
>       uint32_t sh_hidden_private_base;
>  
>       /* CWSR memory */
> +     struct kgd_mem *cwsr_mem;
>       void *cwsr_kaddr;
>       uint64_t cwsr_base;
>       uint64_t tba_addr;
>       uint64_t tma_addr;
>  
>       /* IB memory */
> +     struct kgd_mem *ib_mem;
>       uint64_t ib_base;
>       void *ib_kaddr;
>  
> @@ -808,6 +810,7 @@ struct kfd_process {
>       /* Event ID allocator and lookup */
>       struct idr event_idr;
>       /* Event page */
> +     u64 signal_handle;
>       struct kfd_signal_page *signal_page;
>       size_t signal_mapped_size;
>       size_t signal_event_count;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 21ec8a18cad2..26fc716a92c2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -72,6 +72,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, 
> struct file *filep);
>  static void evict_process_worker(struct work_struct *work);
>  static void restore_process_worker(struct work_struct *work);
>  
> +static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device 
> *pdd);
> +
>  struct kfd_procfs_tree {
>       struct kobject *kobj;
>  };
> @@ -685,10 +687,15 @@ void kfd_process_destroy_wq(void)
>  }
>  
>  static void kfd_process_free_gpuvm(struct kgd_mem *mem,
> -                     struct kfd_process_device *pdd)
> +                     struct kfd_process_device *pdd, void *kptr)
>  {
>       struct kfd_dev *dev = pdd->dev;
>  
> +     if (kptr) {
> +             amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem);
> +             kptr = NULL;
> +     }
> +
>       amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
>       amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
>                                              NULL);
> @@ -702,63 +709,46 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
>   */
>  static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
>                                  uint64_t gpu_va, uint32_t size,
> -                                uint32_t flags, void **kptr)
> +                                uint32_t flags, struct kgd_mem **mem, void 
> **kptr)
>  {
>       struct kfd_dev *kdev = pdd->dev;
> -     struct kgd_mem *mem = NULL;
> -     int handle;
>       int err;
>  
>       err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
> -                                              pdd->drm_priv, &mem, NULL, 
> flags);
> +                                              pdd->drm_priv, mem, NULL, 
> flags);
>       if (err)
>               goto err_alloc_mem;
>  
> -     err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
> +     err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem,
>                       pdd->drm_priv, NULL);
>       if (err)
>               goto err_map_mem;
>  
> -     err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
> +     err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true);
>       if (err) {
>               pr_debug("Sync memory failed, wait interrupted by user 
> signal\n");
>               goto sync_memory_failed;
>       }
>  
> -     /* Create an obj handle so kfd_process_device_remove_obj_handle
> -      * will take care of the bo removal when the process finishes.
> -      * We do not need to take p->mutex, because the process is just
> -      * created and the ioctls have not had the chance to run.
> -      */
> -     handle = kfd_process_device_create_obj_handle(pdd, mem);
> -
> -     if (handle < 0) {
> -             err = handle;
> -             goto free_gpuvm;
> -     }
> -
>       if (kptr) {
>               err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
> -                             (struct kgd_mem *)mem, kptr, NULL);
> +                             (struct kgd_mem *)*mem, kptr, NULL);
>               if (err) {
>                       pr_debug("Map GTT BO to kernel failed\n");
> -                     goto free_obj_handle;
> +                     goto sync_memory_failed;
>               }
>       }
>  
>       return err;
>  
> -free_obj_handle:
> -     kfd_process_device_remove_obj_handle(pdd, handle);
> -free_gpuvm:
>  sync_memory_failed:
> -     kfd_process_free_gpuvm(mem, pdd);
> -     return err;
> +     amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, 
> pdd->drm_priv);
>  
>  err_map_mem:
> -     amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
> +     amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv,
>                                              NULL);
>  err_alloc_mem:
> +     *mem = NULL;
>       *kptr = NULL;
>       return err;
>  }
> @@ -776,6 +766,7 @@ static int kfd_process_device_reserve_ib_mem(struct 
> kfd_process_device *pdd)
>                       KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
>                       KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
>                       KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> +     struct kgd_mem *mem;
>       void *kaddr;
>       int ret;
>  
> @@ -784,15 +775,26 @@ static int kfd_process_device_reserve_ib_mem(struct 
> kfd_process_device *pdd)
>  
>       /* ib_base is only set for dGPU */
>       ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
> -                                   &kaddr);
> +                                   &mem, &kaddr);
>       if (ret)
>               return ret;
>  
> +     qpd->ib_mem = mem;
>       qpd->ib_kaddr = kaddr;
>  
>       return 0;
>  }
>  
> +static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
> +{
> +     struct qcm_process_device *qpd = &pdd->qpd;
> +
> +     if (!qpd->ib_kaddr || !qpd->ib_base)
> +             return;
> +
> +     kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
> +}
> +
>  struct kfd_process *kfd_create_process(struct file *filep)
>  {
>       struct kfd_process *process;
> @@ -947,6 +949,38 @@ static void kfd_process_device_free_bos(struct 
> kfd_process_device *pdd)
>       }
>  }
>  
> +/*
> + * Just kunmap and unpin signal BO here. It will be freed in
> + * kfd_process_free_outstanding_kfd_bos()
> + */
> +static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
> +{
> +     struct kfd_process_device *pdd;
> +     struct kfd_dev *kdev;
> +     void *mem;
> +     int i;
> +
> +     kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
> +     if (!kdev)
> +             return;
> +
> +     mutex_lock(&p->mutex);
> +
> +     pdd = kfd_get_process_device_data(kdev, p);
> +     if (!pdd)
> +             goto out;
> +
> +     mem = kfd_process_device_translate_handle(
> +             pdd, GET_IDR_HANDLE(p->signal_handle));
> +     if (!mem)
> +             goto out;
> +
> +     amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem);
> +
> +out:
> +     mutex_unlock(&p->mutex);
> +}
> +
>  static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
>  {
>       int i;
> @@ -965,6 +999,9 @@ static void kfd_process_destroy_pdds(struct kfd_process 
> *p)
>               pr_debug("Releasing pdd (topology id %d) for process (pasid 
> 0x%x)\n",
>                               pdd->dev->id, p->pasid);
>  
> +             kfd_process_device_destroy_cwsr_dgpu(pdd);
> +             kfd_process_device_destroy_ib_mem(pdd);
> +
>               if (pdd->drm_file) {
>                       amdgpu_amdkfd_gpuvm_release_process_vm(
>                                       pdd->dev->kgd, pdd->drm_priv);
> @@ -1049,9 +1086,11 @@ static void kfd_process_wq_release(struct work_struct 
> *work)
>  {
>       struct kfd_process *p = container_of(work, struct kfd_process,
>                                            release_work);
> +
>       kfd_process_remove_sysfs(p);
>       kfd_iommu_unbind_process(p);
>  
> +     kfd_process_kunmap_signal_bo(p);
>       kfd_process_free_outstanding_kfd_bos(p);
>       svm_range_list_fini(p);
>  
> @@ -1198,6 +1237,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct 
> kfd_process_device *pdd)
>       uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
>                       | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
>                       | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> +     struct kgd_mem *mem;
>       void *kaddr;
>       int ret;
>  
> @@ -1206,10 +1246,11 @@ static int kfd_process_device_init_cwsr_dgpu(struct 
> kfd_process_device *pdd)
>  
>       /* cwsr_base is only set for dGPU */
>       ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
> -                                   KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
> +                                   KFD_CWSR_TBA_TMA_SIZE, flags, &mem, 
> &kaddr);
>       if (ret)
>               return ret;
>  
> +     qpd->cwsr_mem = mem;
>       qpd->cwsr_kaddr = kaddr;
>       qpd->tba_addr = qpd->cwsr_base;
>  
> @@ -1222,6 +1263,17 @@ static int kfd_process_device_init_cwsr_dgpu(struct 
> kfd_process_device *pdd)
>       return 0;
>  }
>  
> +static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device 
> *pdd)
> +{
> +     struct kfd_dev *dev = pdd->dev;
> +     struct qcm_process_device *qpd = &pdd->qpd;
> +
> +     if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
> +             return;
> +
> +     kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
> +}
> +
>  void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
>                                 uint64_t tba_addr,
>                                 uint64_t tma_addr)

Reply via email to