Am 2020-09-01 um 8:21 p.m. schrieb Mukul Joshi:
> Move doorbell allocation for a process into kfd device and
> allocate doorbell space in each PDD during process creation.
> Currently, KFD manages its own doorbell space but for some
> devices, amdgpu would allocate the complete doorbell
> space instead of leaving a chunk of doorbell space for KFD to
> manage. In a system with mix of such devices, KFD would need
> to request process doorbell space based on the type of device,
> either from amdgpu or from its own doorbell space.
>
> Signed-off-by: Mukul Joshi <mukul.jo...@amd.com>

Two nit-picks inline. With those fixed, the patch is

Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 30 +++++++++------
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c       |  3 ++
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  3 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c     | 37 ++++++++++---------
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         | 17 ++++++---
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 21 ++++++-----
>  6 files changed, 64 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index b7b16adb0615..b23caa78328b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1290,18 +1290,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
> *filep,
>               return -EINVAL;
>       }
>  
> -     if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
> -             if (args->size != kfd_doorbell_process_slice(dev))
> -                     return -EINVAL;
> -             offset = kfd_get_process_doorbells(dev, p);
> -     } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
> -             if (args->size != PAGE_SIZE)
> -                     return -EINVAL;
> -             offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
> -             if (!offset)
> -                     return -ENOMEM;
> -     }
> -
>       mutex_lock(&p->mutex);
>  
>       pdd = kfd_bind_process_to_device(dev, p);
> @@ -1310,6 +1298,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
> *filep,
>               goto err_unlock;
>       }
>  
> +     if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
> +             if (args->size != kfd_doorbell_process_slice(dev)) {
> +                     err = -EINVAL;
> +                     goto err_unlock;
> +             }
> +             offset = kfd_get_process_doorbells(dev, pdd);
> +     } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
> +             if (args->size != PAGE_SIZE) {
> +                     err = -EINVAL;
> +                     goto err_unlock;
> +             }
> +             offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
> +             if (!offset) {
> +                     err = -ENOMEM;
> +                     goto err_unlock;
> +             }
> +     }
> +
>       err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>               dev->kgd, args->va_addr, args->size,
>               pdd->vm, (struct kgd_mem **) &mem, &offset,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 0e71a0543f98..a857282f3d09 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>  
>       atomic_set(&kfd->sram_ecc_flag, 0);
>  
> +     ida_init(&kfd->doorbell_ida);
> +
>       return kfd;
>  }
>  
> @@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
>               kfd_interrupt_exit(kfd);
>               kfd_topology_remove_device(kfd);
>               kfd_doorbell_fini(kfd);
> +             ida_destroy(&kfd->doorbell_ida);
>               kfd_gtt_sa_fini(kfd);
>               amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
>               if (kfd->gws)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 560adc57a050..b9d1359c6fe0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device 
> *qpd, struct queue *q)
>       }
>  
>       q->properties.doorbell_off =
> -             kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
> +             kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
>                                         q->doorbell_id);
> -
>       return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index 8e0c00b9555e..5946bfb6b75c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -31,9 +31,6 @@
>   * kernel queues using the first doorbell page reserved for the kernel.
>   */
>  
> -static DEFINE_IDA(doorbell_ida);
> -static unsigned int max_doorbell_slices;
> -
>  /*
>   * Each device exposes a doorbell aperture, a PCI MMIO aperture that
>   * receives 32-bit writes that are passed to queues as wptr values.
> @@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
>       else
>               return -ENOSPC;
>  
> -     if (!max_doorbell_slices ||
> -         doorbell_process_limit < max_doorbell_slices)
> -             max_doorbell_slices = doorbell_process_limit;
> +     if (!kfd->max_doorbell_slices ||
> +         doorbell_process_limit < kfd->max_doorbell_slices)
> +             kfd->max_doorbell_slices = doorbell_process_limit;
>  
>       kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
>                               doorbell_start_offset;
> @@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct 
> kfd_process *process,
>                     struct vm_area_struct *vma)
>  {
>       phys_addr_t address;
> +     struct kfd_process_device *pdd;
>  
>       /*
>        * For simplicitly we only allow mapping of the entire doorbell
> @@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct 
> kfd_process *process,
>       if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
>               return -EINVAL;
>  
> -     /* Calculate physical address of doorbell */
> -     address = kfd_get_process_doorbells(dev, process);
> +     pdd = kfd_get_process_device_data(dev, process);
> +     if (!pdd)
> +             return -EINVAL;
>  
> +     /* Calculate physical address of doorbell */
> +     address = kfd_get_process_doorbells(dev, pdd);
>       vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
>                               VM_DONTDUMP | VM_PFNMAP;
>  
> @@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
>  }
>  
>  unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
> -                                     struct kfd_process *process,
> +                                     struct kfd_process_device *pdd,
>                                       unsigned int doorbell_id)
>  {
>       /*
> @@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct 
> kfd_dev *kfd,
>        * units regardless of the ASIC-dependent doorbell size.
>        */
>       return kfd->doorbell_base_dw_offset +
> -             process->doorbell_index
> +             pdd->doorbell_index
>               * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
>               doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
>  }
> @@ -252,24 +253,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
>  }
>  
>  phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
> -                                     struct kfd_process *process)
> +                                   struct kfd_process_device *pdd)
>  {
>       return dev->doorbell_base +
> -             process->doorbell_index * kfd_doorbell_process_slice(dev);
> +             pdd->doorbell_index * kfd_doorbell_process_slice(dev);

If you already have a pdd parameter, the dev parameter is redundant.
Just use pdd->dev.


>  }
>  
> -int kfd_alloc_process_doorbells(struct kfd_process *process)
> +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int 
> *doorbell_index)
>  {
> -     int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices,
> +     int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
>                               GFP_KERNEL);
>       if (r > 0)
> -             process->doorbell_index = r;
> +             *doorbell_index = r;
>  
>       return r;
>  }
>  
> -void kfd_free_process_doorbells(struct kfd_process *process)
> +void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int 
> *doorbell_index)

Doorbell index doesn't need to be a pointer here, because it's an input
parameter. Just pass the index by value.

Regards,
  Felix


>  {
> -     if (process->doorbell_index)
> -             ida_simple_remove(&doorbell_ida, process->doorbell_index);
> +     if (*doorbell_index)
> +             ida_simple_remove(&kfd->doorbell_ida, *doorbell_index);
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 023629f28495..93b51de14aa0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -314,6 +314,9 @@ struct kfd_dev {
>       spinlock_t smi_lock;
>  
>       uint32_t reset_seq_num;
> +
> +     struct ida doorbell_ida;
> +     unsigned int max_doorbell_slices;
>  };
>  
>  enum kfd_mempool {
> @@ -692,6 +695,8 @@ struct kfd_process_device {
>       uint64_t sdma_past_activity_counter;
>       struct attribute attr_sdma;
>       char sdma_filename[MAX_SYSFS_FILENAME_LEN];
> +
> +     unsigned int doorbell_index;
>  };
>  
>  #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
> @@ -729,7 +734,6 @@ struct kfd_process {
>       struct mmu_notifier mmu_notifier;
>  
>       uint16_t pasid;
> -     unsigned int doorbell_index;
>  
>       /*
>        * List of kfd_process_device structures,
> @@ -862,13 +866,14 @@ u32 read_kernel_doorbell(u32 __iomem *db);
>  void write_kernel_doorbell(void __iomem *db, u32 value);
>  void write_kernel_doorbell64(void __iomem *db, u64 value);
>  unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
> -                                     struct kfd_process *process,
> +                                     struct kfd_process_device *pdd,
>                                       unsigned int doorbell_id);
>  phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
> -                                     struct kfd_process *process);
> -int kfd_alloc_process_doorbells(struct kfd_process *process);
> -void kfd_free_process_doorbells(struct kfd_process *process);
> -
> +                                   struct kfd_process_device *pdd);
> +int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
> +                             unsigned int *doorbell_index);
> +void kfd_free_process_doorbells(struct kfd_dev *kfd,
> +                             unsigned int *doorbell_index);
>  /* GTT Sub-Allocator */
>  
>  int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index a0e12a79ab7d..28bbdebb4703 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -781,6 +781,8 @@ static void kfd_process_destroy_pdds(struct kfd_process 
> *p)
>               kfree(pdd->qpd.doorbell_bitmap);
>               idr_destroy(&pdd->alloc_idr);
>  
> +             kfd_free_process_doorbells(pdd->dev, &pdd->doorbell_index);
> +
>               /*
>                * before destroying pdd, make sure to report availability
>                * for auto suspend
> @@ -833,8 +835,6 @@ static void kfd_process_wq_release(struct work_struct 
> *work)
>       kfd_event_free_process(p);
>  
>       kfd_pasid_free(p->pasid);
> -     kfd_free_process_doorbells(p);
> -
>       mutex_destroy(&p->mutex);
>  
>       put_task_struct(p->lead_thread);
> @@ -1012,9 +1012,6 @@ static struct kfd_process *create_process(const struct 
> task_struct *thread)
>       if (process->pasid == 0)
>               goto err_alloc_pasid;
>  
> -     if (kfd_alloc_process_doorbells(process) < 0)
> -             goto err_alloc_doorbells;
> -
>       err = pqm_init(&process->pqm, process);
>       if (err != 0)
>               goto err_process_pqm_init;
> @@ -1042,8 +1039,6 @@ static struct kfd_process *create_process(const struct 
> task_struct *thread)
>  err_init_apertures:
>       pqm_uninit(&process->pqm);
>  err_process_pqm_init:
> -     kfd_free_process_doorbells(process);
> -err_alloc_doorbells:
>       kfd_pasid_free(process->pasid);
>  err_alloc_pasid:
>       mutex_destroy(&process->mutex);
> @@ -1106,10 +1101,14 @@ struct kfd_process_device 
> *kfd_create_process_device_data(struct kfd_dev *dev,
>       if (!pdd)
>               return NULL;
>  
> +     if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
> +             pr_err("Failed to alloc doorbell for pdd\n");
> +             goto err_free_pdd;
> +     }
> +
>       if (init_doorbell_bitmap(&pdd->qpd, dev)) {
>               pr_err("Failed to init doorbell for process\n");
> -             kfree(pdd);
> -             return NULL;
> +             goto err_free_pdd;
>       }
>  
>       pdd->dev = dev;
> @@ -1131,6 +1130,10 @@ struct kfd_process_device 
> *kfd_create_process_device_data(struct kfd_dev *dev,
>       idr_init(&pdd->alloc_idr);
>  
>       return pdd;
> +
> +err_free_pdd:
> +     kfree(pdd);
> +     return NULL;
>  }
>  
>  /**
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to