Am 2020-09-01 um 8:21 p.m. schrieb Mukul Joshi: > Move doorbell allocation for a process into kfd device and > allocate doorbell space in each PDD during process creation. > Currently, KFD manages its own doorbell space but for some > devices, amdgpu would allocate the complete doorbell > space instead of leaving a chunk of doorbell space for KFD to > manage. In a system with mix of such devices, KFD would need > to request process doorbell space based on the type of device, > either from amdgpu or from its own doorbell space. > > Signed-off-by: Mukul Joshi <mukul.jo...@amd.com>
Two nit-picks inline. With those fixed, the patch is Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com> > --- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 30 +++++++++------ > drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 ++ > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +- > drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 37 ++++++++++--------- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 17 ++++++--- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 21 ++++++----- > 6 files changed, 64 insertions(+), 47 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index b7b16adb0615..b23caa78328b 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -1290,18 +1290,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file > *filep, > return -EINVAL; > } > > - if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { > - if (args->size != kfd_doorbell_process_slice(dev)) > - return -EINVAL; > - offset = kfd_get_process_doorbells(dev, p); > - } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { > - if (args->size != PAGE_SIZE) > - return -EINVAL; > - offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); > - if (!offset) > - return -ENOMEM; > - } > - > mutex_lock(&p->mutex); > > pdd = kfd_bind_process_to_device(dev, p); > @@ -1310,6 +1298,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file > *filep, > goto err_unlock; > } > > + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { > + if (args->size != kfd_doorbell_process_slice(dev)) { > + err = -EINVAL; > + goto err_unlock; > + } > + offset = kfd_get_process_doorbells(dev, pdd); > + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { > + if (args->size != PAGE_SIZE) { > + err = -EINVAL; > + goto err_unlock; > + } > + offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); > + if (!offset) { > + err = -ENOMEM; > + goto err_unlock; > + } > + } > + > err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > dev->kgd, args->va_addr, args->size, > pdd->vm, (struct kgd_mem **) &mem, &offset, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > index 0e71a0543f98..a857282f3d09 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > @@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, > > atomic_set(&kfd->sram_ecc_flag, 0); > > + ida_init(&kfd->doorbell_ida); > + > return kfd; > } > > @@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) > kfd_interrupt_exit(kfd); > kfd_topology_remove_device(kfd); > kfd_doorbell_fini(kfd); > + ida_destroy(&kfd->doorbell_ida); > kfd_gtt_sa_fini(kfd); > amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); > if (kfd->gws) > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 560adc57a050..b9d1359c6fe0 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device > *qpd, struct queue *q) > } > > q->properties.doorbell_off = > - kfd_get_doorbell_dw_offset_in_bar(dev, q->process, > + kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), > q->doorbell_id); > - > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > index 8e0c00b9555e..5946bfb6b75c 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > @@ -31,9 +31,6 @@ > * kernel queues using the first doorbell page reserved for the kernel. > */ > > -static DEFINE_IDA(doorbell_ida); > -static unsigned int max_doorbell_slices; > - > /* > * Each device exposes a doorbell aperture, a PCI MMIO aperture that > * receives 32-bit writes that are passed to queues as wptr values. > @@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd) > else > return -ENOSPC; > > - if (!max_doorbell_slices || > - doorbell_process_limit < max_doorbell_slices) > - max_doorbell_slices = doorbell_process_limit; > + if (!kfd->max_doorbell_slices || > + doorbell_process_limit < kfd->max_doorbell_slices) > + kfd->max_doorbell_slices = doorbell_process_limit; > > kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + > doorbell_start_offset; > @@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct > kfd_process *process, > struct vm_area_struct *vma) > { > phys_addr_t address; > + struct kfd_process_device *pdd; > > /* > * For simplicitly we only allow mapping of the entire doorbell > @@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct > kfd_process *process, > if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) > return -EINVAL; > > - /* Calculate physical address of doorbell */ > - address = kfd_get_process_doorbells(dev, process); > + pdd = kfd_get_process_device_data(dev, process); > + if (!pdd) > + return -EINVAL; > > + /* Calculate physical address of doorbell */ > + address = kfd_get_process_doorbells(dev, pdd); > vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | > VM_DONTDUMP | VM_PFNMAP; > > @@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value) > } > > unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, > - struct kfd_process *process, > + struct kfd_process_device *pdd, > unsigned int doorbell_id) > { > /* > @@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct > kfd_dev *kfd, > * units regardless of the ASIC-dependent doorbell size. > */ > return kfd->doorbell_base_dw_offset + > - process->doorbell_index > + pdd->doorbell_index > * kfd_doorbell_process_slice(kfd) / sizeof(u32) + > doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); > } > @@ -252,24 +253,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd) > } > > phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, > - struct kfd_process *process) > + struct kfd_process_device *pdd) > { > return dev->doorbell_base + > - process->doorbell_index * kfd_doorbell_process_slice(dev); > + pdd->doorbell_index * kfd_doorbell_process_slice(dev); If you already have a pdd parameter, the dev parameter is redundant. Just use pdd->dev. > } > > -int kfd_alloc_process_doorbells(struct kfd_process *process) > +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int > *doorbell_index) > { > - int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices, > + int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices, > GFP_KERNEL); > if (r > 0) > - process->doorbell_index = r; > + *doorbell_index = r; > > return r; > } > > -void kfd_free_process_doorbells(struct kfd_process *process) > +void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int > *doorbell_index) Doorbell index doesn't need to be a pointer here, because it's an input parameter. Just pass the index by value. Regards, Felix > { > - if (process->doorbell_index) > - ida_simple_remove(&doorbell_ida, process->doorbell_index); > + if (*doorbell_index) > + ida_simple_remove(&kfd->doorbell_ida, *doorbell_index); > } > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index 023629f28495..93b51de14aa0 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -314,6 +314,9 @@ struct kfd_dev { > spinlock_t smi_lock; > > uint32_t reset_seq_num; > + > + struct ida doorbell_ida; > + unsigned int max_doorbell_slices; > }; > > enum kfd_mempool { > @@ -692,6 +695,8 @@ struct kfd_process_device { > uint64_t sdma_past_activity_counter; > struct attribute attr_sdma; > char sdma_filename[MAX_SYSFS_FILENAME_LEN]; > + > + unsigned int doorbell_index; > }; > > #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) > @@ -729,7 +734,6 @@ struct kfd_process { > struct mmu_notifier mmu_notifier; > > uint16_t pasid; > - unsigned int doorbell_index; > > /* > * List of kfd_process_device structures, > @@ -862,13 +866,14 @@ u32 read_kernel_doorbell(u32 __iomem *db); > void write_kernel_doorbell(void __iomem *db, u32 value); > void write_kernel_doorbell64(void __iomem *db, u64 value); > unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, > - struct kfd_process *process, > + struct kfd_process_device *pdd, > unsigned int doorbell_id); > phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, > - struct kfd_process *process); > -int kfd_alloc_process_doorbells(struct kfd_process *process); > -void kfd_free_process_doorbells(struct kfd_process *process); > - > + struct kfd_process_device *pdd); > +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, > + unsigned int *doorbell_index); > +void kfd_free_process_doorbells(struct kfd_dev *kfd, > + unsigned int *doorbell_index); > /* GTT Sub-Allocator */ > > int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index a0e12a79ab7d..28bbdebb4703 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -781,6 +781,8 @@ static void kfd_process_destroy_pdds(struct kfd_process > *p) > kfree(pdd->qpd.doorbell_bitmap); > idr_destroy(&pdd->alloc_idr); > > + kfd_free_process_doorbells(pdd->dev, &pdd->doorbell_index); > + > /* > * before destroying pdd, make sure to report availability > * for auto suspend > @@ -833,8 +835,6 @@ static void kfd_process_wq_release(struct work_struct > *work) > kfd_event_free_process(p); > > kfd_pasid_free(p->pasid); > - kfd_free_process_doorbells(p); > - > mutex_destroy(&p->mutex); > > put_task_struct(p->lead_thread); > @@ -1012,9 +1012,6 @@ static struct kfd_process *create_process(const struct > task_struct *thread) > if (process->pasid == 0) > goto err_alloc_pasid; > > - if (kfd_alloc_process_doorbells(process) < 0) > - goto err_alloc_doorbells; > - > err = pqm_init(&process->pqm, process); > if (err != 0) > goto err_process_pqm_init; > @@ -1042,8 +1039,6 @@ static struct kfd_process *create_process(const struct > task_struct *thread) > err_init_apertures: > pqm_uninit(&process->pqm); > err_process_pqm_init: > - kfd_free_process_doorbells(process); > -err_alloc_doorbells: > kfd_pasid_free(process->pasid); > err_alloc_pasid: > mutex_destroy(&process->mutex); > @@ -1106,10 +1101,14 @@ struct kfd_process_device > *kfd_create_process_device_data(struct kfd_dev *dev, > if (!pdd) > return NULL; > > + if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) { > + pr_err("Failed to alloc doorbell for pdd\n"); > + goto err_free_pdd; > + } > + > if (init_doorbell_bitmap(&pdd->qpd, dev)) { > pr_err("Failed to init doorbell for process\n"); > - kfree(pdd); > - return NULL; > + goto err_free_pdd; > } > > pdd->dev = dev; > @@ -1131,6 +1130,10 @@ struct kfd_process_device > *kfd_create_process_device_data(struct kfd_dev *dev, > idr_init(&pdd->alloc_idr); > > return pdd; > + > +err_free_pdd: > + kfree(pdd); > + return NULL; > } > > /** _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx