[PATCH v4] drm/amdkfd: Change kfd/svm page fault drain handling
From: Xiaogang Chen When app unmap vm ranges(munmap) kfd/svm starts drain pending page fault and not handle any incoming pages fault of this process until a deferred work item got executed by default system wq. The time period of "not handle page fault" can be long and is unpredicable. That is advese to kfd performance on page faults recovery. This patch uses time stamp of incoming page page to decide to drop or handle page fault. When app unmap vm ranges kfd records each gpu device's ih ring current time stamp. These time stamps are used at kfd page fault recovery routine. Any page fault happens on unmapped ranges after unmap events is app bug that accesses vm range after unmap. It is not driver work to cover that. By using time stamp of page fault do not need drain page faults at deferred work. So, the time period that kfd does not handle page faults is reduced and can be controlled. Signed-off-by: Xiaogang.Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 128 +++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 +- 7 files changed, 68 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3abfa66d72a2..d90b7ea3f020 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2763,7 +2763,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault) { bool is_compute_context = false; @@ -2789,7 +2789,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, write_fault)) { + node_id, addr, ts, write_fault)) { amdgpu_bo_unref(); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 312a408b80d3..1d6a1381ede9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -548,7 +548,7 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d933e19e0cf5..3596cc2ee7e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -132,7 +132,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, + entry->timestamp, write_fault)) return 1; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 350f6b6676f1..ac08d9424feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -595,7 +595,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, -addr, write_fault); +addr, entry->timestamp, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -618,7 +618,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * tables */ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault)) +
[PATCH v3] drm/amdkfd: Change kfd/svm page fault drain handling
From: Xiaogang Chen When app unmap vm ranges(munmap) kfd/svm starts drain pending page fault and not handle any incoming pages fault of this process until a deferred work item got executed by default system wq. The time period of "not handle page fault" can be long and is unpredicable. That is advese to kfd performance on page faults recovery. This patch uses time stamp of incoming page page to decide to drop or handle page fault. When app unmap vm ranges kfd records each gpu device's ih ring current time stamp. These time stamps are used at kfd page fault recovery routine. Any page fault happens on unmapped ranges after unmap events is app bug that accesses vm range after unmap. It is not driver work to cover that. By using time stamp of page fault do not need drain page faults at deferred work. So, the time period that kfd does not handle page faults is reduced and can be controlled. Signed-off-by: Xiaogang.Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 102 - drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 +- 7 files changed, 79 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3abfa66d72a2..d90b7ea3f020 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2763,7 +2763,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault) { bool is_compute_context = false; @@ -2789,7 +2789,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, write_fault)) { + node_id, addr, ts, write_fault)) { amdgpu_bo_unref(); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 312a408b80d3..1d6a1381ede9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -548,7 +548,7 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d933e19e0cf5..3596cc2ee7e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -132,7 +132,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, + entry->timestamp, write_fault)) return 1; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 350f6b6676f1..ac08d9424feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -595,7 +595,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, -addr, write_fault); +addr, entry->timestamp, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -618,7 +618,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * tables */ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault)) +
[PATCH v2] drm/amdkfd: Change kfd/svm page fault drain handling
From: Xiaogang Chen When app unmap vm ranges(munmap) kfd/svm starts drain pending page fault and not handle any incoming pages fault of this process until a deferred work item got executed by default system wq. The time period of "not handle page fault" can be long and is unpredicable. That is advese to kfd performance on page faults recovery. This patch uses time stamp of incoming page page to decide to drop or handle page fault. When app unmap vm ranges kfd records each gpu device's ih ring current time stamp. These time stamps are used at kfd page fault recovery routine. Any page fault happens on unmapped ranges after unmap events is app bug that accesses vm range after unmap. It is not driver work to cover that. By using time stamp of page fault do not need drain page faults at deferred work. So, the time period that kfd does not handle page faults is reduced and can be controlled. Signed-off-by: Xiaogang.Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 111 + drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 +- 7 files changed, 88 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3abfa66d72a2..d90b7ea3f020 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2763,7 +2763,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault) { bool is_compute_context = false; @@ -2789,7 +2789,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, write_fault)) { + node_id, addr, ts, write_fault)) { amdgpu_bo_unref(); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 312a408b80d3..1d6a1381ede9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -548,7 +548,7 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d933e19e0cf5..5cceaba6e5c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -132,7 +132,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, + entry->timestamp, write_fault)) return 1; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 350f6b6676f1..ac08d9424feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -595,7 +595,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, -addr, write_fault); +addr, entry->timestamp, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -618,7 +618,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * tables */ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault)) +
[PATCH] drm/amdkfd: change kfd/svm page fault drain handling
From: Xiaogang Chen When app unmap vm ranges(munmap) kfd/svm starts drain pending page fault and not handle any incoming pages fault of this process until a deferred work item got executed by default system wq. The time period of "no page fault handling" is unpredicable. That adveser kfd performance on page faults recovery. This patch drain pending page faults just before gpu vm range unmap from app, so reduce the time period that kfd not handle page fault. Any page fault happens on unmapped ranges after drain pending page pault is app bug that it accesses vm range after unmap. It is not driver work to cover that. Signed-off-by: Xiaogang.Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 36 ++-- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 407636a68814..83e694be143d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2263,16 +2263,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) { struct kfd_process_device *pdd; struct kfd_process *p; - int drain; uint32_t i; p = container_of(svms, struct kfd_process, svms); -restart: - drain = atomic_read(>drain_pagefaults); - if (!drain) - return; - for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { pdd = p->pdds[i]; if (!pdd) @@ -2292,8 +2286,6 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } - if (atomic_cmpxchg(>drain_pagefaults, drain, 0) != drain) - goto restart; } static void svm_range_deferred_list_work(struct work_struct *work) @@ -2315,17 +2307,8 @@ static void svm_range_deferred_list_work(struct work_struct *work) prange->start, prange->last, prange->work_item.op); mm = prange->work_item.mm; -retry: - mmap_write_lock(mm); - /* Checking for the need to drain retry faults must be inside -* mmap write lock to serialize with munmap notifiers. -*/ - if (unlikely(atomic_read(>drain_pagefaults))) { - mmap_write_unlock(mm); - svm_range_drain_retry_fault(svms); - goto retry; - } + mmap_write_lock(mm); /* Remove from deferred_list must be inside mmap write lock, for * two race cases: @@ -2455,11 +2438,17 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last, start, last); - /* Make sure pending page faults are drained in the deferred worker -* before the range is freed to avoid straggler interrupts on -* unmapped memory causing "phantom faults". + /* before unmap pages from gpu drain pending page faults to avoid straggler +* interrupts on will-be unmapped memory causing "phantom faults" +* set drain_pagefaults to have page fault handler drop incoming +* page faults and svm_range_drain_retry_fault drain page fault enties +* untill current ts +* page faults on these unmapped pages after current ts are not faults that +* driver needs to drop, they are app bug that "access after unmap" */ - atomic_inc(>drain_pagefaults); + atomic_set(>drain_pagefaults, 1); + svm_range_drain_retry_fault(svms); + atomic_set(>drain_pagefaults, 0); unmap_parent = start <= prange->start && last >= prange->last; @@ -3174,8 +3163,9 @@ void svm_range_list_fini(struct kfd_process *p) * Ensure no retry fault comes in afterwards, as page fault handler will * not find kfd process and take mm lock to recover fault. */ - atomic_inc(>svms.drain_pagefaults); + atomic_set(>svms.drain_pagefaults, 1); svm_range_drain_retry_fault(>svms); + atomic_set(>svms.drain_pagefaults, 0); list_for_each_entry_safe(prange, next, >svms.list, list) { svm_range_unlink(prange); -- 2.25.1
[PATCH v2] drm/amdkfd: Correct svm prange overlapping handling at svm_range_set_attr ioctl
From: Xiaogang Chen When user adds new vm range that has overlapping with existing svm pranges current kfd creats a cloned pragne and split it, then replaces original prange by it. That destroy original prange locks and the cloned prange locks do not inherit original prange lock contexts. This may cause issue if code still need use these locks. In general we should keep using original prange, update its internal data that got changed during split, then free the cloned prange. This patch change vm range overlaping handling that does not remove existing pranges, instead updates it for split and keeps its locks alive. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 112 --- 1 file changed, 82 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 407636a68814..a66b8c96ee14 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1967,7 +1967,8 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, return r; } -static struct svm_range *svm_range_clone(struct svm_range *old) +/* create a prange that has same range/size/addr etc info as old */ +static struct svm_range *svm_range_duplicate(struct svm_range *old) { struct svm_range *new; @@ -1999,6 +2000,25 @@ static struct svm_range *svm_range_clone(struct svm_range *old) return new; } +/* copy range/size/addr info from src to dst prange */ +static void svm_range_copy(struct svm_range *dst, struct svm_range *src) +{ + dst->npages = src->npages; + dst->start = src->start; + dst->last = src->last; + + dst->vram_pages = src->vram_pages; + dst->offset = src->offset; + + for (int i = 0; i < MAX_GPU_INSTANCE; i++) { + if (!src->dma_addr[i]) + continue; + +memcpy(dst->dma_addr[i], src->dma_addr[i], + src->npages * sizeof(*src->dma_addr[i])); + } +} + void svm_range_set_max_pages(struct amdgpu_device *adev) { uint64_t max_pages; @@ -2057,20 +2077,19 @@ svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last, * @attrs: array of attributes * @update_list: output, the ranges need validate and update GPU mapping * @insert_list: output, the ranges need insert to svms - * @remove_list: output, the ranges are replaced and need remove from svms * @remap_list: output, remap unaligned svm ranges * * Check if the virtual address range has overlap with any existing ranges, * split partly overlapping ranges and add new ranges in the gaps. All changes * should be applied to the range_list and interval tree transactionally. If * any range split or allocation fails, the entire update fails. Therefore any - * existing overlapping svm_ranges are cloned and the original svm_ranges left + * existing overlapping svm_ranges are duplicated and the original svm_ranges left * unchanged. * - * If the transaction succeeds, the caller can update and insert clones and - * new ranges, then free the originals. + * If the transaction succeeds, the caller can update and insert split ranges and + * new ranges. * - * Otherwise the caller can free the clones and new ranges, while the old + * Otherwise the caller can free the duplicated and new ranges, while the old * svm_ranges remain unchanged. * * Context: Process context, caller must hold svms->lock @@ -2082,7 +2101,7 @@ static int svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, struct list_head *update_list, struct list_head *insert_list, - struct list_head *remove_list, struct list_head *remap_list) + struct list_head *remap_list) { unsigned long last = start + size - 1UL; struct svm_range_list *svms = >svms; @@ -2090,13 +2109,14 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, struct svm_range *prange; struct svm_range *tmp; struct list_head new_list; + struct list_head modify_list; int r = 0; pr_debug("svms 0x%p [0x%llx 0x%lx]\n", >svms, start, last); INIT_LIST_HEAD(update_list); INIT_LIST_HEAD(insert_list); - INIT_LIST_HEAD(remove_list); + INIT_LIST_HEAD(_list); INIT_LIST_HEAD(_list); INIT_LIST_HEAD(remap_list); @@ -2117,35 +2137,41 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, /* nothing to do */ } else if (node->start < start || node->last > last) { /* node intersects the update range and its attributes -* will change. Clone and split it, apply updates only +
[PATCH] drm/amdkfd: Correct svm prange overlapping handling at svm_range_set_attr ioctl
From: Xiaogang Chen When user adds new vm range that has overlapping with existing svm pranges current kfd clones new prange and remove existing pranges including all data associate with it. It is not necessary. We can handle the overlapping on existing pranges directly that would simplify kfd code. And, when remove a existing prange the locks from it will get destroyed. This may cause issue if code still use these locks. And locks from cloned prange do not inherit context of locks that got removed. This patch does not remove existing pranges or clone new pranges, keeps locks of pranges alive. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 89 1 file changed, 12 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 407636a68814..a8fcace6f9a2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -904,23 +904,6 @@ svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements, return (void *)dst; } -static int -svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src) -{ - int i; - - for (i = 0; i < MAX_GPU_INSTANCE; i++) { - if (!src->dma_addr[i]) - continue; - dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i], - sizeof(*src->dma_addr[i]), src->npages, 0, NULL); - if (!dst->dma_addr[i]) - return -ENOMEM; - } - - return 0; -} - static int svm_range_split_array(void *ppnew, void *ppold, size_t size, uint64_t old_start, uint64_t old_n, @@ -1967,38 +1950,6 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, return r; } -static struct svm_range *svm_range_clone(struct svm_range *old) -{ - struct svm_range *new; - - new = svm_range_new(old->svms, old->start, old->last, false); - if (!new) - return NULL; - if (svm_range_copy_dma_addrs(new, old)) { - svm_range_free(new, false); - return NULL; - } - if (old->svm_bo) { - new->ttm_res = old->ttm_res; - new->offset = old->offset; - new->svm_bo = svm_range_bo_ref(old->svm_bo); - spin_lock(>svm_bo->list_lock); - list_add(>svm_bo_list, >svm_bo->range_list); - spin_unlock(>svm_bo->list_lock); - } - new->flags = old->flags; - new->preferred_loc = old->preferred_loc; - new->prefetch_loc = old->prefetch_loc; - new->actual_loc = old->actual_loc; - new->granularity = old->granularity; - new->mapped_to_gpu = old->mapped_to_gpu; - new->vram_pages = old->vram_pages; - bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); - bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); - - return new; -} - void svm_range_set_max_pages(struct amdgpu_device *adev) { uint64_t max_pages; @@ -2057,7 +2008,6 @@ svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last, * @attrs: array of attributes * @update_list: output, the ranges need validate and update GPU mapping * @insert_list: output, the ranges need insert to svms - * @remove_list: output, the ranges are replaced and need remove from svms * @remap_list: output, remap unaligned svm ranges * * Check if the virtual address range has overlap with any existing ranges, @@ -2082,7 +2032,7 @@ static int svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, struct list_head *update_list, struct list_head *insert_list, - struct list_head *remove_list, struct list_head *remap_list) + struct list_head *remap_list) { unsigned long last = start + size - 1UL; struct svm_range_list *svms = >svms; @@ -2096,7 +2046,6 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, INIT_LIST_HEAD(update_list); INIT_LIST_HEAD(insert_list); - INIT_LIST_HEAD(remove_list); INIT_LIST_HEAD(_list); INIT_LIST_HEAD(remap_list); @@ -2117,20 +2066,11 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, /* nothing to do */ } else if (node->start < start || node->last > last) { /* node intersects the update range and its attributes -* will change. Clone and split it, apply updates only +* will change. Split it, apply updates only * to the overlapping part */ -
[PATCH] drm/amdkfd: Update mm interval notifier tree without acquiring mm's mmap lock
From: Xiaogang Chen Current kfd/svm driver acquires mm's mmap write lock before update mm->notifier_subscriptions->itree. This tree is already protected by mm->notifier_subscriptions->lock at mmu notifier. Each process mm interval tree update from different components in kernel go to mmu interval notifier where they got serialized. This patch removes mmap write lock acquiring at kfd/svm driver when need updates process mm interval tree. It reduces chance of dead lock or warning from lockdev and simplifies the driver code. In addition, the patch adjusts some locks granularity to reduce the lock number that driver holds at same time which also reduces the chance of dead lock or warning from lockdev. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 181 +++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 +- 4 files changed, 122 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index fdf171ad4a3c..b52588ded567 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1078,9 +1078,8 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, /* Flush pending deferred work to avoid racing with deferred actions * from previous memory map changes (e.g. munmap). */ - svm_range_list_lock_and_flush_work(>svms, current->mm); + svm_range_list_flush_work(>svms); mutex_lock(>svms.lock); - mmap_write_unlock(current->mm); if (interval_tree_iter_first(>svms.objects, args->va_addr >> PAGE_SHIFT, (args->va_addr + args->size - 1) >> PAGE_SHIFT)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 8ee3d07ffbdf..eb46643d96b2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -969,10 +969,12 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) mutex_lock(>svms.lock); prange = svm_range_from_addr(>svms, addr, NULL); + + mutex_unlock(>svms.lock); if (!prange) { pr_debug("failed get range svms 0x%p addr 0x%lx\n", >svms, addr); r = -EFAULT; - goto out_unlock_svms; + goto out_unref_process; } mutex_lock(>migrate_mutex); @@ -993,8 +995,6 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) out_unlock_prange: mutex_unlock(>migrate_mutex); -out_unlock_svms: - mutex_unlock(>svms.lock); out_unref_process: pr_debug("CPU fault svms 0x%p address 0x%lx done\n", >svms, addr); kfd_unref_process(p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 407636a68814..46f81c1215d9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -106,12 +106,31 @@ static void svm_range_unlink(struct svm_range *prange) } static void -svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange) +svm_range_add_notifier(struct mm_struct *mm, struct svm_range *prange, bool locked) { pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); - mmu_interval_notifier_insert_locked(>notifier, mm, + /* mm->notifier_subscriptions should have been setup for this process +* ex: during kfd process creation +*/ + WARN_ON_ONCE(!mm->notifier_subscriptions); + + /* not necessary hold mmap lock to update mm interval notifier tree as +* opeations on mm->notifier_subscriptions->itree are serialized by +* mm->notifier_subscriptions->lock +*/ + if (locked) { + /* if mmap write lock has been hold use lock version to udpate +* mm interval notifier tree +*/ + mmu_interval_notifier_insert_locked(>notifier, mm, + prange->start << PAGE_SHIFT, + prange->npages << PAGE_SHIFT, + _range_mn_ops); + } else + /* use no-mmap-lock version to update mm interval notifier tree */ + mmu_interval_notifier_insert(>notifier, mm, prange->start << PAGE_SHIFT, prange->npages << PAGE_SHIFT, _range_mn_ops); @@ -895,6 +914,7 @@ svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements, *vram_pages =
[PATCH] drm/kfd: Correct pined buffer handling at kfd restore and validate process
From: Xiaogang Chen This reverts 8a774fe912ff09e39c2d3a3589c729330113f388 "drm/amdgpu: avoid restore process run into dead loop" since buffer got pined is not related whether it needs mapping. And skip buffer validation at kfd driver if the buffer has been pinned. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 3314821e4cf3..80018738bd1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -415,6 +415,10 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, "Called with userptr BO")) return -EINVAL; + /* bo has been pined, not need validate it */ + if (bo->tbo.pin_count) + return 0; + amdgpu_bo_placement_from_domain(bo, domain); ret = ttm_bo_validate(>tbo, >placement, ); @@ -2736,7 +2740,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i /* keep mem without hmm range at userptr_inval_list */ if (!mem->range) -continue; + continue; /* Only check mem with hmm range associated */ valid = amdgpu_ttm_tt_get_user_pages_done( @@ -2981,9 +2985,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * if (!attachment->is_mapped) continue; - if (attachment->bo_va->base.bo->tbo.pin_count) - continue; - kfd_mem_dmaunmap_attachment(mem, attachment); ret = update_gpuvm_pte(mem, attachment, _obj); if (ret) { -- 2.25.1
[PATCH] amd/kfd: cancle work iterms at ih_wq in kfd_interrupt_exit
From: Xiaogang Chen When kfd/amdgpu driver is tearing down cannot handle callback from ih_wq. If there is still work items left cancle them instead of flush that would wait until they got served. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 9b6b6e882593..1847b9290a84 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -98,11 +98,12 @@ void kfd_interrupt_exit(struct kfd_node *node) spin_unlock_irqrestore(>interrupt_lock, flags); /* -* flush_work ensures that there are no outstanding -* work-queue items that will access interrupt_ring. New work items +* cancel work items still at ih_wq ensures that there are no outstanding +* work-queue items that will access interrupt_ring. At this stage kfd/amd +* driver is tearing down, cannot handle call back from wq. New work itemst * can't be created because we stopped interrupt handling above. */ - flush_workqueue(node->ih_wq); + cancel_work(>interrupt_work); destroy_workqueue(node->ih_wq); -- 2.25.1
[PATCH v4] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM
From: Xiaogang Chen v2: -not need calculate vram page number for new registered svm range, only do it for split vram pages. v3: -use dma address to calculate vram page number of split svm range; use migrate_vma from hmm to calculate page number that migrate to vram. v4: -combine calculating of vram page number of split svm range and page dma address copy in same loop if original svm range includes vram pages. SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial migration/mapping do validation on same vm range as migration/map do instead of whole svm range that can be very large. This change is expected to improve svm code performance. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 35 --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 79 +++- 2 files changed, 48 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index b854cbf06dce..3fb8e59acfbf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr) put_page(page); } -static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate) -{ - unsigned long cpages = 0; - unsigned long i; - - for (i = 0; i < migrate->npages; i++) { - if (migrate->src[i] & MIGRATE_PFN_VALID && - migrate->src[i] & MIGRATE_PFN_MIGRATE) - cpages++; - } - return cpages; -} - static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) { unsigned long upages = 0; @@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, struct dma_fence *mfence = NULL; struct migrate_vma migrate = { 0 }; unsigned long cpages = 0; + unsigned long mpages = 0; dma_addr_t *scratch; void *buf; int r = -ENOMEM; @@ -450,12 +438,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset); migrate_vma_pages(); - pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", - svm_migrate_successful_pages(), cpages, migrate.npages); - svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(); + mpages = cpages - svm_migrate_unsuccessful_pages(); + pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", +mpages, cpages, migrate.npages); + kfd_smi_event_migration_end(node, p->lead_thread->pid, start >> PAGE_SHIFT, end >> PAGE_SHIFT, 0, node->id, trigger); @@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, out_free: kvfree(buf); out: - if (!r && cpages) { + if (!r && mpages) { pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) - WRITE_ONCE(pdd->page_in, pdd->page_in + cpages); + WRITE_ONCE(pdd->page_in, pdd->page_in + mpages); - return cpages; + return mpages; } return r; } @@ -498,7 +487,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, struct vm_area_struct *vma; uint64_t ttm_res_offset; struct kfd_node *node; - unsigned long cpages = 0; + unsigned long mpages = 0; long r = 0; if (start_mgr < prange->start || last_mgr > prange->last) { @@ -540,15 +529,15 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("failed %ld to migrate\n", r); break; } else { - cpages += r; + mpages += r; } ttm_res_offset += next - addr; addr = next; } - if (cpages) { + if (mpages) { prange->actual_loc = best_loc; - prange->vram_pages = prange->vram_pages + cpages; + prange->vram_pages = prange->vram_pages + mpages; } else if (!prange->actual_loc) { /* if no page migrated and all pages from prange are at * sys ram drop svm_bo got from svm_range_vram_node_new diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2834fb351818..61e363e388f8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) static int svm
[PATCH v3] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM
From: Xiaogang Chen v2: -not need calculate vram page number for new registered svm range, only do it for split vram pages. v3: -use dma address to calculate vram page number of split svm range; use migrate_vma from hmm to calculate page number that migrate to vram. SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial migration/mapping do validation on same vm range as migration/map do instead of whole svm range that can be very large. This change is expected to improve svm code performance. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 35 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 72 2 files changed, 48 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index b854cbf06dce..34376184c37c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr) put_page(page); } -static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate) -{ - unsigned long cpages = 0; - unsigned long i; - - for (i = 0; i < migrate->npages; i++) { - if (migrate->src[i] & MIGRATE_PFN_VALID && - migrate->src[i] & MIGRATE_PFN_MIGRATE) - cpages++; - } - return cpages; -} - static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) { unsigned long upages = 0; @@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, struct dma_fence *mfence = NULL; struct migrate_vma migrate = { 0 }; unsigned long cpages = 0; + unsigned long mpages = 0; dma_addr_t *scratch; void *buf; int r = -ENOMEM; @@ -450,12 +438,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset); migrate_vma_pages(); - pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", - svm_migrate_successful_pages(), cpages, migrate.npages); - svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(); + mpages = cpages - svm_migrate_unsuccessful_pages(); + pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", + mpages, cpages, migrate.npages); + kfd_smi_event_migration_end(node, p->lead_thread->pid, start >> PAGE_SHIFT, end >> PAGE_SHIFT, 0, node->id, trigger); @@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, out_free: kvfree(buf); out: - if (!r && cpages) { + if (!r && mpages) { pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) - WRITE_ONCE(pdd->page_in, pdd->page_in + cpages); + WRITE_ONCE(pdd->page_in, pdd->page_in + mpages); - return cpages; + return mpages; } return r; } @@ -498,7 +487,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, struct vm_area_struct *vma; uint64_t ttm_res_offset; struct kfd_node *node; - unsigned long cpages = 0; + unsigned long mpages = 0; long r = 0; if (start_mgr < prange->start || last_mgr > prange->last) { @@ -540,15 +529,15 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("failed %ld to migrate\n", r); break; } else { - cpages += r; + mpages += r; } ttm_res_offset += next - addr; addr = next; } - if (cpages) { + if (mpages) { prange->actual_loc = best_loc; - prange->vram_pages = prange->vram_pages + cpages; + prange->vram_pages = prange->vram_pages + mpages; } else if (!prange->actual_loc) { /* if no page migrated and all pages from prange are at * sys ram drop svm_bo got from svm_range_vram_node_new diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2834fb351818..8d012ca82cd6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) static int svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, unsigned long offset, unsigned long npages, -
[PATCH v2] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM
From: Xiaogang Chen v2: -not need calculate vram page number for new registered svm range, only do it for split vram pages. SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial migration/mapping do validation on same vm range as migration/map do instead of whole svm range that can be very large. This change is expected to improve svm code performance. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 149 --- 1 file changed, 109 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2834fb351818..2f14cd1a3416 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) static int svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages) + unsigned long *hmm_pfns, uint32_t gpuidx) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; dma_addr_t *addr = prange->dma_addr[gpuidx]; struct device *dev = adev->dev; struct page *page; - uint64_t vram_pages_dev; int i, r; if (!addr) { @@ -174,7 +173,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, prange->dma_addr[gpuidx] = addr; } - vram_pages_dev = 0; addr += offset; for (i = 0; i < npages; i++) { if (svm_is_valid_dma_mapping_addr(dev, addr[i])) @@ -184,7 +182,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, if (is_zone_device_page(page)) { struct amdgpu_device *bo_adev = prange->svm_bo->node->adev; - vram_pages_dev++; addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + bo_adev->vm_manager.vram_base_offset - bo_adev->kfd.pgmap.range.start; @@ -201,14 +198,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", addr[i] >> PAGE_SHIFT, page_to_pfn(page)); } - *vram_pages = vram_pages_dev; + return 0; } static int svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns, uint64_t *vram_pages) + unsigned long *hmm_pfns) { struct kfd_process *p; uint32_t gpuidx; @@ -227,7 +224,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, } r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages, - hmm_pfns, gpuidx, vram_pages); + hmm_pfns, gpuidx); if (r) break; } @@ -982,11 +979,6 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, new->svm_bo = svm_range_bo_ref(old->svm_bo); new->ttm_res = old->ttm_res; - /* set new's vram_pages as old range's now, the acurate vram_pages -* will be updated during mapping -*/ - new->vram_pages = min(old->vram_pages, new->npages); - spin_lock(>svm_bo->list_lock); list_add(>svm_bo_list, >svm_bo->range_list); spin_unlock(>svm_bo->list_lock); @@ -1107,9 +1099,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, static int svm_range_split_tail(struct svm_range *prange, uint64_t new_last, -struct list_head *insert_list, struct list_head *remap_list) +struct list_head *insert_list, struct list_head *remap_list, +struct svm_range *tail) { - struct svm_range *tail; int r = svm_range_split(prange, prange->start, new_last, ); if (!r) { @@ -1122,9 +1114,9 @@ svm_range_split_tail(struct svm_range *prange, uint64_t new_last, static int svm_range_split_head(struct svm_range *prange, uint64_t new_start, -struct list_head *insert_list, struct list_head *remap_list) +struct list_head *insert_list, struct list_head *remap_list, +struct svm_range *head) { - struct svm_range *head; int r = svm_range_split(prange, new_start, prange->last, ); if (!r) { @@ -1573,7 +1565,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm, struct svm_validate_context *ctx; u
[PATCH] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM
From: Xiaogang Chen SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial migration/mapping do validation on same vm range as migration/map do instead of whole svm range that can be very large. This change is expected to improve svm code performance. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 123 +++ 1 file changed, 89 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2834fb351818..f670d5f6bcdf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) static int svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages) + unsigned long *hmm_pfns, uint32_t gpuidx) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; dma_addr_t *addr = prange->dma_addr[gpuidx]; struct device *dev = adev->dev; struct page *page; - uint64_t vram_pages_dev; int i, r; if (!addr) { @@ -174,7 +173,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, prange->dma_addr[gpuidx] = addr; } - vram_pages_dev = 0; addr += offset; for (i = 0; i < npages; i++) { if (svm_is_valid_dma_mapping_addr(dev, addr[i])) @@ -184,7 +182,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, if (is_zone_device_page(page)) { struct amdgpu_device *bo_adev = prange->svm_bo->node->adev; - vram_pages_dev++; addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + bo_adev->vm_manager.vram_base_offset - bo_adev->kfd.pgmap.range.start; @@ -201,14 +198,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", addr[i] >> PAGE_SHIFT, page_to_pfn(page)); } - *vram_pages = vram_pages_dev; + return 0; } static int svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns, uint64_t *vram_pages) + unsigned long *hmm_pfns) { struct kfd_process *p; uint32_t gpuidx; @@ -227,7 +224,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, } r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages, - hmm_pfns, gpuidx, vram_pages); + hmm_pfns, gpuidx); if (r) break; } @@ -982,11 +979,6 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, new->svm_bo = svm_range_bo_ref(old->svm_bo); new->ttm_res = old->ttm_res; - /* set new's vram_pages as old range's now, the acurate vram_pages -* will be updated during mapping -*/ - new->vram_pages = min(old->vram_pages, new->npages); - spin_lock(>svm_bo->list_lock); list_add(>svm_bo_list, >svm_bo->range_list); spin_unlock(>svm_bo->list_lock); @@ -1573,7 +1565,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm, struct svm_validate_context *ctx; unsigned long start, end, addr; struct kfd_process *p; - uint64_t vram_pages; void *owner; int32_t idx; int r = 0; @@ -1642,15 +1633,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } } - vram_pages = 0; - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = map_start << PAGE_SHIFT; + end = (map_last + 1) << PAGE_SHIFT; for (addr = start; !r && addr < end; ) { struct hmm_range *hmm_range; unsigned long map_start_vma; unsigned long map_last_vma; struct vm_area_struct *vma; - uint64_t vram_pages_vma; unsigned long next = 0; unsigned long offset; unsigned long npages; @@ -1677,13 +1666,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } if (!r) { - offset = (addr - start) >> PAGE_SHIFT; + of
[PATCH] drm/amdkfd: Use partial migrations/mapping for GPU/CPU page faults in SVM
From: Xiaogang Chen This patch implements partial migration/mapping for gpu/cpu page faults in SVM according to migration granularity(default 2MB). A svm range may include pages from both system ram and vram of one gpu now. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 152 +++- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 176 +++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 9 +- 4 files changed, 183 insertions(+), 160 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6c25dab051d5..67df1b46f292 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, goto out_free; } if (cpages != npages) - pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + pr_debug("partial migration, 0x%lx/0x%llx pages collected\n", cpages, npages); else - pr_debug("0x%lx pages migrated\n", cpages); + pr_debug("0x%lx pages collected\n", cpages); r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset); migrate_vma_pages(); @@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,30 +501,37 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", -prange->svms, prange->start, prange->last, best_loc); + if (!best_loc) { + pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n", + prange->svms, start_mgr, last_mgr); return 0; } + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", +start_mgr, last_mgr, prange->start, prange->last); + return -EFAULT; + } + node = svm_range_get_node_by_id(prange, best_loc); if (!node) { pr_debug("failed to get kfd node by id 0x%x\n", best_loc); return -ENODEV; } - pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, -prange->start, prange->last, best_loc); + pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n", + prange->svms, start_mgr, last_mgr, prange->start, prange->last, + best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r); return r; } - ttm_res_offset = prange->offset << PAGE_SHIFT; + ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT; for (addr = start; addr < end;) { unsigned long next; @@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_dma_unmap(prange); - } else { + prange->vram_pages = prange->vram_pages + cpages; + } else if (!prange->actual_loc) { + /* if no page migrated and all pages from prange are at +* sys ram drop svm_bo got from svm_range_vram_node_new +*/ svm_range_vram_node_free(prange); } @@ -663,9 +676,8 @@ svm_migrate_copy
[PATCH v3] drm/amdkfd: Use partial mapping in GPU page faults
From: Xiaogang Chen After partial migration to recover GPU page fault this patch does GPU vm space mapping for same page range that got migrated intead of mapping all pages of svm range in which the page fault happened. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 29 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 54af7a2b29f8..3a71d04779b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, + unsigned long map_start, unsigned long map_last, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { @@ -1699,6 +1700,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm, end = (prange->last + 1) << PAGE_SHIFT; for (addr = start; !r && addr < end; ) { struct hmm_range *hmm_range; + unsigned long map_start_vma; + unsigned long map_last_vma; struct vm_area_struct *vma; uint64_t vram_pages_vma; unsigned long next = 0; @@ -1747,9 +1750,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } - if (!r) - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) { + map_start_vma = max(map_start, prange->start + offset); + map_last_vma = min(map_last, prange->start + offset + npages - 1); + if (map_start_vma <= map_last_vma) { + offset = map_start_vma - prange->start; + npages = map_last_vma - map_start_vma + 1; + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + } + } if (!r && next == end) prange->mapped_to_gpu = true; @@ -1855,8 +1865,8 @@ static void svm_range_restore_work(struct work_struct *work) */ mutex_lock(>migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - false, true, false); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, false, true, false); if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); @@ -3069,6 +3079,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); + start = prange->start; + last = prange->last; if (prange->actual_loc != 0 || best_loc != 0) { migration = true; /* Align migration range start and size to granularity size */ @@ -3102,10 +3114,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } } - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); + r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, + false, false); if (r) pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", -r, svms, prange->start, prange->last); +r, svms, start, last); kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration); @@ -3650,7 +3663,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, MAX_GPU_INSTANCE, true, true, flush_tlb); if (r) pr_debug("failed %d to map svm range\n", r); -- 2.25.1
[PATCH v2] drm/amdkfd: Use partial mapping in GPU page fault recovery
From: Xiaogang Chen After partial migration to recover GPU page fault this patch does GPU vm space mapping for same page range that got migrated instead of mapping all pages of svm range in which the page fault happened. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 27 +++ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 54af7a2b29f8..58f0506d5221 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, + unsigned long map_start, unsigned long map_last, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { @@ -1747,9 +1748,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } - if (!r) - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) { + map_start = max(map_start, prange->start + offset); + map_last = min(map_last, prange->start + offset + npages - 1); + if (map_start <= map_last) { + offset = map_start - prange->start; + npages = map_last - map_start + 1; + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + } + } if (!r && next == end) prange->mapped_to_gpu = true; @@ -1855,8 +1863,8 @@ static void svm_range_restore_work(struct work_struct *work) */ mutex_lock(>migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - false, true, false); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, false, true, false); if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); @@ -3069,6 +3077,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); + start = prange->start; + last = prange->last; if (prange->actual_loc != 0 || best_loc != 0) { migration = true; /* Align migration range start and size to granularity size */ @@ -3102,10 +3112,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } } - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); + r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, + false, false); if (r) pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", -r, svms, prange->start, prange->last); +r, svms, start, last); kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration); @@ -3650,7 +3661,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, MAX_GPU_INSTANCE, true, true, flush_tlb); if (r) pr_debug("failed %d to map svm range\n", r); -- 2.25.1
[PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery
From: Xiaogang Chen After partial migration to recover GPU page fault this patch does GPU vm space mapping for same page range that got migrated instead of mapping all pages of svm range in which the page fault happened. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 +--- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 54af7a2b29f8..81dbcc8a4ccc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, + unsigned long map_start, unsigned long map_last, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { @@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, int32_t idx; int r = 0; + if (map_start < prange->start || map_last > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", +map_start, map_last, prange->start, prange->last); + return -EFAULT; + } + ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } - if (!r) - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) { + map_start = max(map_start, prange->start + offset); + map_last = min(map_last, prange->start + offset + npages); + if (map_start <= map_last) { + offset = map_start - prange->start; + npages = map_last - map_start + 1; + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + } + } if (!r && next == end) prange->mapped_to_gpu = true; @@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct work_struct *work) */ mutex_lock(>migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - false, true, false); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, false, true, false); if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); @@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); + start = prange->start; + last = prange->last; if (prange->actual_loc != 0 || best_loc != 0) { migration = true; /* Align migration range start and size to granularity size */ @@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } } - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); + r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, + false, false); if (r) pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", -r, svms, prange->start, prange->last); +r, svms, start, last); kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration); @@ -3650,7 +3667,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, MAX_GPU_INSTANCE, true, true, flush_tlb); if (r) pr_debug("failed %d to map svm range\n", r); -- 2.25.1
[PATCH v2] drm/amdgpu: Correctly use bo_va->ref_count in compute VMs
From: Xiaogang Chen This is needed to correctly handle BOs imported into compute VM from gfx. Both kfd and gfx should use same bo_va and set bo_va->ref_count correctly when map the Bos into same VM, otherwise we may trigger kernel general protection when iterate mappings over bo_va's valids or invalids list. Signed-off-by: Felix Kuehling Signed-off-by: Xiaogang Chen Acked-by: Christian König Reviewed-by: Ramesh Errabolu Tested-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a15e59abe70a..c1ec93cc50ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -832,6 +832,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + struct amdgpu_bo_va *bo_va; bool same_hive = false; int i, ret; @@ -919,7 +920,13 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("Unable to reserve BO during memory attach"); goto unwind; } - attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + bo_va = amdgpu_vm_bo_find(vm, bo[i]); + if (!bo_va) + bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + else + ++bo_va->ref_count; + attachment[i]->bo_va = bo_va; + amdgpu_bo_unreserve(bo[i]); if (unlikely(!attachment[i]->bo_va)) { ret = -ENOMEM; @@ -943,7 +950,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, continue; if (attachment[i]->bo_va) { amdgpu_bo_reserve(bo[i], true); - amdgpu_vm_bo_del(adev, attachment[i]->bo_va); + if (--attachment[i]->bo_va->ref_count == 0) + amdgpu_vm_bo_del(adev, attachment[i]->bo_va); amdgpu_bo_unreserve(bo[i]); list_del([i]->list); } -- 2.25.1
[PATCH] Find bo_va before create it when map bo into compute VM
From: Xiaogang Chen This is needed to correctly handle BOs imported into compute VM from gfx. Both kfd and gfx should use same bo_va when map the Bos into same VM, otherwise we may trigger kernel general protection when iterate mappings from bo_va. Signed-off-by: Felix Kuehling Acked-by: Christian König Reviewed-by: Ramesh Errabolu Reviewed-By: Xiaogang Chen Tested-By: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a15e59abe70a..c1ec93cc50ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -832,6 +832,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + struct amdgpu_bo_va *bo_va; bool same_hive = false; int i, ret; @@ -919,7 +920,13 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("Unable to reserve BO during memory attach"); goto unwind; } - attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + bo_va = amdgpu_vm_bo_find(vm, bo[i]); + if (!bo_va) + bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + else + ++bo_va->ref_count; + attachment[i]->bo_va = bo_va; + amdgpu_bo_unreserve(bo[i]); if (unlikely(!attachment[i]->bo_va)) { ret = -ENOMEM; @@ -943,7 +950,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, continue; if (attachment[i]->bo_va) { amdgpu_bo_reserve(bo[i], true); - amdgpu_vm_bo_del(adev, attachment[i]->bo_va); + if (--attachment[i]->bo_va->ref_count == 0) + amdgpu_vm_bo_del(adev, attachment[i]->bo_va); amdgpu_bo_unreserve(bo[i]); list_del([i]->list); } -- 2.25.1
[PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults
From: Xiaogang Chen This patch implements partial migration in gpu page fault according to migration granularity(default 2MB) and not split svm range in cpu page fault handling. A svm range may include pages from both system ram and vram of one gpu now. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +-- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 83 +--- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 6 +- 4 files changed, 162 insertions(+), 89 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6c25dab051d5..6a059e4aff86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, goto out_free; } if (cpages != npages) - pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + pr_debug("partial migration, 0x%lx/0x%llx pages collected\n", cpages, npages); else - pr_debug("0x%lx pages migrated\n", cpages); + pr_debug("0x%lx pages collected\n", cpages); r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset); migrate_vma_pages(); @@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", -prange->svms, prange->start, prange->last, best_loc); + if (!best_loc) { + pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n", + prange->svms, start_mgr, last_mgr); return 0; } + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", +start_mgr, last_mgr, prange->start, prange->last); + return -EFAULT; + } + node = svm_range_get_node_by_id(prange, best_loc); if (!node) { pr_debug("failed to get kfd node by id 0x%x\n", best_loc); return -ENODEV; } - pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, -prange->start, prange->last, best_loc); + pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n", + prange->svms, start_mgr, last_mgr, prange->start, prange->last, + best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { @@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_dma_unmap(prange); - } else { + prange->vram_pages = prange->vram_pages + cpages; + } else if (!prange->actual_loc) { + /* if no page migrated and all pages from prange are at +* sys ram drop svm_bo got from svm_range_vram_node_new +*/ svm_range_vram_node_free(prange); } @@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, * Context: Process context, caller hold mmap read lock, prange->migrate_mutex * * Return: - * 0 - success with all pages migrated * negative values - indicate error - * positive values - partial migration, number of pages not migrated + * positive values or zero - num
[PATCH] drm/amdkfd: Fix a race condition of vram buffer unref in svm code
From: Xiaogang Chen prange->svm_bo unref can happen in both mmu callback and a callback after migrate to system ram. Both are async call in different tasks. Sync svm_bo unref operation to avoid random "use-after-free". Signed-off-by: Xiaogang.Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 70aa882636ab..8e246e848018 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -637,6 +637,15 @@ void svm_range_vram_node_free(struct svm_range *prange) { svm_range_bo_unref(prange->svm_bo); prange->ttm_res = NULL; + /* serialize prange->svm_bo unref */ + mutex_lock(>lock); + /* prange->svm_bo has not been unref */ + if (prange->ttm_res) { + prange->ttm_res = NULL; + mutex_unlock(>lock); + svm_range_bo_unref(prange->svm_bo); + } else + mutex_unlock(>lock); } struct kfd_node * -- 2.25.1
[PATCH v3] drm/amdkfd: Use partial migrations in GPU page faults
From: Xiaogang Chen This patch implements partial migration in gpu page fault according to migration granularity(default 2MB) and not split svm range in cpu page fault handling. A svm range may include pages from both system ram and vram of one gpu now. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: xiaogang chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +-- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 104 +++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 6 +- 4 files changed, 178 insertions(+), 94 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6c25dab051d5..e886f9ce40ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, goto out_free; } if (cpages != npages) - pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + pr_debug("partial migration, 0x%lx/0x%llx pages collected\n", cpages, npages); else - pr_debug("0x%lx pages migrated\n", cpages); + pr_debug("0x%lx pages collected\n", cpages); r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset); migrate_vma_pages(); @@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", -prange->svms, prange->start, prange->last, best_loc); + if (!best_loc) { + pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n", + prange->svms, start_mgr, last_mgr); return 0; } + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", +start_mgr, last_mgr, prange->start, prange->last); + return -EFAULT; + } + node = svm_range_get_node_by_id(prange, best_loc); if (!node) { pr_debug("failed to get kfd node by id 0x%x\n", best_loc); return -ENODEV; } - pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, -prange->start, prange->last, best_loc); + pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n", + prange->svms, start_mgr, last_mgr, prange->start, prange->last, + best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { @@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_dma_unmap(prange); - } else { + prange->vram_pages = prange->vram_pages + cpages; + } else if (!prange->actual_loc) { + /* if no page migrated and all pages from prange are at +* sys ram drop svm_bo got from svm_range_vram_node_new +*/ svm_range_vram_node_free(prange); } @@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, * Context: Process context, caller hold mmap read lock, prange->migrate_mutex * * Return: - * 0 - success with all pages migrated * negative values - indicate error - * positive values - partial migration, number of pages not migrated + * positive values or zero -
[PATCH v2] drm/amdkfd: fix some race conditions in vram buffer alloc/free of svm code
From: Xiaogang Chen This patch fixes: 1: ref number of prange's svm_bo got decreased by an async call from hmm. When wait svm_bo of prange got released we shoul also wait prang->svm_bo become NULL, otherwise prange->svm_bo may be set to null after allocate new vram buffer. 2: During waiting svm_bo of prange got released in a while loop should reschedule current task to give other tasks oppotunity to run, specially the the workque task that handles svm_bo ref release, otherwise we may enter to softlock. Signed-off-by: Xiaogang.Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bed0f8bf83c7..164cd77af62d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -502,11 +502,11 @@ svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange) /* We need a new svm_bo. Spin-loop to wait for concurrent * svm_range_bo_release to finish removing this range from -* its range list. After this, it is safe to reuse the -* svm_bo pointer and svm_bo_list head. +* its range list and set prange->svm_bo to null. After this, +* it is safe to reuse the svm_bo pointer and svm_bo_list head. */ - while (!list_empty_careful(>svm_bo_list)) - ; + while (!list_empty_careful(>svm_bo_list) || prange->svm_bo) + cond_resched(); return false; } -- 2.25.1
[PATCH] drm/amdkfd: fix some race conditions in vram buffer alloc/free of svm code
From: Xiaogang Chen This patch fixes: 1: ref number of prange's svm_bo got decreased by an async call from hmm. When wait svm_bo of prange got released we shoul also wait prang->svm_bo become NULL, otherwise prange->svm_bo may be set to null after allocate new vram buffer. 2: During waiting svm_bo of prange got released in a while loop should schedule current task to give other tasks oppotunity to run, specially the the workque task that handles svm_bo ref release, otherwise we may enter to softlock. Signed-off-by: Xiaogang.Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bed0f8bf83c7..1074a4aedf57 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -502,11 +502,11 @@ svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange) /* We need a new svm_bo. Spin-loop to wait for concurrent * svm_range_bo_release to finish removing this range from -* its range list. After this, it is safe to reuse the -* svm_bo pointer and svm_bo_list head. +* its range list and set prange->svm_bo to null. After this, +* it is safe to reuse the svm_bo pointer and svm_bo_list head. */ - while (!list_empty_careful(>svm_bo_list)) - ; + while (!list_empty_careful(>svm_bo_list) || prange->svm_bo) + schedule(); return false; } -- 2.25.1
[PATCH] drm/amdkfd: Seperate dma unmap and free of dma address array operations
From: Xiaogang Chen We do not need free dma address array of svm_range each time we do dma unmap for pages in svm_range as we can reuse the same array. Only free it when free svm_range. Seperate these two operations and use them accordinly. Signed-off-by: Xiaogang.Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 23 --- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 ++-- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 192b0d106413..6c25dab051d5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -460,7 +460,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, start >> PAGE_SHIFT, end >> PAGE_SHIFT, 0, node->id, trigger); - svm_range_dma_unmap(adev->dev, scratch, 0, npages); + svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages); out_free: kvfree(buf); @@ -544,7 +544,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_free_dma_mappings(prange, true); + svm_range_dma_unmap(prange); } else { svm_range_vram_node_free(prange); } @@ -745,7 +745,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, start >> PAGE_SHIFT, end >> PAGE_SHIFT, node->id, 0, trigger); - svm_range_dma_unmap(adev->dev, scratch, 0, npages); + svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages); out_free: kvfree(buf); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 5d7ba7dbf6ce..bed0f8bf83c7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -229,7 +229,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, return r; } -void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, +void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; @@ -247,7 +247,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, } } -void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma) +void svm_range_dma_unmap(struct svm_range *prange) { struct kfd_process_device *pdd; dma_addr_t *dma_addr; @@ -268,10 +268,8 @@ void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma) continue; } dev = >dev->adev->pdev->dev; - if (unmap_dma) - svm_range_dma_unmap(dev, dma_addr, 0, prange->npages); - kvfree(dma_addr); - prange->dma_addr[gpuidx] = NULL; + + svm_range_dma_unmap_dev(dev, dma_addr, 0, prange->npages); } } @@ -279,18 +277,29 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) { uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT; struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); + uint32_t gpuidx; pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); svm_range_vram_node_free(prange); - svm_range_free_dma_mappings(prange, do_unmap); + if (do_unmap) + svm_range_dma_unmap(prange); if (do_unmap && !p->xnack_enabled) { pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size); amdgpu_amdkfd_unreserve_mem_limit(NULL, size, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); } + + /* free dma_addr array for each gpu */ + for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { + if (prange->dma_addr[gpuidx]) { + kvfree(prange->dma_addr[gpuidx]); + prange->dma_addr[gpuidx] = NULL; + } + } + mutex_destroy(>lock); mutex_destroy(>migrate_mutex); kfree(prange); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 9e668eeefb32..78bfb83cd0c0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -181,9 +181,9 @@ void svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, struct mm
[PATCH v2] drm/amdkfd: Use partial migrations in GPU page faults
From: Xiaogang Chen This patch implements partial migration in gpu page fault according to migration granularity(default 2MB) and not split svm range in cpu page fault handling. A svm range may include pages from both system ram and vram of one gpu now. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: xiaogang chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 151 ++- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 88 ++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 7 +- 4 files changed, 171 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 7d82c7da223a..653a2edbaba4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -445,7 +445,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", cpages, npages); else - pr_debug("0x%lx pages migrated\n", cpages); + pr_debug("0x%lx pages collected\n", cpages); r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset); migrate_vma_pages(); @@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", -prange->svms, prange->start, prange->last, best_loc); + if (!best_loc) { + pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n", +prange->svms, start_mgr, last_mgr); return 0; } + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug("migration range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", +start_mgr, last_mgr, prange->start, prange->last); + return -EFAULT; + } + node = svm_range_get_node_by_id(prange, best_loc); if (!node) { pr_debug("failed to get kfd node by id 0x%x\n", best_loc); return -ENODEV; } - pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, -prange->start, prange->last, best_loc); + pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n", +prange->svms, start_mgr, last_mgr, prange->start, prange->last, +best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { @@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_free_dma_mappings(prange, true); - } else { + prange->vram_pages = prange->vram_pages + cpages; + } else if (!prange->actual_loc) { + /* if no page migrated and all pages from prange are at +* sys ram drop svm_bo got from svm_range_vram_node_new +*/ svm_range_vram_node_free(prange); } @@ -670,7 +683,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, static long svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, uint64_t end, - uint32_t trigger, struct page *fault_page) + uint32_t trigger, struct page *fault_page, unsigned long *mpages) { struct kfd_process *p = container_of(prange->svms, struct
[PATCH] drm/amdkfd: Use partial migrations in GPU page faults
From: Xiaogang Chen This patch implements partial migration in gpu page fault according to migration granularity(default 2MB) and not split svm range in cpu page fault handling. Now a svm range may have pages from both system ram and vram of one gpu. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: xiaogang chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 153 +++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 87 - drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 7 +- 4 files changed, 162 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 7d82c7da223a..5a3aa80a1834 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,9 +501,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", -prange->svms, prange->start, prange->last, best_loc); + if (!best_loc) { + pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n", +prange->svms, start_mgr, last_mgr); return 0; } @@ -513,8 +516,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, prange->start, prange->last, best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { @@ -544,10 +547,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_free_dma_mappings(prange, true); - } else { + /* only free dma mapping in the migrated range */ + svm_range_free_dma_mappings(prange, true, start_mgr - prange->start, +last_mgr - start_mgr + 1); + } else if (!prange->actual_loc) + /* if all pages from prange are at sys ram */ svm_range_vram_node_free(prange); - } return r < 0 ? r : 0; } @@ -762,6 +767,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_vram_to_ram - migrate svm range from device to system * @prange: range structure * @mm: process mm, use current->mm if NULL + * @start_mgr: start page need be migrated to sys ram + * @last_mgr: last page need be migrated to sys ram * @trigger: reason of migration * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback * @@ -771,7 +778,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, * 0 - OK, otherwise error code */ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, - uint32_t trigger, struct page *fault_page) + unsigned long start_mgr, unsigned long last_mgr, + uint32_t trigger, struct page *fault_page) { struct kfd_node *node; struct vm_area_struct *vma; @@ -781,23 +789,30 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, unsigned long upages = 0; long r = 0; + /* this pragne has no any vram page to migrate to sys ram */ if (!prange->actual_loc) { pr_debug("[0x%lx 0x%lx] already migrated to ram\n", prange->start, prange->last); return 0; } + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug(&
[PATCH] drm/amdgpu: have bos for PDs/PTS cpu accessible when kfd uses cpu to update vm
From: Xiaogang Chen When kfd uses cpu to update vm iterates all current PDs/PTs bos, adds AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag and kmap them to kernel virtual address space before kfd updates the vm that was created by gfx. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 11 - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 28 +++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 291977b93b1d..dedf1bf44dc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2278,17 +2278,14 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) goto unreserve_bo; + r = amdgpu_vm_pt_cpu_access_root(adev, vm); + if (r) + goto unreserve_bo; + vm->update_funcs = _vm_cpu_funcs; } else { vm->update_funcs = _vm_sdma_funcs; } - /* -* Make sure root PD gets mapped. As vm_update_mode could be changed -* when turning a GFX VM into a compute VM. -*/ - r = vm->update_funcs->map_table(to_amdgpu_bo_vm(vm->root.bo)); - if (r) - goto unreserve_bo; dma_fence_put(vm->last_update); vm->last_update = dma_fence_get_stub(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 9c85d494f2a2..9b3e75de7c5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -491,6 +491,8 @@ void amdgpu_vm_pt_free_work(struct work_struct *work); void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); #endif +int amdgpu_vm_pt_cpu_access_root(struct amdgpu_device *adev, struct amdgpu_vm *vm); + /** * amdgpu_vm_tlb_seq - return tlb flush sequence number * @vm: the amdgpu_vm structure to query diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index dea1a64be44d..a08742191b7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -1044,3 +1044,31 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, return 0; } + +/** + * amdgpu_vm_pt_cpu_access_root - have bo of root PD cpu accessible + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * + * make root page directory and everything below it cpu accessible. + */ +int amdgpu_vm_pt_cpu_access_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_bo_base *entry; + int r; + struct amdgpu_bo_vm *bo; + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { + + if (entry->bo) { + bo = to_amdgpu_bo_vm(entry->bo); + entry->bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + r = amdgpu_vm_cpu_funcs.map_table(bo); + if (r) + return r; + } + } + + return 0; +} -- 2.25.1
[PATCH] drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute
From: Xiaogang Chen Since we allow kfd and graphic operate on same GPU VM to have interoperation between them GPU VM may have been used by graphic vm operations before kfd turns a GPU VM into a compute VM. Remove vm clean checking at amdgpu_vm_make_compute. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index eff73c428b12..291977b93b1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2245,16 +2245,16 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) return r; - /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { - r = -EINVAL; - goto unreserve_bo; - } - /* Check if PD needs to be reinitialized and do it before * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { + /* Sanity checks */ + if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + r = -EINVAL; + goto unreserve_bo; + } + vm->pte_support_ats = pte_support_ats; r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), false); -- 2.25.1
[PATCH] drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute
From: Xiaogang Chen Since we allow kfd and graphic operate on same GPU VM to have interoperation between them GPU VM may have been used by graphic vm operations before kfd turn a GFX VM into a compute VM. Remove vm clean checking at amdgpu_vm_make_compute. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index eff73c428b12..33f05297ab7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2246,7 +2246,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) return r; /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + if (pte_support_ats && !amdgpu_vm_pt_is_root_clean(adev, vm)) { r = -EINVAL; goto unreserve_bo; } -- 2.25.1
[PATCH] drm/amdkfd: Fix an issue at userptr buffer validation process.
From: Xiaogang Chen amdgpu_ttm_tt_get_user_pages can fail(-EFAULT). If it failed mem has no associated hmm range or user_pages associated. Keep it at process_info->userptr_inval_list and mark mem->invalid until following scheduled attempts can valid it. Signed-off-by: Xiaogang Chen --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 28 ++- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7b1f5933ebaa..fad5183baf80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2444,7 +2444,9 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, ret = -EAGAIN; goto unlock_out; } - mem->invalid = 0; +/* set mem valid if mem has hmm range associated */ + if (mem->range) + mem->invalid = 0; } unlock_out: @@ -2576,16 +2578,28 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i list_for_each_entry_safe(mem, tmp_mem, _info->userptr_inval_list, validate_list.head) { - bool valid = amdgpu_ttm_tt_get_user_pages_done( - mem->bo->tbo.ttm, mem->range); + /* Only check mem with hmm range associated */ + bool valid; - mem->range = NULL; - if (!valid) { - WARN(!mem->invalid, "Invalid BO not marked invalid"); + if (mem->range) { + valid = amdgpu_ttm_tt_get_user_pages_done( + mem->bo->tbo.ttm, mem->range); + + mem->range = NULL; + if (!valid) { + WARN(!mem->invalid, "Invalid BO not marked invalid"); + ret = -EAGAIN; + continue; + } + } else + /* keep mem without hmm range at userptr_inval_list */ + continue; + + if (mem->invalid) { + WARN(1, "Valid BO is marked invalid"); ret = -EAGAIN; continue; } - WARN(mem->invalid, "Valid BO is marked invalid"); list_move_tail(>validate_list.head, _info->userptr_valid_list); -- 2.25.1
[PATCH] drm/amdkfd: Fix some issues at userptr buffer validation process.
From: Xiaogang Chen Notice userptr buffer restore process has following issues: 1: amdgpu_ttm_tt_get_user_pages can fail(-EFAULT). If it failed we should not set it valid(mem->invalid = 0). In this case mem has no associated hmm range or user_pages associated. 2: mmu notifier can happen concurrently and update mem->range->notifier->invalidate_seq, but not mem->range->notifier_seq. That causes mem->range->notifier_seq stale when mem is in process_info->userptr_inval_list and amdgpu_amdkfd_restore_userptr_worker got interrupted. At next rescheduled next attempt we use stale mem->range->notifier_seq to compare with mem->range->notifier->invalidate_seq. Signed-off-by: Xiaogang Chen --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 45 +++ 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7b1f5933ebaa..6881f1b0844c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2444,7 +2444,9 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, ret = -EAGAIN; goto unlock_out; } - mem->invalid = 0; +/* set mem valid if mem has hmm range associated */ + if (mem->range) + mem->invalid = 0; } unlock_out: @@ -2576,16 +2578,28 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i list_for_each_entry_safe(mem, tmp_mem, _info->userptr_inval_list, validate_list.head) { - bool valid = amdgpu_ttm_tt_get_user_pages_done( - mem->bo->tbo.ttm, mem->range); + /* Only check mem with hmm range associated */ + bool valid; - mem->range = NULL; - if (!valid) { - WARN(!mem->invalid, "Invalid BO not marked invalid"); + if (mem->range) { + valid = amdgpu_ttm_tt_get_user_pages_done( + mem->bo->tbo.ttm, mem->range); + + mem->range = NULL; + if (!valid) { + WARN(!mem->invalid, "Invalid BO not marked invalid"); + ret = -EAGAIN; + continue; + } + } else + /* keep mem without hmm range at userptr_inval_list */ + continue; + + if (mem->invalid) { + WARN(1, "Valid BO is marked invalid"); ret = -EAGAIN; continue; } - WARN(mem->invalid, "Valid BO is marked invalid"); list_move_tail(>validate_list.head, _info->userptr_valid_list); @@ -2644,8 +2658,23 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) * reference counting inside KFD will handle this case. */ mutex_lock(_info->notifier_lock); - if (process_info->evicted_bos != evicted_bos) + if (process_info->evicted_bos != evicted_bos) { + /* mmu notifier interrupted amdgpu_amdkfd_restore_userptr_worker +* before reschedule next attempt update stale mem->range->notifier_seq +* inside userptr_inval_list +*/ + struct kgd_mem *mem, *tmp_mem; + + list_for_each_entry_safe(mem, tmp_mem, + _info->userptr_inval_list, + validate_list.head) { + + if (mem->range) + mem->range->notifier_seq = mem->range->notifier->invalidate_seq; + } + goto unlock_notifier_out; + } if (confirm_valid_user_pages_locked(process_info)) { WARN(1, "User pages unexpectedly invalid"); -- 2.25.1
[PATCH] drm/amdkfd: Change WARN to pr_debug when same userptr BOs got invalidated by mmu.
From: Xiaogang Chen During KFD restore evicted userptr BOs mmu invalidate callback may invalidate same userptr BOs that have been just restored. When KFD restore process detects it KFD will reschedule another validation process. It is not an error. Change WARN to pr_debug, not put the BOs at userptr_valid_list, let next scheduled delayed work validate them again. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7b1f5933ebaa..d0c224703278 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2581,11 +2581,18 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i mem->range = NULL; if (!valid) { - WARN(!mem->invalid, "Invalid BO not marked invalid"); + if (!mem->invalid) + pr_debug("Invalid BO not marked invalid\n"); + + ret = -EAGAIN; + continue; + } + + if (mem->invalid) { + pr_debug("Valid BO is marked invalid\n"); ret = -EAGAIN; continue; } - WARN(mem->invalid, "Valid BO is marked invalid"); list_move_tail(>validate_list.head, _info->userptr_valid_list); @@ -2648,7 +2655,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) goto unlock_notifier_out; if (confirm_valid_user_pages_locked(process_info)) { - WARN(1, "User pages unexpectedly invalid"); + pr_debug("User pages unexpectedly invalid, reschedule another attempt\n"); goto unlock_notifier_out; } -- 2.25.1
[PATCH] drm/amdkfd: fix warnings in kfd_migrate.c
From: Xiaogang Chen drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_migrate.c: In function ‘svm_migrate_copy_to_vram’: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_migrate.c:393:1: warning: label ‘out’ defined but not used [-Wunused-label] 393 | out: | ^~~ drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_migrate.c:525:29: note: format string is defined here drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_migrate.c:40:22: warning: format ‘%d’ expects argument of type ‘int’, but argument 4 has type ‘long int’ [-Wformat=] 40 | #define dev_fmt(fmt) "kfd_migrate: " fmt 525 | dev_dbg(adev->dev, "fail %d to alloc vram\n", r); Fixes: b0b7d79469d9 ("drm/amdkfd: Get prange->offset after svm_range_vram_node_new") Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 734b4eeb0f3e..6a7dd6574646 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -390,7 +390,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate->dst[i + 3] = 0; } #endif -out: + return r; } @@ -522,7 +522,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, r = svm_range_vram_node_new(adev, prange, true); if (r) { - dev_dbg(adev->dev, "fail %d to alloc vram\n", r); + dev_dbg(adev->dev, "fail %ld to alloc vram\n", r); return r; } ttm_res_offset = prange->offset << PAGE_SHIFT; -- 2.25.1
[PATCH] drm/amdkfd: Get prange->offset after svm_range_vram_node_new
From: Xiaogang Chen During miration to vram prange->offset is valid after vram buffer is located, either use old one or allocate a new one. Move svm_range_vram_node_new before migrate for each vma to get valid prange->offset. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index fd54a00e7229..15791490c23e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -310,12 +310,6 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, src = scratch; dst = (uint64_t *)(scratch + npages); - r = svm_range_vram_node_new(adev, prange, true); - if (r) { - dev_dbg(adev->dev, "fail %d to alloc vram\n", r); - goto out; - } - amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, ); for (i = j = 0; i < npages; i++) { @@ -525,6 +519,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; + + r = svm_range_vram_node_new(adev, prange, true); + if (r) { + dev_dbg(adev->dev, "fail %d to alloc vram\n", r); + return r; + } ttm_res_offset = prange->offset << PAGE_SHIFT; for (addr = start; addr < end;) { -- 2.25.1
[PATCH v2] drm/amdkfd: Cal vram offset in TTM resource for each svm_migrate_copy_to_vram
From: Xiaogang Chen svm_migrate_ram_to_vram migrates a prange from sys ram to vram. The prange may cross multiple vma. Need remember current dst vram offset in the TTM resource for each migration. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 1c625433ff30..373cd7b0e1ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -294,7 +294,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) static int svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, -dma_addr_t *scratch) +dma_addr_t *scratch, uint64_t ttm_res_offset) { uint64_t npages = migrate->npages; struct device *dev = adev->dev; @@ -304,8 +304,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, uint64_t i, j; int r; - pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, -prange->last); + pr_debug("svms 0x%p [0x%lx 0x%lx 0x%lx]\n", prange->svms, prange->start, +prange->last, ttm_res_offset); src = scratch; dst = (uint64_t *)(scratch + npages); @@ -316,7 +316,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, goto out; } - amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT, + amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, ); for (i = j = 0; i < npages; i++) { struct page *spage; @@ -403,7 +403,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, static long svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, - uint64_t end, uint32_t trigger) + uint64_t end, uint32_t trigger, uint64_t ttm_res_offset) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; @@ -456,7 +456,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, else pr_debug("0x%lx pages migrated\n", cpages); - r = svm_migrate_copy_to_vram(adev, prange, , , scratch); + r = svm_migrate_copy_to_vram(adev, prange, , , scratch, ttm_res_offset); migrate_vma_pages(); pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", @@ -504,6 +504,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long addr, start, end; struct vm_area_struct *vma; struct amdgpu_device *adev; + uint64_t ttm_res_offset; unsigned long cpages = 0; long r = 0; @@ -524,6 +525,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; + ttm_res_offset = prange->offset << PAGE_SHIFT; for (addr = start; addr < end;) { unsigned long next; @@ -533,13 +535,14 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, break; next = min(vma->vm_end, end); - r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, ttm_res_offset); if (r < 0) { pr_debug("failed %ld to migrate\n", r); break; } else { cpages += r; } + ttm_res_offset += next - addr; addr = next; } -- 2.25.1
[PATCH] drm/amdkfd: Cal vram offset in page for each svm_migrate_copy_to_vram
From: Xiaogang Chen svm_migrate_ram_to_vram migrate a prange from sys ram to vram. The prange may cross multiple vma. Need remember current dst vram offset in page for each migration. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 1c625433ff30..60664e0cbc1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -294,7 +294,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) static int svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, -dma_addr_t *scratch) +dma_addr_t *scratch, uint64_t *cur_dst) { uint64_t npages = migrate->npages; struct device *dev = adev->dev; @@ -304,8 +304,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, uint64_t i, j; int r; - pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, -prange->last); + pr_debug("svms 0x%p [0x%lx 0x%lx 0x%lx]\n", prange->svms, prange->start, +prange->last, *cur_dst); src = scratch; dst = (uint64_t *)(scratch + npages); @@ -316,7 +316,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, goto out; } - amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT, + amdgpu_res_first(prange->ttm_res, *cur_dst << PAGE_SHIFT, npages << PAGE_SHIFT, ); for (i = j = 0; i < npages; i++) { struct page *spage; @@ -381,6 +381,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate->dst[i] = 0; } } + *cur_dst = *cur_dst + i; #ifdef DEBUG_FORCE_MIXED_DOMAINS for (i = 0, j = 0; i < npages; i += 4, j++) { @@ -403,7 +404,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, static long svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, - uint64_t end, uint32_t trigger) + uint64_t end, uint32_t trigger, uint64_t *cur_dst) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; @@ -456,7 +457,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, else pr_debug("0x%lx pages migrated\n", cpages); - r = svm_migrate_copy_to_vram(adev, prange, , , scratch); + r = svm_migrate_copy_to_vram(adev, prange, , , scratch, cur_dst); migrate_vma_pages(); pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", @@ -504,6 +505,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long addr, start, end; struct vm_area_struct *vma; struct amdgpu_device *adev; + uint64_t cur_dst; unsigned long cpages = 0; long r = 0; @@ -524,6 +526,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; + cur_dst = prange->offset; for (addr = start; addr < end;) { unsigned long next; @@ -533,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, break; next = min(vma->vm_end, end); - r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, _dst); if (r < 0) { pr_debug("failed %ld to migrate\n", r); break; -- 2.25.1
[PATCH v3] drm/amdkfd: Prevent user space using both svm and kfd api to register same user buffer
From: Xiaogang Chen When xnack is on user space can use svm page restore to set a vm range without setup it first, then use regular api to register. Currently kfd api and svm are not interoperable. We already have check on that, but for user buffer the mapping address is not same as buffer cpu virtual address. Add checking on that to avoid error propagate to hmm. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f79b8e964140..072fa4fbd27f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1065,6 +1065,20 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, mutex_unlock(>svms.lock); return -EADDRINUSE; } + + /* When register user buffer check if it has been registered by svm by +* buffer cpu virtual address. +*/ + if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) && + interval_tree_iter_first(>svms.objects, +args->mmap_offset >> PAGE_SHIFT, +(args->mmap_offset + args->size - 1) >> PAGE_SHIFT)) { + pr_err("User Buffer Address: 0x%llx already allocated by SVM\n", + args->mmap_offset); + mutex_unlock(>svms.lock); + return -EADDRINUSE; + } + mutex_unlock(>svms.lock); #endif mutex_lock(>mutex); -- 2.25.1
[PATCH v2] drm/amdkfd: Prevent user space using both svm and kfd api to register same user buffer
From: Xiaogang Chen When xnack is on user space can use svm page restore to set a vm range without setup it first, then use regular api to register. Currently kfd api and svm are not interoperable. We already have check on that, but for user buffer the mapping address is not same as buffer cpu virtual address. Add checking on that to avoid error propagate to hmm. --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f79b8e964140..6d9cf860d2da 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1065,6 +1065,21 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, mutex_unlock(>svms.lock); return -EADDRINUSE; } + + /* When register user buffer check if it has been registered by svm by +* buffer cpu virtual address. +*/ + if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) && + interval_tree_iter_first(>svms.objects, + args->mmap_offset >> PAGE_SHIFT, + (args->mmap_offset + args->size - 1) >> PAGE_SHIFT)) { + + pr_err("User Buffer Address: 0x%llx already allocated by SVM\n", + args->mmap_offset); + mutex_unlock(>svms.lock); + return -EADDRINUSE; + } + mutex_unlock(>svms.lock); #endif mutex_lock(>mutex); -- 2.25.1
[PATCH] drm/amdkfd: Prevent user space using both svm and kfd api to register same user buffer
From: Xiaogang Chen When xnack is on user space can use svm page restore to set a vm range without setup it first, then use regular api to register. Currently kfd api and svm are not interoperable. We already have check on that, but for user buffer the mapping address is not same as buffer cpu virtual address. Add checking on that to avoid error propagate to hmm. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 + 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f79b8e964140..cb7acb0b9b52 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1065,6 +1065,23 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, mutex_unlock(>svms.lock); return -EADDRINUSE; } + + /* When register user buffer check if it has been registered by svm by +* buffer cpu virtual address. +*/ + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { + + if (interval_tree_iter_first(>svms.objects, + untagged_addr(args->mmap_offset) >> PAGE_SHIFT, + (untagged_addr(args->mmap_offset) + args->size - 1) >> PAGE_SHIFT)) { + + pr_err("User Buffer Address: 0x%llx already allocated by SVM\n", + untagged_addr(args->mmap_offset)); + mutex_unlock(>svms.lock); + return -EADDRINUSE; + } + + } mutex_unlock(>svms.lock); #endif mutex_lock(>mutex); -- 2.25.1
[PATCH] drm/amdkfd: Remove skiping userptr buffer mapping when mmu notifier marks it as invalid
From: Xiaogang Chen mmu notifier does not always hold mm->sem during call back. That causes a race condition between kfd userprt buffer mapping and mmu notifier which leds to gpu shadder or SDMA access userptr buffer before it has been mapped to gpu VM. Always map userptr buffer to avoid that though it may make some userprt buffers mapped two times. Suggested-by: Felix Kuehling Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 10 -- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index da9d475d7ef2..ba72a910d0d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1907,16 +1907,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( */ mutex_lock(>process_info->lock); - /* Lock mmap-sem. If we find an invalid userptr BO, we can be -* sure that the MMU notifier is no longer running -* concurrently and the queues are actually stopped -*/ - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - mmap_write_lock(current->mm); - is_invalid_userptr = atomic_read(>invalid); - mmap_write_unlock(current->mm); - } - mutex_lock(>lock); domain = mem->domain; -- 2.25.1
[PATCH v2] drm/amdgpu: config HDP_MISC_CNTL.READ_BUFFER_WATERMARK to fix applications running across multiple GPU config hang.
From: Xiaogang Chen Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 3 +++ drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index d7811e0327cb..02400d97a95c 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -146,6 +146,9 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev) WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); + if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 0)) + WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, READ_BUFFER_WATERMARK, 2); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); } diff --git a/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h index 25e28691d62d..65c91b0102e4 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h @@ -104,6 +104,7 @@ #define HDP_MISC_CNTL__OUTSTANDING_WRITE_COUNT_1024__SHIFT 0x5 #define HDP_MISC_CNTL__MULTIPLE_READS__SHIFT 0x6 #define HDP_MISC_CNTL__SIMULTANEOUS_READS_WRITES__SHIFT0xb +#define HDP_MISC_CNTL__READ_BUFFER_WATERMARK__SHIFT 0xe #define HDP_MISC_CNTL__FED_ENABLE__SHIFT 0x15 #define HDP_MISC_CNTL__SYSHUB_CHANNEL_PRIORITY__SHIFT 0x17 #define HDP_MISC_CNTL__MMHUB_WRBURST_ENABLE__SHIFT 0x18 @@ -118,6 +119,7 @@ #define HDP_MISC_CNTL__OUTSTANDING_WRITE_COUNT_1024_MASK 0x0020L #define HDP_MISC_CNTL__MULTIPLE_READS_MASK 0x0040L #define HDP_MISC_CNTL__SIMULTANEOUS_READS_WRITES_MASK 0x0800L +#define HDP_MISC_CNTL__READ_BUFFER_WATERMARK_MASK 0xc000L #define HDP_MISC_CNTL__FED_ENABLE_MASK 0x0020L #define HDP_MISC_CNTL__SYSHUB_CHANNEL_PRIORITY_MASK0x0080L #define HDP_MISC_CNTL__MMHUB_WRBURST_ENABLE_MASK 0x0100L -- 2.25.1
[PATCH] drm/amdgpu: config HDP_MISC_CNTL.READ_BUFFER_WATERMARK to fix applications running across multiple GPU config hang.
From: Xiaogang Chen Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 1 + drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index d7811e0327cb..aa2c7c3f721f 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -145,6 +145,7 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev) } WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); + WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, READ_BUFFER_WATERMARK, 2); WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); diff --git a/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h index 25e28691d62d..65c91b0102e4 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h @@ -104,6 +104,7 @@ #define HDP_MISC_CNTL__OUTSTANDING_WRITE_COUNT_1024__SHIFT 0x5 #define HDP_MISC_CNTL__MULTIPLE_READS__SHIFT 0x6 #define HDP_MISC_CNTL__SIMULTANEOUS_READS_WRITES__SHIFT0xb +#define HDP_MISC_CNTL__READ_BUFFER_WATERMARK__SHIFT 0xe #define HDP_MISC_CNTL__FED_ENABLE__SHIFT 0x15 #define HDP_MISC_CNTL__SYSHUB_CHANNEL_PRIORITY__SHIFT 0x17 #define HDP_MISC_CNTL__MMHUB_WRBURST_ENABLE__SHIFT 0x18 @@ -118,6 +119,7 @@ #define HDP_MISC_CNTL__OUTSTANDING_WRITE_COUNT_1024_MASK 0x0020L #define HDP_MISC_CNTL__MULTIPLE_READS_MASK 0x0040L #define HDP_MISC_CNTL__SIMULTANEOUS_READS_WRITES_MASK 0x0800L +#define HDP_MISC_CNTL__READ_BUFFER_WATERMARK_MASK 0xc000L #define HDP_MISC_CNTL__FED_ENABLE_MASK 0x0020L #define HDP_MISC_CNTL__SYSHUB_CHANNEL_PRIORITY_MASK0x0080L #define HDP_MISC_CNTL__MMHUB_WRBURST_ENABLE_MASK 0x0100L -- 2.25.1
[PATCH] drm/amdkfd: explicitly create/destroy queue attributes under /sys
From: Xiaogang Chen When application is about finish it destroys queues it has created by an ioctl. Driver deletes queue entry(/sys/class/kfd/kfd/proc/pid/queues/queueid/) which is directory including this queue all attributes. Low level kernel code deletes all attributes under this directory. The lock from kernel is on queue entry, not its attributes. At meantime another user space application can read the attributes. There is possibility that the application can hold/read the attributes while kernel is deleting the queue entry, cause the application have invalid memory access, then killed by kernel. Driver changes: explicitly create/destroy each attribute for each queue, let kernel put lock on each attribute too. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 33 +++- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0c3f911e3bf4..045da300749e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -546,6 +546,9 @@ struct queue { /* procfs */ struct kobject kobj; + struct attribute attr_guid; + struct attribute attr_size; + struct attribute attr_type; }; enum KFD_MQD_TYPE { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 9158f9754a24..04a5638f9196 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -73,6 +73,8 @@ static void evict_process_worker(struct work_struct *work); static void restore_process_worker(struct work_struct *work); static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd); +static void kfd_sysfs_create_file(struct kobject *kobj, struct attribute *attr, + char *name); struct kfd_procfs_tree { struct kobject *kobj; @@ -441,35 +443,12 @@ static ssize_t kfd_sysfs_counters_show(struct kobject *kobj, return 0; } -static struct attribute attr_queue_size = { - .name = "size", - .mode = KFD_SYSFS_FILE_MODE -}; - -static struct attribute attr_queue_type = { - .name = "type", - .mode = KFD_SYSFS_FILE_MODE -}; - -static struct attribute attr_queue_gpuid = { - .name = "gpuid", - .mode = KFD_SYSFS_FILE_MODE -}; - -static struct attribute *procfs_queue_attrs[] = { - _queue_size, - _queue_type, - _queue_gpuid, - NULL -}; - static const struct sysfs_ops procfs_queue_ops = { .show = kfd_procfs_queue_show, }; static struct kobj_type procfs_queue_type = { .sysfs_ops = _queue_ops, - .default_attrs = procfs_queue_attrs, }; static const struct sysfs_ops procfs_stats_ops = { @@ -511,6 +490,10 @@ int kfd_procfs_add_queue(struct queue *q) return ret; } + kfd_sysfs_create_file(>kobj, >attr_guid, "guid"); + kfd_sysfs_create_file(>kobj, >attr_size, "size"); + kfd_sysfs_create_file(>kobj, >attr_type, "type"); + return 0; } @@ -655,6 +638,10 @@ void kfd_procfs_del_queue(struct queue *q) if (!q) return; + sysfs_remove_file(>kobj, >attr_guid); + sysfs_remove_file(>kobj, >attr_size); + sysfs_remove_file(>kobj, >attr_type); + kobject_del(>kobj); kobject_put(>kobj); } -- 2.25.1
[PATCH 2/2] drm/amdgpu/display: buffer INTERRUPT_LOW_IRQ_CONTEXT interrupt work
From: Xiaogang Chen amdgpu DM handles INTERRUPT_LOW_IRQ_CONTEXT interrupt(hpd, hpd_rx) by using work queue and uses single work_struct. If previous interrupt has not been handled new interrupts(same type) will be discarded and driver just sends "amdgpu_dm_irq_schedule_work FAILED" message out. If some important hpd, hpd_rx related interrupts are missed by driver the hot (un)plug devices may cause system hang or unstable, such as system resumes from S3 sleep with mst device connected. This patch dynamically allocates new amdgpu_dm_irq_handler_data for new interrupts if previous INTERRUPT_LOW_IRQ_CONTEXT interrupt work has not been handled. So the new interrupt works can be queued to the same workqueue_struct, instead discard the new interrupts. All allocated amdgpu_dm_irq_handler_data are put into a single linked list and will be reused after. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 14 +-- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 114 ++--- 2 files changed, 80 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index c9d82b9..730e540 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -69,18 +69,6 @@ struct common_irq_params { }; /** - * struct irq_list_head - Linked-list for low context IRQ handlers. - * - * @head: The list_head within handler_data - * @work: A work_struct containing the deferred handler work - */ -struct irq_list_head { - struct list_head head; - /* In case this interrupt needs post-processing, 'work' will be queued*/ - struct work_struct work; -}; - -/** * struct dm_compressor_info - Buffer info used by frame buffer compression * @cpu_addr: MMIO cpu addr * @bo_ptr: Pointer to the buffer object @@ -270,7 +258,7 @@ struct amdgpu_display_manager { * Note that handlers are called in the same order as they were * registered (FIFO). */ - struct irq_list_head irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER]; + struct list_head irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER]; /** * @irq_handler_list_high_tab: diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 3577785..ada344a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -82,6 +82,7 @@ struct amdgpu_dm_irq_handler_data { struct amdgpu_display_manager *dm; /* DAL irq source which registered for this interrupt. */ enum dc_irq_source irq_source; + struct work_struct work; }; #define DM_IRQ_TABLE_LOCK(adev, flags) \ @@ -111,20 +112,10 @@ static void init_handler_common_data(struct amdgpu_dm_irq_handler_data *hcd, */ static void dm_irq_work_func(struct work_struct *work) { - struct irq_list_head *irq_list_head = - container_of(work, struct irq_list_head, work); - struct list_head *handler_list = _list_head->head; - struct amdgpu_dm_irq_handler_data *handler_data; - - list_for_each_entry(handler_data, handler_list, list) { - DRM_DEBUG_KMS("DM_IRQ: work_func: for dal_src=%d\n", - handler_data->irq_source); + struct amdgpu_dm_irq_handler_data *handler_data = +container_of(work, struct amdgpu_dm_irq_handler_data, work); - DRM_DEBUG_KMS("DM_IRQ: schedule_work: for dal_src=%d\n", - handler_data->irq_source); - - handler_data->handler(handler_data->handler_arg); - } + handler_data->handler(handler_data->handler_arg); /* Call a DAL subcomponent which registered for interrupt notification * at INTERRUPT_LOW_IRQ_CONTEXT. @@ -156,7 +147,7 @@ static struct list_head *remove_irq_handler(struct amdgpu_device *adev, break; case INTERRUPT_LOW_IRQ_CONTEXT: default: - hnd_list = >dm.irq_handler_list_low_tab[irq_source].head; + hnd_list = >dm.irq_handler_list_low_tab[irq_source]; break; } @@ -287,7 +278,8 @@ void *amdgpu_dm_irq_register_interrupt(struct amdgpu_device *adev, break; case INTERRUPT_LOW_IRQ_CONTEXT: default: - hnd_list = >dm.irq_handler_list_low_tab[irq_source].head; + hnd_list = >dm.irq_handler_list_low_tab[irq_source]; + INIT_WORK(_data->work, dm_irq_work_func); break; } @@ -369,7 +361,7 @@ void amdgpu_dm_irq_unregister_interrupt(struct amdgpu_device *adev, int amdgpu_dm_irq_init(struct amdgpu_device *adev) { int src; - struct irq_list_head *lh; + struct list_head *
[PATCH 1/2] drm: distinguish return value of drm_dp_check_and_send_link_address.
From: Xiaogang Chen drm_dp_check_and_send_link_address discovers MST device topology. It can return both positive and negative values. When it returns positive values there is no error found. If it returns negative values there is error found, such as get NAK , timeout, etc. Following drm_kms_helper_hotplug_event should be called when drm_dp_check_and_send_link_address returns positive value. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/drm_dp_mst_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 17dbed0..3ef5206 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2650,7 +2650,7 @@ static void drm_dp_mst_link_probe_work(struct work_struct *work) drm_dp_mst_topology_put_mstb(mstb); mutex_unlock(>probe_lock); - if (ret) + if (ret > 0) drm_kms_helper_hotplug_event(dev); } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx