This patch is Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com> Regards, Felix
On 3/12/2019 9:17 PM, Yang, Philip wrote: > userptr may cross two VMAs if the forked child process (not call exec > after fork) malloc buffer, then free it, and then malloc larger size > buf, kerenl will create new VMA adjacent to old VMA which was cloned > from parent process, some pages of userptr are in the first VMA, the > rest pages are in the second VMA. > > HMM expects range only have one VMA, loop over all VMAs in the address > range, create multiple ranges to handle this case. See > is_mergeable_anon_vma in mm/mmap.c for details. > > Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314 > Signed-off-by: Philip Yang <philip.y...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++------- > 1 file changed, 91 insertions(+), 35 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index c1240bf243ba..c14198737dcd 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -711,7 +711,8 @@ struct amdgpu_ttm_tt { > struct task_struct *usertask; > uint32_t userflags; > #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) > - struct hmm_range range; > + struct hmm_range *ranges; > + int nr_ranges; > #endif > }; > > @@ -723,62 +724,108 @@ struct amdgpu_ttm_tt { > * once afterwards to stop HMM tracking > */ > #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) > + > +/* Support Userptr pages cross max 16 vmas */ > +#define MAX_NR_VMAS (16) > + > int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) > { > struct amdgpu_ttm_tt *gtt = (void *)ttm; > struct mm_struct *mm = gtt->usertask->mm; > - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; > - struct hmm_range *range = >t->range; > - int r = 0, i; > + unsigned long start = gtt->userptr; > + unsigned long end = start + ttm->num_pages * PAGE_SIZE; > + struct hmm_range *ranges; > + struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS]; > + uint64_t *pfns, f; > + int r = 0, i, nr_pages; > > if (!mm) /* Happens during process shutdown */ > return -ESRCH; > > - amdgpu_hmm_init_range(range); > - > down_read(&mm->mmap_sem); > > - range->vma = find_vma(mm, gtt->userptr); > - if (!range_in_vma(range->vma, gtt->userptr, end)) > - r = -EFAULT; > - else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && > - range->vma->vm_file) > + /* user pages may cross multiple VMAs */ > + gtt->nr_ranges = 0; > + do { > + unsigned long vm_start; > + > + if (gtt->nr_ranges >= MAX_NR_VMAS) { > + DRM_ERROR("Too many VMAs in userptr range\n"); > + r = -EFAULT; > + goto out; > + } > + > + vm_start = vma ? vma->vm_end : start; > + vma = find_vma(mm, vm_start); > + if (unlikely(!vma || vm_start < vma->vm_start)) { > + r = -EFAULT; > + goto out; > + } > + vmas[gtt->nr_ranges++] = vma; > + } while (end > vma->vm_end); > + > + DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n", > + start, gtt->nr_ranges, ttm->num_pages); > + > + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && > + vmas[0]->vm_file)) { > r = -EPERM; > - if (r) > goto out; > + } > > - range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), > - GFP_KERNEL); > - if (range->pfns == NULL) { > + ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL); > + if (unlikely(!ranges)) { > r = -ENOMEM; > goto out; > } > - range->start = gtt->userptr; > - range->end = end; > > - range->pfns[0] = range->flags[HMM_PFN_VALID]; > - range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? > - 0 : range->flags[HMM_PFN_WRITE]; > - for (i = 1; i < ttm->num_pages; i++) > - range->pfns[i] = range->pfns[0]; > + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); > + if (unlikely(!pfns)) { > + r = -ENOMEM; > + goto out_free_ranges; > + } > + > + for (i = 0; i < gtt->nr_ranges; i++) > + amdgpu_hmm_init_range(&ranges[i]); > + > + f = ranges[0].flags[HMM_PFN_VALID]; > + f |= amdgpu_ttm_tt_is_readonly(ttm) ? > + 0 : ranges[0].flags[HMM_PFN_WRITE]; > + memset64(pfns, f, ttm->num_pages); > + > + for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) { > + ranges[i].vma = vmas[i]; > + ranges[i].start = max(start, vmas[i]->vm_start); > + ranges[i].end = min(end, vmas[i]->vm_end); > + ranges[i].pfns = pfns + nr_pages; > + nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE; > + > + r = hmm_vma_fault(&ranges[i], true); > + if (unlikely(r)) > + break; > + } > + if (unlikely(r)) { > + while (i--) > + hmm_vma_range_done(&ranges[i]); > > - /* This may trigger page table update */ > - r = hmm_vma_fault(range, true); > - if (r) > goto out_free_pfns; > + } > > up_read(&mm->mmap_sem); > > for (i = 0; i < ttm->num_pages; i++) > - pages[i] = hmm_pfn_to_page(range, range->pfns[i]); > + pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]); > + gtt->ranges = ranges; > > return 0; > > out_free_pfns: > - kvfree(range->pfns); > - range->pfns = NULL; > + kvfree(pfns); > +out_free_ranges: > + kvfree(ranges); > out: > up_read(&mm->mmap_sem); > + > return r; > } > > @@ -792,15 +839,23 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt > *ttm) > { > struct amdgpu_ttm_tt *gtt = (void *)ttm; > bool r = false; > + int i; > > if (!gtt || !gtt->userptr) > return false; > > - WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); > - if (gtt->range.pfns) { > - r = hmm_vma_range_done(>t->range); > - kvfree(gtt->range.pfns); > - gtt->range.pfns = NULL; > + DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n", > + gtt->userptr, gtt->nr_ranges, ttm->num_pages); > + > + WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns, > + "No user pages to check\n"); > + > + if (gtt->ranges) { > + for (i = 0; i < gtt->nr_ranges; i++) > + r |= hmm_vma_range_done(>t->ranges[i]); > + kvfree(gtt->ranges[0].pfns); > + kvfree(gtt->ranges); > + gtt->ranges = NULL; > } > > return r; > @@ -884,8 +939,9 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt > *ttm) > sg_free_table(ttm->sg); > > #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) > - if (gtt->range.pfns && > - ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0])) > + if (gtt->ranges && > + ttm->pages[0] == hmm_pfn_to_page(>t->ranges[0], > + gtt->ranges[0].pfns[0])) > WARN_ONCE(1, "Missing get_user_page_done\n"); > #endif > } _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx