[+Jerome]

On 2019-06-03 7:20 p.m., Yang, Philip wrote:
>
> On 2019-06-03 5:02 p.m., Kuehling, Felix wrote:
>> On 2019-06-03 2:44 p.m., Yang, Philip wrote:
>>> HMM provides new APIs and helps in kernel 5.2-rc1 to simplify driver
>>> path. The old hmm APIs are deprecated and will be removed in future.
>>>
>>> Below are changes in driver:
>>>
>>> 1. Change hmm_vma_fault to hmm_range_register and hmm_range_fault which
>>> supports range with multiple vmas, remove the multiple vmas handle path
>>> and data structure.
>>> 2. Change hmm_vma_range_done to hmm_range_unregister.
>>> 3. Use default flags to avoid pre-fill pfn arrays.
>>> 4. Use new hmm_device_ helpers.
>>>
>>> v2: retry if hmm_range_fault returns -EAGAIN
>>> v3: define MAX_RETRY_HMM_RANGE_FAULT, skip drop mmap_sem if retry
>> I think dropping mmap_sem for retry was correct in v2 of this patch. Now
>> you will try to take the mmap_sem multiple times without dropping it if
>> you retry.
>>
>> We don't need to hold the mmap_sem during hmm_range_wait_until_valid.
>>
> hmm_range_fault will drop the mmap_sem internally before returning
> -EAGAIN, I think it does this on purpose to avoid upper level call retry
> whithout release mmap_sem in between.

Good catch.

Looks like the documentation of hmm_range_fault is outdated. It says 
-EAGAIN can only be returned if block==false, and that mmap_sem is not 
dropped if block==true. Both of these statements are no longer true.

The example code in Documentation/vm/hmm.rst drops the mmap_sem 
explicitly after hmm_range_snapshot returned -EAGAIN. It seems that 
hmm_range_snapshot behaves subtly differently from hmm_range_fault with 
respect to dropping the mmap_sem. That's very confusing and annoying.

Regards,
   Felix


>   I find v2 was incorrect after
> reading hmm code again today. I should mention details of this change in
> my commit, see more below.
>
> Philip
>
>> See more below ...
>>
>>
>>> Change-Id: I1a00f88459f3b96c9536933e9a17eb1ebaa78113
>>> Signed-off-by: Philip Yang <philip.y...@amd.com>
>>> ---
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c  |   1 -
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 149 ++++++++++--------------
>>>     2 files changed, 64 insertions(+), 86 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> index 41ccee49a224..e0bb47ce570b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> @@ -519,7 +519,6 @@ void amdgpu_hmm_init_range(struct hmm_range *range)
>>>                     range->flags = hmm_range_flags;
>>>                     range->values = hmm_range_values;
>>>                     range->pfn_shift = PAGE_SHIFT;
>>> -           range->pfns = NULL;
>>>                     INIT_LIST_HEAD(&range->list);
>>>             }
>>>     }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index eb4b85061c7e..9de6a2f5e572 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -711,8 +711,7 @@ struct amdgpu_ttm_tt {
>>>             struct task_struct      *usertask;
>>>             uint32_t                userflags;
>>>     #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>>> -   struct hmm_range        *ranges;
>>> -   int                     nr_ranges;
>>> +   struct hmm_range        *range;
>>>     #endif
>>>     };
>>>     
>>> @@ -725,57 +724,36 @@ struct amdgpu_ttm_tt {
>>>      */
>>>     #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>>>     
>>> -/* Support Userptr pages cross max 16 vmas */
>>> -#define MAX_NR_VMAS        (16)
>>> +#define MAX_RETRY_HMM_RANGE_FAULT  16
>>>     
>>>     int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page 
>>> **pages)
>>>     {
>>>             struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>             struct mm_struct *mm = gtt->usertask->mm;
>>>             unsigned long start = gtt->userptr;
>>> -   unsigned long end = start + ttm->num_pages * PAGE_SIZE;
>>> -   struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
>>> -   struct hmm_range *ranges;
>>> -   unsigned long nr_pages, i;
>>> -   uint64_t *pfns, f;
>>> +   struct vm_area_struct *vma;
>>> +   struct hmm_range *range;
>>> +   unsigned long i;
>>> +   uint64_t *pfns;
>>> +   int retry = 0;
>>>             int r = 0;
>>>     
>>>             if (!mm) /* Happens during process shutdown */
>>>                     return -ESRCH;
>>>     
>>> -   down_read(&mm->mmap_sem);
>>> -
>>> -   /* user pages may cross multiple VMAs */
>>> -   gtt->nr_ranges = 0;
>>> -   do {
>>> -           unsigned long vm_start;
>>> -
>>> -           if (gtt->nr_ranges >= MAX_NR_VMAS) {
>>> -                   DRM_ERROR("Too many VMAs in userptr range\n");
>>> -                   r = -EFAULT;
>>> -                   goto out;
>>> -           }
>>> -
>>> -           vm_start = vma ? vma->vm_end : start;
>>> -           vma = find_vma(mm, vm_start);
>>> -           if (unlikely(!vma || vm_start < vma->vm_start)) {
>>> -                   r = -EFAULT;
>>> -                   goto out;
>>> -           }
>>> -           vmas[gtt->nr_ranges++] = vma;
>>> -   } while (end > vma->vm_end);
>>> -
>>> -   DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
>>> -           start, gtt->nr_ranges, ttm->num_pages);
>>> -
>>> +   vma = find_vma(mm, start);
>>> +   if (unlikely(!vma || start < vma->vm_start)) {
>>> +           r = -EFAULT;
>>> +           goto out;
>>> +   }
>>>             if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
>>> -           vmas[0]->vm_file)) {
>>> +           vma->vm_file)) {
>>>                     r = -EPERM;
>>>                     goto out;
>>>             }
>>>     
>>> -   ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
>>> -   if (unlikely(!ranges)) {
>>> +   range = kzalloc(sizeof(*range), GFP_KERNEL);
>>> +   if (unlikely(!range)) {
>>>                     r = -ENOMEM;
>>>                     goto out;
>>>             }
>>> @@ -786,61 +764,62 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
>>> struct page **pages)
>>>                     goto out_free_ranges;
>>>             }
>>>     
>>> -   for (i = 0; i < gtt->nr_ranges; i++)
>>> -           amdgpu_hmm_init_range(&ranges[i]);
>>> +   amdgpu_hmm_init_range(range);
>>> +   range->default_flags = range->flags[HMM_PFN_VALID];
>>> +   range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
>>> +                           0 : range->flags[HMM_PFN_WRITE];
>>> +   range->pfn_flags_mask = 0;
>>> +   range->pfns = pfns;
>>> +   hmm_range_register(range, mm, start,
>>> +                      start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT);
>>>     
>>> -   f = ranges[0].flags[HMM_PFN_VALID];
>>> -   f |= amdgpu_ttm_tt_is_readonly(ttm) ?
>>> -                           0 : ranges[0].flags[HMM_PFN_WRITE];
>>> -   memset64(pfns, f, ttm->num_pages);
>>> +retry:
>>> +   /*
>>> +    * Just wait for range to be valid, safe to ignore return value as we
>>> +    * will use the return value of hmm_range_fault() below under the
>>> +    * mmap_sem to ascertain the validity of the range.
>>> +    */
>>> +   hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
>>>     
>>> -   for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
>>> -           ranges[i].vma = vmas[i];
>>> -           ranges[i].start = max(start, vmas[i]->vm_start);
>>> -           ranges[i].end = min(end, vmas[i]->vm_end);
>>> -           ranges[i].pfns = pfns + nr_pages;
>>> -           nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
>>> +   down_read(&mm->mmap_sem);
>> If you take the mmap_sem here, you have to drop it before goto retry.
>> Otherwise you'll end up taking the mmap_sem multiple times. It's not
>> fatal immediately because you can take multiple read locks. But if you
>> don't drop all the read locks, no writer will ever be able to take a
>> write lock again.
>>
> mmap_sem dropped by hmm_range_fault before returning -EAGAIN, have to
> take it again.
>>>     
>>> -           r = hmm_vma_fault(&ranges[i], true);
>>> -           if (unlikely(r))
>>> -                   break;
>>> -   }
>>> -   if (unlikely(r)) {
>>> -           while (i--)
>>> -                   hmm_vma_range_done(&ranges[i]);
>>> +   r = hmm_range_fault(range, true);
>>> +   if (unlikely(r < 0)) {
>>> +           if (likely(r == -EAGAIN)) {
>>> +                   if (retry++ < MAX_RETRY_HMM_RANGE_FAULT)
>>> +                           goto retry;
>> You're still holding the mmap_sem here.
>>
>>
>>> +                   else
>>> +                           pr_err("Retry hmm fault too many times\n");
>>> +           }
>>>     
>>>                     goto out_free_pfns;
>>>             }
>>>     
>>> -   up_read(&mm->mmap_sem);
>>> -
>>>             for (i = 0; i < ttm->num_pages; i++) {
>>> -           pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]);
>>> -           if (!pages[i]) {
>>> +           pages[i] = hmm_device_entry_to_page(range, pfns[i]);
>>> +           if (unlikely(!pages[i])) {
>>>                             pr_err("Page fault failed for pfn[%lu] = 
>>> 0x%llx\n",
>>>                                    i, pfns[i]);
>>> -                   goto out_invalid_pfn;
>>> +                   r = -ENOMEM;
>>> +
>>> +                   goto out_free_pfns;
>>>                     }
>>>             }
>>> -   gtt->ranges = ranges;
>>> +
>>> +   up_read(&mm->mmap_sem);
>> Why do you drop the mmap_sem so late? Does the loop above require it? I
>> don't believe it does.
>>
> Yes, I can drop the mmap_sem before the loop, just wanted to remove one
> line of duplicate up_read call.
>
>> Regards,
>>      Felix
>>
>>
>>> +
>>> +   gtt->range = range;
>>>     
>>>             return 0;
>>>     
>>>     out_free_pfns:
>>> +   up_read(&mm->mmap_sem);
>>> +   hmm_range_unregister(range);
>>>             kvfree(pfns);
>>>     out_free_ranges:
>>> -   kvfree(ranges);
>>> +   kfree(range);
>>>     out:
>>> -   up_read(&mm->mmap_sem);
>>> -
>>>             return r;
>>> -
>>> -out_invalid_pfn:
>>> -   for (i = 0; i < gtt->nr_ranges; i++)
>>> -           hmm_vma_range_done(&ranges[i]);
>>> -   kvfree(pfns);
>>> -   kvfree(ranges);
>>> -   return -ENOMEM;
>>>     }
>>>     
>>>     /**
>>> @@ -853,23 +832,23 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt 
>>> *ttm)
>>>     {
>>>             struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>             bool r = false;
>>> -   int i;
>>>     
>>>             if (!gtt || !gtt->userptr)
>>>                     return false;
>>>     
>>> -   DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n",
>>> -           gtt->userptr, gtt->nr_ranges, ttm->num_pages);
>>> +   DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
>>> +           gtt->userptr, ttm->num_pages);
>>>     
>>> -   WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns,
>>> +   WARN_ONCE(!gtt->range || !gtt->range->pfns,
>>>                     "No user pages to check\n");
>>>     
>>> -   if (gtt->ranges) {
>>> -           for (i = 0; i < gtt->nr_ranges; i++)
>>> -                   r |= hmm_vma_range_done(&gtt->ranges[i]);
>>> -           kvfree(gtt->ranges[0].pfns);
>>> -           kvfree(gtt->ranges);
>>> -           gtt->ranges = NULL;
>>> +   if (gtt->range) {
>>> +           r = hmm_range_valid(gtt->range);
>>> +           hmm_range_unregister(gtt->range);
>>> +
>>> +           kvfree(gtt->range->pfns);
>>> +           kfree(gtt->range);
>>> +           gtt->range = NULL;
>>>             }
>>>     
>>>             return r;
>>> @@ -953,9 +932,9 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt 
>>> *ttm)
>>>             sg_free_table(ttm->sg);
>>>     
>>>     #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>>> -   if (gtt->ranges &&
>>> -       ttm->pages[0] == hmm_pfn_to_page(&gtt->ranges[0],
>>> -                                        gtt->ranges[0].pfns[0]))
>>> +   if (gtt->range &&
>>> +       ttm->pages[0] == hmm_device_entry_to_page(gtt->range,
>>> +                                                 gtt->range->pfns[0]))
>>>                     WARN_ONCE(1, "Missing get_user_page_done\n");
>>>     #endif
>>>     }
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to