Re: [PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM v3

2019-03-13 Thread Christian König

Acked-by: Christian König 

But I have the strong feeling that we sooner or later need to rewrite 
the whole stuff from scratch.


Especially the struct amdgpu_ttm_tt structure now has a lot of 
superfluous stuff left.


Christian.

Am 13.03.19 um 02:47 schrieb Kuehling, Felix:

This patch is Reviewed-by: Felix Kuehling 

Regards,
    Felix

On 3/12/2019 9:17 PM, Yang, Philip wrote:

userptr may cross two VMAs if the forked child process (not call exec
after fork) malloc buffer, then free it, and then malloc larger size
buf, kerenl will create new VMA adjacent to old VMA which was cloned
from parent process, some pages of userptr are in the first VMA, the
rest pages are in the second VMA.

HMM expects range only have one VMA, loop over all VMAs in the address
range, create multiple ranges to handle this case. See
is_mergeable_anon_vma in mm/mmap.c for details.

Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
Signed-off-by: Philip Yang 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +---
   1 file changed, 91 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c1240bf243ba..c14198737dcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
struct task_struct  *usertask;
uint32_tuserflags;
   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-   struct hmm_rangerange;
+   struct hmm_range*ranges;
+   int nr_ranges;
   #endif
   };
   
@@ -723,62 +724,108 @@ struct amdgpu_ttm_tt {

* once afterwards to stop HMM tracking
*/
   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+
+/* Support Userptr pages cross max 16 vmas */
+#define MAX_NR_VMAS(16)
+
   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
   {
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
-   unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
-   struct hmm_range *range = >range;
-   int r = 0, i;
+   unsigned long start = gtt->userptr;
+   unsigned long end = start + ttm->num_pages * PAGE_SIZE;
+   struct hmm_range *ranges;
+   struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
+   uint64_t *pfns, f;
+   int r = 0, i, nr_pages;
   
   	if (!mm) /* Happens during process shutdown */

return -ESRCH;
   
-	amdgpu_hmm_init_range(range);

-
down_read(>mmap_sem);
   
-	range->vma = find_vma(mm, gtt->userptr);

-   if (!range_in_vma(range->vma, gtt->userptr, end))
-   r = -EFAULT;
-   else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
-   range->vma->vm_file)
+   /* user pages may cross multiple VMAs */
+   gtt->nr_ranges = 0;
+   do {
+   unsigned long vm_start;
+
+   if (gtt->nr_ranges >= MAX_NR_VMAS) {
+   DRM_ERROR("Too many VMAs in userptr range\n");
+   r = -EFAULT;
+   goto out;
+   }
+
+   vm_start = vma ? vma->vm_end : start;
+   vma = find_vma(mm, vm_start);
+   if (unlikely(!vma || vm_start < vma->vm_start)) {
+   r = -EFAULT;
+   goto out;
+   }
+   vmas[gtt->nr_ranges++] = vma;
+   } while (end > vma->vm_end);
+
+   DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
+   start, gtt->nr_ranges, ttm->num_pages);
+
+   if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+   vmas[0]->vm_file)) {
r = -EPERM;
-   if (r)
goto out;
+   }
   
-	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),

-GFP_KERNEL);
-   if (range->pfns == NULL) {
+   ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
+   if (unlikely(!ranges)) {
r = -ENOMEM;
goto out;
}
-   range->start = gtt->userptr;
-   range->end = end;
   
-	range->pfns[0] = range->flags[HMM_PFN_VALID];

-   range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
-   0 : range->flags[HMM_PFN_WRITE];
-   for (i = 1; i < ttm->num_pages; i++)
-   range->pfns[i] = range->pfns[0];
+   pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
+   if (unlikely(!pfns)) {
+   r = -ENOMEM;
+   goto out_free_ranges;
+   }
+
+   for (i = 0; i < gtt->nr_ranges; i++)
+   amdgpu_hmm_init_range([i]);
+
+   f = ranges[0].flags[HMM_PFN_VALID];
+   f |= amdgpu_ttm_tt_is_readonly(ttm) ?
+   0 : ranges[0].flags[HMM_PFN_WRITE];
+   memset64(pfns, f, ttm->num_pages);
+
+   for (nr_pages = 0, i = 

Re: [PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM v3

2019-03-12 Thread Kuehling, Felix
This patch is Reviewed-by: Felix Kuehling 

Regards,
   Felix

On 3/12/2019 9:17 PM, Yang, Philip wrote:
> userptr may cross two VMAs if the forked child process (not call exec
> after fork) malloc buffer, then free it, and then malloc larger size
> buf, kerenl will create new VMA adjacent to old VMA which was cloned
> from parent process, some pages of userptr are in the first VMA, the
> rest pages are in the second VMA.
>
> HMM expects range only have one VMA, loop over all VMAs in the address
> range, create multiple ranges to handle this case. See
> is_mergeable_anon_vma in mm/mmap.c for details.
>
> Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
> Signed-off-by: Philip Yang 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +---
>   1 file changed, 91 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c1240bf243ba..c14198737dcd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
>   struct task_struct  *usertask;
>   uint32_tuserflags;
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> - struct hmm_rangerange;
> + struct hmm_range*ranges;
> + int nr_ranges;
>   #endif
>   };
>   
> @@ -723,62 +724,108 @@ struct amdgpu_ttm_tt {
>* once afterwards to stop HMM tracking
>*/
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> +
> +/* Support Userptr pages cross max 16 vmas */
> +#define MAX_NR_VMAS  (16)
> +
>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>   {
>   struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   struct mm_struct *mm = gtt->usertask->mm;
> - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> - struct hmm_range *range = >range;
> - int r = 0, i;
> + unsigned long start = gtt->userptr;
> + unsigned long end = start + ttm->num_pages * PAGE_SIZE;
> + struct hmm_range *ranges;
> + struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
> + uint64_t *pfns, f;
> + int r = 0, i, nr_pages;
>   
>   if (!mm) /* Happens during process shutdown */
>   return -ESRCH;
>   
> - amdgpu_hmm_init_range(range);
> -
>   down_read(>mmap_sem);
>   
> - range->vma = find_vma(mm, gtt->userptr);
> - if (!range_in_vma(range->vma, gtt->userptr, end))
> - r = -EFAULT;
> - else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> - range->vma->vm_file)
> + /* user pages may cross multiple VMAs */
> + gtt->nr_ranges = 0;
> + do {
> + unsigned long vm_start;
> +
> + if (gtt->nr_ranges >= MAX_NR_VMAS) {
> + DRM_ERROR("Too many VMAs in userptr range\n");
> + r = -EFAULT;
> + goto out;
> + }
> +
> + vm_start = vma ? vma->vm_end : start;
> + vma = find_vma(mm, vm_start);
> + if (unlikely(!vma || vm_start < vma->vm_start)) {
> + r = -EFAULT;
> + goto out;
> + }
> + vmas[gtt->nr_ranges++] = vma;
> + } while (end > vma->vm_end);
> +
> + DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
> + start, gtt->nr_ranges, ttm->num_pages);
> +
> + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> + vmas[0]->vm_file)) {
>   r = -EPERM;
> - if (r)
>   goto out;
> + }
>   
> - range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> -  GFP_KERNEL);
> - if (range->pfns == NULL) {
> + ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
> + if (unlikely(!ranges)) {
>   r = -ENOMEM;
>   goto out;
>   }
> - range->start = gtt->userptr;
> - range->end = end;
>   
> - range->pfns[0] = range->flags[HMM_PFN_VALID];
> - range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> - 0 : range->flags[HMM_PFN_WRITE];
> - for (i = 1; i < ttm->num_pages; i++)
> - range->pfns[i] = range->pfns[0];
> + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
> + if (unlikely(!pfns)) {
> + r = -ENOMEM;
> + goto out_free_ranges;
> + }
> +
> + for (i = 0; i < gtt->nr_ranges; i++)
> + amdgpu_hmm_init_range([i]);
> +
> + f = ranges[0].flags[HMM_PFN_VALID];
> + f |= amdgpu_ttm_tt_is_readonly(ttm) ?
> + 0 : ranges[0].flags[HMM_PFN_WRITE];
> + memset64(pfns, f, ttm->num_pages);
> +
> + for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
> + ranges[i].vma = vmas[i];
> + ranges[i].start = max(start, vmas[i]->vm_start);
> + ranges[i].end = min(end, 

Re: [PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM v2

2019-03-12 Thread Yang, Philip
Hi Felix,

Submitted v3 to fix the potential problems with invalid userptr.

Philip

On 2019-03-12 3:30 p.m., Kuehling, Felix wrote:
> See one comment inline. There are still some potential problems that
> you're not catching.
> 
> On 2019-03-06 9:42 p.m., Yang, Philip wrote:
>> userptr may cross two VMAs if the forked child process (not call exec
>> after fork) malloc buffer, then free it, and then malloc larger size
>> buf, kerenl will create new VMA adjacent to old VMA which was cloned
>> from parent process, some pages of userptr are in the first VMA, the
>> rest pages are in the second VMA.
>>
>> HMM expects range only have one VMA, loop over all VMAs in the address
>> range, create multiple ranges to handle this case. See
>> is_mergeable_anon_vma in mm/mmap.c for details.
>>
>> Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
>> Signed-off-by: Philip Yang 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 123 +---
>>1 file changed, 88 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 7cc0ba24369d..802bec7ef917 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
>>  struct task_struct  *usertask;
>>  uint32_tuserflags;
>>#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>> -struct hmm_rangerange;
>> +struct hmm_range*ranges;
>> +int nr_ranges;
>>#endif
>>};
>>
>> @@ -723,62 +724,105 @@ struct amdgpu_ttm_tt {
>> * once afterwards to stop HMM tracking
>> */
>>#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>> +
>> +/* Support Userptr pages cross max 16 vmas */
>> +#define MAX_NR_VMAS (16)
>> +
>>int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>{
>>  struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>  struct mm_struct *mm = gtt->usertask->mm;
>> -unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> -struct hmm_range *range = >range;
>> -int r = 0, i;
>> +unsigned long start = gtt->userptr;
>> +unsigned long end = start + ttm->num_pages * PAGE_SIZE;
>> +struct hmm_range *ranges;
>> +struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
>> +uint64_t *pfns, f;
>> +int r = 0, i, nr_pages;
>>
>>  if (!mm) /* Happens during process shutdown */
>>  return -ESRCH;
>>
>> -amdgpu_hmm_init_range(range);
>> -
>>  down_read(>mmap_sem);
>>
>> -range->vma = find_vma(mm, gtt->userptr);
>> -if (!range_in_vma(range->vma, gtt->userptr, end))
>> -r = -EFAULT;
>> -else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
>> -range->vma->vm_file)
>> +/* user pages may cross multiple VMAs */
>> +gtt->nr_ranges = 0;
>> +do {
>> +if (gtt->nr_ranges >= MAX_NR_VMAS) {
>> +DRM_ERROR("Too many VMAs in userptr range\n");
>> +r = -EFAULT;
>> +goto out;
>> +}
>> +
>> +vma = find_vma(mm, vma ? vma->vm_end : start);
> 
> You need a check here that vma->vm_start <= the requested start address.
> Otherwise you can end up with gaps in your userptr mapping that don't
> have valid pages.
> 
> Regards,
>     Felix
> 
> 
>> +if (unlikely(!vma)) {
>> +r = -EFAULT;
>> +goto out;
>> +}
>> +vmas[gtt->nr_ranges++] = vma;
>> +} while (end > vma->vm_end);+
>> +DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
>> +start, gtt->nr_ranges, ttm->num_pages);
>> +
>> +if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
>> +vmas[0]->vm_file)) {
>>  r = -EPERM;
>> -if (r)
>>  goto out;
>> +}
>>
>> -range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
>> - GFP_KERNEL);
>> -if (range->pfns == NULL) {
>> +ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
>> +if (unlikely(!ranges)) {
>>  r = -ENOMEM;
>>  goto out;
>>  }
>> -range->start = gtt->userptr;
>> -range->end = end;
>>
>> -range->pfns[0] = range->flags[HMM_PFN_VALID];
>> -range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
>> -0 : range->flags[HMM_PFN_WRITE];
>> -for (i = 1; i < ttm->num_pages; i++)
>> -range->pfns[i] = range->pfns[0];
>> +pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
>> +if (unlikely(!pfns)) {
>> +r = -ENOMEM;
>> +goto out_free_ranges;
>> +}
>> +
>> +for (i = 0; i < gtt->nr_ranges; i++)
>> +amdgpu_hmm_init_range([i]);
>> +
>> +f = ranges[0].flags[HMM_PFN_VALID];
>> +f |= amdgpu_ttm_tt_is_readonly(ttm) ?
>> + 

[PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM v3

2019-03-12 Thread Yang, Philip
userptr may cross two VMAs if the forked child process (not call exec
after fork) malloc buffer, then free it, and then malloc larger size
buf, kerenl will create new VMA adjacent to old VMA which was cloned
from parent process, some pages of userptr are in the first VMA, the
rest pages are in the second VMA.

HMM expects range only have one VMA, loop over all VMAs in the address
range, create multiple ranges to handle this case. See
is_mergeable_anon_vma in mm/mmap.c for details.

Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +---
 1 file changed, 91 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c1240bf243ba..c14198737dcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
struct task_struct  *usertask;
uint32_tuserflags;
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-   struct hmm_rangerange;
+   struct hmm_range*ranges;
+   int nr_ranges;
 #endif
 };
 
@@ -723,62 +724,108 @@ struct amdgpu_ttm_tt {
  * once afterwards to stop HMM tracking
  */
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+
+/* Support Userptr pages cross max 16 vmas */
+#define MAX_NR_VMAS(16)
+
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
-   unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
-   struct hmm_range *range = >range;
-   int r = 0, i;
+   unsigned long start = gtt->userptr;
+   unsigned long end = start + ttm->num_pages * PAGE_SIZE;
+   struct hmm_range *ranges;
+   struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
+   uint64_t *pfns, f;
+   int r = 0, i, nr_pages;
 
if (!mm) /* Happens during process shutdown */
return -ESRCH;
 
-   amdgpu_hmm_init_range(range);
-
down_read(>mmap_sem);
 
-   range->vma = find_vma(mm, gtt->userptr);
-   if (!range_in_vma(range->vma, gtt->userptr, end))
-   r = -EFAULT;
-   else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
-   range->vma->vm_file)
+   /* user pages may cross multiple VMAs */
+   gtt->nr_ranges = 0;
+   do {
+   unsigned long vm_start;
+
+   if (gtt->nr_ranges >= MAX_NR_VMAS) {
+   DRM_ERROR("Too many VMAs in userptr range\n");
+   r = -EFAULT;
+   goto out;
+   }
+
+   vm_start = vma ? vma->vm_end : start;
+   vma = find_vma(mm, vm_start);
+   if (unlikely(!vma || vm_start < vma->vm_start)) {
+   r = -EFAULT;
+   goto out;
+   }
+   vmas[gtt->nr_ranges++] = vma;
+   } while (end > vma->vm_end);
+
+   DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
+   start, gtt->nr_ranges, ttm->num_pages);
+
+   if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+   vmas[0]->vm_file)) {
r = -EPERM;
-   if (r)
goto out;
+   }
 
-   range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
-GFP_KERNEL);
-   if (range->pfns == NULL) {
+   ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
+   if (unlikely(!ranges)) {
r = -ENOMEM;
goto out;
}
-   range->start = gtt->userptr;
-   range->end = end;
 
-   range->pfns[0] = range->flags[HMM_PFN_VALID];
-   range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
-   0 : range->flags[HMM_PFN_WRITE];
-   for (i = 1; i < ttm->num_pages; i++)
-   range->pfns[i] = range->pfns[0];
+   pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
+   if (unlikely(!pfns)) {
+   r = -ENOMEM;
+   goto out_free_ranges;
+   }
+
+   for (i = 0; i < gtt->nr_ranges; i++)
+   amdgpu_hmm_init_range([i]);
+
+   f = ranges[0].flags[HMM_PFN_VALID];
+   f |= amdgpu_ttm_tt_is_readonly(ttm) ?
+   0 : ranges[0].flags[HMM_PFN_WRITE];
+   memset64(pfns, f, ttm->num_pages);
+
+   for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
+   ranges[i].vma = vmas[i];
+   ranges[i].start = max(start, vmas[i]->vm_start);
+   ranges[i].end = min(end, vmas[i]->vm_end);
+   ranges[i].pfns = pfns + nr_pages;
+   nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
+
+   r = hmm_vma_fault([i], true);
+   if 

Re: [PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM v2

2019-03-12 Thread Kuehling, Felix
See one comment inline. There are still some potential problems that 
you're not catching.

On 2019-03-06 9:42 p.m., Yang, Philip wrote:
> userptr may cross two VMAs if the forked child process (not call exec
> after fork) malloc buffer, then free it, and then malloc larger size
> buf, kerenl will create new VMA adjacent to old VMA which was cloned
> from parent process, some pages of userptr are in the first VMA, the
> rest pages are in the second VMA.
>
> HMM expects range only have one VMA, loop over all VMAs in the address
> range, create multiple ranges to handle this case. See
> is_mergeable_anon_vma in mm/mmap.c for details.
>
> Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
> Signed-off-by: Philip Yang 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 123 +---
>   1 file changed, 88 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 7cc0ba24369d..802bec7ef917 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
>   struct task_struct  *usertask;
>   uint32_tuserflags;
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> - struct hmm_rangerange;
> + struct hmm_range*ranges;
> + int nr_ranges;
>   #endif
>   };
>   
> @@ -723,62 +724,105 @@ struct amdgpu_ttm_tt {
>* once afterwards to stop HMM tracking
>*/
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> +
> +/* Support Userptr pages cross max 16 vmas */
> +#define MAX_NR_VMAS  (16)
> +
>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>   {
>   struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   struct mm_struct *mm = gtt->usertask->mm;
> - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> - struct hmm_range *range = >range;
> - int r = 0, i;
> + unsigned long start = gtt->userptr;
> + unsigned long end = start + ttm->num_pages * PAGE_SIZE;
> + struct hmm_range *ranges;
> + struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
> + uint64_t *pfns, f;
> + int r = 0, i, nr_pages;
>   
>   if (!mm) /* Happens during process shutdown */
>   return -ESRCH;
>   
> - amdgpu_hmm_init_range(range);
> -
>   down_read(>mmap_sem);
>   
> - range->vma = find_vma(mm, gtt->userptr);
> - if (!range_in_vma(range->vma, gtt->userptr, end))
> - r = -EFAULT;
> - else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> - range->vma->vm_file)
> + /* user pages may cross multiple VMAs */
> + gtt->nr_ranges = 0;
> + do {
> + if (gtt->nr_ranges >= MAX_NR_VMAS) {
> + DRM_ERROR("Too many VMAs in userptr range\n");
> + r = -EFAULT;
> + goto out;
> + }
> +
> + vma = find_vma(mm, vma ? vma->vm_end : start);

You need a check here that vma->vm_start <= the requested start address. 
Otherwise you can end up with gaps in your userptr mapping that don't 
have valid pages.

Regards,
   Felix


> + if (unlikely(!vma)) {
> + r = -EFAULT;
> + goto out;
> + }
> + vmas[gtt->nr_ranges++] = vma;
> + } while (end > vma->vm_end);+
> + DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
> + start, gtt->nr_ranges, ttm->num_pages);
> +
> + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> + vmas[0]->vm_file)) {
>   r = -EPERM;
> - if (r)
>   goto out;
> + }
>   
> - range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> -  GFP_KERNEL);
> - if (range->pfns == NULL) {
> + ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
> + if (unlikely(!ranges)) {
>   r = -ENOMEM;
>   goto out;
>   }
> - range->start = gtt->userptr;
> - range->end = end;
>   
> - range->pfns[0] = range->flags[HMM_PFN_VALID];
> - range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> - 0 : range->flags[HMM_PFN_WRITE];
> - for (i = 1; i < ttm->num_pages; i++)
> - range->pfns[i] = range->pfns[0];
> + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
> + if (unlikely(!pfns)) {
> + r = -ENOMEM;
> + goto out_free_ranges;
> + }
> +
> + for (i = 0; i < gtt->nr_ranges; i++)
> + amdgpu_hmm_init_range([i]);
> +
> + f = ranges[0].flags[HMM_PFN_VALID];
> + f |= amdgpu_ttm_tt_is_readonly(ttm) ?
> + 0 : ranges[0].flags[HMM_PFN_WRITE];
> + memset64(pfns, f, ttm->num_pages);
> +
> + for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
> + ranges[i].vma = vmas[i];
> +   

[PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM v2

2019-03-06 Thread Yang, Philip
userptr may cross two VMAs if the forked child process (not call exec
after fork) malloc buffer, then free it, and then malloc larger size
buf, kerenl will create new VMA adjacent to old VMA which was cloned
from parent process, some pages of userptr are in the first VMA, the
rest pages are in the second VMA.

HMM expects range only have one VMA, loop over all VMAs in the address
range, create multiple ranges to handle this case. See
is_mergeable_anon_vma in mm/mmap.c for details.

Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 123 +---
 1 file changed, 88 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7cc0ba24369d..802bec7ef917 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
struct task_struct  *usertask;
uint32_tuserflags;
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-   struct hmm_rangerange;
+   struct hmm_range*ranges;
+   int nr_ranges;
 #endif
 };
 
@@ -723,62 +724,105 @@ struct amdgpu_ttm_tt {
  * once afterwards to stop HMM tracking
  */
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+
+/* Support Userptr pages cross max 16 vmas */
+#define MAX_NR_VMAS(16)
+
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
-   unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
-   struct hmm_range *range = >range;
-   int r = 0, i;
+   unsigned long start = gtt->userptr;
+   unsigned long end = start + ttm->num_pages * PAGE_SIZE;
+   struct hmm_range *ranges;
+   struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
+   uint64_t *pfns, f;
+   int r = 0, i, nr_pages;
 
if (!mm) /* Happens during process shutdown */
return -ESRCH;
 
-   amdgpu_hmm_init_range(range);
-
down_read(>mmap_sem);
 
-   range->vma = find_vma(mm, gtt->userptr);
-   if (!range_in_vma(range->vma, gtt->userptr, end))
-   r = -EFAULT;
-   else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
-   range->vma->vm_file)
+   /* user pages may cross multiple VMAs */
+   gtt->nr_ranges = 0;
+   do {
+   if (gtt->nr_ranges >= MAX_NR_VMAS) {
+   DRM_ERROR("Too many VMAs in userptr range\n");
+   r = -EFAULT;
+   goto out;
+   }
+
+   vma = find_vma(mm, vma ? vma->vm_end : start);
+   if (unlikely(!vma)) {
+   r = -EFAULT;
+   goto out;
+   }
+   vmas[gtt->nr_ranges++] = vma;
+   } while (end > vma->vm_end);
+
+   DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
+   start, gtt->nr_ranges, ttm->num_pages);
+
+   if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+   vmas[0]->vm_file)) {
r = -EPERM;
-   if (r)
goto out;
+   }
 
-   range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
-GFP_KERNEL);
-   if (range->pfns == NULL) {
+   ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
+   if (unlikely(!ranges)) {
r = -ENOMEM;
goto out;
}
-   range->start = gtt->userptr;
-   range->end = end;
 
-   range->pfns[0] = range->flags[HMM_PFN_VALID];
-   range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
-   0 : range->flags[HMM_PFN_WRITE];
-   for (i = 1; i < ttm->num_pages; i++)
-   range->pfns[i] = range->pfns[0];
+   pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
+   if (unlikely(!pfns)) {
+   r = -ENOMEM;
+   goto out_free_ranges;
+   }
+
+   for (i = 0; i < gtt->nr_ranges; i++)
+   amdgpu_hmm_init_range([i]);
+
+   f = ranges[0].flags[HMM_PFN_VALID];
+   f |= amdgpu_ttm_tt_is_readonly(ttm) ?
+   0 : ranges[0].flags[HMM_PFN_WRITE];
+   memset64(pfns, f, ttm->num_pages);
+
+   for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
+   ranges[i].vma = vmas[i];
+   ranges[i].start = max(start, vmas[i]->vm_start);
+   ranges[i].end = min(end, vmas[i]->vm_end);
+   ranges[i].pfns = pfns + nr_pages;
+   nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
+
+   r = hmm_vma_fault([i], true);
+   if (unlikely(r))
+   break;
+   }
+   if (unlikely(r)) {
+   while (i--)
+ 

Re: [PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM

2019-03-06 Thread Yang, Philip
I will submit v2 to fix those issues. Some comments inline...

On 2019-03-06 3:11 p.m., Kuehling, Felix wrote:
> Some comments inline ...
> 
> On 3/5/2019 1:09 PM, Yang, Philip wrote:
>> userptr may cross two VMAs if the forked child process (not call exec
>> after fork) malloc buffer, then free it, and then malloc larger size
>> buf, kerenl will create new VMA adjacent to old VMA which was cloned
>> from parent process, some pages of userptr are in the first VMA, the
>> rest pages are in the second VMA.
>>
>> HMM expects range only have one VMA, loop over all VMAs in the address
>> range, create multiple ranges to handle this case. See
>> is_mergeable_anon_vma in mm/mmap.c for details.
>>
>> Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
>> Signed-off-by: Philip Yang 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 122 +---
>>1 file changed, 87 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index cd0ccfbbcb84..173bf4db5994 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
>>  struct task_struct  *usertask;
>>  uint32_tuserflags;
>>#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>> -struct hmm_rangerange;
>> +struct hmm_range*ranges;
>> +int nr_ranges;
>>#endif
>>};
>>
>> @@ -723,62 +724,104 @@ struct amdgpu_ttm_tt {
>> * once afterwards to stop HMM tracking
>> */
>>#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>> +
>> +/* Support Userptr pages cross max 16 vmas */
>> +#define MAX_NR_VMAS (16)
>> +
>>int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>{
>>  struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>  struct mm_struct *mm = gtt->usertask->mm;
>> -unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> -struct hmm_range *range = >range;
>> -int r = 0, i;
>> +unsigned long start = gtt->userptr;
>> +unsigned long end = start + ttm->num_pages * PAGE_SIZE;
>> +struct hmm_range *ranges;
>> +struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
>> +uint64_t *pfns, f;
>> +int r = 0, i, nr_pages;
>>
>>  if (!mm) /* Happens during process shutdown */
>>  return -ESRCH;
>>
>> -amdgpu_hmm_init_range(range);
>> -
>>  down_read(>mmap_sem);
>>
>> -range->vma = find_vma(mm, gtt->userptr);
>> -if (!range_in_vma(range->vma, gtt->userptr, end))
>> -r = -EFAULT;
>> -else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
>> -range->vma->vm_file)
>> +/* user pages may cross multiple VMAs */
>> +gtt->nr_ranges = 0;
>> +do {
>> +vma = find_vma(mm, vma ? vma->vm_end : start);
>> +if (unlikely(!vma)) {
>> +r = -EFAULT;
>> +goto out;
>> +}
>> +vmas[gtt->nr_ranges++] = vma;
>> +if (gtt->nr_ranges >= MAX_NR_VMAS) {
> 
> This will lead to a failure when you have exactly 16 VMAs. If you move
> the check to the start of the loop, it will only trigger when you exceed
> the limit not just after you reach it.
> 
Ok
> 
>> +DRM_ERROR("invalid userptr range\n");
> 
> The userptr range is not really invalid. It only exceeds some artificial
> limitation in this code. A message like "Too many VMAs in userptr range"
> would be more appropriate.
> 
Ok
> 
>> +r = -EFAULT;
>> +goto out;
>> +}
>> +} while (end > vma->vm_end);
>> +
>> +DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
>> +start, gtt->nr_ranges, ttm->num_pages);
>> +
>> +if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
>> +vmas[0]->vm_file)) {
>>  r = -EPERM;
>> -if (r)
>>  goto out;
>> +}
>>
>> -range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
>> - GFP_KERNEL);
>> -if (range->pfns == NULL) {
>> +ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
>> +if (unlikely(!ranges)) {
>>  r = -ENOMEM;
>>  goto out;
>>  }
>> -range->start = gtt->userptr;
>> -range->end = end;
>>
>> -range->pfns[0] = range->flags[HMM_PFN_VALID];
>> -range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
>> -0 : range->flags[HMM_PFN_WRITE];
>> -for (i = 1; i < ttm->num_pages; i++)
>> -range->pfns[i] = range->pfns[0];
>> +pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
>> +if (unlikely(!pfns)) {
>> +r = -ENOMEM;
>> +goto out_free_ranges;
>> +}
>> +
>> +for (i = 0; i < gtt->nr_ranges; i++)
>> +amdgpu_hmm_init_range([i]);
>> +
>> +f = 

Re: [PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM

2019-03-06 Thread Kuehling, Felix
Some comments inline ...

On 3/5/2019 1:09 PM, Yang, Philip wrote:
> userptr may cross two VMAs if the forked child process (not call exec
> after fork) malloc buffer, then free it, and then malloc larger size
> buf, kerenl will create new VMA adjacent to old VMA which was cloned
> from parent process, some pages of userptr are in the first VMA, the
> rest pages are in the second VMA.
>
> HMM expects range only have one VMA, loop over all VMAs in the address
> range, create multiple ranges to handle this case. See
> is_mergeable_anon_vma in mm/mmap.c for details.
>
> Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
> Signed-off-by: Philip Yang 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 122 +---
>   1 file changed, 87 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index cd0ccfbbcb84..173bf4db5994 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
>   struct task_struct  *usertask;
>   uint32_tuserflags;
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> - struct hmm_rangerange;
> + struct hmm_range*ranges;
> + int nr_ranges;
>   #endif
>   };
>   
> @@ -723,62 +724,104 @@ struct amdgpu_ttm_tt {
>* once afterwards to stop HMM tracking
>*/
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> +
> +/* Support Userptr pages cross max 16 vmas */
> +#define MAX_NR_VMAS  (16)
> +
>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>   {
>   struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   struct mm_struct *mm = gtt->usertask->mm;
> - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> - struct hmm_range *range = >range;
> - int r = 0, i;
> + unsigned long start = gtt->userptr;
> + unsigned long end = start + ttm->num_pages * PAGE_SIZE;
> + struct hmm_range *ranges;
> + struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
> + uint64_t *pfns, f;
> + int r = 0, i, nr_pages;
>   
>   if (!mm) /* Happens during process shutdown */
>   return -ESRCH;
>   
> - amdgpu_hmm_init_range(range);
> -
>   down_read(>mmap_sem);
>   
> - range->vma = find_vma(mm, gtt->userptr);
> - if (!range_in_vma(range->vma, gtt->userptr, end))
> - r = -EFAULT;
> - else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> - range->vma->vm_file)
> + /* user pages may cross multiple VMAs */
> + gtt->nr_ranges = 0;
> + do {
> + vma = find_vma(mm, vma ? vma->vm_end : start);
> + if (unlikely(!vma)) {
> + r = -EFAULT;
> + goto out;
> + }
> + vmas[gtt->nr_ranges++] = vma;
> + if (gtt->nr_ranges >= MAX_NR_VMAS) {

This will lead to a failure when you have exactly 16 VMAs. If you move 
the check to the start of the loop, it will only trigger when you exceed 
the limit not just after you reach it.


> + DRM_ERROR("invalid userptr range\n");

The userptr range is not really invalid. It only exceeds some artificial 
limitation in this code. A message like "Too many VMAs in userptr range" 
would be more appropriate.


> + r = -EFAULT;
> + goto out;
> + }
> + } while (end > vma->vm_end);
> +
> + DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
> + start, gtt->nr_ranges, ttm->num_pages);
> +
> + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> + vmas[0]->vm_file)) {
>   r = -EPERM;
> - if (r)
>   goto out;
> + }
>   
> - range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> -  GFP_KERNEL);
> - if (range->pfns == NULL) {
> + ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
> + if (unlikely(!ranges)) {
>   r = -ENOMEM;
>   goto out;
>   }
> - range->start = gtt->userptr;
> - range->end = end;
>   
> - range->pfns[0] = range->flags[HMM_PFN_VALID];
> - range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> - 0 : range->flags[HMM_PFN_WRITE];
> - for (i = 1; i < ttm->num_pages; i++)
> - range->pfns[i] = range->pfns[0];
> + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
> + if (unlikely(!pfns)) {
> + r = -ENOMEM;
> + goto out_free_ranges;
> + }
> +
> + for (i = 0; i < gtt->nr_ranges; i++)
> + amdgpu_hmm_init_range([i]);
> +
> + f = ranges[0].flags[HMM_PFN_VALID];
> + f |= amdgpu_ttm_tt_is_readonly(ttm) ?
> + 0 : ranges[0].flags[HMM_PFN_WRITE];
> + memset64(pfns, f, ttm->num_pages);
> +
> + 

[PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM

2019-03-05 Thread Yang, Philip
userptr may cross two VMAs if the forked child process (not call exec
after fork) malloc buffer, then free it, and then malloc larger size
buf, kerenl will create new VMA adjacent to old VMA which was cloned
from parent process, some pages of userptr are in the first VMA, the
rest pages are in the second VMA.

HMM expects range only have one VMA, loop over all VMAs in the address
range, create multiple ranges to handle this case. See
is_mergeable_anon_vma in mm/mmap.c for details.

Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 122 +---
 1 file changed, 87 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index cd0ccfbbcb84..173bf4db5994 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
struct task_struct  *usertask;
uint32_tuserflags;
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-   struct hmm_rangerange;
+   struct hmm_range*ranges;
+   int nr_ranges;
 #endif
 };
 
@@ -723,62 +724,104 @@ struct amdgpu_ttm_tt {
  * once afterwards to stop HMM tracking
  */
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+
+/* Support Userptr pages cross max 16 vmas */
+#define MAX_NR_VMAS(16)
+
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
-   unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
-   struct hmm_range *range = >range;
-   int r = 0, i;
+   unsigned long start = gtt->userptr;
+   unsigned long end = start + ttm->num_pages * PAGE_SIZE;
+   struct hmm_range *ranges;
+   struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
+   uint64_t *pfns, f;
+   int r = 0, i, nr_pages;
 
if (!mm) /* Happens during process shutdown */
return -ESRCH;
 
-   amdgpu_hmm_init_range(range);
-
down_read(>mmap_sem);
 
-   range->vma = find_vma(mm, gtt->userptr);
-   if (!range_in_vma(range->vma, gtt->userptr, end))
-   r = -EFAULT;
-   else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
-   range->vma->vm_file)
+   /* user pages may cross multiple VMAs */
+   gtt->nr_ranges = 0;
+   do {
+   vma = find_vma(mm, vma ? vma->vm_end : start);
+   if (unlikely(!vma)) {
+   r = -EFAULT;
+   goto out;
+   }
+   vmas[gtt->nr_ranges++] = vma;
+   if (gtt->nr_ranges >= MAX_NR_VMAS) {
+   DRM_ERROR("invalid userptr range\n");
+   r = -EFAULT;
+   goto out;
+   }
+   } while (end > vma->vm_end);
+
+   DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
+   start, gtt->nr_ranges, ttm->num_pages);
+
+   if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+   vmas[0]->vm_file)) {
r = -EPERM;
-   if (r)
goto out;
+   }
 
-   range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
-GFP_KERNEL);
-   if (range->pfns == NULL) {
+   ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
+   if (unlikely(!ranges)) {
r = -ENOMEM;
goto out;
}
-   range->start = gtt->userptr;
-   range->end = end;
 
-   range->pfns[0] = range->flags[HMM_PFN_VALID];
-   range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
-   0 : range->flags[HMM_PFN_WRITE];
-   for (i = 1; i < ttm->num_pages; i++)
-   range->pfns[i] = range->pfns[0];
+   pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
+   if (unlikely(!pfns)) {
+   r = -ENOMEM;
+   goto out_free_ranges;
+   }
+
+   for (i = 0; i < gtt->nr_ranges; i++)
+   amdgpu_hmm_init_range([i]);
+
+   f = ranges[0].flags[HMM_PFN_VALID];
+   f |= amdgpu_ttm_tt_is_readonly(ttm) ?
+   0 : ranges[0].flags[HMM_PFN_WRITE];
+   memset64(pfns, f, ttm->num_pages);
+
+   for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
+   ranges[i].vma = vmas[i];
+   ranges[i].start = max(start, vmas[i]->vm_start);
+   ranges[i].end = min(end, vmas[i]->vm_end);
+   ranges[i].pfns = pfns + nr_pages;
+   nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
+
+   r = hmm_vma_fault([i], true);
+   if (unlikely(r))
+   break;
+   }
+   if (unlikely(r)) {
+   while (i--)
+