Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-22 Thread Felix Kuehling


Am 2021-04-22 um 10:47 a.m. schrieb Alex Sierra:
> SVM ranges are created for unregistered memory, triggered
> by page faults. These ranges are migrated/mapped to
> GPU VRAM memory.
>
> Signed-off-by: Alex Sierra 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 107 ++-
>  1 file changed, 104 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 45dd055118eb..44ff643e3c32 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -274,7 +274,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
> uint64_t start,
>   INIT_LIST_HEAD(>deferred_list);
>   INIT_LIST_HEAD(>child_list);
>   atomic_set(>invalid, 0);
> - prange->validate_timestamp = ktime_to_us(ktime_get());
> + prange->validate_timestamp = 0;
>   mutex_init(>migrate_mutex);
>   mutex_init(>lock);
>   svm_range_set_default_attributes(>preferred_loc,
> @@ -2179,6 +2179,86 @@ svm_range_best_restore_location(struct svm_range 
> *prange,
>  
>   return -1;
>  }
> +static int
> +svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
> + unsigned long *start, unsigned long *last)
> +{
> + struct vm_area_struct *vma;
> + struct interval_tree_node *node;
> + unsigned long start_limit, end_limit;
> +
> + vma = find_vma(p->mm, addr << PAGE_SHIFT);
> + if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
> + pr_debug("VMA does not exist in address [0x%llx]\n", addr);
> + return -EFAULT;
> + }
> + start_limit = max(vma->vm_start >> PAGE_SHIFT,
> +   (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
> + end_limit = min(vma->vm_end >> PAGE_SHIFT,
> + (unsigned long)ALIGN(addr + 1, 2UL << 8));
> + /* First range that starts after the fault address */
> + node = interval_tree_iter_first(>svms.objects, addr + 1, ULONG_MAX);
> + if (node) {
> + end_limit = min(end_limit, node->start);
> + /* Last range that ends before the fault address */
> + node = container_of(rb_prev(>rb),
> + struct interval_tree_node, rb);
> + } else {
> + /* Last range must end before addr because
> +  * there was no range after addr
> +  */
> + node = container_of(rb_last(>svms.objects.rb_root),
> + struct interval_tree_node, rb);
> + }
> + if (node) {
> + if (node->last >= addr) {
> + WARN(1, "Overlap with prev node and page fault addr\n");
> + return -EFAULT;
> + }
> + start_limit = max(start_limit, node->last + 1);
> + }
> +
> + *start = start_limit;
> + *last = end_limit - 1;
> +
> + pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n",
> +   vma->vm_start >> PAGE_SHIFT, *start,
> +   vma->vm_end >> PAGE_SHIFT, *last);
> +
> + return 0;
> +
> +}
> +static struct
> +svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
> + struct kfd_process *p,
> + struct mm_struct *mm,
> + int64_t addr)
> +{
> + struct svm_range *prange = NULL;
> + unsigned long start, last;
> + uint32_t gpuid, gpuidx;
> +
> + if (svm_range_get_range_boundaries(p, addr, , ))
> + return NULL;
> +
> + prange = svm_range_new(>svms, start, last);
> + if (!prange) {
> + pr_debug("Failed to create prange in address [0x%llx]\\n", 
> addr);
> + return NULL;
> + }
> + if (kfd_process_gpuid_from_kgd(p, adev, , )) {
> + pr_debug("failed to get gpuid from kgd\n");
> + svm_range_free(prange);
> + return NULL;
> + }
> + prange->preferred_loc = gpuid;
> + prange->actual_loc = 0;
> + /* Gurantee prange is migrate it */
> + svm_range_add_to_svms(prange);
> + svm_range_add_notifier_locked(mm, prange);
> +
> + return prange;
> +}
>  
>  /* svm_range_skip_recover - decide if prange can be recovered
>   * @prange: svm range structure
> @@ -2228,6 +2308,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>   struct kfd_process *p;
>   uint64_t timestamp;
>   int32_t best_loc, gpuidx;
> + bool write_locked = false;
>   int r = 0;
>  
>   p = kfd_lookup_process_by_pasid(pasid);
> @@ -2251,14 +2332,34 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>   }
>  
>   mmap_read_lock(mm);
> +retry_write_locked:
>   mutex_lock(>lock);
>   prange = svm_range_from_addr(svms, addr, NULL);
>   if (!prange) {
>   pr_debug("failed to find prange svms 0x%p 

Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-22 Thread philip yang

  


On 2021-04-22 9:20 a.m., Felix Kuehling
  wrote:


  Am 2021-04-22 um 9:08 a.m. schrieb philip yang:

  


On 2021-04-20 9:25 p.m., Felix Kuehling wrote:
@@ -2251,14 +2330,34 @@ svm_range_restore_pages(struct amdgpu_device
*adev, unsigned int pasid,


  

   	}
 
 	mmap_read_lock(mm);
+retry_write_locked:
 	mutex_lock(>lock);
 	prange = svm_range_from_addr(svms, addr, NULL);
 	if (!prange) {
 		pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 			 svms, addr);
-		r = -EFAULT;
-		goto out_unlock_svms;
+		if (!write_locked) {
+			/* Need the write lock to create new range with MMU notifier.
+			 * Also flush pending deferred work to make sure the interval
+			 * tree is up to date before we add a new range
+			 */
+			mutex_unlock(>lock);
+			mmap_read_unlock(mm);
+			svm_range_list_lock_and_flush_work(svms, mm);


  
  I think this can deadlock with a deferred worker trying to drain
interrupts (Philip's patch series). If we cannot flush deferred work
here, we need to be more careful creating new ranges to make sure they
don't conflict with added deferred or child ranges.



It's impossible to have deadlock with deferred worker to drain
interrupts, because drain interrupt wait for restore_pages without
taking any lock, and restore_pages flush deferred work without taking
any lock too.


  
  The deadlock does not come from holding or waiting for locks. It comes
from the worker waiting for interrupts to drain and the interrupt
handler waiting for the worker to finish with flush_work in
svm_range_list_lock_and_flush_work. If both are waiting for each other,
neither can make progress and you have a deadlock.


yes, you are right, I can repro the deadlock after changing the
  kfdtest. We cannot flush deferred work here.
Regards,
Philip


  
Regards,
  Felix



  
Regards,

Philip



  Regards,
  Felix



  

  +			write_locked = true;
+			goto retry_write_locked;
+		}
+		prange = svm_range_create_unregistered_range(adev, p, mm, addr);
+		if (!prange) {
+			pr_debug("failed to create unregisterd range svms 0x%p address [0x%llx]\n",
+			svms, addr);
+			mmap_write_downgrade(mm);
+			r = -EFAULT;
+			goto out_unlock_svms;
+		}
 	}
+	if (write_locked)
+		mmap_write_downgrade(mm);
 
 	mutex_lock(>migrate_mutex);
 


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  

  

  

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-22 Thread Alex Sierra
SVM ranges are created for unregistered memory, triggered
by page faults. These ranges are migrated/mapped to
GPU VRAM memory.

Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 107 ++-
 1 file changed, 104 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 45dd055118eb..44ff643e3c32 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -274,7 +274,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>deferred_list);
INIT_LIST_HEAD(>child_list);
atomic_set(>invalid, 0);
-   prange->validate_timestamp = ktime_to_us(ktime_get());
+   prange->validate_timestamp = 0;
mutex_init(>migrate_mutex);
mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
@@ -2179,6 +2179,86 @@ svm_range_best_restore_location(struct svm_range *prange,
 
return -1;
 }
+static int
+svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
+   unsigned long *start, unsigned long *last)
+{
+   struct vm_area_struct *vma;
+   struct interval_tree_node *node;
+   unsigned long start_limit, end_limit;
+
+   vma = find_vma(p->mm, addr << PAGE_SHIFT);
+   if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+   pr_debug("VMA does not exist in address [0x%llx]\n", addr);
+   return -EFAULT;
+   }
+   start_limit = max(vma->vm_start >> PAGE_SHIFT,
+ (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
+   end_limit = min(vma->vm_end >> PAGE_SHIFT,
+   (unsigned long)ALIGN(addr + 1, 2UL << 8));
+   /* First range that starts after the fault address */
+   node = interval_tree_iter_first(>svms.objects, addr + 1, ULONG_MAX);
+   if (node) {
+   end_limit = min(end_limit, node->start);
+   /* Last range that ends before the fault address */
+   node = container_of(rb_prev(>rb),
+   struct interval_tree_node, rb);
+   } else {
+   /* Last range must end before addr because
+* there was no range after addr
+*/
+   node = container_of(rb_last(>svms.objects.rb_root),
+   struct interval_tree_node, rb);
+   }
+   if (node) {
+   if (node->last >= addr) {
+   WARN(1, "Overlap with prev node and page fault addr\n");
+   return -EFAULT;
+   }
+   start_limit = max(start_limit, node->last + 1);
+   }
+
+   *start = start_limit;
+   *last = end_limit - 1;
+
+   pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n",
+ vma->vm_start >> PAGE_SHIFT, *start,
+ vma->vm_end >> PAGE_SHIFT, *last);
+
+   return 0;
+
+}
+static struct
+svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+   struct kfd_process *p,
+   struct mm_struct *mm,
+   int64_t addr)
+{
+   struct svm_range *prange = NULL;
+   unsigned long start, last;
+   uint32_t gpuid, gpuidx;
+
+   if (svm_range_get_range_boundaries(p, addr, , ))
+   return NULL;
+
+   prange = svm_range_new(>svms, start, last);
+   if (!prange) {
+   pr_debug("Failed to create prange in address [0x%llx]\\n", 
addr);
+   return NULL;
+   }
+   if (kfd_process_gpuid_from_kgd(p, adev, , )) {
+   pr_debug("failed to get gpuid from kgd\n");
+   svm_range_free(prange);
+   return NULL;
+   }
+   prange->preferred_loc = gpuid;
+   prange->actual_loc = 0;
+   /* Gurantee prange is migrate it */
+   svm_range_add_to_svms(prange);
+   svm_range_add_notifier_locked(mm, prange);
+
+   return prange;
+}
 
 /* svm_range_skip_recover - decide if prange can be recovered
  * @prange: svm range structure
@@ -2228,6 +2308,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
struct kfd_process *p;
uint64_t timestamp;
int32_t best_loc, gpuidx;
+   bool write_locked = false;
int r = 0;
 
p = kfd_lookup_process_by_pasid(pasid);
@@ -2251,14 +2332,34 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
 
mmap_read_lock(mm);
+retry_write_locked:
mutex_lock(>lock);
prange = svm_range_from_addr(svms, addr, NULL);
if (!prange) {
pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 svms, addr);
-   r = -EFAULT;
-   goto out_unlock_svms;
+   if 

Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-22 Thread Felix Kuehling
Am 2021-04-22 um 9:08 a.m. schrieb philip yang:
>
>
> On 2021-04-20 9:25 p.m., Felix Kuehling wrote:
> @@ -2251,14 +2330,34 @@ svm_range_restore_pages(struct amdgpu_device
> *adev, unsigned int pasid,
}
  
mmap_read_lock(mm);
 +retry_write_locked:
mutex_lock(>lock);
prange = svm_range_from_addr(svms, addr, NULL);
if (!prange) {
pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 svms, addr);
 -  r = -EFAULT;
 -  goto out_unlock_svms;
 +  if (!write_locked) {
 +  /* Need the write lock to create new range with MMU 
 notifier.
 +   * Also flush pending deferred work to make sure the 
 interval
 +   * tree is up to date before we add a new range
 +   */
 +  mutex_unlock(>lock);
 +  mmap_read_unlock(mm);
 +  svm_range_list_lock_and_flush_work(svms, mm);
>> I think this can deadlock with a deferred worker trying to drain
>> interrupts (Philip's patch series). If we cannot flush deferred work
>> here, we need to be more careful creating new ranges to make sure they
>> don't conflict with added deferred or child ranges.
>
> It's impossible to have deadlock with deferred worker to drain
> interrupts, because drain interrupt wait for restore_pages without
> taking any lock, and restore_pages flush deferred work without taking
> any lock too.
>
The deadlock does not come from holding or waiting for locks. It comes
from the worker waiting for interrupts to drain and the interrupt
handler waiting for the worker to finish with flush_work in
svm_range_list_lock_and_flush_work. If both are waiting for each other,
neither can make progress and you have a deadlock.

Regards,
  Felix


> Regards,
>
> Philip
>
>> Regards,
>>   Felix
>>
>>
 +  write_locked = true;
 +  goto retry_write_locked;
 +  }
 +  prange = svm_range_create_unregistered_range(adev, p, mm, addr);
 +  if (!prange) {
 +  pr_debug("failed to create unregisterd range svms 0x%p 
 address [0x%llx]\n",
 +  svms, addr);
 +  mmap_write_downgrade(mm);
 +  r = -EFAULT;
 +  goto out_unlock_svms;
 +  }
}
 +  if (write_locked)
 +  mmap_write_downgrade(mm);
  
mutex_lock(>migrate_mutex);
  
>>> ___
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-22 Thread philip yang

  


On 2021-04-20 9:25 p.m., Felix Kuehling
  wrote:


  
Am 2021-04-20 um 8:45 p.m. schrieb Felix Kuehling:

  
Am 2021-04-19 um 9:52 p.m. schrieb Alex Sierra:


  SVM ranges are created for unregistered memory, triggered
by page faults. These ranges are migrated/mapped to
GPU VRAM memory.

Signed-off-by: Alex Sierra 


This looks generally good to me. One more nit-pick inline in addition to
Philip's comments. And one question.

  
  
I found another potential deadlock. See inline. [+Philip]



  




  ---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 ++-
 1 file changed, 101 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 45dd055118eb..a8a92c533cf7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2179,6 +2179,84 @@ svm_range_best_restore_location(struct svm_range *prange,
 
 	return -1;
 }
+static int
+svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
+unsigned long *start, unsigned long *last)
+{
+	struct vm_area_struct *vma;
+	struct interval_tree_node *node;
+	unsigned long start_limit, end_limit;
+
+	vma = find_vma(p->mm, addr);
+	if (!vma || addr < vma->vm_start) {
+		pr_debug("VMA does not exist in address [0x%llx]\n", addr);
+		return -EFAULT;
+	}
+	start_limit = max(vma->vm_start,
+			(unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> PAGE_SHIFT;
+	end_limit = min(vma->vm_end,
+			(unsigned long)ALIGN(addr + 1, 2UL << 20)) >> PAGE_SHIFT;
+	/* First range that starts after the fault address */
+	node = interval_tree_iter_first(>svms.objects, (addr >> PAGE_SHIFT) + 1, ULONG_MAX);
+	if (node) {
+		end_limit = min(end_limit, node->start);
+		/* Last range that ends before the fault address */
+		node = container_of(rb_prev(>rb), struct interval_tree_node, rb);
+	} else {
+		/* Last range must end before addr because there was no range after addr */
+		node = container_of(rb_last(>svms.objects.rb_root),
+struct interval_tree_node, rb);
+	}
+	if (node)
+		start_limit = max(start_limit, node->last + 1);
+
+	*start = start_limit;
+	*last = end_limit - 1;
+
+	pr_debug("vma start: %lx start: %lx vma end: %lx last: %lx\n",
+		  vma->vm_start >> PAGE_SHIFT, *start,
+		  vma->vm_end >> PAGE_SHIFT, *last);
+
+	return 0;
+
+}
+static struct
+svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+		struct kfd_process *p,
+		struct mm_struct *mm,
+		int64_t addr)
+{
+	struct svm_range *prange = NULL;
+	struct svm_range_list *svms;
+	unsigned long start, last;
+	uint32_t gpuid, gpuidx;
+
+	if (svm_range_get_range_boundaries(p, addr << PAGE_SHIFT,
+	   , ))
+		return NULL;
+
+	svms = >svms;
+	prange = svm_range_new(>svms, start, last);
+	if (!prange) {
+		pr_debug("Failed to create prange in address [0x%llx]\\n", addr);
+		goto out;


You can just return here, since you're not doing any cleanup at the out:
label.




  +	}
+	if (kfd_process_gpuid_from_kgd(p, adev, , )) {
+		pr_debug("failed to get gpuid from kgd\n");
+		svm_range_free(prange);
+		prange = NULL;
+		goto out;


Just return.




  +	}
+	prange->preferred_loc = gpuid;
+	prange->actual_loc = 0;
+	/* Gurantee prange is migrate it */
+	prange->validate_timestamp -= AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING;


Is this really specific to svm_range_create_unregistered_range? Or
should we always do this in svm_range_new to guarantee that new ranges
can get validated?

  

It's good idea to set prange->validate_timestamp to 0 in
svm_range_new, then we don't need the special handle here, and
restore_page will recover range to update page table without waiting
for AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING for new range,
AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING is used to skip duplicate retry
fault on different pages of same range.

  

Regards,
  Felix




  +	svm_range_add_to_svms(prange);
+	svm_range_add_notifier_locked(mm, prange);
+
+out:
+	return prange;
+}
 
 /* svm_range_skip_recover - decide if prange can be recovered
  * @prange: svm range structure
@@ -2228,6 +2306,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	struct kfd_process *p;
 	uint64_t timestamp;
 	int32_t best_loc, gpuidx;
+	bool write_locked = false;
 	int r = 0;
 
 	p = kfd_lookup_process_by_pasid(pasid);
@@ -2251,14 +2330,34 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	}
 
 	mmap_read_lock(mm);
+retry_write_locked:
 	mutex_lock(>lock);
 	prange = svm_range_from_addr(svms, addr, NULL);
 	if (!prange) {
 		pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 			 svms, addr);
-		r = -EFAULT;
-		goto out_unlock_svms;
+		if (!write_locked) {
+			/* Need the write lock to 

Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-20 Thread Felix Kuehling

Am 2021-04-20 um 8:45 p.m. schrieb Felix Kuehling:
> Am 2021-04-19 um 9:52 p.m. schrieb Alex Sierra:
>> SVM ranges are created for unregistered memory, triggered
>> by page faults. These ranges are migrated/mapped to
>> GPU VRAM memory.
>>
>> Signed-off-by: Alex Sierra 
> This looks generally good to me. One more nit-pick inline in addition to
> Philip's comments. And one question.

I found another potential deadlock. See inline. [+Philip]


>
>
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 ++-
>>  1 file changed, 101 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 45dd055118eb..a8a92c533cf7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -2179,6 +2179,84 @@ svm_range_best_restore_location(struct svm_range 
>> *prange,
>>  
>>  return -1;
>>  }
>> +static int
>> +svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
>> +unsigned long *start, unsigned long *last)
>> +{
>> +struct vm_area_struct *vma;
>> +struct interval_tree_node *node;
>> +unsigned long start_limit, end_limit;
>> +
>> +vma = find_vma(p->mm, addr);
>> +if (!vma || addr < vma->vm_start) {
>> +pr_debug("VMA does not exist in address [0x%llx]\n", addr);
>> +return -EFAULT;
>> +}
>> +start_limit = max(vma->vm_start,
>> +(unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> 
>> PAGE_SHIFT;
>> +end_limit = min(vma->vm_end,
>> +(unsigned long)ALIGN(addr + 1, 2UL << 20)) >> 
>> PAGE_SHIFT;
>> +/* First range that starts after the fault address */
>> +node = interval_tree_iter_first(>svms.objects, (addr >> PAGE_SHIFT) 
>> + 1, ULONG_MAX);
>> +if (node) {
>> +end_limit = min(end_limit, node->start);
>> +/* Last range that ends before the fault address */
>> +node = container_of(rb_prev(>rb), struct 
>> interval_tree_node, rb);
>> +} else {
>> +/* Last range must end before addr because there was no range 
>> after addr */
>> +node = container_of(rb_last(>svms.objects.rb_root),
>> +struct interval_tree_node, rb);
>> +}
>> +if (node)
>> +start_limit = max(start_limit, node->last + 1);
>> +
>> +*start = start_limit;
>> +*last = end_limit - 1;
>> +
>> +pr_debug("vma start: %lx start: %lx vma end: %lx last: %lx\n",
>> +  vma->vm_start >> PAGE_SHIFT, *start,
>> +  vma->vm_end >> PAGE_SHIFT, *last);
>> +
>> +return 0;
>> +
>> +}
>> +static struct
>> +svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
>> +struct kfd_process *p,
>> +struct mm_struct *mm,
>> +int64_t addr)
>> +{
>> +struct svm_range *prange = NULL;
>> +struct svm_range_list *svms;
>> +unsigned long start, last;
>> +uint32_t gpuid, gpuidx;
>> +
>> +if (svm_range_get_range_boundaries(p, addr << PAGE_SHIFT,
>> +   , ))
>> +return NULL;
>> +
>> +svms = >svms;
>> +prange = svm_range_new(>svms, start, last);
>> +if (!prange) {
>> +pr_debug("Failed to create prange in address [0x%llx]\\n", 
>> addr);
>> +goto out;
> You can just return here, since you're not doing any cleanup at the out:
> label.
>
>
>> +}
>> +if (kfd_process_gpuid_from_kgd(p, adev, , )) {
>> +pr_debug("failed to get gpuid from kgd\n");
>> +svm_range_free(prange);
>> +prange = NULL;
>> +goto out;
> Just return.
>
>
>> +}
>> +prange->preferred_loc = gpuid;
>> +prange->actual_loc = 0;
>> +/* Gurantee prange is migrate it */
>> +prange->validate_timestamp -= AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING;
> Is this really specific to svm_range_create_unregistered_range? Or
> should we always do this in svm_range_new to guarantee that new ranges
> can get validated?
>
> Regards,
>   Felix
>
>
>> +svm_range_add_to_svms(prange);
>> +svm_range_add_notifier_locked(mm, prange);
>> +
>> +out:
>> +return prange;
>> +}
>>  
>>  /* svm_range_skip_recover - decide if prange can be recovered
>>   * @prange: svm range structure
>> @@ -2228,6 +2306,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
>> unsigned int pasid,
>>  struct kfd_process *p;
>>  uint64_t timestamp;
>>  int32_t best_loc, gpuidx;
>> +bool write_locked = false;
>>  int r = 0;
>>  
>>  p = kfd_lookup_process_by_pasid(pasid);
>> @@ -2251,14 +2330,34 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
>> unsigned int pasid,
>>  }
>>  
>>  mmap_read_lock(mm);
>> +retry_write_locked:
>>  mutex_lock(>lock);
>>  prange = 

Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-20 Thread Felix Kuehling
Am 2021-04-19 um 9:52 p.m. schrieb Alex Sierra:
> SVM ranges are created for unregistered memory, triggered
> by page faults. These ranges are migrated/mapped to
> GPU VRAM memory.
>
> Signed-off-by: Alex Sierra 

This looks generally good to me. One more nit-pick inline in addition to
Philip's comments. And one question.


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 ++-
>  1 file changed, 101 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 45dd055118eb..a8a92c533cf7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2179,6 +2179,84 @@ svm_range_best_restore_location(struct svm_range 
> *prange,
>  
>   return -1;
>  }
> +static int
> +svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
> + unsigned long *start, unsigned long *last)
> +{
> + struct vm_area_struct *vma;
> + struct interval_tree_node *node;
> + unsigned long start_limit, end_limit;
> +
> + vma = find_vma(p->mm, addr);
> + if (!vma || addr < vma->vm_start) {
> + pr_debug("VMA does not exist in address [0x%llx]\n", addr);
> + return -EFAULT;
> + }
> + start_limit = max(vma->vm_start,
> + (unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> 
> PAGE_SHIFT;
> + end_limit = min(vma->vm_end,
> + (unsigned long)ALIGN(addr + 1, 2UL << 20)) >> 
> PAGE_SHIFT;
> + /* First range that starts after the fault address */
> + node = interval_tree_iter_first(>svms.objects, (addr >> PAGE_SHIFT) 
> + 1, ULONG_MAX);
> + if (node) {
> + end_limit = min(end_limit, node->start);
> + /* Last range that ends before the fault address */
> + node = container_of(rb_prev(>rb), struct 
> interval_tree_node, rb);
> + } else {
> + /* Last range must end before addr because there was no range 
> after addr */
> + node = container_of(rb_last(>svms.objects.rb_root),
> + struct interval_tree_node, rb);
> + }
> + if (node)
> + start_limit = max(start_limit, node->last + 1);
> +
> + *start = start_limit;
> + *last = end_limit - 1;
> +
> + pr_debug("vma start: %lx start: %lx vma end: %lx last: %lx\n",
> +   vma->vm_start >> PAGE_SHIFT, *start,
> +   vma->vm_end >> PAGE_SHIFT, *last);
> +
> + return 0;
> +
> +}
> +static struct
> +svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
> + struct kfd_process *p,
> + struct mm_struct *mm,
> + int64_t addr)
> +{
> + struct svm_range *prange = NULL;
> + struct svm_range_list *svms;
> + unsigned long start, last;
> + uint32_t gpuid, gpuidx;
> +
> + if (svm_range_get_range_boundaries(p, addr << PAGE_SHIFT,
> +, ))
> + return NULL;
> +
> + svms = >svms;
> + prange = svm_range_new(>svms, start, last);
> + if (!prange) {
> + pr_debug("Failed to create prange in address [0x%llx]\\n", 
> addr);
> + goto out;

You can just return here, since you're not doing any cleanup at the out:
label.


> + }
> + if (kfd_process_gpuid_from_kgd(p, adev, , )) {
> + pr_debug("failed to get gpuid from kgd\n");
> + svm_range_free(prange);
> + prange = NULL;
> + goto out;

Just return.


> + }
> + prange->preferred_loc = gpuid;
> + prange->actual_loc = 0;
> + /* Gurantee prange is migrate it */
> + prange->validate_timestamp -= AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING;

Is this really specific to svm_range_create_unregistered_range? Or
should we always do this in svm_range_new to guarantee that new ranges
can get validated?

Regards,
  Felix


> + svm_range_add_to_svms(prange);
> + svm_range_add_notifier_locked(mm, prange);
> +
> +out:
> + return prange;
> +}
>  
>  /* svm_range_skip_recover - decide if prange can be recovered
>   * @prange: svm range structure
> @@ -2228,6 +2306,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>   struct kfd_process *p;
>   uint64_t timestamp;
>   int32_t best_loc, gpuidx;
> + bool write_locked = false;
>   int r = 0;
>  
>   p = kfd_lookup_process_by_pasid(pasid);
> @@ -2251,14 +2330,34 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>   }
>  
>   mmap_read_lock(mm);
> +retry_write_locked:
>   mutex_lock(>lock);
>   prange = svm_range_from_addr(svms, addr, NULL);
>   if (!prange) {
>   pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
>svms, addr);
> - r = 

Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-20 Thread philip yang

  


On 2021-04-19 9:52 p.m., Alex Sierra
  wrote:


  SVM ranges are created for unregistered memory, triggered
by page faults. These ranges are migrated/mapped to
GPU VRAM memory.

Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 ++-
 1 file changed, 101 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 45dd055118eb..a8a92c533cf7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2179,6 +2179,84 @@ svm_range_best_restore_location(struct svm_range *prange,
 
 	return -1;
 }
+static int
+svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
+unsigned long *start, unsigned long *last)
+{
+	struct vm_area_struct *vma;
+	struct interval_tree_node *node;
+	unsigned long start_limit, end_limit;
+
+	vma = find_vma(p->mm, addr);
+	if (!vma || addr < vma->vm_start) {
+		pr_debug("VMA does not exist in address [0x%llx]\n", addr);
+		return -EFAULT;
+	}
+	start_limit = max(vma->vm_start,
+			(unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> PAGE_SHIFT;

line over 80 columns

  
+	end_limit = min(vma->vm_end,
+			(unsigned long)ALIGN(addr + 1, 2UL << 20)) >> PAGE_SHIFT;

line over 80 columns

  
+	/* First range that starts after the fault address */
+	node = interval_tree_iter_first(>svms.objects, (addr >> PAGE_SHIFT) + 1, ULONG_MAX);

line over 80 columns

  
+	if (node) {
+		end_limit = min(end_limit, node->start);
+		/* Last range that ends before the fault address */
+		node = container_of(rb_prev(>rb), struct interval_tree_node, rb);

line over 80 columns

  
+	} else {
+		/* Last range must end before addr because there was no range after addr */

line over 80 columns

  
+		node = container_of(rb_last(>svms.objects.rb_root),
+struct interval_tree_node, rb);
+	}
+	if (node)
+		start_limit = max(start_limit, node->last + 1);
+
+	*start = start_limit;
+	*last = end_limit - 1;
+
+	pr_debug("vma start: %lx start: %lx vma end: %lx last: %lx\n",

use 0x%lx, to show address as hex

  
+		  vma->vm_start >> PAGE_SHIFT, *start,
+		  vma->vm_end >> PAGE_SHIFT, *last);
+
+	return 0;
+
+}
+static struct
+svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+		struct kfd_process *p,
+		struct mm_struct *mm,
+		int64_t addr)
+{
+	struct svm_range *prange = NULL;
+	struct svm_range_list *svms;
+	unsigned long start, last;
+	uint32_t gpuid, gpuidx;
+
+	if (svm_range_get_range_boundaries(p, addr << PAGE_SHIFT,

We use pfn address inside svm code, and do address PAGE_SHIFT
to/from kernel functions, pass addr here.

  
+	   , )) 



  
+		return NULL;
+
+	svms = >svms;

svms is not used, can be removed.

  
+	prange = svm_range_new(>svms, start, last);
+	if (!prange) {
+		pr_debug("Failed to create prange in address [0x%llx]\\n", addr);
+		goto out;
+	}
+	if (kfd_process_gpuid_from_kgd(p, adev, , )) {
+		pr_debug("failed to get gpuid from kgd\n");
+		svm_range_free(prange);
+		prange = NULL;
+		goto out;
+	}
+	prange->preferred_loc = gpuid;
+	prange->actual_loc = 0;
+	/* Gurantee prange is migrate it */
+	prange->validate_timestamp -= AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING;
+	svm_range_add_to_svms(prange);
+	svm_range_add_notifier_locked(mm, prange);
+
+out:
+	return prange;
+}
 
 /* svm_range_skip_recover - decide if prange can be recovered
  * @prange: svm range structure
@@ -2228,6 +2306,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	struct kfd_process *p;
 	uint64_t timestamp;
 	int32_t best_loc, gpuidx;
+	bool write_locked = false;
 	int r = 0;
 
 	p = kfd_lookup_process_by_pasid(pasid);
@@ -2251,14 +2330,34 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	}
 
 	mmap_read_lock(mm);
+retry_write_locked:
 	mutex_lock(>lock);
 	prange = svm_range_from_addr(svms, addr, NULL);
 	if (!prange) {
 		pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 			 svms, addr);
-		r = -EFAULT;
-		goto out_unlock_svms;
+		if (!write_locked) {
+			/* Need the write lock to create new range with MMU notifier.
+			 * Also flush pending deferred work to make sure the interval

line over 80 columns

  
+			 * tree is up to date before we add a new range
+			 */
+			mutex_unlock(>lock);
+			mmap_read_unlock(mm);
+			svm_range_list_lock_and_flush_work(svms, mm);
+			write_locked = true;
+			goto retry_write_locked;
+		}
+		prange = svm_range_create_unregistered_range(adev, p, mm, addr);
+		if (!prange) {
+			pr_debug("failed to create unregisterd range svms 0x%p address [0x%llx]\n",

line over 80 columns

  +			svms, addr);

indent
Regards,
Philip


  
+			mmap_write_downgrade(mm);
+			r = -EFAULT;
+			goto 

[PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-19 Thread Alex Sierra
SVM ranges are created for unregistered memory, triggered
by page faults. These ranges are migrated/mapped to
GPU VRAM memory.

Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 ++-
 1 file changed, 101 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 45dd055118eb..a8a92c533cf7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2179,6 +2179,84 @@ svm_range_best_restore_location(struct svm_range *prange,
 
return -1;
 }
+static int
+svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
+   unsigned long *start, unsigned long *last)
+{
+   struct vm_area_struct *vma;
+   struct interval_tree_node *node;
+   unsigned long start_limit, end_limit;
+
+   vma = find_vma(p->mm, addr);
+   if (!vma || addr < vma->vm_start) {
+   pr_debug("VMA does not exist in address [0x%llx]\n", addr);
+   return -EFAULT;
+   }
+   start_limit = max(vma->vm_start,
+   (unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> 
PAGE_SHIFT;
+   end_limit = min(vma->vm_end,
+   (unsigned long)ALIGN(addr + 1, 2UL << 20)) >> 
PAGE_SHIFT;
+   /* First range that starts after the fault address */
+   node = interval_tree_iter_first(>svms.objects, (addr >> PAGE_SHIFT) 
+ 1, ULONG_MAX);
+   if (node) {
+   end_limit = min(end_limit, node->start);
+   /* Last range that ends before the fault address */
+   node = container_of(rb_prev(>rb), struct 
interval_tree_node, rb);
+   } else {
+   /* Last range must end before addr because there was no range 
after addr */
+   node = container_of(rb_last(>svms.objects.rb_root),
+   struct interval_tree_node, rb);
+   }
+   if (node)
+   start_limit = max(start_limit, node->last + 1);
+
+   *start = start_limit;
+   *last = end_limit - 1;
+
+   pr_debug("vma start: %lx start: %lx vma end: %lx last: %lx\n",
+ vma->vm_start >> PAGE_SHIFT, *start,
+ vma->vm_end >> PAGE_SHIFT, *last);
+
+   return 0;
+
+}
+static struct
+svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+   struct kfd_process *p,
+   struct mm_struct *mm,
+   int64_t addr)
+{
+   struct svm_range *prange = NULL;
+   struct svm_range_list *svms;
+   unsigned long start, last;
+   uint32_t gpuid, gpuidx;
+
+   if (svm_range_get_range_boundaries(p, addr << PAGE_SHIFT,
+  , ))
+   return NULL;
+
+   svms = >svms;
+   prange = svm_range_new(>svms, start, last);
+   if (!prange) {
+   pr_debug("Failed to create prange in address [0x%llx]\\n", 
addr);
+   goto out;
+   }
+   if (kfd_process_gpuid_from_kgd(p, adev, , )) {
+   pr_debug("failed to get gpuid from kgd\n");
+   svm_range_free(prange);
+   prange = NULL;
+   goto out;
+   }
+   prange->preferred_loc = gpuid;
+   prange->actual_loc = 0;
+   /* Gurantee prange is migrate it */
+   prange->validate_timestamp -= AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING;
+   svm_range_add_to_svms(prange);
+   svm_range_add_notifier_locked(mm, prange);
+
+out:
+   return prange;
+}
 
 /* svm_range_skip_recover - decide if prange can be recovered
  * @prange: svm range structure
@@ -2228,6 +2306,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
struct kfd_process *p;
uint64_t timestamp;
int32_t best_loc, gpuidx;
+   bool write_locked = false;
int r = 0;
 
p = kfd_lookup_process_by_pasid(pasid);
@@ -2251,14 +2330,34 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
 
mmap_read_lock(mm);
+retry_write_locked:
mutex_lock(>lock);
prange = svm_range_from_addr(svms, addr, NULL);
if (!prange) {
pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 svms, addr);
-   r = -EFAULT;
-   goto out_unlock_svms;
+   if (!write_locked) {
+   /* Need the write lock to create new range with MMU 
notifier.
+* Also flush pending deferred work to make sure the 
interval
+* tree is up to date before we add a new range
+*/
+   mutex_unlock(>lock);
+   mmap_read_unlock(mm);
+   svm_range_list_lock_and_flush_work(svms, mm);
+  

Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-19 Thread Felix Kuehling
Am 2021-04-19 um 1:24 p.m. schrieb Alex Sierra:
> SVM ranges are created for unregistered memory, triggered
> by page faults. These ranges are migrated/mapped to
> GPU VRAM memory.
>
> Signed-off-by: Alex Sierra 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 85 +++-
>  1 file changed, 82 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 45dd055118eb..4cbbfba01cae 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2179,6 +2179,79 @@ svm_range_best_restore_location(struct svm_range 
> *prange,
>  
>   return -1;
>  }
> +static int
> +svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
> + unsigned long *start, unsigned long *end)
> +{
> + struct vm_area_struct *vma;
> + unsigned long start_limit, end_limit;
> +
> + vma = find_vma(p->mm, addr);
> + if (!vma) {

This check is not correct. Look for other examples of find_vma in the
driver. It's possible that find_vma returns the first VMA that starts
after the specified address. The condition usually used after find_vma
is something like

if (!vma || addr < vma->vm_start)
return -EFAULT;


> + pr_debug("VMA does not exist in address [0x%llx]\n", addr);
> + return -1;

Return a proper error code, i.e. -EFAULT;


> + }
> + start_limit = max(vma->vm_start,
> + (unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> 
> PAGE_SHIFT;
> + addr >>= PAGE_SHIFT;
> + *start = addr;
> +
> + while (*start > start_limit &&
> + !interval_tree_iter_first(>svms.objects, *start - 1, *start 
> - 1))
> + *start -= 1;

This loop doesn't really make sense. Calling interval_tree_iter_first in
a loop is weird. It would typically be called before a loop. In the loop
you'd call interval_tree_iter_next. But in this case you shouldn't need
a loop at all because you're just looking for one specific range.
Interval trees are supposed to make this more efficient than a linear
search.

I think what you want to do here is to find the last prange that ends
before addr. Something like this:

start_limit = max(vma->vm_start,
(unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> 
PAGE_SHIFT;
end_limit = min(vma->end,
(unsigned long)ALIGN(addr + 1, 2UL << 20)) >> 
PAGE_SHIFT;
/* First range that starts after the fault address */
node = interval_tree_first(>svms.objects, (addr >> PAGE_SHIFT) + 1, 
ULONG_MAX);
if (node) {
end_limit = min(end_limit, node->start);
/* Last range that ends before the fault address */
node = container_of(rb_prev(>rb), struct 
interval_tree_node, rb);
} else {
/* Last range must end before addr because there was no range 
after addr */
node = container_of(rb_last(>svms.objects.rb_root), struct 
interval_tree_node, rb);
}
if (node)
start_limit = max(start_limit, node->last + 1);


*start = start_limit;
*last = end_limit - 1;


> +
> + end_limit = min(vma->vm_end >> PAGE_SHIFT,
> + (*start + 0x200)) - 1;
> +
> + *end = addr;
> +
> + while (*end < end_limit &&
> + !interval_tree_iter_first(>svms.objects, *end + 1, *end + 1))
> + *end += 1;

See above. My code snipped already calculates both the start and end
without requiring any loops.


> + pr_debug("vma start: %lx start: %lx vma end: %lx end: %lx\n",
> +   vma->vm_start >> PAGE_SHIFT, *start,
> +   vma->vm_end >> PAGE_SHIFT, *end);
> +
> + return 0;
> +
> +}
> +static struct
> +svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
> + struct kfd_process *p,
> + struct mm_struct *mm,
> + int64_t addr)
> +{
> + struct svm_range *prange = NULL;
> + struct svm_range_list *svms;
> + unsigned long start, end;

Rename "end" to "last". "end" is typically used for an exclusive end
address (just outside the range). "last" is typically used for an
inclusive end address (the last address still inside the range). You're
using an inclusive end address, so this should be called "last" to avoid
confusion.


> + uint32_t gpuid, gpuidx;
> +
> + if (svm_range_get_range_boundaries(p, addr << PAGE_SHIFT,
> +, ))
> + return NULL;
> +
> + svms = >svms;
> + prange = svm_range_new(>svms, start, end);
> + if (!prange) {
> + pr_debug("Failed to create prange in address [0x%llx]\\n", 
> addr);
> + goto out;
> + }
> + if (kfd_process_gpuid_from_kgd(p, adev, , )) {
> 

[PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-19 Thread Alex Sierra
SVM ranges are created for unregistered memory, triggered
by page faults. These ranges are migrated/mapped to
GPU VRAM memory.

Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 85 +++-
 1 file changed, 82 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 45dd055118eb..4cbbfba01cae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2179,6 +2179,79 @@ svm_range_best_restore_location(struct svm_range *prange,
 
return -1;
 }
+static int
+svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
+   unsigned long *start, unsigned long *end)
+{
+   struct vm_area_struct *vma;
+   unsigned long start_limit, end_limit;
+
+   vma = find_vma(p->mm, addr);
+   if (!vma) {
+   pr_debug("VMA does not exist in address [0x%llx]\n", addr);
+   return -1;
+   }
+   start_limit = max(vma->vm_start,
+   (unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> 
PAGE_SHIFT;
+   addr >>= PAGE_SHIFT;
+   *start = addr;
+
+   while (*start > start_limit &&
+   !interval_tree_iter_first(>svms.objects, *start - 1, *start 
- 1))
+   *start -= 1;
+
+   end_limit = min(vma->vm_end >> PAGE_SHIFT,
+   (*start + 0x200)) - 1;
+
+   *end = addr;
+
+   while (*end < end_limit &&
+   !interval_tree_iter_first(>svms.objects, *end + 1, *end + 1))
+   *end += 1;
+   pr_debug("vma start: %lx start: %lx vma end: %lx end: %lx\n",
+ vma->vm_start >> PAGE_SHIFT, *start,
+ vma->vm_end >> PAGE_SHIFT, *end);
+
+   return 0;
+
+}
+static struct
+svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+   struct kfd_process *p,
+   struct mm_struct *mm,
+   int64_t addr)
+{
+   struct svm_range *prange = NULL;
+   struct svm_range_list *svms;
+   unsigned long start, end;
+   uint32_t gpuid, gpuidx;
+
+   if (svm_range_get_range_boundaries(p, addr << PAGE_SHIFT,
+  , ))
+   return NULL;
+
+   svms = >svms;
+   prange = svm_range_new(>svms, start, end);
+   if (!prange) {
+   pr_debug("Failed to create prange in address [0x%llx]\\n", 
addr);
+   goto out;
+   }
+   if (kfd_process_gpuid_from_kgd(p, adev, , )) {
+   pr_debug("failed to get gpuid from kgd\n");
+   svm_range_free(prange);
+   prange = NULL;
+   goto out;
+   }
+   prange->preferred_loc = gpuid;
+   prange->actual_loc = 0;
+   /* Gurantee prange is migrate it */
+   prange->validate_timestamp -= AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING;
+   svm_range_add_to_svms(prange);
+   svm_range_add_notifier_locked(mm, prange);
+
+out:
+   return prange;
+}
 
 /* svm_range_skip_recover - decide if prange can be recovered
  * @prange: svm range structure
@@ -2250,15 +2323,21 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
goto out;
}
 
-   mmap_read_lock(mm);
+   mmap_write_lock(mm);
mutex_lock(>lock);
prange = svm_range_from_addr(svms, addr, NULL);
if (!prange) {
pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 svms, addr);
-   r = -EFAULT;
-   goto out_unlock_svms;
+   prange = svm_range_create_unregistered_range(adev, p, mm, addr);
+   if (!prange) {
+   pr_debug("failed to create unregisterd range svms 0x%p 
address [0x%llx]\n",
+   svms, addr);
+   mmap_write_downgrade(mm);
+   goto out_unlock_svms;
+   }
}
+   mmap_write_downgrade(mm);
 
mutex_lock(>migrate_mutex);
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx