Re: [PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery

2023-10-19 Thread Chen, Xiaogang



On 10/19/2023 2:40 PM, Philip Yang wrote:



On 2023-10-19 12:20, Chen, Xiaogang wrote:


On 10/19/2023 11:08 AM, Philip Yang wrote:



On 2023-10-19 10:24, Xiaogang.Chen wrote:

From: Xiaogang Chen

After partial migration to recover GPU page fault this patch does 
GPU vm
space mapping for same page range that got migrated instead of 
mapping all

pages of svm range in which the page fault happened.

Signed-off-by: Xiaogang Chen
---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 
+---

  1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 54af7a2b29f8..81dbcc8a4ccc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct 
kfd_process *p, int32_t gpuidx)

   * 5. Release page table (and SVM BO) reservation
   */
  static int svm_range_validate_and_map(struct mm_struct *mm,
+  unsigned long map_start, unsigned long 
map_last,

    struct svm_range *prange, int32_t gpuidx,
    bool intr, bool wait, bool flush_tlb)
  {
@@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct 
mm_struct *mm,

  int32_t idx;
  int r = 0;
  +    if (map_start < prange->start || map_last > prange->last) {
This is not needed, as this case should never happen, and you also 
use max/min to limit map_start, map_last below.

This was just a sanity check, I can remove it.

+    pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+ map_start, map_last, prange->start, prange->last);
+    return -EFAULT;
+    }
+
  ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
  if (!ctx)
  return -ENOMEM;
@@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct 
mm_struct *mm,

  r = -EAGAIN;
  }
  -    if (!r)
-    r = svm_range_map_to_gpus(prange, offset, npages, 
readonly,

-  ctx->bitmap, wait, flush_tlb);
+    if (!r) {
+    map_start = max(map_start, prange->start + offset);
+    map_last = min(map_last, prange->start + offset + 
npages);


This should move forward to outside the for loop, otherwise 
amdgpu_hmm_range_get_pages and svm_range_dma_map still work on the 
entire prange, and then prange->vram_pages update logic should be 
changed accordingly.


We need use hmm function to get all vram page number on whole range 
as we did not know how many vram pages after partial migration, then 
we know if can release vram bo.


ok,migrate to vram and migrate to ram have the vram pages updated 
already, the is needed for the splite prange. We could update 
prange->vram_pages when splitting prange, this can be done in another 
patch.


map_last is inclusive,

+    map_last = min(map_last, prange->start + offset + npages 
- 1);



ok, will update it.

Regards

Xiaogang


Regards,

Philip



Regards

Xiaogang


Regards,

Philip


+    if (map_start <= map_last) {
+    offset = map_start - prange->start;
+    npages = map_last - map_start + 1;
+    r = svm_range_map_to_gpus(prange, offset, npages, 
readonly,

+  ctx->bitmap, wait, flush_tlb);
+    }
+    }
    if (!r && next == end)
  prange->mapped_to_gpu = true;
@@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct 
work_struct *work)

   */
  mutex_lock(>migrate_mutex);
  -    r = svm_range_validate_and_map(mm, prange, 
MAX_GPU_INSTANCE,

-   false, true, false);
+    r = svm_range_validate_and_map(mm, prange->start, 
prange->last, prange,

+   MAX_GPU_INSTANCE, false, true, false);
  if (r)
  pr_debug("failed %d to map 0x%lx to gpus\n", r,
   prange->start);
@@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device 
*adev, unsigned int pasid,

  kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
 write_fault, timestamp);
  +    start = prange->start;
+    last = prange->last;
  if (prange->actual_loc != 0 || best_loc != 0) {
  migration = true;
  /* Align migration range start and size to granularity 
size */
@@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct 
amdgpu_device *adev, unsigned int pasid,

  }
  }
  -    r = svm_range_validate_and_map(mm, prange, gpuidx, false, 
false, false);
+    r = svm_range_validate_and_map(mm, start, last, prange, 
gpuidx, false,

+   false, false);
  if (r)
  pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to 
gpus\n",

- r, svms, prange->start, prange->last);
+ r, svms, start, last);
    kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
   

Re: [PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery

2023-10-19 Thread Philip Yang

  


On 2023-10-19 12:20, Chen, Xiaogang
  wrote:


  
  On 10/19/2023 11:08 AM, Philip Yang wrote:
  
  


On 2023-10-19 10:24, Xiaogang.Chen wrote:

From: Xiaogang
  Chen
  
  
  After partial migration to recover GPU page fault this patch
  does GPU vm
  
  space mapping for same page range that got migrated instead of
  mapping all
  
  pages of svm range in which the page fault happened.
  
  
  Signed-off-by: Xiaogang Chen
  
  ---
  
    drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33
  +---
  
    1 file changed, 25 insertions(+), 8 deletions(-)
  
  
  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
  b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
  
  index 54af7a2b29f8..81dbcc8a4ccc 100644
  
  --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
  
  +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
  
  @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct
  kfd_process *p, int32_t gpuidx)
  
     * 5. Release page table (and SVM BO) reservation
  
     */
  
    static int svm_range_validate_and_map(struct mm_struct *mm,
  
  +  unsigned long map_start, unsigned long
  map_last,
  
      struct svm_range *prange, int32_t
  gpuidx,
  
      bool intr, bool wait, bool flush_tlb)
  
    {
  
  @@ -1630,6 +1631,12 @@ static int
  svm_range_validate_and_map(struct mm_struct *mm,
  
    int32_t idx;
  
    int r = 0;
  
    +    if (map_start < prange->start || map_last >
  prange->last) {
  

This is not needed, as this case should never happen, and you
also use max/min to limit map_start, map_last below.

  
  This was just a sanity check, I can remove it.
  
  
+    pr_debug("range [0x%lx 0x%lx]
  out prange [0x%lx 0x%lx]\n",
  
  + map_start, map_last, prange->start,
  prange->last);
  
  +    return -EFAULT;
  
  +    }
  
  +
  
    ctx = kzalloc(sizeof(struct svm_validate_context),
  GFP_KERNEL);
  
    if (!ctx)
  
    return -ENOMEM;
  
  @@ -1747,9 +1754,16 @@ static int
  svm_range_validate_and_map(struct mm_struct *mm,
  
    r = -EAGAIN;
  
    }
  
    -    if (!r)
  
  -    r = svm_range_map_to_gpus(prange, offset, npages,
  readonly,
  
  -  ctx->bitmap, wait, flush_tlb);
  
  +    if (!r) {
  
  +    map_start = max(map_start, prange->start +
  offset);
  
  +    map_last = min(map_last, prange->start +
  offset + npages);
  


This should move forward to outside the for loop, otherwise
amdgpu_hmm_range_get_pages and svm_range_dma_map still work on
the entire prange, and then prange->vram_pages update logic
should be changed accordingly.


  
  We need use hmm function to get all vram page number on whole
  range as we did not know how many vram pages after partial
  migration, then we know if can release vram bo.
  

ok,migrate to vram and migrate to ram have the vram pages updated
  already, the is needed for the splite prange. We could update
  prange->vram_pages when splitting prange, this can be done in
  another patch.
map_last is inclusive,

+    map_last = min(map_last, prange->start + offset +
  npages - 1); 

Regards,
Philip


  
  Regards
  
  
  Xiaogang
  
  
  Regards,


Philip


+    if (map_start <=
  map_last) {
  
  +    offset = map_start - prange->start;
  
  +    npages = map_last - map_start + 1;
  
  +    r = svm_range_map_to_gpus(prange, offset,
  npages, readonly,
  
  +  ctx->bitmap, wait,
  flush_tlb);
  
  +    }
  
  +    }
  
      if (!r && next == end)
  
    

Re: [PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery

2023-10-19 Thread Chen, Xiaogang



On 10/19/2023 11:08 AM, Philip Yang wrote:



On 2023-10-19 10:24, Xiaogang.Chen wrote:

From: Xiaogang Chen

After partial migration to recover GPU page fault this patch does GPU vm
space mapping for same page range that got migrated instead of mapping all
pages of svm range in which the page fault happened.

Signed-off-by: Xiaogang Chen
---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 +---
  1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 54af7a2b29f8..81dbcc8a4ccc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, 
int32_t gpuidx)
   * 5. Release page table (and SVM BO) reservation
   */
  static int svm_range_validate_and_map(struct mm_struct *mm,
+ unsigned long map_start, unsigned long 
map_last,
  struct svm_range *prange, int32_t gpuidx,
  bool intr, bool wait, bool flush_tlb)
  {
@@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
int32_t idx;
int r = 0;
  
+	if (map_start < prange->start || map_last > prange->last) {
This is not needed, as this case should never happen, and you also use 
max/min to limit map_start, map_last below.

This was just a sanity check, I can remove it.

+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+map_start, map_last, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
r = -EAGAIN;
}
  
-		if (!r)

-   r = svm_range_map_to_gpus(prange, offset, npages, 
readonly,
- ctx->bitmap, wait, flush_tlb);
+   if (!r) {
+   map_start = max(map_start, prange->start + offset);
+   map_last = min(map_last, prange->start + offset + 
npages);


This should move forward to outside the for loop, otherwise 
amdgpu_hmm_range_get_pages and svm_range_dma_map still work on the 
entire prange, and then prange->vram_pages update logic should be 
changed accordingly.


We need use hmm function to get all vram page number on whole range as 
we did not know how many vram pages after partial migration, then we 
know if can release vram bo.


Regards

Xiaogang


Regards,

Philip


+   if (map_start <= map_last) {
+   offset = map_start - prange->start;
+   npages = map_last - map_start + 1;
+   r = svm_range_map_to_gpus(prange, offset, 
npages, readonly,
+ ctx->bitmap, wait, 
flush_tlb);
+   }
+   }
  
  		if (!r && next == end)

prange->mapped_to_gpu = true;
@@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct work_struct 
*work)
 */
mutex_lock(>migrate_mutex);
  
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,

-  false, true, false);
+   r = svm_range_validate_and_map(mm, prange->start, prange->last, 
prange,
+  MAX_GPU_INSTANCE, false, true, 
false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
 prange->start);
@@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
   write_fault, timestamp);
  
+	start = prange->start;

+   last = prange->last;
if (prange->actual_loc != 0 || best_loc != 0) {
migration = true;
/* Align migration range start and size to granularity size */
@@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
}
  
-	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);

+   r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+  false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
-r, svms, prange->start, prange->last);
+r, svms, start, last);
  
  	kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,

 migration);
@@ 

Re: [PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery

2023-10-19 Thread Philip Yang

  


On 2023-10-19 10:24, Xiaogang.Chen
  wrote:


  From: Xiaogang Chen 

After partial migration to recover GPU page fault this patch does GPU vm
space mapping for same page range that got migrated instead of mapping all
pages of svm range in which the page fault happened.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 +---
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 54af7a2b29f8..81dbcc8a4ccc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
  * 5. Release page table (and SVM BO) reservation
  */
 static int svm_range_validate_and_map(struct mm_struct *mm,
+  unsigned long map_start, unsigned long map_last,
   struct svm_range *prange, int32_t gpuidx,
   bool intr, bool wait, bool flush_tlb)
 {
@@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 	int32_t idx;
 	int r = 0;
 
+	if (map_start < prange->start || map_last > prange->last) {

This is not needed, as this case should never happen, and you also
use max/min to limit map_start, map_last below.

  
+		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+ map_start, map_last, prange->start, prange->last);
+		return -EFAULT;
+	}
+
 	ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
@@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			r = -EAGAIN;
 		}
 
-		if (!r)
-			r = svm_range_map_to_gpus(prange, offset, npages, readonly,
-		  ctx->bitmap, wait, flush_tlb);
+		if (!r) {
+			map_start = max(map_start, prange->start + offset);
+			map_last = min(map_last, prange->start + offset + npages);

This should move forward to outside the for loop, otherwise
  amdgpu_hmm_range_get_pages and svm_range_dma_map still work on the
  entire prange, and then prange->vram_pages update logic should
  be changed accordingly.
Regards,
Philip


  
+			if (map_start <= map_last) {
+offset = map_start - prange->start;
+npages = map_last - map_start + 1;
+r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+			  ctx->bitmap, wait, flush_tlb);
+			}
+		}
 
 		if (!r && next == end)
 			prange->mapped_to_gpu = true;
@@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct work_struct *work)
 		 */
 		mutex_lock(>migrate_mutex);
 
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-	   false, true, false);
+		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+	   MAX_GPU_INSTANCE, false, true, false);
 		if (r)
 			pr_debug("failed %d to map 0x%lx to gpus\n", r,
  prange->start);
@@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
    write_fault, timestamp);
 
+	start = prange->start;
+	last = prange->last;
 	if (prange->actual_loc != 0 || best_loc != 0) {
 		migration = true;
 		/* Align migration range start and size to granularity size */
@@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 		}
 	}
 
-	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
+	r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+   false, false);
 	if (r)
 		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
-			 r, svms, prange->start, prange->last);
+			 r, svms, start, last);
 
 	kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
  migration);
@@ -3650,7 +3667,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 
 		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
 
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, MAX_GPU_INSTANCE,
 	   true, true, flush_tlb);
 		if (r)
 			pr_debug("failed %d to map svm range\n", r);


  



[PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery

2023-10-19 Thread Xiaogang . Chen
From: Xiaogang Chen 

After partial migration to recover GPU page fault this patch does GPU vm
space mapping for same page range that got migrated instead of mapping all
pages of svm range in which the page fault happened.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 +---
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 54af7a2b29f8..81dbcc8a4ccc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, 
int32_t gpuidx)
  * 5. Release page table (and SVM BO) reservation
  */
 static int svm_range_validate_and_map(struct mm_struct *mm,
+ unsigned long map_start, unsigned long 
map_last,
  struct svm_range *prange, int32_t gpuidx,
  bool intr, bool wait, bool flush_tlb)
 {
@@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
int32_t idx;
int r = 0;
 
+   if (map_start < prange->start || map_last > prange->last) {
+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+map_start, map_last, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
r = -EAGAIN;
}
 
-   if (!r)
-   r = svm_range_map_to_gpus(prange, offset, npages, 
readonly,
- ctx->bitmap, wait, flush_tlb);
+   if (!r) {
+   map_start = max(map_start, prange->start + offset);
+   map_last = min(map_last, prange->start + offset + 
npages);
+   if (map_start <= map_last) {
+   offset = map_start - prange->start;
+   npages = map_last - map_start + 1;
+   r = svm_range_map_to_gpus(prange, offset, 
npages, readonly,
+ ctx->bitmap, wait, 
flush_tlb);
+   }
+   }
 
if (!r && next == end)
prange->mapped_to_gpu = true;
@@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct work_struct 
*work)
 */
mutex_lock(>migrate_mutex);
 
-   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-  false, true, false);
+   r = svm_range_validate_and_map(mm, prange->start, prange->last, 
prange,
+  MAX_GPU_INSTANCE, false, true, 
false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
 prange->start);
@@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
   write_fault, timestamp);
 
+   start = prange->start;
+   last = prange->last;
if (prange->actual_loc != 0 || best_loc != 0) {
migration = true;
/* Align migration range start and size to granularity size */
@@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
}
 
-   r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
+   r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+  false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
-r, svms, prange->start, prange->last);
+r, svms, start, last);
 
kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
 migration);
@@ -3650,7 +3667,7 @@ svm_range_set_attr(struct kfd_process *p, struct 
mm_struct *mm,
 
flush_tlb = !migrated && update_mapping && 
prange->mapped_to_gpu;
 
-   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+   r = svm_range_validate_and_map(mm, prange->start, prange->last, 
prange, MAX_GPU_INSTANCE,
   true, true, flush_tlb);
if (r)
pr_debug("failed %d to map svm range\n", r);
-- 
2.25.1