Re: [PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery
On 10/19/2023 2:40 PM, Philip Yang wrote: On 2023-10-19 12:20, Chen, Xiaogang wrote: On 10/19/2023 11:08 AM, Philip Yang wrote: On 2023-10-19 10:24, Xiaogang.Chen wrote: From: Xiaogang Chen After partial migration to recover GPU page fault this patch does GPU vm space mapping for same page range that got migrated instead of mapping all pages of svm range in which the page fault happened. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 +--- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 54af7a2b29f8..81dbcc8a4ccc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, + unsigned long map_start, unsigned long map_last, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { @@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, int32_t idx; int r = 0; + if (map_start < prange->start || map_last > prange->last) { This is not needed, as this case should never happen, and you also use max/min to limit map_start, map_last below. This was just a sanity check, I can remove it. + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", + map_start, map_last, prange->start, prange->last); + return -EFAULT; + } + ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } - if (!r) - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) { + map_start = max(map_start, prange->start + offset); + map_last = min(map_last, prange->start + offset + npages); This should move forward to outside the for loop, otherwise amdgpu_hmm_range_get_pages and svm_range_dma_map still work on the entire prange, and then prange->vram_pages update logic should be changed accordingly. We need use hmm function to get all vram page number on whole range as we did not know how many vram pages after partial migration, then we know if can release vram bo. ok,migrate to vram and migrate to ram have the vram pages updated already, the is needed for the splite prange. We could update prange->vram_pages when splitting prange, this can be done in another patch. map_last is inclusive, + map_last = min(map_last, prange->start + offset + npages - 1); ok, will update it. Regards Xiaogang Regards, Philip Regards Xiaogang Regards, Philip + if (map_start <= map_last) { + offset = map_start - prange->start; + npages = map_last - map_start + 1; + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + } + } if (!r && next == end) prange->mapped_to_gpu = true; @@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct work_struct *work) */ mutex_lock(&prange->migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - false, true, false); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, false, true, false); if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); @@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); + start = prange->start; + last = prange->last; if (prange->actual_loc != 0 || best_loc != 0) { migration = true; /* Align migration range start and size to granularity size */ @@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } } - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); + r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, + false, false); if (r) pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", - r, svms, prange->start, prange->last); + r, svms, start, last); kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
Re: [PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery
On 2023-10-19 12:20, Chen, Xiaogang wrote: On 10/19/2023 11:08 AM, Philip Yang wrote: On 2023-10-19 10:24, Xiaogang.Chen wrote: From: Xiaogang Chen After partial migration to recover GPU page fault this patch does GPU vm space mapping for same page range that got migrated instead of mapping all pages of svm range in which the page fault happened. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 +--- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 54af7a2b29f8..81dbcc8a4ccc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, + unsigned long map_start, unsigned long map_last, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { @@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, int32_t idx; int r = 0; + if (map_start < prange->start || map_last > prange->last) { This is not needed, as this case should never happen, and you also use max/min to limit map_start, map_last below. This was just a sanity check, I can remove it. + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", + map_start, map_last, prange->start, prange->last); + return -EFAULT; + } + ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } - if (!r) - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) { + map_start = max(map_start, prange->start + offset); + map_last = min(map_last, prange->start + offset + npages); This should move forward to outside the for loop, otherwise amdgpu_hmm_range_get_pages and svm_range_dma_map still work on the entire prange, and then prange->vram_pages update logic should be changed accordingly. We need use hmm function to get all vram page number on whole range as we did not know how many vram pages after partial migration, then we know if can release vram bo. ok,migrate to vram and migrate to ram have the vram pages updated already, the is needed for the splite prange. We could update prange->vram_pages when splitting prange, this can be done in another patch. map_last is inclusive, + map_last = min(map_last, prange->start + offset + npages - 1); Regards, Philip Regards Xiaogang Regards, Philip + if (map_start <= map_last) { + offset = map_start - prange->start; + npages = map_last - map_start + 1; + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + } + } if (!r && next == end) prange->mapped_
Re: [PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery
On 10/19/2023 11:08 AM, Philip Yang wrote: On 2023-10-19 10:24, Xiaogang.Chen wrote: From: Xiaogang Chen After partial migration to recover GPU page fault this patch does GPU vm space mapping for same page range that got migrated instead of mapping all pages of svm range in which the page fault happened. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 +--- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 54af7a2b29f8..81dbcc8a4ccc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, + unsigned long map_start, unsigned long map_last, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { @@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, int32_t idx; int r = 0; + if (map_start < prange->start || map_last > prange->last) { This is not needed, as this case should never happen, and you also use max/min to limit map_start, map_last below. This was just a sanity check, I can remove it. + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", +map_start, map_last, prange->start, prange->last); + return -EFAULT; + } + ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } - if (!r) - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) { + map_start = max(map_start, prange->start + offset); + map_last = min(map_last, prange->start + offset + npages); This should move forward to outside the for loop, otherwise amdgpu_hmm_range_get_pages and svm_range_dma_map still work on the entire prange, and then prange->vram_pages update logic should be changed accordingly. We need use hmm function to get all vram page number on whole range as we did not know how many vram pages after partial migration, then we know if can release vram bo. Regards Xiaogang Regards, Philip + if (map_start <= map_last) { + offset = map_start - prange->start; + npages = map_last - map_start + 1; + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + } + } if (!r && next == end) prange->mapped_to_gpu = true; @@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct work_struct *work) */ mutex_lock(&prange->migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - false, true, false); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, false, true, false); if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); @@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); + start = prange->start; + last = prange->last; if (prange->actual_loc != 0 || best_loc != 0) { migration = true; /* Align migration range start and size to granularity size */ @@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } } - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); + r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, + false, false); if (r) pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", -r, svms, prange->start, prange->last); +r, svms, start, last); kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration)
Re: [PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery
On 2023-10-19 10:24, Xiaogang.Chen wrote: From: Xiaogang Chen After partial migration to recover GPU page fault this patch does GPU vm space mapping for same page range that got migrated instead of mapping all pages of svm range in which the page fault happened. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 +--- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 54af7a2b29f8..81dbcc8a4ccc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, + unsigned long map_start, unsigned long map_last, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { @@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, int32_t idx; int r = 0; + if (map_start < prange->start || map_last > prange->last) { This is not needed, as this case should never happen, and you also use max/min to limit map_start, map_last below. + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", + map_start, map_last, prange->start, prange->last); + return -EFAULT; + } + ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } - if (!r) - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) { + map_start = max(map_start, prange->start + offset); + map_last = min(map_last, prange->start + offset + npages); This should move forward to outside the for loop, otherwise amdgpu_hmm_range_get_pages and svm_range_dma_map still work on the entire prange, and then prange->vram_pages update logic should be changed accordingly. Regards, Philip + if (map_start <= map_last) { +offset = map_start - prange->start; +npages = map_last - map_start + 1; +r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + } + } if (!r && next == end) prange->mapped_to_gpu = true; @@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct work_struct *work) */ mutex_lock(&prange->migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - false, true, false); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, false, true, false); if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); @@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); + start = prange->start; + last = prange->last; if (prange->actual_loc != 0 || best_loc != 0) { migration = true; /* Align migration range start and size to granularity size */ @@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } } - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); + r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, + false, false); if (r) pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", - r, svms, prange->start, prange->last); + r, svms, start, last); kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration); @@ -3650,7 +3667,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, MAX_GPU_INSTANCE, true, true, flush_tlb); if (r) pr_debug("failed %d to map svm range\n", r);
[PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery
From: Xiaogang Chen After partial migration to recover GPU page fault this patch does GPU vm space mapping for same page range that got migrated instead of mapping all pages of svm range in which the page fault happened. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 +--- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 54af7a2b29f8..81dbcc8a4ccc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, + unsigned long map_start, unsigned long map_last, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { @@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, int32_t idx; int r = 0; + if (map_start < prange->start || map_last > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", +map_start, map_last, prange->start, prange->last); + return -EFAULT; + } + ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } - if (!r) - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) { + map_start = max(map_start, prange->start + offset); + map_last = min(map_last, prange->start + offset + npages); + if (map_start <= map_last) { + offset = map_start - prange->start; + npages = map_last - map_start + 1; + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + } + } if (!r && next == end) prange->mapped_to_gpu = true; @@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct work_struct *work) */ mutex_lock(&prange->migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - false, true, false); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, false, true, false); if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); @@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); + start = prange->start; + last = prange->last; if (prange->actual_loc != 0 || best_loc != 0) { migration = true; /* Align migration range start and size to granularity size */ @@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } } - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); + r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, + false, false); if (r) pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", -r, svms, prange->start, prange->last); +r, svms, start, last); kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration); @@ -3650,7 +3667,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, MAX_GPU_INSTANCE, true, true, flush_tlb); if (r) pr_debug("failed %d to map svm range\n", r); -- 2.25.1