Am 2021-04-18 um 1:35 p.m. schrieb Philip Yang:
> Retry fault interrupt maybe pending in IH ring after GPU page table is
> updated to recover the vm fault, because each page of the range generate
> retry fault interrupt. There is race if application unmap range to
> remove and free the range first and then retry fault work restore_pages
> handle the retry fault interrupt, because range can not be found, this
> vm fault can not be recovered and report incorrect GPU vm fault to
> application.
>
> Before unmap to remove and free range, drain retry fault interrupt from
> IH ring1 to ensure no retry fault comes after the range is removed.
>
> Drain retry fault interrupt skip the range which is on deferred list to
> remove, or the range is child range, which is split by unmap, does not
> add to svms and have interval notifier.
>
> Signed-off-by: Philip Yang <philip.y...@amd.com>
The series looks good to me. But the skip-recover changes and the
-EAGAIN handling in svm_range_restore_pages should be a separate patch.

Also, when we defer processing an interrupt (skip-recover or r ==
-EAGAIN) and wait for a subsequent retry interrupt, we may want to
remove that fault address from the gmc->fault_ring that's used by
amdgpu_gmc_filter_faults to filter out repeated page faults on the same
address. In the future we would also have to remove those addresses from
the IH CAM.

Regards,
  Felix



> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 75 +++++++++++++++++++++++++++-
>  1 file changed, 74 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 0e0b4ffd20ab..45dd055118eb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -1402,11 +1402,13 @@ static int svm_range_validate_and_map(struct 
> mm_struct *mm,
>       svm_range_lock(prange);
>       if (!prange->actual_loc) {
>               if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
> +                     pr_debug("hmm update the range, need validate again\n");
>                       r = -EAGAIN;
>                       goto unlock_out;
>               }
>       }
>       if (!list_empty(&prange->child_list)) {
> +             pr_debug("range split by unmap in parallel, validate again\n");
>               r = -EAGAIN;
>               goto unlock_out;
>       }
> @@ -1828,6 +1830,28 @@ svm_range_handle_list_op(struct svm_range_list *svms, 
> struct svm_range *prange)
>       }
>  }
>  
> +static void svm_range_drain_retry_fault(struct svm_range_list *svms)
> +{
> +     struct kfd_process_device *pdd;
> +     struct amdgpu_device *adev;
> +     struct kfd_process *p;
> +     uint32_t i;
> +
> +     p = container_of(svms, struct kfd_process, svms);
> +
> +     for (i = 0; i < p->n_pdds; i++) {
> +             pdd = p->pdds[i];
> +             if (!pdd)
> +                     continue;
> +
> +             pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
> +             adev = (struct amdgpu_device *)pdd->dev->kgd;
> +
> +             amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
> +             pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
> +     }
> +}
> +
>  static void svm_range_deferred_list_work(struct work_struct *work)
>  {
>       struct svm_range_list *svms;
> @@ -1845,6 +1869,10 @@ static void svm_range_deferred_list_work(struct 
> work_struct *work)
>               pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
>                        prange->start, prange->last, prange->work_item.op);
>  
> +             /* Make sure no stale retry fault coming after range is freed */
> +             if (prange->work_item.op == SVM_OP_UNMAP_RANGE)
> +                     svm_range_drain_retry_fault(prange->svms);
> +
>               mm = prange->work_item.mm;
>               mmap_write_lock(mm);
>               mutex_lock(&svms->lock);
> @@ -2152,6 +2180,44 @@ svm_range_best_restore_location(struct svm_range 
> *prange,
>       return -1;
>  }
>  
> +/* svm_range_skip_recover - decide if prange can be recovered
> + * @prange: svm range structure
> + *
> + * GPU vm retry fault handle skip recover the range for cases:
> + * 1. prange is on deferred list to be removed after unmap, it is stale 
> fault,
> + *    deferred list work will drain the stale fault before free the prange.
> + * 2. prange is on deferred list to add interval notifier after split, or
> + * 3. prange is child range, it is split from parent prange, recover later
> + *    after interval notifier is added.
> + *
> + * Return: true to skip recover, false to recover
> + */
> +static bool svm_range_skip_recover(struct svm_range *prange)
> +{
> +     struct svm_range_list *svms = prange->svms;
> +
> +     spin_lock(&svms->deferred_list_lock);
> +     if (list_empty(&prange->deferred_list) &&
> +         list_empty(&prange->child_list)) {
> +             spin_unlock(&svms->deferred_list_lock);
> +             return false;
> +     }
> +     spin_unlock(&svms->deferred_list_lock);
> +
> +     if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
> +             pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n",
> +                      svms, prange, prange->start, prange->last);
> +             return true;
> +     }
> +     if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP ||
> +         prange->work_item.op == SVM_OP_ADD_RANGE) {
> +             pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n",
> +                      svms, prange, prange->start, prange->last);
> +             return true;
> +     }
> +     return false;
> +}
> +
>  int
>  svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>                       uint64_t addr)
> @@ -2187,7 +2253,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>       mmap_read_lock(mm);
>       mutex_lock(&svms->lock);
>       prange = svm_range_from_addr(svms, addr, NULL);
> -
>       if (!prange) {
>               pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
>                        svms, addr);
> @@ -2196,6 +2261,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>       }
>  
>       mutex_lock(&prange->migrate_mutex);
> +
> +     if (svm_range_skip_recover(prange))
> +             goto out_unlock_range;
> +
>       timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
>       /* skip duplicate vm fault on different pages of same range */
>       if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
> @@ -2254,6 +2323,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>  out:
>       kfd_unref_process(p);
>  
> +     if (r == -EAGAIN) {
> +             pr_debug("recover vm fault later\n");
> +             r = 0;
> +     }
>       return r;
>  }
>  
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to