I realized that most of the code in gmc_v9_0_psescreen_iv is not
actually hardware-specific. If it was not prescreening, but using an
amdgpu_iv_entry that was already parsed, I think it could just be a
generic function for processing retry faults:

  * looking up the VM of a fault
  * storing retry faults in a per-VM fifo
  * dropping faults that have already been seen

In other words, it's just a generic top half interrupt handler for retry
faults while the bottom half (worker thread) would use the per-VM FIFOs
to handle those pending retry faults.

Regards,
  Felix


On 2018-09-26 09:53 AM, Christian König wrote:
> The GMC/VM subsystem is causing the faults, so move the handling here as
> well.
>
> Signed-off-by: Christian König <christian.koe...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 59 +++++++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 69 
> ----------------------------------
>  2 files changed, 59 insertions(+), 69 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 729a2c230f91..f8d69ab85fc3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -244,6 +244,62 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
> amdgpu_device *adev,
>       return 0;
>  }
>  
> +/**
> + * vega10_ih_prescreen_iv - prescreen an interrupt vector
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Returns true if the interrupt vector should be further processed.
> + */
> +static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev,
> +                               struct amdgpu_iv_entry *entry,
> +                               uint64_t addr)
> +{
> +     struct amdgpu_vm *vm;
> +     u64 key;
> +     int r;
> +
> +     /* No PASID, can't identify faulting process */
> +     if (!entry->pasid)
> +             return true;
> +
> +     /* Not a retry fault */
> +     if (!(entry->src_data[1] & 0x80))
> +             return true;
> +
> +     /* Track retry faults in per-VM fault FIFO. */
> +     spin_lock(&adev->vm_manager.pasid_lock);
> +     vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid);
> +     if (!vm) {
> +             /* VM not found, process it normally */
> +             spin_unlock(&adev->vm_manager.pasid_lock);
> +             return true;
> +     }
> +
> +     key = AMDGPU_VM_FAULT(entry->pasid, addr);
> +     r = amdgpu_vm_add_fault(vm->fault_hash, key);
> +
> +     /* Hash table is full or the fault is already being processed,
> +      * ignore further page faults
> +      */
> +     if (r != 0) {
> +             spin_unlock(&adev->vm_manager.pasid_lock);
> +             return false;
> +     }
> +     /* No locking required with single writer and single reader */
> +     r = kfifo_put(&vm->faults, key);
> +     if (!r) {
> +             /* FIFO is full. Ignore it until there is space */
> +             amdgpu_vm_clear_fault(vm->fault_hash, key);
> +             spin_unlock(&adev->vm_manager.pasid_lock);
> +             return false;
> +     }
> +
> +     spin_unlock(&adev->vm_manager.pasid_lock);
> +     /* It's the first fault for this address, process it normally */
> +     return true;
> +}
> +
>  static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>                               struct amdgpu_irq_src *source,
>                               struct amdgpu_iv_entry *entry)
> @@ -255,6 +311,9 @@ static int gmc_v9_0_process_interrupt(struct 
> amdgpu_device *adev,
>       addr = (u64)entry->src_data[0] << 12;
>       addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>  
> +     if (!gmc_v9_0_prescreen_iv(adev, entry, addr))
> +             return 1;
> +
>       if (!amdgpu_sriov_vf(adev)) {
>               status = RREG32(hub->vm_l2_pro_fault_status);
>               WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 
> b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> index 0f50bef87163..0f68a0cd1fbf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> @@ -228,76 +228,7 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev)
>   */
>  static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
>  {
> -     u32 ring_index = adev->irq.ih.rptr >> 2;
> -     u32 dw0, dw3, dw4, dw5;
> -     u16 pasid;
> -     u64 addr, key;
> -     struct amdgpu_vm *vm;
> -     int r;
> -
> -     dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
> -     dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
> -     dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]);
> -     dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]);
> -
> -     /* Filter retry page faults, let only the first one pass. If
> -      * there are too many outstanding faults, ignore them until
> -      * some faults get cleared.
> -      */
> -     switch (dw0 & 0xff) {
> -     case SOC15_IH_CLIENTID_VMC:
> -     case SOC15_IH_CLIENTID_UTCL2:
> -             break;
> -     default:
> -             /* Not a VM fault */
> -             return true;
> -     }
> -
> -     pasid = dw3 & 0xffff;
> -     /* No PASID, can't identify faulting process */
> -     if (!pasid)
> -             return true;
> -
> -     /* Not a retry fault */
> -     if (!(dw5 & 0x80))
> -             return true;
> -
> -     /* Track retry faults in per-VM fault FIFO. */
> -     spin_lock(&adev->vm_manager.pasid_lock);
> -     vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
> -     addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
> -     key = AMDGPU_VM_FAULT(pasid, addr);
> -     if (!vm) {
> -             /* VM not found, process it normally */
> -             spin_unlock(&adev->vm_manager.pasid_lock);
> -             return true;
> -     } else {
> -             r = amdgpu_vm_add_fault(vm->fault_hash, key);
> -
> -             /* Hash table is full or the fault is already being processed,
> -              * ignore further page faults
> -              */
> -             if (r != 0) {
> -                     spin_unlock(&adev->vm_manager.pasid_lock);
> -                     goto ignore_iv;
> -             }
> -     }
> -     /* No locking required with single writer and single reader */
> -     r = kfifo_put(&vm->faults, key);
> -     if (!r) {
> -             /* FIFO is full. Ignore it until there is space */
> -             amdgpu_vm_clear_fault(vm->fault_hash, key);
> -             spin_unlock(&adev->vm_manager.pasid_lock);
> -             goto ignore_iv;
> -     }
> -
> -     spin_unlock(&adev->vm_manager.pasid_lock);
> -     /* It's the first fault for this address, process it normally */
>       return true;
> -
> -ignore_iv:
> -     adev->irq.ih.rptr += 32;
> -     return false;
>  }
>  
>  /**

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to