[PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-04 Thread Christian König
Next step towards HMM support. For now just silence the retry fault and
optionally redirect the request to the dummy page.

v2: make sure the VM is not destroyed while we handle the fault.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
 3 files changed, 80 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 951608fc1925..410d89966a66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
}
}
 }
+
+/**
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ *
+ * Try to gracefully handle a VM fault. Return true if the fault was handled 
and
+ * shouldn't be reported any more.
+ */
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+   uint64_t addr)
+{
+   struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
+   struct amdgpu_bo *root;
+   uint64_t value, flags;
+   struct amdgpu_vm *vm;
+   long r;
+
+   if (!ring->sched.ready)
+   return false;
+
+   spin_lock(&adev->vm_manager.pasid_lock);
+   vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+   if (vm)
+   root = amdgpu_bo_ref(vm->root.base.bo);
+   else
+   root = NULL;
+   spin_unlock(&adev->vm_manager.pasid_lock);
+
+   if (!root)
+   return false;
+
+   r = amdgpu_bo_reserve(root, true);
+   if (r)
+   goto error_unref;
+
+   spin_lock(&adev->vm_manager.pasid_lock);
+   vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+   spin_unlock(&adev->vm_manager.pasid_lock);
+
+   if (!vm || vm->root.base.bo != root)
+   goto error_unlock;
+
+   addr /= AMDGPU_GPU_PAGE_SIZE;
+   flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
+   AMDGPU_PTE_SYSTEM;
+
+   if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+   /* Redirect the access to the dummy page */
+   value = adev->dummy_page_addr;
+   flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
+   AMDGPU_PTE_WRITEABLE;
+   } else {
+   value = 0;
+   }
+
+   r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
+   flags, value, NULL, NULL);
+   if (r)
+   goto error_unlock;
+
+   r = amdgpu_vm_update_pdes(adev, vm, true);
+
+error_unlock:
+   amdgpu_bo_unreserve(root);
+   if (r < 0)
+   DRM_ERROR("Can't handle page fault (%ld)\n", r);
+
+error_unref:
+   amdgpu_bo_unref(&root);
+
+   return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 0a97dc839f3b..4dbbe1b6b413 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -413,6 +413,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
*adev);
 
 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
 struct amdgpu_task_info *task_info);
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+   uint64_t addr);
 
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 9d15679df6e0..15a1ce51befa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -353,6 +353,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
}
 
/* If it's the first fault for this address, process it normally */
+   if (retry_fault && !in_interrupt() &&
+   amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+   return 1; /* This also prevents sending it to KFD */
+
if (!amdgpu_sriov_vf(adev)) {
/*
 * Issue a dummy read to wait for the status register to
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-04 Thread Yang, Philip
This series looks nice and clear for me, two questions embedded below.

Are we going to use dedicated sdma page queue for direct VM update path 
during a fault?

Thanks,
Philip

On 2019-09-04 11:02 a.m., Christian König wrote:
> Next step towards HMM support. For now just silence the retry fault and
> optionally redirect the request to the dummy page.
> 
> v2: make sure the VM is not destroyed while we handle the fault.
> 
> Signed-off-by: Christian König 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
>   3 files changed, 80 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 951608fc1925..410d89966a66 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>   }
>   }
>   }
> +
> +/**
> + * amdgpu_vm_handle_fault - graceful handling of VM faults.
> + * @adev: amdgpu device pointer
> + * @pasid: PASID of the VM
> + * @addr: Address of the fault
> + *
> + * Try to gracefully handle a VM fault. Return true if the fault was handled 
> and
> + * shouldn't be reported any more.
> + */
> +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
> + uint64_t addr)
> +{
> + struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
> + struct amdgpu_bo *root;
> + uint64_t value, flags;
> + struct amdgpu_vm *vm;
> + long r;
> +
> + if (!ring->sched.ready)
> + return false;
> +
> + spin_lock(&adev->vm_manager.pasid_lock);
> + vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
> + if (vm)
> + root = amdgpu_bo_ref(vm->root.base.bo);
> + else
> + root = NULL;
> + spin_unlock(&adev->vm_manager.pasid_lock);
> +
> + if (!root)
> + return false;
> +
> + r = amdgpu_bo_reserve(root, true);
> + if (r)
> + goto error_unref;
> +
> + spin_lock(&adev->vm_manager.pasid_lock);
> + vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
> + spin_unlock(&adev->vm_manager.pasid_lock);
> +
Here get vm from pasid second time, and check if PD bo is changed, is 
this to handle vm fault race with vm destory?

> + if (!vm || vm->root.base.bo != root)
> + goto error_unlock;
> +
> + addr /= AMDGPU_GPU_PAGE_SIZE;
> + flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
> + AMDGPU_PTE_SYSTEM;
> +
> + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
> + /* Redirect the access to the dummy page */
> + value = adev->dummy_page_addr;
> + flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
> + AMDGPU_PTE_WRITEABLE;
> + } else {
> + value = 0;
> + }
> +
> + r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
> + flags, value, NULL, NULL);
> + if (r)
> + goto error_unlock;
> +
After fault address redirect to dummy page, will the fault recover and 
retry continue to execute? Is this dangerous to update PTE to use system 
memory address 0?

> + r = amdgpu_vm_update_pdes(adev, vm, true);
> +
> +error_unlock:
> + amdgpu_bo_unreserve(root);
> + if (r < 0)
> + DRM_ERROR("Can't handle page fault (%ld)\n", r);
> +
> +error_unref:
> + amdgpu_bo_unref(&root);
> +
> + return false;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 0a97dc839f3b..4dbbe1b6b413 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -413,6 +413,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
> *adev);
>   
>   void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
>struct amdgpu_task_info *task_info);
> +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
> + uint64_t addr);
>   
>   void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 9d15679df6e0..15a1ce51befa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -353,6 +353,10 @@ static int gmc_v9_0_process_interrupt(struct 
> amdgpu_device *adev,
>   }
>   
>   /* If it's the first fault for this address, process it normally */
> + if (retry_fault && !in_interrupt() &&
> + amdgpu_vm_handle_fault(adev, entry->pasid, addr))
> + return 1; /* This also prevents sending it to KFD */
> +
>   if (!amdgpu_sriov_vf(adev)) {
>   /*
>* Issue a dummy read to wait for the status regis

Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-04 Thread Kuehling, Felix
On 2019-09-04 11:02 a.m., Christian König wrote:
> Next step towards HMM support. For now just silence the retry fault and
> optionally redirect the request to the dummy page.
>
> v2: make sure the VM is not destroyed while we handle the fault.
>
> Signed-off-by: Christian König 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
>   3 files changed, 80 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 951608fc1925..410d89966a66 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>   }
>   }
>   }
> +
> +/**
> + * amdgpu_vm_handle_fault - graceful handling of VM faults.
> + * @adev: amdgpu device pointer
> + * @pasid: PASID of the VM
> + * @addr: Address of the fault
> + *
> + * Try to gracefully handle a VM fault. Return true if the fault was handled 
> and
> + * shouldn't be reported any more.
> + */
> +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
> + uint64_t addr)
> +{
> + struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
> + struct amdgpu_bo *root;
> + uint64_t value, flags;
> + struct amdgpu_vm *vm;
> + long r;
> +
> + if (!ring->sched.ready)
> + return false;
> +
> + spin_lock(&adev->vm_manager.pasid_lock);
> + vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
> + if (vm)
> + root = amdgpu_bo_ref(vm->root.base.bo);
> + else
> + root = NULL;
> + spin_unlock(&adev->vm_manager.pasid_lock);
> +
> + if (!root)
> + return false;
> +
> + r = amdgpu_bo_reserve(root, true);
> + if (r)
> + goto error_unref;
> +
> + spin_lock(&adev->vm_manager.pasid_lock);
> + vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
> + spin_unlock(&adev->vm_manager.pasid_lock);

I think this deserves a comment. If I understand it correctly, you're 
looking up the vm twice so that you have the VM root reservation to 
protect against user-after-free. Otherwise the vm pointer is only valid 
as long as you're holding the spin-lock.


> +
> + if (!vm || vm->root.base.bo != root)

The check of vm->root.base.bo should probably still be under the 
spin_lock. Because you're not sure yet it's the right VM, you can't rely 
on the reservation here to prevent use-after-free.


> + goto error_unlock;
> +
> + addr /= AMDGPU_GPU_PAGE_SIZE;
> + flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
> + AMDGPU_PTE_SYSTEM;
> +
> + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
> + /* Redirect the access to the dummy page */
> + value = adev->dummy_page_addr;
> + flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
> + AMDGPU_PTE_WRITEABLE;
> + } else {
> + value = 0;
> + }
> +
> + r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
> + flags, value, NULL, NULL);
> + if (r)
> + goto error_unlock;
> +
> + r = amdgpu_vm_update_pdes(adev, vm, true);
> +
> +error_unlock:
> + amdgpu_bo_unreserve(root);
> + if (r < 0)
> + DRM_ERROR("Can't handle page fault (%ld)\n", r);
> +
> +error_unref:
> + amdgpu_bo_unref(&root);
> +
> + return false;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 0a97dc839f3b..4dbbe1b6b413 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -413,6 +413,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
> *adev);
>   
>   void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
>struct amdgpu_task_info *task_info);
> +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
> + uint64_t addr);
>   
>   void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 9d15679df6e0..15a1ce51befa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -353,6 +353,10 @@ static int gmc_v9_0_process_interrupt(struct 
> amdgpu_device *adev,
>   }
>   
>   /* If it's the first fault for this address, process it normally */
> + if (retry_fault && !in_interrupt() &&
> + amdgpu_vm_handle_fault(adev, entry->pasid, addr))
> + return 1; /* This also prevents sending it to KFD */

The !in_interrupt() is meant to only do this on the rerouted interrupt 
ring that's handled by a worker function?

Looks like amdgpu_vm_han

Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-09 Thread Christian König

Am 04.09.19 um 22:12 schrieb Yang, Philip:

This series looks nice and clear for me, two questions embedded below.

Are we going to use dedicated sdma page queue for direct VM update path
during a fault?

Thanks,
Philip

On 2019-09-04 11:02 a.m., Christian König wrote:

Next step towards HMM support. For now just silence the retry fault and
optionally redirect the request to the dummy page.

v2: make sure the VM is not destroyed while we handle the fault.

Signed-off-by: Christian König 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
   3 files changed, 80 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 951608fc1925..410d89966a66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
}
}
   }
+
+/**
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ *
+ * Try to gracefully handle a VM fault. Return true if the fault was handled 
and
+ * shouldn't be reported any more.
+ */
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+   uint64_t addr)
+{
+   struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
+   struct amdgpu_bo *root;
+   uint64_t value, flags;
+   struct amdgpu_vm *vm;
+   long r;
+
+   if (!ring->sched.ready)
+   return false;
+
+   spin_lock(&adev->vm_manager.pasid_lock);
+   vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+   if (vm)
+   root = amdgpu_bo_ref(vm->root.base.bo);
+   else
+   root = NULL;
+   spin_unlock(&adev->vm_manager.pasid_lock);
+
+   if (!root)
+   return false;
+
+   r = amdgpu_bo_reserve(root, true);
+   if (r)
+   goto error_unref;
+
+   spin_lock(&adev->vm_manager.pasid_lock);
+   vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+   spin_unlock(&adev->vm_manager.pasid_lock);
+

Here get vm from pasid second time, and check if PD bo is changed, is
this to handle vm fault race with vm destory?


Yes, exactly.




+   if (!vm || vm->root.base.bo != root)
+   goto error_unlock;
+
+   addr /= AMDGPU_GPU_PAGE_SIZE;
+   flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
+   AMDGPU_PTE_SYSTEM;
+
+   if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+   /* Redirect the access to the dummy page */
+   value = adev->dummy_page_addr;
+   flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
+   AMDGPU_PTE_WRITEABLE;
+   } else {
+   value = 0;
+   }
+
+   r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
+   flags, value, NULL, NULL);
+   if (r)
+   goto error_unlock;
+

After fault address redirect to dummy page, will the fault recover and
retry continue to execute?


Yes, the read/write operation will just retry and use the value from the 
dummy page instead.



Is this dangerous to update PTE to use system
memory address 0?


What are you talking about? The dummy page is a page allocate by TTM 
where we redirect faulty accesses to.


Regards,
Christian.




+   r = amdgpu_vm_update_pdes(adev, vm, true);
+
+error_unlock:
+   amdgpu_bo_unreserve(root);
+   if (r < 0)
+   DRM_ERROR("Can't handle page fault (%ld)\n", r);
+
+error_unref:
+   amdgpu_bo_unref(&root);
+
+   return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 0a97dc839f3b..4dbbe1b6b413 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -413,6 +413,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
*adev);
   
   void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,

 struct amdgpu_task_info *task_info);
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+   uint64_t addr);
   
   void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
   
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 9d15679df6e0..15a1ce51befa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -353,6 +353,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
}
   
   	/* If it's the first fault for this address, process it normally */

+   if (retry_fault && !in_interrupt() &&
+   amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+   return 1; /*

Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-09 Thread Christian König

Am 05.09.19 um 00:47 schrieb Kuehling, Felix:

On 2019-09-04 11:02 a.m., Christian König wrote:

Next step towards HMM support. For now just silence the retry fault and
optionally redirect the request to the dummy page.

v2: make sure the VM is not destroyed while we handle the fault.

Signed-off-by: Christian König 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
   3 files changed, 80 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 951608fc1925..410d89966a66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
}
}
   }
+
+/**
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ *
+ * Try to gracefully handle a VM fault. Return true if the fault was handled 
and
+ * shouldn't be reported any more.
+ */
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+   uint64_t addr)
+{
+   struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
+   struct amdgpu_bo *root;
+   uint64_t value, flags;
+   struct amdgpu_vm *vm;
+   long r;
+
+   if (!ring->sched.ready)
+   return false;
+
+   spin_lock(&adev->vm_manager.pasid_lock);
+   vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+   if (vm)
+   root = amdgpu_bo_ref(vm->root.base.bo);
+   else
+   root = NULL;
+   spin_unlock(&adev->vm_manager.pasid_lock);
+
+   if (!root)
+   return false;
+
+   r = amdgpu_bo_reserve(root, true);
+   if (r)
+   goto error_unref;
+
+   spin_lock(&adev->vm_manager.pasid_lock);
+   vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+   spin_unlock(&adev->vm_manager.pasid_lock);

I think this deserves a comment. If I understand it correctly, you're
looking up the vm twice so that you have the VM root reservation to
protect against user-after-free. Otherwise the vm pointer is only valid
as long as you're holding the spin-lock.



+
+   if (!vm || vm->root.base.bo != root)

The check of vm->root.base.bo should probably still be under the
spin_lock. Because you're not sure yet it's the right VM, you can't rely
on the reservation here to prevent use-after-free.


Good point, going to fix that.





+   goto error_unlock;
+
+   addr /= AMDGPU_GPU_PAGE_SIZE;
+   flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
+   AMDGPU_PTE_SYSTEM;
+
+   if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+   /* Redirect the access to the dummy page */
+   value = adev->dummy_page_addr;
+   flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
+   AMDGPU_PTE_WRITEABLE;
+   } else {
+   value = 0;
+   }
+
+   r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
+   flags, value, NULL, NULL);
+   if (r)
+   goto error_unlock;
+
+   r = amdgpu_vm_update_pdes(adev, vm, true);
+
+error_unlock:
+   amdgpu_bo_unreserve(root);
+   if (r < 0)
+   DRM_ERROR("Can't handle page fault (%ld)\n", r);
+
+error_unref:
+   amdgpu_bo_unref(&root);
+
+   return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 0a97dc839f3b..4dbbe1b6b413 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -413,6 +413,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
*adev);
   
   void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,

 struct amdgpu_task_info *task_info);
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+   uint64_t addr);
   
   void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
   
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 9d15679df6e0..15a1ce51befa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -353,6 +353,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
}
   
   	/* If it's the first fault for this address, process it normally */

+   if (retry_fault && !in_interrupt() &&
+   amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+   return 1; /* This also prevents sending it to KFD */

The !in_interrupt() is meant to only do this on the rerouted interrupt
ring that's handled by a worker function?


Yes, exactly. But I plan to add a workaround where the CPU redirects 

Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-09 Thread Yang, Philip


On 2019-09-09 8:03 a.m., Christian König wrote:
> Am 04.09.19 um 22:12 schrieb Yang, Philip:
>> This series looks nice and clear for me, two questions embedded below.
>>
>> Are we going to use dedicated sdma page queue for direct VM update path
>> during a fault?
>>
>> Thanks,
>> Philip
>>
>> On 2019-09-04 11:02 a.m., Christian König wrote:
>>> Next step towards HMM support. For now just silence the retry fault and
>>> optionally redirect the request to the dummy page.
>>>
>>> v2: make sure the VM is not destroyed while we handle the fault.
>>>
>>> Signed-off-by: Christian König 
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 
>>> ++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
>>>    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
>>>    3 files changed, 80 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 951608fc1925..410d89966a66 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm 
>>> *vm)
>>>    }
>>>    }
>>>    }
>>> +
>>> +/**
>>> + * amdgpu_vm_handle_fault - graceful handling of VM faults.
>>> + * @adev: amdgpu device pointer
>>> + * @pasid: PASID of the VM
>>> + * @addr: Address of the fault
>>> + *
>>> + * Try to gracefully handle a VM fault. Return true if the fault was 
>>> handled and
>>> + * shouldn't be reported any more.
>>> + */
>>> +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int 
>>> pasid,
>>> +    uint64_t addr)
>>> +{
>>> +    struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
>>> +    struct amdgpu_bo *root;
>>> +    uint64_t value, flags;
>>> +    struct amdgpu_vm *vm;
>>> +    long r;
>>> +
>>> +    if (!ring->sched.ready)
>>> +    return false;
>>> +
>>> +    spin_lock(&adev->vm_manager.pasid_lock);
>>> +    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>> +    if (vm)
>>> +    root = amdgpu_bo_ref(vm->root.base.bo);
>>> +    else
>>> +    root = NULL;
>>> +    spin_unlock(&adev->vm_manager.pasid_lock);
>>> +
>>> +    if (!root)
>>> +    return false;
>>> +
>>> +    r = amdgpu_bo_reserve(root, true);
>>> +    if (r)
>>> +    goto error_unref;
>>> +
>>> +    spin_lock(&adev->vm_manager.pasid_lock);
>>> +    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>> +    spin_unlock(&adev->vm_manager.pasid_lock);
>>> +
>> Here get vm from pasid second time, and check if PD bo is changed, is
>> this to handle vm fault race with vm destory?
> 
> Yes, exactly.
> 
>>
>>> +    if (!vm || vm->root.base.bo != root)
>>> +    goto error_unlock;
>>> +
>>> +    addr /= AMDGPU_GPU_PAGE_SIZE;
>>> +    flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
>>> +    AMDGPU_PTE_SYSTEM;
>>> +
>>> +    if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
>>> +    /* Redirect the access to the dummy page */
>>> +    value = adev->dummy_page_addr;
>>> +    flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
>>> +    AMDGPU_PTE_WRITEABLE;
>>> +    } else {
>>> +    value = 0;
>>> +    }
>>> +
>>> +    r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr 
>>> + 1,
>>> +    flags, value, NULL, NULL);
>>> +    if (r)
>>> +    goto error_unlock;
>>> +
>> After fault address redirect to dummy page, will the fault recover and
>> retry continue to execute?
> 
> Yes, the read/write operation will just retry and use the value from the 
> dummy page instead.
> 
>> Is this dangerous to update PTE to use system
>> memory address 0?
> 
> What are you talking about? The dummy page is a page allocate by TTM 
> where we redirect faulty accesses to.
> 
For amdgpu_vm_fault_stop equals to AMDGPU_VM_FAULT_STOP_FIRST/ALWAYS 
case, value is 0, this will redirect to system memory 0. Maybe redirect 
is only needed for AMDGPU_VM_FAULT_STOP_NEVER?

Regards,
Philip

> Regards,
> Christian.
> 
>>
>>> +    r = amdgpu_vm_update_pdes(adev, vm, true);
>>> +
>>> +error_unlock:
>>> +    amdgpu_bo_unreserve(root);
>>> +    if (r < 0)
>>> +    DRM_ERROR("Can't handle page fault (%ld)\n", r);
>>> +
>>> +error_unref:
>>> +    amdgpu_bo_unref(&root);
>>> +
>>> +    return false;
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 0a97dc839f3b..4dbbe1b6b413 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -413,6 +413,8 @@ void amdgpu_vm_check_compute_bug(struct 
>>> amdgpu_device *adev);
>>>    void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned 
>>> int pasid,
>>>     struct amdgpu_task_info *task_info);
>>> +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int 
>>> pasid,
>>> +    uint64_t addr);
>>>    void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
>>> diff --git

RE: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-09 Thread Zeng, Oak
Is looking up vm twice necessary? I think we are in interrupt context, is it 
possible that the user space application can be switched in between? My 
understanding is, if user space application is can't kick in during interrupt 
handling, application shouldn't have chance to exit (then their vm being 
destroyed).

Regards,
Oak

-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Monday, September 9, 2019 8:08 AM
To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

Am 05.09.19 um 00:47 schrieb Kuehling, Felix:
> On 2019-09-04 11:02 a.m., Christian König wrote:
>> Next step towards HMM support. For now just silence the retry fault 
>> and optionally redirect the request to the dummy page.
>>
>> v2: make sure the VM is not destroyed while we handle the fault.
>>
>> Signed-off-by: Christian König 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++
>>drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
>>drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
>>3 files changed, 80 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 951608fc1925..410d89966a66 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>>  }
>>  }
>>}
>> +
>> +/**
>> + * amdgpu_vm_handle_fault - graceful handling of VM faults.
>> + * @adev: amdgpu device pointer
>> + * @pasid: PASID of the VM
>> + * @addr: Address of the fault
>> + *
>> + * Try to gracefully handle a VM fault. Return true if the fault was 
>> +handled and
>> + * shouldn't be reported any more.
>> + */
>> +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>> +uint64_t addr)
>> +{
>> +struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
>> +struct amdgpu_bo *root;
>> +uint64_t value, flags;
>> +struct amdgpu_vm *vm;
>> +long r;
>> +
>> +if (!ring->sched.ready)
>> +return false;
>> +
>> +spin_lock(&adev->vm_manager.pasid_lock);
>> +vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>> +if (vm)
>> +root = amdgpu_bo_ref(vm->root.base.bo);
>> +else
>> +root = NULL;
>> +spin_unlock(&adev->vm_manager.pasid_lock);
>> +
>> +if (!root)
>> +return false;
>> +
>> +r = amdgpu_bo_reserve(root, true);
>> +if (r)
>> +goto error_unref;
>> +
>> +spin_lock(&adev->vm_manager.pasid_lock);
>> +vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>> +spin_unlock(&adev->vm_manager.pasid_lock);
> I think this deserves a comment. If I understand it correctly, you're 
> looking up the vm twice so that you have the VM root reservation to 
> protect against user-after-free. Otherwise the vm pointer is only 
> valid as long as you're holding the spin-lock.
>
>
>> +
>> +if (!vm || vm->root.base.bo != root)
> The check of vm->root.base.bo should probably still be under the 
> spin_lock. Because you're not sure yet it's the right VM, you can't 
> rely on the reservation here to prevent use-after-free.

Good point, going to fix that.

>
>
>> +goto error_unlock;
>> +
>> +addr /= AMDGPU_GPU_PAGE_SIZE;
>> +flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
>> +AMDGPU_PTE_SYSTEM;
>> +
>> +if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
>> +/* Redirect the access to the dummy page */
>> +value = adev->dummy_page_addr;
>> +flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
>> +AMDGPU_PTE_WRITEABLE;
>> +} else {
>> +value = 0;
>> +}
>> +
>> +r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
>> +flags, value, NULL, NULL);
>> +if (r)
>> +goto error_unlock;
>> +
>> +r = amdgpu_vm_update_pdes(adev, vm, true);
>> +
>> +error_unlock:
>> +amdgpu_bo_unreserve(root);
>> +if (r < 0)
>> +DRM_ERROR("Can't handle page fault (%ld)\n", r);
>> +
>> +error_unref:
>> +amdgpu_bo_u

Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-09 Thread Koenig, Christian
Well first of all we are not in interrupt context here, this is handled 
by a work item or otherwise we couldn't do all the locking.

But even in interrupt context another CPU can easily destroy the VM when 
we just handle a stale fault or the process was killed.

So this extra double checking is strictly necessary.

Regards,
Christian.

Am 09.09.19 um 16:08 schrieb Zeng, Oak:
> Is looking up vm twice necessary? I think we are in interrupt context, is it 
> possible that the user space application can be switched in between? My 
> understanding is, if user space application is can't kick in during interrupt 
> handling, application shouldn't have chance to exit (then their vm being 
> destroyed).
>
> Regards,
> Oak
>
> -Original Message-
> From: amd-gfx  On Behalf Of Christian 
> König
> Sent: Monday, September 9, 2019 8:08 AM
> To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2
>
> Am 05.09.19 um 00:47 schrieb Kuehling, Felix:
>> On 2019-09-04 11:02 a.m., Christian König wrote:
>>> Next step towards HMM support. For now just silence the retry fault
>>> and optionally redirect the request to the dummy page.
>>>
>>> v2: make sure the VM is not destroyed while we handle the fault.
>>>
>>> Signed-off-by: Christian König 
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
>>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
>>> 3 files changed, 80 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 951608fc1925..410d89966a66 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>>> }
>>> }
>>> }
>>> +
>>> +/**
>>> + * amdgpu_vm_handle_fault - graceful handling of VM faults.
>>> + * @adev: amdgpu device pointer
>>> + * @pasid: PASID of the VM
>>> + * @addr: Address of the fault
>>> + *
>>> + * Try to gracefully handle a VM fault. Return true if the fault was
>>> +handled and
>>> + * shouldn't be reported any more.
>>> + */
>>> +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>>> +   uint64_t addr)
>>> +{
>>> +   struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
>>> +   struct amdgpu_bo *root;
>>> +   uint64_t value, flags;
>>> +   struct amdgpu_vm *vm;
>>> +   long r;
>>> +
>>> +   if (!ring->sched.ready)
>>> +   return false;
>>> +
>>> +   spin_lock(&adev->vm_manager.pasid_lock);
>>> +   vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>> +   if (vm)
>>> +   root = amdgpu_bo_ref(vm->root.base.bo);
>>> +   else
>>> +   root = NULL;
>>> +   spin_unlock(&adev->vm_manager.pasid_lock);
>>> +
>>> +   if (!root)
>>> +   return false;
>>> +
>>> +   r = amdgpu_bo_reserve(root, true);
>>> +   if (r)
>>> +   goto error_unref;
>>> +
>>> +   spin_lock(&adev->vm_manager.pasid_lock);
>>> +   vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>> +   spin_unlock(&adev->vm_manager.pasid_lock);
>> I think this deserves a comment. If I understand it correctly, you're
>> looking up the vm twice so that you have the VM root reservation to
>> protect against user-after-free. Otherwise the vm pointer is only
>> valid as long as you're holding the spin-lock.
>>
>>
>>> +
>>> +   if (!vm || vm->root.base.bo != root)
>> The check of vm->root.base.bo should probably still be under the
>> spin_lock. Because you're not sure yet it's the right VM, you can't
>> rely on the reservation here to prevent use-after-free.
> Good point, going to fix that.
>
>>
>>> +   goto error_unlock;
>>> +
>>> +   addr /= AMDGPU_GPU_PAGE_SIZE;
>>> +   flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
>>> +   AMDGPU_PTE_SYSTEM;
>>> +
>>> +   if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
>>> +   /* Redirect the access to the dummy page */
>>> + 

Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-09 Thread Koenig, Christian
Am 09.09.19 um 15:58 schrieb Yang, Philip:
>
> On 2019-09-09 8:03 a.m., Christian König wrote:
>> Am 04.09.19 um 22:12 schrieb Yang, Philip:
>>> This series looks nice and clear for me, two questions embedded below.
>>>
>>> Are we going to use dedicated sdma page queue for direct VM update path
>>> during a fault?
>>>
>>> Thanks,
>>> Philip
>>>
>>> On 2019-09-04 11:02 a.m., Christian König wrote:
 Next step towards HMM support. For now just silence the retry fault and
 optionally redirect the request to the dummy page.

 v2: make sure the VM is not destroyed while we handle the fault.

 Signed-off-by: Christian König 
 ---
     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74
 ++
     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
     drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
     3 files changed, 80 insertions(+)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 index 951608fc1925..410d89966a66 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 @@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm
 *vm)
     }
     }
     }
 +
 +/**
 + * amdgpu_vm_handle_fault - graceful handling of VM faults.
 + * @adev: amdgpu device pointer
 + * @pasid: PASID of the VM
 + * @addr: Address of the fault
 + *
 + * Try to gracefully handle a VM fault. Return true if the fault was
 handled and
 + * shouldn't be reported any more.
 + */
 +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int
 pasid,
 +    uint64_t addr)
 +{
 +    struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
 +    struct amdgpu_bo *root;
 +    uint64_t value, flags;
 +    struct amdgpu_vm *vm;
 +    long r;
 +
 +    if (!ring->sched.ready)
 +    return false;
 +
 +    spin_lock(&adev->vm_manager.pasid_lock);
 +    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
 +    if (vm)
 +    root = amdgpu_bo_ref(vm->root.base.bo);
 +    else
 +    root = NULL;
 +    spin_unlock(&adev->vm_manager.pasid_lock);
 +
 +    if (!root)
 +    return false;
 +
 +    r = amdgpu_bo_reserve(root, true);
 +    if (r)
 +    goto error_unref;
 +
 +    spin_lock(&adev->vm_manager.pasid_lock);
 +    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
 +    spin_unlock(&adev->vm_manager.pasid_lock);
 +
>>> Here get vm from pasid second time, and check if PD bo is changed, is
>>> this to handle vm fault race with vm destory?
>> Yes, exactly.
>>
 +    if (!vm || vm->root.base.bo != root)
 +    goto error_unlock;
 +
 +    addr /= AMDGPU_GPU_PAGE_SIZE;
 +    flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
 +    AMDGPU_PTE_SYSTEM;
 +
 +    if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
 +    /* Redirect the access to the dummy page */
 +    value = adev->dummy_page_addr;
 +    flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
 +    AMDGPU_PTE_WRITEABLE;
 +    } else {
 +    value = 0;
 +    }
 +
 +    r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr
 + 1,
 +    flags, value, NULL, NULL);
 +    if (r)
 +    goto error_unlock;
 +
>>> After fault address redirect to dummy page, will the fault recover and
>>> retry continue to execute?
>> Yes, the read/write operation will just retry and use the value from the
>> dummy page instead.
>>
>>> Is this dangerous to update PTE to use system
>>> memory address 0?
>> What are you talking about? The dummy page is a page allocate by TTM
>> where we redirect faulty accesses to.
>>
> For amdgpu_vm_fault_stop equals to AMDGPU_VM_FAULT_STOP_FIRST/ALWAYS
> case, value is 0, this will redirect to system memory 0. Maybe redirect
> is only needed for AMDGPU_VM_FAULT_STOP_NEVER?

The value 0 doesn't redirect to system memory, it results in a silence 
retry when neither the R nor the W bit is set in a PTE.

Regards,
Christian.

>
> Regards,
> Philip
>
>> Regards,
>> Christian.
>>
 +    r = amdgpu_vm_update_pdes(adev, vm, true);
 +
 +error_unlock:
 +    amdgpu_bo_unreserve(root);
 +    if (r < 0)
 +    DRM_ERROR("Can't handle page fault (%ld)\n", r);
 +
 +error_unref:
 +    amdgpu_bo_unref(&root);
 +
 +    return false;
 +}
 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
 index 0a97dc839f3b..4dbbe1b6b413 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
 @@ -413,6 +413,8 @@ void amdgpu_vm_check_compute_bug(struct
 amdgpu_device *adev);
     void amdgpu_vm_get_

RE: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-10 Thread Zeng, Oak


Regards,
Oak

-Original Message-
From: Koenig, Christian  
Sent: Monday, September 9, 2019 1:14 PM
To: Zeng, Oak ; Kuehling, Felix ; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

> Well first of all we are not in interrupt context here, this is handled by a 
> work item or otherwise we couldn't do all the locking.

This is called from amdgpu_irq_handler. I think this is interrupt context. This 
is also the reason why we use spin lock instead of other sleepable lock like a 
semaphore.

> But even in interrupt context another CPU can easily destroy the VM when we 
> just handle a stale fault or the process was killed.

Agree with this point.

So this extra double checking is strictly necessary.

Regards,
Christian.

Am 09.09.19 um 16:08 schrieb Zeng, Oak:
> Is looking up vm twice necessary? I think we are in interrupt context, is it 
> possible that the user space application can be switched in between? My 
> understanding is, if user space application is can't kick in during interrupt 
> handling, application shouldn't have chance to exit (then their vm being 
> destroyed).
>
> Regards,
> Oak
>
> -Original Message-
> From: amd-gfx  On Behalf Of 
> Christian König
> Sent: Monday, September 9, 2019 8:08 AM
> To: Kuehling, Felix ; 
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2
>
> Am 05.09.19 um 00:47 schrieb Kuehling, Felix:
>> On 2019-09-04 11:02 a.m., Christian König wrote:
>>> Next step towards HMM support. For now just silence the retry fault 
>>> and optionally redirect the request to the dummy page.
>>>
>>> v2: make sure the VM is not destroyed while we handle the fault.
>>>
>>> Signed-off-by: Christian König 
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
>>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
>>> 3 files changed, 80 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 951608fc1925..410d89966a66 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>>> }
>>> }
>>> }
>>> +
>>> +/**
>>> + * amdgpu_vm_handle_fault - graceful handling of VM faults.
>>> + * @adev: amdgpu device pointer
>>> + * @pasid: PASID of the VM
>>> + * @addr: Address of the fault
>>> + *
>>> + * Try to gracefully handle a VM fault. Return true if the fault 
>>> +was handled and
>>> + * shouldn't be reported any more.
>>> + */
>>> +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>>> +   uint64_t addr)
>>> +{
>>> +   struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
>>> +   struct amdgpu_bo *root;
>>> +   uint64_t value, flags;
>>> +   struct amdgpu_vm *vm;
>>> +   long r;
>>> +
>>> +   if (!ring->sched.ready)
>>> +   return false;
>>> +
>>> +   spin_lock(&adev->vm_manager.pasid_lock);
>>> +   vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>> +   if (vm)
>>> +   root = amdgpu_bo_ref(vm->root.base.bo);
>>> +   else
>>> +   root = NULL;
>>> +   spin_unlock(&adev->vm_manager.pasid_lock);
>>> +
>>> +   if (!root)
>>> +   return false;
>>> +
>>> +   r = amdgpu_bo_reserve(root, true);
>>> +   if (r)
>>> +   goto error_unref;
>>> +
>>> +   spin_lock(&adev->vm_manager.pasid_lock);
>>> +   vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>> +   spin_unlock(&adev->vm_manager.pasid_lock);
>> I think this deserves a comment. If I understand it correctly, you're 
>> looking up the vm twice so that you have the VM root reservation to 
>> protect against user-after-free. Otherwise the vm pointer is only 
>> valid as long as you're holding the spin-lock.
>>
>>
>>> +
>>> +   if (!vm || vm->root.base.bo != root)
>> The check of vm->root.base.bo should probably still be under the 
>> spin_lock. Because you're not sure yet it's the right VM, you can't 
>> rely on the reservatio

Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2

2019-09-11 Thread Koenig, Christian
Am 10.09.19 um 15:30 schrieb Zeng, Oak:
>
> Regards,
> Oak
>
> -Original Message-
> From: Koenig, Christian 
> Sent: Monday, September 9, 2019 1:14 PM
> To: Zeng, Oak ; Kuehling, Felix ; 
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2
>
>> Well first of all we are not in interrupt context here, this is handled by a 
>> work item or otherwise we couldn't do all the locking.
> This is called from amdgpu_irq_handler. I think this is interrupt context.

No, that's incorrect. Only the primary interrupt ring is handled in 
interrupt context. Ring 1 and 2 are handled with a work item.

>   This is also the reason why we use spin lock instead of other sleepable 
> lock like a semaphore.

Well we do have a sleep-able lock here to make it possible to update 
page tables and directories.

That's also the reason why we protect the call with in !in_interrupt().

Regards,
Christian.

>
>> But even in interrupt context another CPU can easily destroy the VM when we 
>> just handle a stale fault or the process was killed.
> Agree with this point.
>
> So this extra double checking is strictly necessary.
>
> Regards,
> Christian.
>
> Am 09.09.19 um 16:08 schrieb Zeng, Oak:
>> Is looking up vm twice necessary? I think we are in interrupt context, is it 
>> possible that the user space application can be switched in between? My 
>> understanding is, if user space application is can't kick in during 
>> interrupt handling, application shouldn't have chance to exit (then their vm 
>> being destroyed).
>>
>> Regards,
>> Oak
>>
>> -Original Message-----
>> From: amd-gfx  On Behalf Of
>> Christian König
>> Sent: Monday, September 9, 2019 8:08 AM
>> To: Kuehling, Felix ;
>> amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 9/9] drm/amdgpu: add graceful VM fault handling v2
>>
>> Am 05.09.19 um 00:47 schrieb Kuehling, Felix:
>>> On 2019-09-04 11:02 a.m., Christian König wrote:
>>>> Next step towards HMM support. For now just silence the retry fault
>>>> and optionally redirect the request to the dummy page.
>>>>
>>>> v2: make sure the VM is not destroyed while we handle the fault.
>>>>
>>>> Signed-off-by: Christian König 
>>>> ---
>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++
>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
>>>>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
>>>>  3 files changed, 80 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 951608fc1925..410d89966a66 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -3142,3 +3142,77 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>>>>}
>>>>}
>>>>  }
>>>> +
>>>> +/**
>>>> + * amdgpu_vm_handle_fault - graceful handling of VM faults.
>>>> + * @adev: amdgpu device pointer
>>>> + * @pasid: PASID of the VM
>>>> + * @addr: Address of the fault
>>>> + *
>>>> + * Try to gracefully handle a VM fault. Return true if the fault
>>>> +was handled and
>>>> + * shouldn't be reported any more.
>>>> + */
>>>> +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int 
>>>> pasid,
>>>> +  uint64_t addr)
>>>> +{
>>>> +  struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
>>>> +  struct amdgpu_bo *root;
>>>> +  uint64_t value, flags;
>>>> +  struct amdgpu_vm *vm;
>>>> +  long r;
>>>> +
>>>> +  if (!ring->sched.ready)
>>>> +  return false;
>>>> +
>>>> +  spin_lock(&adev->vm_manager.pasid_lock);
>>>> +  vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>> +  if (vm)
>>>> +  root = amdgpu_bo_ref(vm->root.base.bo);
>>>> +  else
>>>> +  root = NULL;
>>>> +  spin_unlock(&adev->vm_manager.pasid_lock);
>>>> +
>>>> +  if (!root)
>>>> +  return false;
>>>> +
>>>> +  r = amdgpu_bo_reserve(root, true);
>>>> +  if (r)
>>>> +  goto er