Re: [PATCH V2 1/2] drm/amdgpu: Acquire ttm locks for dmaunmap

2023-10-16 Thread Felix Kuehling



On 2023-10-16 10:49, David Francis wrote:

dmaunmap can call ttm_bo_validate, which expects the
ttm dma_resv to be held.

Acquire the locks in amdgpu_amdkfd_gpuvm_dmaunmap_mem.

Because the dmaunmap step can now fail, the unmap ioctl UAPI
needs two new arguments. n_dmaunmap_success tracks the number
of devices that have completed dmaunmap. If a device fails
to dmaunmap due to a signal interrupt, n_dmaunmap_bos tracks
the number of bos on that device that were successfully
dmaunmapped.

This failure can also cause the sync_memory step of the ioctl
to be repeated; it is idempotent, so this should not cause any issues.

Signed-off-by: David Francis 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 23 +++
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 14 +--
  include/uapi/linux/kfd_ioctl.h|  2 ++
  4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3ad8dc523b42..781642871900 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -302,7 +302,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct 
amdgpu_device *adev,
  struct kgd_mem *mem, void *drm_priv);
  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void 
*drm_priv);
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv, 
uint32_t *num_bos);
  int amdgpu_amdkfd_gpuvm_sync_memory(
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
  int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a15e59abe70a..cbd6032f3d39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2094,21 +2094,36 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
return ret;
  }
  
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)

+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv, 
uint32_t *num_bos)
  {
struct kfd_mem_attachment *entry;
struct amdgpu_vm *vm;
+   int ret;
+   int i = 0;
  
  	vm = drm_priv_to_vm(drm_priv);
  
  	mutex_lock(>lock);
  
  	list_for_each_entry(entry, >attachments, list) {

-   if (entry->bo_va->base.vm == vm)
-   kfd_mem_dmaunmap_attachment(mem, entry);
-   }
+   if (i >= *num_bos) {
+   ret = amdgpu_bo_reserve(entry->bo_va->base.bo, false);
+   if (ret) {
+   *num_bos = i;
+   goto out;
+   }
+
+   if (entry->bo_va->base.vm == vm)
+   kfd_mem_dmaunmap_attachment(mem, entry);
  
+			amdgpu_bo_unreserve(entry->bo_va->base.bo);

+   }
+   i++;
+   }
+   *num_bos = 0;
+out:
mutex_unlock(>lock);
+   return ret;
  }
  
  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 06988cf1db51..a944e255de4a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1379,6 +1379,10 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
pr_debug("n_success exceeds n_devices\n");
return -EINVAL;
}
+   if (args->n_dmaunmap_success > args->n_devices) {
+   pr_debug("n_dmaunmap_success exceeds n_devices\n");
+   return -EINVAL;
+   }
  
  	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),

GFP_KERNEL);
@@ -1434,7 +1438,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
}
  
  	/* Flush TLBs after waiting for the page table updates to complete */

-   for (i = 0; i < args->n_devices; i++) {
+   for (i = args->n_dmaunmap_success; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (WARN_ON_ONCE(!peer_pdd))
continue;
@@ -1442,7 +1446,12 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
  
  		/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */

-   amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+   err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv, 
>n_dmaunmap_bos);
+   if (err) {
+   

[PATCH V2 1/2] drm/amdgpu: Acquire ttm locks for dmaunmap

2023-10-16 Thread David Francis
dmaunmap can call ttm_bo_validate, which expects the
ttm dma_resv to be held.

Acquire the locks in amdgpu_amdkfd_gpuvm_dmaunmap_mem.

Because the dmaunmap step can now fail, the unmap ioctl UAPI
needs two new arguments. n_dmaunmap_success tracks the number
of devices that have completed dmaunmap. If a device fails
to dmaunmap due to a signal interrupt, n_dmaunmap_bos tracks
the number of bos on that device that were successfully
dmaunmapped.

This failure can also cause the sync_memory step of the ioctl
to be repeated; it is idempotent, so this should not cause any issues.

Signed-off-by: David Francis 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 23 +++
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 14 +--
 include/uapi/linux/kfd_ioctl.h|  2 ++
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3ad8dc523b42..781642871900 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -302,7 +302,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct 
amdgpu_device *adev,
  struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void 
*drm_priv);
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv, 
uint32_t *num_bos);
 int amdgpu_amdkfd_gpuvm_sync_memory(
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a15e59abe70a..cbd6032f3d39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2094,21 +2094,36 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
return ret;
 }
 
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv, 
uint32_t *num_bos)
 {
struct kfd_mem_attachment *entry;
struct amdgpu_vm *vm;
+   int ret;
+   int i = 0;
 
vm = drm_priv_to_vm(drm_priv);
 
mutex_lock(>lock);
 
list_for_each_entry(entry, >attachments, list) {
-   if (entry->bo_va->base.vm == vm)
-   kfd_mem_dmaunmap_attachment(mem, entry);
-   }
+   if (i >= *num_bos) {
+   ret = amdgpu_bo_reserve(entry->bo_va->base.bo, false);
+   if (ret) {
+   *num_bos = i;
+   goto out;
+   }
+
+   if (entry->bo_va->base.vm == vm)
+   kfd_mem_dmaunmap_attachment(mem, entry);
 
+   amdgpu_bo_unreserve(entry->bo_va->base.bo);
+   }
+   i++;
+   }
+   *num_bos = 0;
+out:
mutex_unlock(>lock);
+   return ret;
 }
 
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 06988cf1db51..a944e255de4a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1379,6 +1379,10 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
pr_debug("n_success exceeds n_devices\n");
return -EINVAL;
}
+   if (args->n_dmaunmap_success > args->n_devices) {
+   pr_debug("n_dmaunmap_success exceeds n_devices\n");
+   return -EINVAL;
+   }
 
devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
GFP_KERNEL);
@@ -1434,7 +1438,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
}
 
/* Flush TLBs after waiting for the page table updates to complete */
-   for (i = 0; i < args->n_devices; i++) {
+   for (i = args->n_dmaunmap_success; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (WARN_ON_ONCE(!peer_pdd))
continue;
@@ -1442,7 +1446,12 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
 
/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
-   amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+   err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv, 
>n_dmaunmap_bos);
+   if (err) {
+   pr_debug("DMA