Re: [PATCH v6 13/16] drm/amdgpu: Fix hang on device removal.
Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky: If removing while commands in flight you cannot wait to flush the HW fences on a ring since the device is gone. Signed-off-by: Andrey Grodzovsky --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 1ffb36bd0b19..fa03702ecbfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -36,6 +36,7 @@ #include #include +#include #include "amdgpu.h" #include "amdgpu_trace.h" @@ -525,8 +526,7 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev) */ void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev) { - unsigned i, j; - int r; + int i, r; Is j not used here any more? Christian. for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -535,11 +535,15 @@ void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev) continue; if (!ring->no_scheduler) drm_sched_fini(&ring->sched); - r = amdgpu_fence_wait_empty(ring); - if (r) { - /* no need to trigger GPU reset as we are unloading */ + /* You can't wait for HW to signal if it's gone */ + if (!drm_dev_is_unplugged(&adev->ddev)) + r = amdgpu_fence_wait_empty(ring); + else + r = -ENODEV; + /* no need to trigger GPU reset as we are unloading */ + if (r) amdgpu_fence_driver_force_completion(ring); - } + if (ring->fence_drv.irq_src) amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type);
[PATCH v6 13/16] drm/amdgpu: Fix hang on device removal.
If removing while commands in flight you cannot wait to flush the HW fences on a ring since the device is gone. Signed-off-by: Andrey Grodzovsky --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 1ffb36bd0b19..fa03702ecbfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -36,6 +36,7 @@ #include #include +#include #include "amdgpu.h" #include "amdgpu_trace.h" @@ -525,8 +526,7 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev) */ void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev) { - unsigned i, j; - int r; + int i, r; for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -535,11 +535,15 @@ void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev) continue; if (!ring->no_scheduler) drm_sched_fini(&ring->sched); - r = amdgpu_fence_wait_empty(ring); - if (r) { - /* no need to trigger GPU reset as we are unloading */ + /* You can't wait for HW to signal if it's gone */ + if (!drm_dev_is_unplugged(&adev->ddev)) + r = amdgpu_fence_wait_empty(ring); + else + r = -ENODEV; + /* no need to trigger GPU reset as we are unloading */ + if (r) amdgpu_fence_driver_force_completion(ring); - } + if (ring->fence_drv.irq_src) amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); -- 2.25.1