cancel_work is not backported to all custom kernels. Add a workaround to
skip execution of already queued recovery jobs, if the device is already
reset.

Signed-off-by: Lijo Lazar <lijo.la...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c  |  9 +++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h  | 16 ++++++++++++++++
 3 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bebc73c6822c..c66524e2a56a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5411,6 +5411,8 @@ static inline void 
amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
+       amdgpu_reset_domain_clear_pending(adev->reset_domain);
+
 #if defined(CONFIG_DEBUG_FS)
        if (!amdgpu_sriov_vf(adev))
                cancel_work(&adev->reset_work);
@@ -5452,6 +5454,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
        bool audio_suspended = false;
        bool gpu_reset_for_dev_remove = false;
 
+       if (amdgpu_reset_domain_in_drain_mode(adev->reset_domain))
+               return 0;
+
        gpu_reset_for_dev_remove =
                        test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, 
&reset_context->flags) &&
                                test_bit(AMDGPU_NEED_FULL_RESET, 
&reset_context->flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 4baa300121d8..3ece7267d6ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -120,6 +120,14 @@ void amdgpu_reset_destroy_reset_domain(struct kref *ref)
        kvfree(reset_domain);
 }
 
+static void amdgpu_reset_domain_cancel_all_work(struct work_struct *work)
+{
+       struct amdgpu_reset_domain *reset_domain =
+               container_of(work, struct amdgpu_reset_domain, clear);
+
+       reset_domain->drain = false;
+}
+
 struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum 
amdgpu_reset_domain_type type,
                                                             char *wq_name)
 {
@@ -142,6 +150,7 @@ struct amdgpu_reset_domain 
*amdgpu_reset_create_reset_domain(enum amdgpu_reset_d
 
        }
 
+       INIT_WORK(&reset_domain->clear, amdgpu_reset_domain_cancel_all_work);
        atomic_set(&reset_domain->in_gpu_reset, 0);
        atomic_set(&reset_domain->reset_res, 0);
        init_rwsem(&reset_domain->sem);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index b0335a1c5e90..70059eea7e2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -87,6 +87,8 @@ struct amdgpu_reset_domain {
        struct rw_semaphore sem;
        atomic_t in_gpu_reset;
        atomic_t reset_res;
+       struct work_struct clear;
+       bool drain;
 };
 
 #ifdef CONFIG_DEV_COREDUMP
@@ -137,6 +139,20 @@ static inline bool amdgpu_reset_domain_schedule(struct 
amdgpu_reset_domain *doma
        return queue_work(domain->wq, work);
 }
 
+static inline void amdgpu_reset_domain_clear_pending(struct 
amdgpu_reset_domain *domain)
+{
+       domain->drain = true;
+       /* queue one more work to the domain queue. Till this work is finished,
+        * domain is in drain mode.
+        */
+       queue_work(domain->wq, &domain->clear);
+}
+
+static inline bool amdgpu_reset_domain_in_drain_mode(struct 
amdgpu_reset_domain *domain)
+{
+       return domain->drain;
+}
+
 void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
 
 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain 
*reset_domain);
-- 
2.25.1

Reply via email to