To avoid resetting the gpu repeatedly, clear all
message reset flags in the fifo before the first
gpu reset.

Signed-off-by: YiPeng Chai <yipeng.c...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 59 ++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1e6e06009577..7dfb2e548d70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2768,6 +2768,49 @@ static int amdgpu_ras_get_poison_req(struct 
amdgpu_device *adev,
 
        return kfifo_get(&con->poison_fifo, poison_msg);
 }
+
+static void amdgpu_ras_clear_poison_fifo_msg_reset_flag(struct amdgpu_device 
*adev,
+                       uint32_t *cached_reset)
+{
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       struct ras_poison_msg msg;
+       uint32_t cached_msg_count;
+       uint32_t reset = 0;
+       int i, ret;
+
+       cached_msg_count = kfifo_len(&con->poison_fifo);
+
+       for (i = 0; i < cached_msg_count; i++) {
+               ret = kfifo_get(&con->poison_fifo, &msg);
+               if (!ret)
+                       continue;
+
+               if (msg.block != AMDGPU_RAS_BLOCK__UMC) {
+                       reset |= msg.reset;
+
+                       /* Clear reset flag */
+                       msg.reset = 0;
+               }
+
+               /* add message back to fifo */
+               ret = kfifo_put(&con->poison_fifo, msg);
+               if (!ret)
+                       dev_info(adev->dev, "Poison fifo drop message!\n");
+       }
+       *cached_reset = reset;
+}
+
+static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev)
+{
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       struct ras_poison_msg msg;
+       int ret;
+
+       do {
+               ret = kfifo_get(&con->poison_fifo, &msg);
+       } while (ret);
+
+}
 #endif
 
 #ifdef HAVE_RADIX_TREE_ITER_DELETE
@@ -2886,9 +2929,23 @@ static int amdgpu_ras_poison_consumption_handler(struct 
amdgpu_device *adev,
                poison_msg->pasid_fn(adev, pasid, poison_msg->data);
 
        if (reset) {
+               uint32_t fifo_cached_reset = 0;
+
                flush_delayed_work(&con->page_retirement_dwork);
 
-               con->gpu_reset_flags |= reset;
+               amdgpu_ras_clear_poison_fifo_msg_reset_flag(adev, 
&fifo_cached_reset);
+
+               reset |= fifo_cached_reset;
+
+               if (reset & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
+                       con->gpu_reset_flags |= 
AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+                       amdgpu_ras_clear_poison_fifo(adev);
+               } else if (reset & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
+                       con->gpu_reset_flags |= 
AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+               } else {
+                       con->gpu_reset_flags |= reset;
+               }
+
                amdgpu_ras_reset_gpu(adev);
        }
 
-- 
2.34.1

Reply via email to