To fix issue that ras controller interrupt cannot be triggered anymore after
one time nbif uncorrectable error. And error count is stored in nbif ras object
for query.

Change-Id: Iba482c169fdff3e9c390072c0289a622a522133c
Signed-off-by: Le Ma <le...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 25231d6..9a3a65a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -52,6 +52,9 @@
 #define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK          0x00000FFCL
 #define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK            0x001F0000L
 
+static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
+                                       void *ras_error_status);
+
 static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
 {
        WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
@@ -314,6 +317,7 @@ static void nbio_v7_4_init_registers(struct amdgpu_device 
*adev)
 static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct 
amdgpu_device *adev)
 {
        uint32_t bif_doorbell_intr_cntl;
+       struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
 
        bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
        if (REG_GET_FIELD(bif_doorbell_intr_cntl,
@@ -324,6 +328,12 @@ static void 
nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
                                                RAS_CNTLR_INTERRUPT_CLEAR, 1);
                WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, 
bif_doorbell_intr_cntl);
 
+               /*
+                * clear error status after ras_controller_intr according to
+                * hw team and count ue number for query
+                */
+               nbio_v7_4_query_ras_error_count(adev, &obj->err_data);
+
                DRM_WARN("RAS controller interrupt triggered by NBIF error\n");
 
                /* ras_controller_int is dedicated for nbif ras error,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to