[PATCH] drm/amdgpu: add umc ras functions for navi31

2022-07-04 Thread yipechai
Add umc ras functions for navi31:
1. Add driver and asic register for umc new ip.
2. Support query umc ras error counter.
3. Support ras umc ue error address remapping.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h   |   7 +
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c|  27 +-
 drivers/gpu/drm/amd/amdgpu/umc_v8_10.c| 362 ++
 drivers/gpu/drm/amd/amdgpu/umc_v8_10.h|  62 +++
 .../include/asic_reg/umc/umc_8_10_0_offset.h  |  33 ++
 .../include/asic_reg/umc/umc_8_10_0_sh_mask.h |  94 +
 7 files changed, 585 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
 create mode 100644 drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h
 create mode 100644 
drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index a87e42c2c8dc..c7d0cd15b5ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -93,7 +93,7 @@ amdgpu-y += \
 
 # add UMC block
 amdgpu-y += \
-   umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o
+   umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o
 
 # add IH block
 amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 2ec6698aa1fe..8cc4618bd7fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -41,6 +41,9 @@
 #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < 
adev->umc.channel_inst_num; (ch_inst)++)
 #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) 
LOOP_UMC_CH_INST((ch_inst))
 
+#define LOOP_UMC_MCD_INST(mcd_inst) \
+   for ((mcd_inst) = 0; (mcd_inst) < adev->umc.mcd_inst_num; 
(mcd_inst)++)
+
 struct amdgpu_umc_ras {
struct amdgpu_ras_block_object ras_block;
void (*err_cnt_init)(struct amdgpu_device *adev);
@@ -62,6 +65,10 @@ struct amdgpu_umc {
uint32_t channel_inst_num;
/* number of umc instance with memory map register access */
uint32_t umc_inst_num;
+
+   /*number of mcd instance with memory map register access*/
+   uint32_t mcd_inst_num;
+
/* UMC regiser per channel offset */
uint32_t channel_offs;
/* channel index table of interleaved memory */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index e098b69d6321..c2a745bce068 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -25,7 +25,7 @@
 #include "amdgpu.h"
 #include "amdgpu_atomfirmware.h"
 #include "gmc_v11_0.h"
-#include "umc_v8_7.h"
+#include "umc_v8_10.h"
 #include "athub/athub_3_0_0_sh_mask.h"
 #include "athub/athub_3_0_0_offset.h"
 #include "oss/osssys_6_0_0_offset.h"
@@ -586,11 +586,36 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device 
*adev)
 {
switch (adev->ip_versions[UMC_HWIP][0]) {
case IP_VERSION(8, 10, 0):
+   adev->umc.max_ras_err_cnt_per_query = 
UMC_V8_10_TOTAL_CHANNEL_NUM;
+   adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM;
+   adev->umc.mcd_inst_num = UMC_V8_10_MCD_INSTANCE_NUM;
+   adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM;
+   adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
+   adev->umc.channel_idx_tbl = _v8_10_channel_idx_tbl[0][0][0];
+   adev->umc.ras = _v8_10_ras;
+   break;
case IP_VERSION(8, 11, 0):
break;
default:
break;
}
+
+   if (adev->umc.ras) {
+   amdgpu_ras_register_ras_block(adev, >umc.ras->ras_block);
+
+   strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
+   adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
+   adev->umc.ras->ras_block.ras_comm.type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+   adev->umc.ras_if = >umc.ras->ras_block.ras_comm;
+
+   /* If don't define special ras_late_init function, use default 
ras_late_init */
+   if (!adev->umc.ras->ras_block.ras_late_init)
+   adev->umc.ras->ras_block.ras_late_init = 
amdgpu_umc_ras_late_init;
+
+   /* If not defined special ras_cb function, use default ras_cb */
+   if (!adev->umc.ras->ras_block.ras_cb)
+   adev->umc.ras->ras_block.ras_cb = 
amdgpu_umc_process_ras_data_cb;
+   }
 }
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v8_

[PATCH] drm/amdgpu: Support AMDGPU RAS debugfs poll interface

2022-03-29 Thread yipechai
Some AMDGPU RAS debugfs operations like UE injection
can cause gpu reset. Before doing the next debugfs
operation, the application should call poll to check
if the gpu has finished recovering.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 38 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  6 
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 4bbed76b79c8..337e3e247a45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -452,6 +452,12 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file 
*f,
 
/* data.inject.address is offset instead of absolute gpu 
address */
ret = amdgpu_ras_error_inject(adev, );
+
+   if (!ret && (data.head.type == 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE)) {
+   struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+   con->ras_ue_injected = 1;
+   }
break;
default:
ret = -EINVAL;
@@ -464,6 +470,30 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file 
*f,
return size;
 }
 
+/**
+ * DOC: Support AMDGPU RAS debugfs poll interface
+ *
+ * Some AMDGPU RAS debugfs operations like UE injection
+ * can cause gpu reset. Before doing the next debugfs
+ * operation, the application should call poll to check
+ * if gpu is in recovering status.
+ */
+static __poll_t amdgpu_ras_debugfs_ctrl_poll(struct file *f, struct 
poll_table_struct *wait)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device 
*)file_inode(f)->i_private;
+   struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+   __poll_t mask = 0;
+
+   /* For UE injection, wait for gpu to finish recovery */
+   if (con->ras_ue_injected)
+   poll_wait(f, >gpu_ready_wait_wq, wait);
+
+   if (!atomic_read(>in_recovery))
+   mask = EPOLLIN | EPOLLRDNORM;
+
+   return mask;
+}
+
 /**
  * DOC: AMDGPU RAS debugfs EEPROM table reset interface
  *
@@ -503,6 +533,7 @@ static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file 
*f,
 
 static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
.owner = THIS_MODULE,
+   .poll = amdgpu_ras_debugfs_ctrl_poll,
.read = NULL,
.write = amdgpu_ras_debugfs_ctrl_write,
.llseek = default_llseek
@@ -1837,6 +1868,11 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
if (amdgpu_device_should_recover_gpu(ras->adev))
amdgpu_device_gpu_recover(ras->adev, NULL);
atomic_set(>in_recovery, 0);
+
+   if (ras->ras_ue_injected) {
+   ras->ras_ue_injected = 0;
+   wake_up_all(>gpu_ready_wait_wq);
+   }
 }
 
 /* alloc/realloc bps array */
@@ -2279,7 +2315,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
INIT_DELAYED_WORK(>ras_counte_delay_work, amdgpu_ras_counte_dw);
atomic_set(>ras_ce_count, 0);
atomic_set(>ras_ue_count, 0);
-
+   init_waitqueue_head(>gpu_ready_wait_wq);
con->objs = (struct ras_manager *)(con + 1);
 
amdgpu_ras_set_context(adev, con);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 606df8869b89..aea6bbb71501 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -379,6 +379,12 @@ struct amdgpu_ras {
 
/* Indicates smu whether need update bad channel info */
bool update_channel_flag;
+
+   /* UE injection flag */
+   uint32_t  ras_ue_injected;
+
+   /* Waiting for gpu ready work queue */
+   wait_queue_head_t gpu_ready_wait_wq;
 };
 
 struct ras_fs_data {
-- 
2.25.1



[PATCH] drm/amdgpu: fixed the warnings reported by kernel test robot

2022-03-13 Thread yipechai
The reported warnings are as follows:
  1.warning:no-previous-prototype-for-amdgpu_hdp_ras_fini.
  2.warning:no-previous-prototype-for-amdgpu_mmhub_ras_fini.

Amdgpu_hdp_ras_fini and amdgpu_mmhub_ras_fini are unused
in the code, they are the only functions in amdgpu_hdp.c
and amdgpu_mmhub.c. After removing these two functions,
both amdgpu_hdp.c and amdgpu_mmhub.c are empty, so these
two files can be deleted to fix the warning.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c   | 30 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 30 ---
 3 files changed, 2 insertions(+), 62 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
 delete mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 5dfe08cb045e..40e2c6e2df79 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -53,11 +53,11 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
-   amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \
+   amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o \
amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
-   amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o \
+   amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
deleted file mode 100644
index 3f3d92e16c2e..
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2021 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "amdgpu.h"
-#include "amdgpu_ras.h"
-
-void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
-{
-
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
deleted file mode 100644
index 8f2fa247d605..
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "amdgpu.h"
-#include "amdgpu_ras.h"
-
-void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
-{
-
-}
-- 
2.25.1



[PATCH V2] drm/amdgpu: Move common initialization operations of each ras block to one function

2022-03-07 Thread yipechai
Define amdgpu_ras_sw_init function to initialize all ras blocks.

V2: Modify error debugging information.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 143 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  21 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  16 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  28 
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c  |   6 -
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  17 ---
 9 files changed, 148 insertions(+), 92 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6113ddc765a7..0c83eb69dad5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2402,6 +2402,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
}
}
 
+   r = amdgpu_ras_sw_init(adev);
+   if (r) {
+   DRM_ERROR("amdgpu_ras_sw_init failed (%d).\n", r);
+   goto init_failed;
+   }
+
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index ab75e189bc0b..544241f357b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -440,8 +440,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)
 {
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.ras = _ras;
-   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
-   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
}
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index d3875618ebf5..89075ab9e82e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2299,8 +2299,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
case CHIP_ALDEBARAN:
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->nbio.ras = _v7_4_ras;
-   amdgpu_ras_register_ras_block(adev, 
>nbio.ras->ras_block);
-   adev->nbio.ras_if = >nbio.ras->ras_block.ras_comm;
}
break;
default:
@@ -2533,6 +2531,147 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
amdgpu_ras_disable_all_features(adev, 1);
 }
 
+int amdgpu_ras_sw_init(struct amdgpu_device *adev)
+{
+   int err = 0;
+
+   if (!amdgpu_ras_asic_supported(adev))
+   return 0;
+
+   if (adev->nbio.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>nbio.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register nbio ras 
block!\n");
+   return err;
+   }
+   adev->nbio.ras_if = >nbio.ras->ras_block.ras_comm;
+   }
+
+   if (adev->gmc.xgmi.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register xgmi ras 
block!\n");
+   return err;
+   }
+   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
+   }
+
+   if (adev->gfx.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>gfx.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register gfx ras 
block!\n");
+   return err;
+   }
+
+   strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
+   adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
+   adev->gfx.ras->ras_block.ras_comm.type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+   adev->gfx.ras_if = >gfx.ras->ras_block.ras_comm;
+
+   /* If not define special ras_late_init function, use gfx 
default ras_late_init */
+   if (!adev->gfx.ras->ras_block.ras_late_init)
+   adev->gfx.ras->ras_block.ras_late_init = 
amdgpu_gfx_ras_late_init;
+
+   /* If not defined special ras_cb function, use default ras_cb */
+   if (!adev->gfx.ras->ras_block.ras_cb)
+   adev->gfx.ras->ras_block.ras_cb = 
amdgpu_gfx_process_ras_data_cb;
+   }
+
+   if (adev->umc.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>umc.ras->ras_block);
+   if (err) {
+

[PATCH] drm/amdgpu: Move common initialization operations of each ras block to one function

2022-03-01 Thread yipechai
Define amdgpu_ras_sw_init function to initialize all ras blocks.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 143 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  21 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  16 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  28 
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c  |   6 -
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  17 ---
 9 files changed, 148 insertions(+), 92 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6113ddc765a7..72550e9f6058 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2402,6 +2402,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
}
}
 
+   r = amdgpu_ras_sw_init(adev);
+   if (r) {
+   DRM_ERROR("amdgpu_ras_early_init failed (%d).\n", r);
+   goto init_failed;
+   }
+
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index ab75e189bc0b..544241f357b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -440,8 +440,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)
 {
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.ras = _ras;
-   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
-   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
}
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index d3875618ebf5..89075ab9e82e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2299,8 +2299,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
case CHIP_ALDEBARAN:
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->nbio.ras = _v7_4_ras;
-   amdgpu_ras_register_ras_block(adev, 
>nbio.ras->ras_block);
-   adev->nbio.ras_if = >nbio.ras->ras_block.ras_comm;
}
break;
default:
@@ -2533,6 +2531,147 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
amdgpu_ras_disable_all_features(adev, 1);
 }
 
+int amdgpu_ras_sw_init(struct amdgpu_device *adev)
+{
+   int err = 0;
+
+   if (!amdgpu_ras_asic_supported(adev))
+   return 0;
+
+   if (adev->nbio.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>nbio.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register nbio ras 
block!\n");
+   return err;
+   }
+   adev->nbio.ras_if = >nbio.ras->ras_block.ras_comm;
+   }
+
+   if (adev->gmc.xgmi.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register xgmi ras 
block!\n");
+   return err;
+   }
+   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
+   }
+
+   if (adev->gfx.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>gfx.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register gfx ras 
block!\n");
+   return err;
+   }
+
+   strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
+   adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
+   adev->gfx.ras->ras_block.ras_comm.type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+   adev->gfx.ras_if = >gfx.ras->ras_block.ras_comm;
+
+   /* If not define special ras_late_init function, use gfx 
default ras_late_init */
+   if (!adev->gfx.ras->ras_block.ras_late_init)
+   adev->gfx.ras->ras_block.ras_late_init = 
amdgpu_gfx_ras_late_init;
+
+   /* If not defined special ras_cb function, use default ras_cb */
+   if (!adev->gfx.ras->ras_block.ras_cb)
+   adev->gfx.ras->ras_block.ras_cb = 
amdgpu_gfx_process_ras_data_cb;
+   }
+
+   if (adev->umc.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>umc.ras->ras_block);
+   if (err) {
+   dev_err(ad

[PATCH] drm/amdgpu: Fixed warning reported by kernel test robot

2022-02-21 Thread yipechai
Fixed warning reported by kernel test robot:
1.warning: no previous prototype for function 
'amdgpu_ras_block_late_init_default'.
2.warning: variable 'ras_obj' is used uninitialized whenever '||' condition is 
true.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e5874df3c9ca..a73567ea03d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2400,7 +2400,7 @@ bool amdgpu_ras_is_poison_mode_supported(struct 
amdgpu_device *adev)
 int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
 struct ras_common_if *ras_block)
 {
-   struct amdgpu_ras_block_object *ras_obj;
+   struct amdgpu_ras_block_object *ras_obj = NULL;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
unsigned long ue_count, ce_count;
int r;
@@ -2456,7 +2456,7 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
return r;
 }
 
-int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
+static int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
 struct ras_common_if *ras_block)
 {
return amdgpu_ras_block_late_init(adev, ras_block);
-- 
2.25.1



[PATCH 12/12] drm/amdgpu: Remove redundant .ras_fini initialization in some ras blocks

2022-02-21 Thread yipechai
1. Define amdgpu_ras_block_late_fini_default in amdgpu_ras.c as
   .ras_fini common function, which is called when
   .ras_fini of ras block isn't initialized.
2. Remove the code of using amdgpu_ras_block_late_fini to
   initialize .ras_fini in ras blocks.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 4 
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 4 
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 8 
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c| 1 -
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c| 3 ---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 1 -
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 4 
 9 files changed, 8 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 846f51b0c013..17f7c0259115 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2477,6 +2477,12 @@ void amdgpu_ras_block_late_fini(struct amdgpu_device 
*adev,
amdgpu_ras_interrupt_remove_handler(adev, ras_block);
 }
 
+void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+   return amdgpu_ras_block_late_fini(adev, ras_block);
+}
+
 /* do some init work after IP late init as dependence.
  * and it runs in resume/gpu reset/booting up cases.
  */
@@ -2586,6 +2592,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) 
&&
obj->ras_fini)
obj->ras_fini(adev, >ras_comm);
+   else
+   amdgpu_ras_block_late_fini_default(adev, 
>ras_comm);
}
 
/* Clear ras blocks from ras_list and free ras block list node 
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index c04d98e3bd9d..8abdab6869e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -950,6 +950,5 @@ struct amdgpu_xgmi_ras xgmi_ras = {
},
.hw_ops = _ras_hw_ops,
.ras_late_init = amdgpu_xgmi_ras_late_init,
-   .ras_fini = amdgpu_ras_block_late_fini,
},
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index f0cc073e6bb0..8def7f630d4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2204,10 +2204,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device 
*adev)
if (!adev->gfx.ras->ras_block.ras_late_init)
adev->gfx.ras->ras_block.ras_late_init = 
amdgpu_gfx_ras_late_init;
 
-   /* If not define special ras_fini function, use gfx default 
ras_fini */
-   if (!adev->gfx.ras->ras_block.ras_fini)
-   adev->gfx.ras->ras_block.ras_fini = 
amdgpu_ras_block_late_fini;
-
/* If not defined special ras_cb function, use default ras_cb */
if (!adev->gfx.ras->ras_block.ras_cb)
adev->gfx.ras->ras_block.ras_cb = 
amdgpu_gfx_process_ras_data_cb;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index c8ad824328d7..d9353bb99314 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -681,10 +681,6 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device 
*adev)
if (!adev->umc.ras->ras_block.ras_late_init)
adev->umc.ras->ras_block.ras_late_init = 
amdgpu_umc_ras_late_init;
 
-   /* If don't define special ras_fini function, use default 
ras_fini */
-   if (!adev->umc.ras->ras_block.ras_fini)
-   adev->umc.ras->ras_block.ras_fini = 
amdgpu_ras_block_late_fini;
-
/* If not defined special ras_cb function, use default ras_cb */
if (!adev->umc.ras->ras_block.ras_cb)
adev->umc.ras->ras_block.ras_cb = 
amdgpu_umc_process_ras_data_cb;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b01767d78153..4c3483fbe613 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1241,10 +1241,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
if (!adev->umc.ras->ras_block.ras_late_init)
adev->umc.ras->ras_block.ras_late_init = 
amdgpu_umc_ras_late_init;
 
-   /* If don't define special ras_fini function, use default 
ras_fini */
- 

[PATCH 09/12] drm/amdgpu: Remove redundant calls of amdgpu_ras_block_late_fini in hdp ras block

2022-02-21 Thread yipechai
Remove redundant calls of amdgpu_ras_block_late_fini in hdp ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 1 -
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c   | 2 +-
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index 5595f903c17a..3f3d92e16c2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -26,7 +26,5 @@
 
 void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
-   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
-   ras_block)
-   amdgpu_ras_block_late_fini(adev, ras_block);
+
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index c05cd992ef8a..9181c7bef7c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -44,5 +44,4 @@ struct amdgpu_hdp {
 };
 
 int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
-void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 #endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index d7811e0327cb..86c166ae794a 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -163,7 +163,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
},
.hw_ops = _v4_0_ras_hw_ops,
-   .ras_fini = amdgpu_hdp_ras_fini,
+   .ras_fini = amdgpu_ras_block_late_fini,
},
 };
 
-- 
2.25.1



[PATCH 11/12] drm/amdgpu: Remove redundant calls of amdgpu_ras_block_late_fini in mca ras block

2022-02-21 Thread yipechai
Remove redundant calls of amdgpu_ras_block_late_fini in mca ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c |  6 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h |  3 ---
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 21 +++--
 3 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index e2607d9f5cf4..51c2a82e2fa4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -70,9 +70,3 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device 
*adev,
 
amdgpu_mca_reset_error_count(adev, mc_status_addr);
 }
-
-void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
-struct amdgpu_mca_ras *mca_dev)
-{
-   amdgpu_ras_block_late_fini(adev, mca_dev->ras_if);
-}
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index 15e1a1efeb4f..7ce16d16e34b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -56,7 +56,4 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device 
*adev,
  uint64_t mc_status_addr,
  void *ras_error_status);
 
-void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
-struct amdgpu_mca_ras *mca_dev);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index 02c50be19d3b..5ce6778a821d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -37,11 +37,6 @@ static void mca_v3_0_mp0_query_ras_error_count(struct 
amdgpu_device *adev,
 ras_error_status);
 }
 
-static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
-{
-   amdgpu_mca_ras_fini(adev, >mca.mp0);
-}
-
 static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj,
enum amdgpu_ras_block block, uint32_t 
sub_block_index)
 {
@@ -71,7 +66,7 @@ struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
},
.hw_ops = _v3_0_mp0_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_fini = mca_v3_0_mp0_ras_fini,
+   .ras_fini = amdgpu_ras_block_late_fini,
},
 };
 
@@ -83,11 +78,6 @@ static void mca_v3_0_mp1_query_ras_error_count(struct 
amdgpu_device *adev,
 ras_error_status);
 }
 
-static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
-{
-   amdgpu_mca_ras_fini(adev, >mca.mp1);
-}
-
 const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
.query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
.query_ras_error_address = NULL,
@@ -103,7 +93,7 @@ struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
},
.hw_ops = _v3_0_mp1_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_fini = mca_v3_0_mp1_ras_fini,
+   .ras_fini = amdgpu_ras_block_late_fini,
},
 };
 
@@ -115,11 +105,6 @@ static void mca_v3_0_mpio_query_ras_error_count(struct 
amdgpu_device *adev,
 ras_error_status);
 }
 
-static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
-{
-   amdgpu_mca_ras_fini(adev, >mca.mpio);
-}
-
 const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
.query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
.query_ras_error_address = NULL,
@@ -135,7 +120,7 @@ struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
},
.hw_ops = _v3_0_mpio_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_fini = mca_v3_0_mpio_ras_fini,
+   .ras_fini = amdgpu_ras_block_late_fini,
},
 };
 
-- 
2.25.1



[PATCH 10/12] drm/amdgpu: Remove redundant calls of amdgpu_ras_block_late_fini in sdma ras block

2022-02-21 Thread yipechai
Remove redundant calls of amdgpu_ras_block_late_fini in sdma ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 7 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 -
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 2 +-
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 1df8de84386d..e1835fd4b237 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -111,13 +111,6 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
return r;
 }
 
-void amdgpu_sdma_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
-{
-   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
-   ras_block)
-   amdgpu_ras_block_late_fini(adev, ras_block);
-}
-
 int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 34ec60dfe5e8..53ac3ebae8d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -118,7 +118,6 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring 
*ring, uint32_t *index);
 uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
 int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
  struct ras_common_if *ras_block);
-void amdgpu_sdma_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d1d40bbb2892..206acb6a5b20 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2824,7 +2824,7 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device 
*adev)
 
/* If don't define special ras_fini function, use default 
ras_fini */
if (!adev->sdma.ras->ras_block.ras_fini)
-   adev->sdma.ras->ras_block.ras_fini = 
amdgpu_sdma_ras_fini;
+   adev->sdma.ras->ras_block.ras_fini = 
amdgpu_ras_block_late_fini;
 
/* If not defined special ras_cb function, use default ras_cb */
if (!adev->sdma.ras->ras_block.ras_cb)
-- 
2.25.1



[PATCH 05/12] drm/amdgpu: Remove redundant calls of amdgpu_ras_block_late_fini in mmhub ras block

2022-02-21 Thread yipechai
Remove redundant calls of amdgpu_ras_block_late_fini in mmhub ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 1 -
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +-
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
index e7c3b8fff868..8f2fa247d605 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -26,7 +26,5 @@
 
 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
-   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
-   ras_block)
-   amdgpu_ras_block_late_fini(adev, ras_block);
+
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 253f047379cf..9f1540f0ebf9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -47,6 +47,5 @@ struct amdgpu_mmhub {
struct amdgpu_mmhub_ras  *ras;
 };
 
-void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 412e44af1608..2f1b092c53b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1293,7 +1293,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct 
amdgpu_device *adev)
 
/* If don't define special ras_fini function, use default 
ras_fini */
if (!adev->mmhub.ras->ras_block.ras_fini)
-   adev->mmhub.ras->ras_block.ras_fini = 
amdgpu_mmhub_ras_fini;
+   adev->mmhub.ras->ras_block.ras_fini = 
amdgpu_ras_block_late_fini;
}
 }
 
-- 
2.25.1



[PATCH 08/12] drm/amdgpu: Remove redundant calls of amdgpu_ras_block_late_fini in xgmi ras block

2022-02-21 Thread yipechai
Remove redundant calls of amdgpu_ras_block_late_fini in xgmi ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 7d18c5d22e10..c04d98e3bd9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -743,13 +743,6 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device 
*adev, struct ras_comm
return amdgpu_ras_block_late_init(adev, ras_block);
 }
 
-static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
-{
-   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
-   ras_block)
-   amdgpu_ras_block_late_fini(adev, ras_block);
-}
-
 uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
   uint64_t addr)
 {
@@ -957,6 +950,6 @@ struct amdgpu_xgmi_ras xgmi_ras = {
},
.hw_ops = _ras_hw_ops,
.ras_late_init = amdgpu_xgmi_ras_late_init,
-   .ras_fini = amdgpu_xgmi_ras_fini,
+   .ras_fini = amdgpu_ras_block_late_fini,
},
 };
-- 
2.25.1



[PATCH 06/12] drm/amdgpu: Remove redundant calls of amdgpu_ras_block_late_fini in nbio ras block

2022-02-21 Thread yipechai
Remove redundant calls of amdgpu_ras_block_late_fini in nbio ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 7 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 1 -
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 2 +-
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 54a5a15272c1..37d779b8e4a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -43,10 +43,3 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *
amdgpu_ras_block_late_fini(adev, ras_block);
return r;
 }
-
-void amdgpu_nbio_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
-{
-   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) &&
-   ras_block)
-   amdgpu_ras_block_late_fini(adev, ras_block);
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 3222e1cae134..3d13e601fc35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -105,5 +105,4 @@ struct amdgpu_nbio {
 };
 
 int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
-void amdgpu_nbio_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 14768570c298..485fd9239cd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -671,7 +671,7 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = {
},
.hw_ops = _v7_4_ras_hw_ops,
.ras_late_init = amdgpu_nbio_ras_late_init,
-   .ras_fini = amdgpu_nbio_ras_fini,
+   .ras_fini = amdgpu_ras_block_late_fini,
},
.handle_ras_controller_intr_no_bifring = 
nbio_v7_4_handle_ras_controller_intr_no_bifring,
.handle_ras_err_event_athub_intr_no_bifring = 
nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
-- 
2.25.1



[PATCH 07/12] drm/amdgpu: Remove redundant calls of amdgpu_ras_block_late_fini in umc ras block

2022-02-21 Thread yipechai
Remove redundant calls of amdgpu_ras_block_late_fini in umc ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 7 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 1 -
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 2 +-
 4 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 2623a2d30703..85da6cbaf3b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -162,13 +162,6 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *r
return r;
 }
 
-void amdgpu_umc_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
-{
-   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
-   ras_block)
-   amdgpu_ras_block_late_fini(adev, ras_block);
-}
-
 int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index c8deba8dacb5..2ec6698aa1fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -73,7 +73,6 @@ struct amdgpu_umc {
 };
 
 int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
-void amdgpu_umc_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
void *ras_error_status,
bool reset);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index e7add2020d48..c8ad824328d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -683,7 +683,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device 
*adev)
 
/* If don't define special ras_fini function, use default 
ras_fini */
if (!adev->umc.ras->ras_block.ras_fini)
-   adev->umc.ras->ras_block.ras_fini = 
amdgpu_umc_ras_fini;
+   adev->umc.ras->ras_block.ras_fini = 
amdgpu_ras_block_late_fini;
 
/* If not defined special ras_cb function, use default ras_cb */
if (!adev->umc.ras->ras_block.ras_cb)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2f1b092c53b0..b01767d78153 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1243,7 +1243,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
 
/* If don't define special ras_fini function, use default 
ras_fini */
if (!adev->umc.ras->ras_block.ras_fini)
-   adev->umc.ras->ras_block.ras_fini = 
amdgpu_umc_ras_fini;
+   adev->umc.ras->ras_block.ras_fini = 
amdgpu_ras_block_late_fini;
 
/* If not defined special ras_cb function, use default ras_cb */
if (!adev->umc.ras->ras_block.ras_cb)
-- 
2.25.1



[PATCH 04/12] drm/amdgpu: Remove redundant calls of amdgpu_ras_block_late_fini in gfx ras block

2022-02-21 Thread yipechai
Remove redundant calls of amdgpu_ras_block_late_fini in gfx ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 7 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 2 +-
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 40f7e29aa9ca..8fe939976224 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -644,13 +644,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *r
return r;
 }
 
-void amdgpu_gfx_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
-{
-   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
-   ras_block)
-   amdgpu_ras_block_late_fini(adev, ras_block);
-}
-
 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index f7c50ab4589c..dcb3c7871c73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -387,7 +387,6 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device 
*adev, int me,
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
-void amdgpu_gfx_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index dc6e6fe6c978..f0cc073e6bb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2206,7 +2206,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device 
*adev)
 
/* If not define special ras_fini function, use gfx default 
ras_fini */
if (!adev->gfx.ras->ras_block.ras_fini)
-   adev->gfx.ras->ras_block.ras_fini = amdgpu_gfx_ras_fini;
+   adev->gfx.ras->ras_block.ras_fini = 
amdgpu_ras_block_late_fini;
 
/* If not defined special ras_cb function, use default ras_cb */
if (!adev->gfx.ras->ras_block.ras_cb)
-- 
2.25.1



[PATCH 03/12] drm/amdgpu: centrally calls the .ras_fini function of all ras blocks

2022-02-21 Thread yipechai
centrally calls the .ras_fini function of all ras blocks.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 10 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 22 --
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  3 ---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  |  4 
 drivers/gpu/drm/amd/amdgpu/soc15.c  |  3 ---
 5 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 4823c42e0e02..ab75e189bc0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -454,17 +454,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 {
-   if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini)
-   adev->umc.ras->ras_block.ras_fini(adev, adev->umc.ras_if);
 
-   if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
-   adev->mmhub.ras->ras_block.ras_fini(adev, adev->mmhub.ras_if);
-
-   if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
-   adev->gmc.xgmi.ras->ras_block.ras_fini(adev, 
adev->gmc.xgmi.ras_if);
-
-   if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_fini)
-   adev->hdp.ras->ras_block.ras_fini(adev, adev->hdp.ras_if);
 }
 
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e5874df3c9ca..846f51b0c013 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2572,11 +2572,27 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 int amdgpu_ras_fini(struct amdgpu_device *adev)
 {
struct amdgpu_ras_block_list *ras_node, *tmp;
+   struct amdgpu_ras_block_object *obj;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
if (!adev->ras_enabled || !con)
return 0;
 
+
+   list_for_each_entry_safe(ras_node, tmp, >ras_list, node) {
+
+   if (ras_node->ras_obj) {
+   obj = ras_node->ras_obj;
+   if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) 
&&
+   obj->ras_fini)
+   obj->ras_fini(adev, >ras_comm);
+   }
+
+   /* Clear ras blocks from ras_list and free ras block list node 
*/
+   list_del(_node->node);
+   kfree(ras_node);
+   }
+
amdgpu_ras_fs_fini(adev);
amdgpu_ras_interrupt_remove_all(adev);
 
@@ -2590,12 +2606,6 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
amdgpu_ras_set_context(adev, NULL);
kfree(con);
 
-   /* Clear ras blocks from ras_list and free ras block list node */
-   list_for_each_entry_safe(ras_node, tmp, >ras_list, node) {
-   list_del(_node->node);
-   kfree(ras_node);
-   }
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e8446967a4d4..dc6e6fe6c978 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2432,9 +2432,6 @@ static int gfx_v9_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_fini)
-   adev->gfx.ras->ras_block.ras_fini(adev, adev->gfx.ras_if);
-
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(>gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 222d25a0413a..d1d40bbb2892 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1995,10 +1995,6 @@ static int sdma_v4_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
 
-   if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
-   adev->sdma.ras->ras_block.ras_fini)
-   adev->sdma.ras->ras_block.ras_fini(adev, adev->sdma.ras_if);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_ring_fini(>sdma.instance[i].ring);
if (adev->sdma.has_page_queue)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 0631ebd39db1..496c4a6e23ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1214,9 +1214,6 @@ static int soc15_common_sw_fini(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   

[PATCH 01/12] drm/amdgpu: Modify .ras_fini function pointer parameter

2022-02-21 Thread yipechai
Modify .ras_fini function pointer parameter so that
it can remove redundant intermediate calls in some
ras blocks.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 6 +++---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/soc15.c| 2 +-
 19 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 52912b6bcb20..d020c4599433 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -644,7 +644,7 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *r
return r;
 }
 
-void amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
adev->gfx.ras_if)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index ccca0a85b982..f7c50ab4589c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -387,7 +387,7 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device 
*adev, int me,
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
-void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 26e31c53ed0d..5dcb341cae19 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -455,16 +455,16 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 {
if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini)
-   adev->umc.ras->ras_block.ras_fini(adev);
+   adev->umc.ras->ras_block.ras_fini(adev, NULL);
 
if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
-   adev->mmhub.ras->ras_block.ras_fini(adev);
+   adev->mmhub.ras->ras_block.ras_fini(adev, NULL);
 
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
-   adev->gmc.xgmi.ras->ras_block.ras_fini(adev);
+   adev->gmc.xgmi.ras->ras_block.ras_fini(adev, NULL);
 
if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_fini)
-   adev->hdp.ras->ras_block.ras_fini(adev);
+   adev->hdp.ras->ras_block.ras_fini(adev, NULL);
 }
 
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index b7fbc114a175..0f224e21cd55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -24,7 +24,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
+void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
adev->hdp.ras_if)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index aabd59aa5213..c05cd992ef8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -44,5 +44,5 @@ struct amdgpu_hdp {
 };
 
 int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
-void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
+void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 #endif /* __AMDGPU_HDP_H__ */

[PATCH 02/12] drm/amdgpu: Optimize xxx_ras_fini function of each ras block

2022-02-21 Thread yipechai
1. Move the variables of ras block instance members to
   the top of the call to xxx_ras_fini.
2. Function calls inside the modules only use parameters
   passed from xxx_ras_fini instead of ras block instance
   members.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/soc15.c| 2 +-
 11 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index d020c4599433..40f7e29aa9ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -647,8 +647,8 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *r
 void amdgpu_gfx_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
-   adev->gfx.ras_if)
-   amdgpu_ras_block_late_fini(adev, adev->gfx.ras_if);
+   ras_block)
+   amdgpu_ras_block_late_fini(adev, ras_block);
 }
 
 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 5dcb341cae19..4823c42e0e02 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -455,16 +455,16 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 {
if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini)
-   adev->umc.ras->ras_block.ras_fini(adev, NULL);
+   adev->umc.ras->ras_block.ras_fini(adev, adev->umc.ras_if);
 
if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
-   adev->mmhub.ras->ras_block.ras_fini(adev, NULL);
+   adev->mmhub.ras->ras_block.ras_fini(adev, adev->mmhub.ras_if);
 
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
-   adev->gmc.xgmi.ras->ras_block.ras_fini(adev, NULL);
+   adev->gmc.xgmi.ras->ras_block.ras_fini(adev, 
adev->gmc.xgmi.ras_if);
 
if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_fini)
-   adev->hdp.ras->ras_block.ras_fini(adev, NULL);
+   adev->hdp.ras->ras_block.ras_fini(adev, adev->hdp.ras_if);
 }
 
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index 0f224e21cd55..5595f903c17a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -27,6 +27,6 @@
 void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
-   adev->hdp.ras_if)
-   amdgpu_ras_block_late_fini(adev, adev->hdp.ras_if);
+   ras_block)
+   amdgpu_ras_block_late_fini(adev, ras_block);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
index 6dfcedcc37fd..e7c3b8fff868 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -27,6 +27,6 @@
 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
-   adev->mmhub.ras_if)
-   amdgpu_ras_block_late_fini(adev, adev->mmhub.ras_if);
+   ras_block)
+   amdgpu_ras_block_late_fini(adev, ras_block);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 0de2fdf31eed..54a5a15272c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -47,6 +47,6 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *
 void amdgpu_nbio_ras_fini(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) &&
-   adev->nbio.ras_if)
-   amdgpu_ras_block_late_fini(adev, adev->nbio.ras_if);
+   ras_block)
+   amdgpu_ras_block_late_fini(adev, ras_block);
 }
diff --git a/d

[PATCH V2 5/7] drm/amdgpu: Optimize xxx_ras_late_init function of each ras block

2022-02-16 Thread yipechai
1. Move calling ras block instance members from module internal
   function to the top calling xxx_ras_late_init.
2. Module internal function calls can only use parameter variables
   of xxx_ras_late_init instead of ras block instance members.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c  | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/soc15.c   | 2 +-
 9 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index b7470ed7bc25..52912b6bcb20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -625,11 +625,11 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, 
uint32_t *value)
 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
int r;
-   r = amdgpu_ras_block_late_init(adev, adev->gfx.ras_if);
+   r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
return r;
 
-   if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
+   if (amdgpu_ras_is_supported(adev, ras_block->block)) {
if (!amdgpu_persistent_edc_harvesting_supported(adev))
amdgpu_ras_reset_error_status(adev, 
AMDGPU_RAS_BLOCK__GFX);
 
@@ -640,7 +640,7 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *r
 
return 0;
 late_fini:
-   amdgpu_ras_block_late_fini(adev, adev->gfx.ras_if);
+   amdgpu_ras_block_late_fini(adev, ras_block);
return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index d42e05572db5..ebf4194b0699 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -452,7 +452,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
int r;
 
if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) {
-   r = adev->umc.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->umc.ras->ras_block.ras_late_init(adev, 
adev->umc.ras_if);
if (r)
return r;
}
@@ -464,7 +464,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
}
 
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
-   r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, 
adev->gmc.xgmi.ras_if);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 92fd4ffa7779..f09ad80f0772 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -25,11 +25,11 @@
 int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
int r;
-   r = amdgpu_ras_block_late_init(adev, adev->nbio.ras_if);
+   r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
return r;
 
-   if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+   if (amdgpu_ras_is_supported(adev, ras_block->block)) {
r = amdgpu_irq_get(adev, >nbio.ras_controller_irq, 0);
if (r)
goto late_fini;
@@ -40,7 +40,7 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *
 
return 0;
 late_fini:
-   amdgpu_ras_block_late_fini(adev, adev->nbio.ras_if);
+   amdgpu_ras_block_late_fini(adev, ras_block);
return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 594454dba4c1..3b5c43575aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -91,11 +91,11 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
 {
int r, i;
 
-   r = amdgpu_ras_block_late_init(adev, adev->sdma.ras_if);
+   r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
return r;
 
-   if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) {
+   if (amdgpu_ras_is_supported(adev, ras_block->block)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
r = amdgpu_irq_get(adev, >sdma.ecc_irq,
AMDGPU_SDMA_IRQ_INSTAN

[PATCH V2 7/7] drm/amdgpu: Remove redundant .ras_late_init initialization in some ras blocks

2022-02-16 Thread yipechai
1. Define amdgpu_ras_block_late_init_default in amdgpu_ras.c as
   .ras_late_init common function, which is called when
   .ras_late_init of ras block isn't initialized.
2. Remove the code of using amdgpu_ras_block_late_init to
   initialize .ras_late_init in ras blocks.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  4 
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c   |  1 -
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   |  3 ---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b5286a0d9c8a..35167a3ddf94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2457,6 +2457,12 @@ int amdgpu_ras_block_late_init(struct amdgpu_device 
*adev,
return r;
 }
 
+int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
+struct ras_common_if *ras_block)
+{
+   return amdgpu_ras_block_late_init(adev, ras_block);
+}
+
 /* helper function to remove ras fs node and interrupt handler */
 void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
  struct ras_common_if *ras_block)
@@ -2533,6 +2539,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
dev_warn(adev->dev, "Warning: abnormal ras list 
node.\n");
continue;
}
+
obj = node->ras_obj;
if (obj->ras_late_init) {
r = obj->ras_late_init(adev, >ras_comm);
@@ -2541,7 +2548,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
obj->ras_comm.name, r);
return r;
}
-   }
+   } else
+   amdgpu_ras_block_late_init_default(adev, 
>ras_comm);
}
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b719d2c3003b..412e44af1608 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1291,10 +1291,6 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct 
amdgpu_device *adev)
adev->mmhub.ras->ras_block.ras_comm.type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->mmhub.ras_if = >mmhub.ras->ras_block.ras_comm;
 
-   /* If don't define special ras_late_init function, use default 
ras_late_init */
-   if (!adev->mmhub.ras->ras_block.ras_late_init)
-   adev->mmhub.ras->ras_block.ras_late_init = 
amdgpu_ras_block_late_init;
-
/* If don't define special ras_fini function, use default 
ras_fini */
if (!adev->mmhub.ras->ras_block.ras_fini)
adev->mmhub.ras->ras_block.ras_fini = 
amdgpu_mmhub_ras_fini;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index c9e931f046f7..d7811e0327cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -163,7 +163,6 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
},
.hw_ops = _v4_0_ras_hw_ops,
-   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = amdgpu_hdp_ras_fini,
},
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index 12d09a58b644..b4b36899f5c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -71,7 +71,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
},
.hw_ops = _v3_0_mp0_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = mca_v3_0_mp0_ras_fini,
},
 };
@@ -104,7 +103,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
},
.hw_ops = _v3_0_mp1_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = mca_v3_0_mp1_ras_fini,
},
 };
@@ -137,7 +135,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
},
.hw_ops = _v3_0_mpio_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = mca_v3_0_mpio_ras_fini,
},
 };
-- 
2.25.1



[PATCH V2 6/7] drm/amdgpu: define amdgpu_ras_late_init to call all ras blocks' .ras_late_init

2022-02-16 Thread yipechai
Define amdgpu_ras_late_init to call all ras blocks' .ras_late_init.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c| 44 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 25 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  6 ---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  5 +--
 drivers/gpu/drm/amd/amdgpu/soc15.c |  6 +--
 7 files changed, 34 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a74a1b74a172..d90388dd5362 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2625,6 +2625,12 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)
adev->ip_blocks[i].status.late_initialized = true;
}
 
+   r = amdgpu_ras_late_init(adev);
+   if (r) {
+   DRM_ERROR("amdgpu_ras_late_init failed %d", r);
+   return r;
+   }
+
amdgpu_ras_set_error_query_ready(adev, true);
 
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index ebf4194b0699..49dd81c0db2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -449,50 +449,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)
 
 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 {
-   int r;
-
-   if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) {
-   r = adev->umc.ras->ras_block.ras_late_init(adev, 
adev->umc.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) {
-   r = adev->mmhub.ras->ras_block.ras_late_init(adev, 
adev->mmhub.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
-   r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, 
adev->gmc.xgmi.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_late_init) {
-   r = adev->hdp.ras->ras_block.ras_late_init(adev, 
adev->hdp.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->mca.mp0.ras && adev->mca.mp0.ras->ras_block.ras_late_init) {
-   r = adev->mca.mp0.ras->ras_block.ras_late_init(adev, 
adev->mca.mp0.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->mca.mp1.ras && adev->mca.mp1.ras->ras_block.ras_late_init) {
-   r = adev->mca.mp1.ras->ras_block.ras_late_init(adev, 
adev->mca.mp1.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->mca.mpio.ras && adev->mca.mpio.ras->ras_block.ras_late_init) {
-   r = adev->mca.mpio.ras->ras_block.ras_late_init(adev, 
adev->mca.mpio.ras_if);
-   if (r)
-   return r;
-   }
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1aff88fcea76..b5286a0d9c8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2522,6 +2522,31 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
amdgpu_ras_disable_all_features(adev, 1);
 }
 
+int amdgpu_ras_late_init(struct amdgpu_device *adev)
+{
+   struct amdgpu_ras_block_list *node, *tmp;
+   struct amdgpu_ras_block_object *obj;
+   int r;
+
+   list_for_each_entry_safe(node, tmp, >ras_list, node) {
+   if (!node->ras_obj) {
+   dev_warn(adev->dev, "Warning: abnormal ras list 
node.\n");
+   continue;
+   }
+   obj = node->ras_obj;
+   if (obj->ras_late_init) {
+   r = obj->ras_late_init(adev, >ras_comm);
+   if (r) {
+   dev_err(adev->dev, "%s failed to execute 
ras_late_init! ret:%d\n",
+   obj->ras_comm.name, r);
+   return r;
+   }
+   }
+   }
+
+   return 0;
+}
+
 /* do some fini work before IP fini as dependence */
 int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 837d1b

[PATCH V2 4/7] drm/amdgpu: Remove redundant calls of ras_late_init in mca ras block

2022-02-16 Thread yipechai
Remove redundant calls of ras_late_init in mca ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c |  6 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h |  3 ---
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 21 +++--
 4 files changed, 6 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index f443d1e359ce..d42e05572db5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -476,19 +476,19 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
}
 
if (adev->mca.mp0.ras && adev->mca.mp0.ras->ras_block.ras_late_init) {
-   r = adev->mca.mp0.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->mca.mp0.ras->ras_block.ras_late_init(adev, 
adev->mca.mp0.ras_if);
if (r)
return r;
}
 
if (adev->mca.mp1.ras && adev->mca.mp1.ras->ras_block.ras_late_init) {
-   r = adev->mca.mp1.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->mca.mp1.ras->ras_block.ras_late_init(adev, 
adev->mca.mp1.ras_if);
if (r)
return r;
}
 
if (adev->mca.mpio.ras && adev->mca.mpio.ras->ras_block.ras_late_init) {
-   r = adev->mca.mpio.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->mca.mpio.ras->ras_block.ras_late_init(adev, 
adev->mca.mpio.ras_if);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 1c77fe7e9e68..e2607d9f5cf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -71,12 +71,6 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device 
*adev,
amdgpu_mca_reset_error_count(adev, mc_status_addr);
 }
 
-int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
-struct amdgpu_mca_ras *mca_dev)
-{
-   return amdgpu_ras_block_late_init(adev, mca_dev->ras_if);
-}
-
 void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
 struct amdgpu_mca_ras *mca_dev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index be030c4031d2..15e1a1efeb4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -56,9 +56,6 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device 
*adev,
  uint64_t mc_status_addr,
  void *ras_error_status);
 
-int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
-struct amdgpu_mca_ras *mca_dev);
-
 void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
 struct amdgpu_mca_ras *mca_dev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index 72ce19acb8bb..12d09a58b644 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -37,11 +37,6 @@ static void mca_v3_0_mp0_query_ras_error_count(struct 
amdgpu_device *adev,
 ras_error_status);
 }
 
-static int mca_v3_0_mp0_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
-{
-   return amdgpu_mca_ras_late_init(adev, >mca.mp0);
-}
-
 static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev)
 {
amdgpu_mca_ras_fini(adev, >mca.mp0);
@@ -76,7 +71,7 @@ struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
},
.hw_ops = _v3_0_mp0_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_late_init = mca_v3_0_mp0_ras_late_init,
+   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = mca_v3_0_mp0_ras_fini,
},
 };
@@ -89,11 +84,6 @@ static void mca_v3_0_mp1_query_ras_error_count(struct 
amdgpu_device *adev,
 ras_error_status);
 }
 
-static int mca_v3_0_mp1_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
-{
-   return amdgpu_mca_ras_late_init(adev, >mca.mp1);
-}
-
 static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev)
 {
amdgpu_mca_ras_fini(adev, >mca.mp1);
@@ -114,7 +104,7 @@ struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
},
.hw_ops = _v3_0_mp1_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_late_init = mca_v3_0_mp1_ras_late_init,
+   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = mca_v3_0_mp1_ras_fini,

[PATCH V2 2/7] drm/amdgpu: Remove redundant calls of ras_late_init in hdp ras block

2022-02-16 Thread yipechai
Remove redundant calls of ras_late_init in hdp ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 5 -
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c   | 2 +-
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 956cc994ca7d..67a7d1cb89d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -470,7 +470,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
}
 
if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_late_init) {
-   r = adev->hdp.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->hdp.ras->ras_block.ras_late_init(adev, 
adev->hdp.ras_if);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index 70a096160998..b7fbc114a175 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -24,11 +24,6 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
-{
-   return amdgpu_ras_block_late_init(adev, adev->hdp.ras_if);
-}
-
 void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index a9ed4232cdeb..c9e931f046f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -163,7 +163,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
},
.hw_ops = _v4_0_ras_hw_ops,
-   .ras_late_init = amdgpu_hdp_ras_late_init,
+   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = amdgpu_hdp_ras_fini,
},
 };
-- 
2.25.1



[PATCH V2 3/7] drm/amdgpu: Remove redundant calls of ras_late_init in mmhub ras block

2022-02-16 Thread yipechai
Remove redundant calls of ras_late_init in mmhub ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 5 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 1 -
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +-
 4 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 67a7d1cb89d6..f443d1e359ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -458,7 +458,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
}
 
if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) {
-   r = adev->mmhub.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->mmhub.ras->ras_block.ras_late_init(adev, 
adev->mmhub.ras_if);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
index ede98db8c126..42413813765a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -24,11 +24,6 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
-{
-   return amdgpu_ras_block_late_init(adev, adev->mmhub.ras_if);
-}
-
 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 75815106f2d5..240b26d9a388 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -47,7 +47,6 @@ struct amdgpu_mmhub {
struct amdgpu_mmhub_ras  *ras;
 };
 
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block);
 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 94095b965e2c..b719d2c3003b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1293,7 +1293,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct 
amdgpu_device *adev)
 
/* If don't define special ras_late_init function, use default 
ras_late_init */
if (!adev->mmhub.ras->ras_block.ras_late_init)
-   adev->mmhub.ras->ras_block.ras_late_init = 
amdgpu_mmhub_ras_late_init;
+   adev->mmhub.ras->ras_block.ras_late_init = 
amdgpu_ras_block_late_init;
 
/* If don't define special ras_fini function, use default 
ras_fini */
if (!adev->mmhub.ras->ras_block.ras_fini)
-- 
2.25.1



[PATCH V2 1/7] drm/amdgpu: Modify .ras_late_init function pointer parameter

2022-02-16 Thread yipechai
Modify .ras_late_init function pointer parameter so that
it can remove redundant intermediate calls in some ras blocks.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 6 +++---
 15 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index fe392108b5c2..b7470ed7bc25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -622,7 +622,7 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, 
uint32_t *value)
return r;
 }
 
-int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info)
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
int r;
r = amdgpu_ras_block_late_init(adev, adev->gfx.ras_if);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index f99eac544f6d..ccca0a85b982 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -386,7 +386,7 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device 
*adev, int me,
int pipe, int queue);
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
-int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info);
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index 21a5f884dd2a..70a096160998 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -24,7 +24,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info)
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
return amdgpu_ras_block_late_init(adev, adev->hdp.ras_if);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 4af2c2a322e7..aabd59aa5213 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -43,6 +43,6 @@ struct amdgpu_hdp {
struct amdgpu_hdp_ras   *ras;
 };
 
-int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info);
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
 #endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
index 2bdb4d8b7955..ede98db8c126 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -24,7 +24,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info)
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
 {
return amdgpu_ras_block_late_init(adev, adev->mmhub.ras_if);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 7deda9a3b81e..75815106f2d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -47,7 +47,7 @@ struct amdgpu_mmhub {
struct amdgpu_mmhub_ras  *ras;
 };
 
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info);
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block);
 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 89e61fdd3580..92fd4ffa7779 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -22,7 +22,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgp

[PATCH 6/7] drm/amdgpu: define amdgpu_ras_late_init to call all ras blocks' .ras_late_init

2022-02-14 Thread yipechai
Define amdgpu_ras_late_init to call all ras blocks' .ras_late_init.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c| 44 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 18 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  6 ---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  5 +--
 drivers/gpu/drm/amd/amdgpu/soc15.c |  6 +--
 7 files changed, 23 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a74a1b74a172..67ea23dbc618 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2625,6 +2625,8 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)
adev->ip_blocks[i].status.late_initialized = true;
}
 
+   amdgpu_ras_late_init(adev);
+
amdgpu_ras_set_error_query_ready(adev, true);
 
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index ebf4194b0699..49dd81c0db2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -449,50 +449,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)
 
 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 {
-   int r;
-
-   if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) {
-   r = adev->umc.ras->ras_block.ras_late_init(adev, 
adev->umc.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) {
-   r = adev->mmhub.ras->ras_block.ras_late_init(adev, 
adev->mmhub.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
-   r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, 
adev->gmc.xgmi.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_late_init) {
-   r = adev->hdp.ras->ras_block.ras_late_init(adev, 
adev->hdp.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->mca.mp0.ras && adev->mca.mp0.ras->ras_block.ras_late_init) {
-   r = adev->mca.mp0.ras->ras_block.ras_late_init(adev, 
adev->mca.mp0.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->mca.mp1.ras && adev->mca.mp1.ras->ras_block.ras_late_init) {
-   r = adev->mca.mp1.ras->ras_block.ras_late_init(adev, 
adev->mca.mp1.ras_if);
-   if (r)
-   return r;
-   }
-
-   if (adev->mca.mpio.ras && adev->mca.mpio.ras->ras_block.ras_late_init) {
-   r = adev->mca.mpio.ras->ras_block.ras_late_init(adev, 
adev->mca.mpio.ras_if);
-   if (r)
-   return r;
-   }
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1aff88fcea76..6cb1e5d126d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2522,6 +2522,24 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
amdgpu_ras_disable_all_features(adev, 1);
 }
 
+int amdgpu_ras_late_init(struct amdgpu_device *adev)
+{
+   struct amdgpu_ras_block_list *node, *tmp;
+   struct amdgpu_ras_block_object *obj;
+
+   list_for_each_entry_safe(node, tmp, >ras_list, node) {
+   if (!node->ras_obj) {
+   dev_warn(adev->dev, "Warning: abnormal ras list 
node.\n");
+   continue;
+   }
+   obj = node->ras_obj;
+   if (obj->ras_late_init)
+   obj->ras_late_init(adev, >ras_comm);
+   }
+
+   return 0;
+}
+
 /* do some fini work before IP fini as dependence */
 int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 837d1b79a9cb..143a83043d7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -595,6 +595,7 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
 
 /* called in ip_init and ip_fini */
 int amdgpu_ras_init(struct amdgpu_device *adev);
+int amdgpu_ras_late_init(struct amdgpu_device *adev);
 int amdgpu_ras_fini(struct amdgpu_device *adev);
 int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
 
dif

[PATCH 7/7] drm/amdgpu: Remove redundant .ras_late_init initialization in some ras blocks

2022-02-14 Thread yipechai
1. Define amdgpu_ras_block_late_init_default in amdgpu_ras.c as
   .ras_late_init common function, which is called when
   .ras_late_init of ras block doesn't initialize.
2. Remove the code of using amdgpu_ras_block_late_init to
   initialize .ras_late_init in ras blocks.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 4 
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c   | 1 -
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 3 ---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 6cb1e5d126d7..ad37df6e50ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2457,6 +2457,12 @@ int amdgpu_ras_block_late_init(struct amdgpu_device 
*adev,
return r;
 }
 
+int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
+struct ras_common_if *ras_block)
+{
+   return amdgpu_ras_block_late_init(adev, ras_block);
+}
+
 /* helper function to remove ras fs node and interrupt handler */
 void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
  struct ras_common_if *ras_block)
@@ -2535,6 +2541,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
obj = node->ras_obj;
if (obj->ras_late_init)
obj->ras_late_init(adev, >ras_comm);
+   else
+   amdgpu_ras_block_late_init_default(adev, 
>ras_comm);
}
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b719d2c3003b..412e44af1608 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1291,10 +1291,6 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct 
amdgpu_device *adev)
adev->mmhub.ras->ras_block.ras_comm.type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->mmhub.ras_if = >mmhub.ras->ras_block.ras_comm;
 
-   /* If don't define special ras_late_init function, use default 
ras_late_init */
-   if (!adev->mmhub.ras->ras_block.ras_late_init)
-   adev->mmhub.ras->ras_block.ras_late_init = 
amdgpu_ras_block_late_init;
-
/* If don't define special ras_fini function, use default 
ras_fini */
if (!adev->mmhub.ras->ras_block.ras_fini)
adev->mmhub.ras->ras_block.ras_fini = 
amdgpu_mmhub_ras_fini;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index c9e931f046f7..d7811e0327cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -163,7 +163,6 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
},
.hw_ops = _v4_0_ras_hw_ops,
-   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = amdgpu_hdp_ras_fini,
},
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index 12d09a58b644..b4b36899f5c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -71,7 +71,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
},
.hw_ops = _v3_0_mp0_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = mca_v3_0_mp0_ras_fini,
},
 };
@@ -104,7 +103,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
},
.hw_ops = _v3_0_mp1_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = mca_v3_0_mp1_ras_fini,
},
 };
@@ -137,7 +135,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
},
.hw_ops = _v3_0_mpio_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = mca_v3_0_mpio_ras_fini,
},
 };
-- 
2.25.1



[PATCH 4/7] drm/amdgpu: Remove redundant calls of ras_late_init in mca ras block

2022-02-14 Thread yipechai
Remove redundant calls of ras_late_init in mca ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c |  6 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h |  3 ---
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 21 +++--
 4 files changed, 6 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index f443d1e359ce..d42e05572db5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -476,19 +476,19 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
}
 
if (adev->mca.mp0.ras && adev->mca.mp0.ras->ras_block.ras_late_init) {
-   r = adev->mca.mp0.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->mca.mp0.ras->ras_block.ras_late_init(adev, 
adev->mca.mp0.ras_if);
if (r)
return r;
}
 
if (adev->mca.mp1.ras && adev->mca.mp1.ras->ras_block.ras_late_init) {
-   r = adev->mca.mp1.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->mca.mp1.ras->ras_block.ras_late_init(adev, 
adev->mca.mp1.ras_if);
if (r)
return r;
}
 
if (adev->mca.mpio.ras && adev->mca.mpio.ras->ras_block.ras_late_init) {
-   r = adev->mca.mpio.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->mca.mpio.ras->ras_block.ras_late_init(adev, 
adev->mca.mpio.ras_if);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 1c77fe7e9e68..e2607d9f5cf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -71,12 +71,6 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device 
*adev,
amdgpu_mca_reset_error_count(adev, mc_status_addr);
 }
 
-int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
-struct amdgpu_mca_ras *mca_dev)
-{
-   return amdgpu_ras_block_late_init(adev, mca_dev->ras_if);
-}
-
 void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
 struct amdgpu_mca_ras *mca_dev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index be030c4031d2..15e1a1efeb4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -56,9 +56,6 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device 
*adev,
  uint64_t mc_status_addr,
  void *ras_error_status);
 
-int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
-struct amdgpu_mca_ras *mca_dev);
-
 void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
 struct amdgpu_mca_ras *mca_dev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index 72ce19acb8bb..12d09a58b644 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -37,11 +37,6 @@ static void mca_v3_0_mp0_query_ras_error_count(struct 
amdgpu_device *adev,
 ras_error_status);
 }
 
-static int mca_v3_0_mp0_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
-{
-   return amdgpu_mca_ras_late_init(adev, >mca.mp0);
-}
-
 static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev)
 {
amdgpu_mca_ras_fini(adev, >mca.mp0);
@@ -76,7 +71,7 @@ struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
},
.hw_ops = _v3_0_mp0_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_late_init = mca_v3_0_mp0_ras_late_init,
+   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = mca_v3_0_mp0_ras_fini,
},
 };
@@ -89,11 +84,6 @@ static void mca_v3_0_mp1_query_ras_error_count(struct 
amdgpu_device *adev,
 ras_error_status);
 }
 
-static int mca_v3_0_mp1_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
-{
-   return amdgpu_mca_ras_late_init(adev, >mca.mp1);
-}
-
 static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev)
 {
amdgpu_mca_ras_fini(adev, >mca.mp1);
@@ -114,7 +104,7 @@ struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
},
.hw_ops = _v3_0_mp1_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
-   .ras_late_init = mca_v3_0_mp1_ras_late_init,
+   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = mca_v3_0_mp1_ras_fini,

[PATCH 5/7] drm/amdgpu: Optimize xxx_ras_late_init function of each ras block

2022-02-14 Thread yipechai
1. Move calling ras block instance members from module internal
   function to the top calling xxx_ras_late_init.
2. Module internal function calls can only use parameter variables
   of xxx_ras_late_init instead of ras block instance members.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c  | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/soc15.c   | 2 +-
 9 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index b7470ed7bc25..52912b6bcb20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -625,11 +625,11 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, 
uint32_t *value)
 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
int r;
-   r = amdgpu_ras_block_late_init(adev, adev->gfx.ras_if);
+   r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
return r;
 
-   if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
+   if (amdgpu_ras_is_supported(adev, ras_block->block)) {
if (!amdgpu_persistent_edc_harvesting_supported(adev))
amdgpu_ras_reset_error_status(adev, 
AMDGPU_RAS_BLOCK__GFX);
 
@@ -640,7 +640,7 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *r
 
return 0;
 late_fini:
-   amdgpu_ras_block_late_fini(adev, adev->gfx.ras_if);
+   amdgpu_ras_block_late_fini(adev, ras_block);
return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index d42e05572db5..ebf4194b0699 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -452,7 +452,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
int r;
 
if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) {
-   r = adev->umc.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->umc.ras->ras_block.ras_late_init(adev, 
adev->umc.ras_if);
if (r)
return r;
}
@@ -464,7 +464,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
}
 
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
-   r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, 
adev->gmc.xgmi.ras_if);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 92fd4ffa7779..f09ad80f0772 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -25,11 +25,11 @@
 int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
int r;
-   r = amdgpu_ras_block_late_init(adev, adev->nbio.ras_if);
+   r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
return r;
 
-   if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+   if (amdgpu_ras_is_supported(adev, ras_block->block)) {
r = amdgpu_irq_get(adev, >nbio.ras_controller_irq, 0);
if (r)
goto late_fini;
@@ -40,7 +40,7 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *
 
return 0;
 late_fini:
-   amdgpu_ras_block_late_fini(adev, adev->nbio.ras_if);
+   amdgpu_ras_block_late_fini(adev, ras_block);
return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 594454dba4c1..3b5c43575aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -91,11 +91,11 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
 {
int r, i;
 
-   r = amdgpu_ras_block_late_init(adev, adev->sdma.ras_if);
+   r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
return r;
 
-   if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) {
+   if (amdgpu_ras_is_supported(adev, ras_block->block)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
r = amdgpu_irq_get(adev, >sdma.ecc_irq,
AMDGPU_SDMA_IRQ_INSTAN

[PATCH 1/7] drm/amdgpu: Modify .ras_late_init function pointer parameter

2022-02-14 Thread yipechai
Modify .ras_late_init function pointer parameter so that
it can remove redundant intermediate calls in some ras blocks.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 6 +++---
 15 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index fe392108b5c2..b7470ed7bc25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -622,7 +622,7 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, 
uint32_t *value)
return r;
 }
 
-int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info)
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
int r;
r = amdgpu_ras_block_late_init(adev, adev->gfx.ras_if);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index f99eac544f6d..ccca0a85b982 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -386,7 +386,7 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device 
*adev, int me,
int pipe, int queue);
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
-int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info);
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index 21a5f884dd2a..70a096160998 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -24,7 +24,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info)
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
 {
return amdgpu_ras_block_late_init(adev, adev->hdp.ras_if);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 4af2c2a322e7..aabd59aa5213 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -43,6 +43,6 @@ struct amdgpu_hdp {
struct amdgpu_hdp_ras   *ras;
 };
 
-int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info);
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
 void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
 #endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
index 2bdb4d8b7955..ede98db8c126 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -24,7 +24,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info)
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
 {
return amdgpu_ras_block_late_init(adev, adev->mmhub.ras_if);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 7deda9a3b81e..75815106f2d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -47,7 +47,7 @@ struct amdgpu_mmhub {
struct amdgpu_mmhub_ras  *ras;
 };
 
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info);
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block);
 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 89e61fdd3580..92fd4ffa7779 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -22,7 +22,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgp

[PATCH 3/7] drm/amdgpu: Remove redundant calls of ras_late_init in mmhub ras block

2022-02-14 Thread yipechai
Remove redundant calls of ras_late_init in mmhub ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 5 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 1 -
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +-
 4 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 67a7d1cb89d6..f443d1e359ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -458,7 +458,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
}
 
if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) {
-   r = adev->mmhub.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->mmhub.ras->ras_block.ras_late_init(adev, 
adev->mmhub.ras_if);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
index ede98db8c126..42413813765a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -24,11 +24,6 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block)
-{
-   return amdgpu_ras_block_late_init(adev, adev->mmhub.ras_if);
-}
-
 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 75815106f2d5..240b26d9a388 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -47,7 +47,6 @@ struct amdgpu_mmhub {
struct amdgpu_mmhub_ras  *ras;
 };
 
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, struct 
ras_common_if *ras_block);
 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 94095b965e2c..b719d2c3003b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1293,7 +1293,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct 
amdgpu_device *adev)
 
/* If don't define special ras_late_init function, use default 
ras_late_init */
if (!adev->mmhub.ras->ras_block.ras_late_init)
-   adev->mmhub.ras->ras_block.ras_late_init = 
amdgpu_mmhub_ras_late_init;
+   adev->mmhub.ras->ras_block.ras_late_init = 
amdgpu_ras_block_late_init;
 
/* If don't define special ras_fini function, use default 
ras_fini */
if (!adev->mmhub.ras->ras_block.ras_fini)
-- 
2.25.1



[PATCH 2/7] drm/amdgpu: Remove redundant calls of ras_late_init in hdp ras block

2022-02-14 Thread yipechai
Remove redundant calls of ras_late_init in hdp ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 5 -
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c   | 2 +-
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 956cc994ca7d..67a7d1cb89d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -470,7 +470,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
}
 
if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_late_init) {
-   r = adev->hdp.ras->ras_block.ras_late_init(adev, NULL);
+   r = adev->hdp.ras->ras_block.ras_late_init(adev, 
adev->hdp.ras_if);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index 70a096160998..b7fbc114a175 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -24,11 +24,6 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block)
-{
-   return amdgpu_ras_block_late_init(adev, adev->hdp.ras_if);
-}
-
 void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index a9ed4232cdeb..c9e931f046f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -163,7 +163,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
},
.hw_ops = _v4_0_ras_hw_ops,
-   .ras_late_init = amdgpu_hdp_ras_late_init,
+   .ras_late_init = amdgpu_ras_block_late_init,
.ras_fini = amdgpu_hdp_ras_fini,
},
 };
-- 
2.25.1



[PATCH 11/11] drm/amdgpu: Merge amdgpu_ras_late_init/amdgpu_ras_late_fini to amdgpu_ras_block_late_init/amdgpu_ras_block_late_fini

2022-02-08 Thread yipechai
1. Merge amdgpu_ras_late_init to
   amdgpu_ras_block_late_init.
2. Remove amdgpu_ras_late_init since no ras block
   calls amdgpu_ras_late_init.
3. Merge amdgpu_ras_late_fini to
   amdgpu_ras_block_late_fini.
4. Remove amdgpu_ras_late_fini since no ras block
   calls amdgpu_ras_late_fini.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 53 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  8 
 2 files changed, 11 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 9f8f596663ab..1aff88fcea76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2397,11 +2397,10 @@ bool amdgpu_ras_is_poison_mode_supported(struct 
amdgpu_device *adev)
 }
 
 /* helper function to handle common stuff in ip late init phase */
-int amdgpu_ras_late_init(struct amdgpu_device *adev,
-struct ras_common_if *ras_block,
-struct ras_fs_if *fs_info,
-struct ras_ih_if *ih_info)
+int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
+struct ras_common_if *ras_block)
 {
+   struct amdgpu_ras_block_object *ras_obj;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
unsigned long ue_count, ce_count;
int r;
@@ -2429,7 +2428,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
if (adev->in_suspend || amdgpu_in_reset(adev))
return 0;
 
-   if (ih_info->cb) {
+   ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, 
ras_comm);
+   if (ras_obj->ras_cb) {
r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
if (r)
goto interrupt;
@@ -2450,57 +2450,26 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
 cleanup:
amdgpu_ras_sysfs_remove(adev, ras_block);
 sysfs:
-   if (ih_info->cb)
+   if (ras_obj->ras_cb)
amdgpu_ras_interrupt_remove_handler(adev, ras_block);
 interrupt:
amdgpu_ras_feature_enable(adev, ras_block, 0);
return r;
 }
 
-int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
-   struct ras_common_if *ras_block)
-{
-   char sysfs_name[32];
-   struct ras_ih_if ih_info;
-   struct ras_fs_if fs_info;
-   struct amdgpu_ras_block_object *obj;
-
-   obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
-   ih_info.cb = obj->ras_cb;
-   ih_info.head = *ras_block;
-   snprintf(sysfs_name, sizeof(sysfs_name), "%s_err_count", 
ras_block->name);
-   fs_info.sysfs_name = (const char *)sysfs_name;
-   fs_info.head = *ras_block;
-   return amdgpu_ras_late_init(adev, ras_block, _info, _info);
-}
-
 /* helper function to remove ras fs node and interrupt handler */
-void amdgpu_ras_late_fini(struct amdgpu_device *adev,
- struct ras_common_if *ras_block,
- struct ras_ih_if *ih_info)
-{
-   if (!ras_block || !ih_info)
-   return;
-
-   amdgpu_ras_sysfs_remove(adev, ras_block);
-   if (ih_info->cb)
-   amdgpu_ras_interrupt_remove_handler(adev, _info->head);
-}
-
 void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
  struct ras_common_if *ras_block)
 {
-   struct ras_ih_if ih_info;
-   struct amdgpu_ras_block_object *obj;
-
+   struct amdgpu_ras_block_object *ras_obj;
if (!ras_block)
return;
 
-   obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
-   ih_info.head = *ras_block;
-   ih_info.cb = obj->ras_cb;
+   amdgpu_ras_sysfs_remove(adev, ras_block);
 
-   amdgpu_ras_late_fini(adev, ras_block, _info);
+   ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, 
ras_comm);
+   if (ras_obj->ras_cb)
+   amdgpu_ras_interrupt_remove_handler(adev, ras_block);
 }
 
 /* do some init work after IP late init as dependence.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index ae8741ac526f..5de567c6a8f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -597,18 +597,10 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
 int amdgpu_ras_init(struct amdgpu_device *adev);
 int amdgpu_ras_fini(struct amdgpu_device *adev);
 int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
-int amdgpu_ras_late_init(struct amdgpu_device *adev,
-struct ras_common_if *ras_block,
-struct ras_fs_if *fs_info,
-struct ras_ih_if *ih_info);
 
 int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
struct ras_common_if *ras_block);
 
-void amdgpu_ras_late_fini(struct amdgpu_dev

[PATCH 08/11] drm/amdgpu: Optimize amdgpu_umc_ras_late_init/amdgpu_umc_ras_fini function code

2022-02-08 Thread yipechai
Optimize amdgpu_umc_ras_late_init/amdgpu_umc_ras_fini function code.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 44 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  4 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  6 
 3 files changed, 16 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index ff7805beda38..9f1406e1a48a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -129,7 +129,7 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
return ret;
 }
 
-static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
void *ras_error_status,
struct amdgpu_iv_entry *entry)
 {
@@ -139,36 +139,15 @@ static int amdgpu_umc_process_ras_data_cb(struct 
amdgpu_device *adev,
 int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
int r;
-   struct ras_fs_if fs_info = {
-   .sysfs_name = "umc_err_count",
-   };
-   struct ras_ih_if ih_info = {
-   .cb = amdgpu_umc_process_ras_data_cb,
-   };
 
-   if (!adev->umc.ras_if) {
-   adev->umc.ras_if =
-   kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
-   if (!adev->umc.ras_if)
-   return -ENOMEM;
-   adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC;
-   adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
-   adev->umc.ras_if->sub_block_index = 0;
-   }
-   ih_info.head = fs_info.head = *adev->umc.ras_if;
-
-   r = amdgpu_ras_late_init(adev, adev->umc.ras_if,
-_info, _info);
+   r = amdgpu_ras_block_late_init(adev, adev->umc.ras_if);
if (r)
-   goto free;
+   return r;
 
if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) {
r = amdgpu_irq_get(adev, >gmc.ecc_irq, 0);
if (r)
goto late_fini;
-   } else {
-   r = 0;
-   goto free;
}
 
/* ras init of specific umc version */
@@ -179,26 +158,15 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, 
void *ras_info)
return 0;
 
 late_fini:
-   amdgpu_ras_late_fini(adev, adev->umc.ras_if, _info);
-free:
-   kfree(adev->umc.ras_if);
-   adev->umc.ras_if = NULL;
+   amdgpu_ras_block_late_fini(adev, adev->umc.ras_if);
return r;
 }
 
 void amdgpu_umc_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
-   adev->umc.ras_if) {
-   struct ras_common_if *ras_if = adev->umc.ras_if;
-   struct ras_ih_if ih_info = {
-   .head = *ras_if,
-   .cb = amdgpu_umc_process_ras_data_cb,
-   };
-
-   amdgpu_ras_late_fini(adev, ras_if, _info);
-   kfree(ras_if);
-   }
+   adev->umc.ras_if)
+   amdgpu_ras_block_late_fini(adev, adev->umc.ras_if);
 }
 
 int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 4db0526d0be4..ec15b3640399 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -85,4 +85,8 @@ void amdgpu_umc_fill_error_record(struct ras_err_data 
*err_data,
uint64_t retired_page,
uint32_t channel_index,
uint32_t umc_inst);
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+   void *ras_error_status,
+   struct amdgpu_iv_entry *entry);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 15958fd45f64..94095b965e2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1234,6 +1234,8 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
 
strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
+   adev->umc.ras->ras_block.ras_comm.type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+   adev->umc.ras_if = >umc.ras->ras_block.ras_comm;
 
/* If don't define special ras_late_init function, use default 
ras_late_init */
if (!adev->umc.ras->ras_block.ras_late_init)
@@ -1242,6 +1244,10 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
/* If don't define special ras_fini functi

[PATCH 10/11] drm/amdgpu: Optimize operating sysfs and interrupt function interface in amdgpu_ras.c

2022-02-08 Thread yipechai
In order to reduce redundant struct conversion, modify
operating sysfs and interrupt function interface parameters.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  6 ++--
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 8a76a4e07659..9f8f596663ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1276,18 +1276,17 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct 
amdgpu_device *adev)
 }
 
 int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
-   struct ras_fs_if *head)
+   struct ras_common_if *head)
 {
-   struct ras_manager *obj = amdgpu_ras_find_obj(adev, >head);
+   struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
 
if (!obj || obj->attr_inuse)
return -EINVAL;
 
get_obj(obj);
 
-   memcpy(obj->fs_data.sysfs_name,
-   head->sysfs_name,
-   sizeof(obj->fs_data.sysfs_name));
+   snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name),
+   "%s_err_count", head->name);
 
obj->sysfs_attr = (struct device_attribute){
.attr = {
@@ -1594,9 +1593,9 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device 
*adev,
 }
 
 int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
-   struct ras_ih_if *info)
+   struct ras_common_if *head)
 {
-   struct ras_manager *obj = amdgpu_ras_find_obj(adev, >head);
+   struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
struct ras_ih_data *data;
 
if (!obj)
@@ -1616,24 +1615,27 @@ int amdgpu_ras_interrupt_remove_handler(struct 
amdgpu_device *adev,
 }
 
 int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
-   struct ras_ih_if *info)
+   struct ras_common_if *head)
 {
-   struct ras_manager *obj = amdgpu_ras_find_obj(adev, >head);
+   struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
struct ras_ih_data *data;
+   struct amdgpu_ras_block_object *ras_obj;
 
if (!obj) {
/* in case we registe the IH before enable ras feature */
-   obj = amdgpu_ras_create_obj(adev, >head);
+   obj = amdgpu_ras_create_obj(adev, head);
if (!obj)
return -EINVAL;
} else
get_obj(obj);
 
+   ras_obj = container_of(head, struct amdgpu_ras_block_object, ras_comm);
+
data = >ih_data;
/* add the callback.etc */
*data = (struct ras_ih_data) {
.inuse = 0,
-   .cb = info->cb,
+   .cb = ras_obj->ras_cb,
.element_size = sizeof(struct amdgpu_iv_entry),
.rptr = 0,
.wptr = 0,
@@ -1662,10 +1664,7 @@ static int amdgpu_ras_interrupt_remove_all(struct 
amdgpu_device *adev)
struct ras_manager *obj, *tmp;
 
list_for_each_entry_safe(obj, tmp, >head, node) {
-   struct ras_ih_if info = {
-   .head = obj->head,
-   };
-   amdgpu_ras_interrupt_remove_handler(adev, );
+   amdgpu_ras_interrupt_remove_handler(adev, >head);
}
 
return 0;
@@ -2431,12 +2430,12 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
return 0;
 
if (ih_info->cb) {
-   r = amdgpu_ras_interrupt_add_handler(adev, ih_info);
+   r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
if (r)
goto interrupt;
}
 
-   r = amdgpu_ras_sysfs_create(adev, fs_info);
+   r = amdgpu_ras_sysfs_create(adev, ras_block);
if (r)
goto sysfs;
 
@@ -2452,7 +2451,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
amdgpu_ras_sysfs_remove(adev, ras_block);
 sysfs:
if (ih_info->cb)
-   amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+   amdgpu_ras_interrupt_remove_handler(adev, ras_block);
 interrupt:
amdgpu_ras_feature_enable(adev, ras_block, 0);
return r;
@@ -2485,7 +2484,7 @@ void amdgpu_ras_late_fini(struct amdgpu_device *adev,
 
amdgpu_ras_sysfs_remove(adev, ras_block);
if (ih_info->cb)
-   amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+   amdgpu_ras_interrupt_remove_handler(adev, _info->head);
 }
 
 void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 8b94b556baf6..ae8741ac526f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@

[PATCH 07/11] drm/amdgpu: Optimize amdgpu_sdma_ras_late_init/amdgpu_sdma_ras_fini function code

2022-02-08 Thread yipechai
Optimize amdgpu_sdma_ras_late_init/amdgpu_sdma_ras_fini function code.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 46 +++-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 12 ---
 2 files changed, 13 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 65debb65a5df..242a7b4dcad9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -90,28 +90,10 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
  void *ras_ih_info)
 {
int r, i;
-   struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info;
-   struct ras_fs_if fs_info = {
-   .sysfs_name = "sdma_err_count",
-   };
-
-   if (!ih_info)
-   return -EINVAL;
 
-   if (!adev->sdma.ras_if) {
-   adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), 
GFP_KERNEL);
-   if (!adev->sdma.ras_if)
-   return -ENOMEM;
-   adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA;
-   adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
-   adev->sdma.ras_if->sub_block_index = 0;
-   }
-   fs_info.head = ih_info->head = *adev->sdma.ras_if;
-
-   r = amdgpu_ras_late_init(adev, adev->sdma.ras_if,
-_info, ih_info);
+   r = amdgpu_ras_block_late_init(adev, adev->sdma.ras_if);
if (r)
-   goto free;
+   return r;
 
if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -120,38 +102,20 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
if (r)
goto late_fini;
}
-   } else {
-   r = 0;
-   goto free;
}
 
return 0;
 
 late_fini:
-   amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info);
-free:
-   kfree(adev->sdma.ras_if);
-   adev->sdma.ras_if = NULL;
+   amdgpu_ras_block_late_fini(adev, adev->sdma.ras_if);
return r;
 }
 
 void amdgpu_sdma_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
-   adev->sdma.ras_if) {
-   struct ras_common_if *ras_if = adev->sdma.ras_if;
-   struct ras_ih_if ih_info = {
-   .head = *ras_if,
-   /* the cb member will not be used by
-* amdgpu_ras_interrupt_remove_handler, init it only
-* to cheat the check in ras_late_fini
-*/
-   .cb = amdgpu_sdma_process_ras_data_cb,
-   };
-
-   amdgpu_ras_late_fini(adev, ras_if, _info);
-   kfree(ras_if);
-   }
+   adev->sdma.ras_if)
+   amdgpu_ras_block_late_fini(adev, adev->sdma.ras_if);
 }
 
 int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 8b0a8587dd36..82a31dfa8c21 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1885,9 +1885,6 @@ static int sdma_v4_0_process_ras_data_cb(struct 
amdgpu_device *adev,
 static int sdma_v4_0_late_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   struct ras_ih_if ih_info = {
-   .cb = sdma_v4_0_process_ras_data_cb,
-   };
 
sdma_v4_0_setup_ulv(adev);
 
@@ -1898,7 +1895,7 @@ static int sdma_v4_0_late_init(void *handle)
}
 
if (adev->sdma.ras && adev->sdma.ras->ras_block.ras_late_init)
-   return adev->sdma.ras->ras_block.ras_late_init(adev, _info);
+   return adev->sdma.ras->ras_block.ras_late_init(adev, NULL);
else
return 0;
 }
@@ -2794,6 +2791,7 @@ const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops 
= {
 static struct amdgpu_sdma_ras sdma_v4_0_ras = {
.ras_block = {
.hw_ops = _v4_0_ras_hw_ops,
+   .ras_cb = sdma_v4_0_process_ras_data_cb,
},
 };
 
@@ -2816,6 +2814,8 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device 
*adev)
 
strcpy(adev->sdma.ras->ras_block.ras_comm.name, "sdma");
adev->sdma.ras->ras_block.ras_comm.block = 
AMDGPU_RAS_BLOCK__SDMA;
+   adev->sdma.ras->ras_block.ras_comm.type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+   adev->sdma.ras_if = >sdma.ras->ras_block.ras_comm;
 
/* If don't define special ras_late_init f

[PATCH 09/11] drm/amdgpu: Optimize amdgpu_xgmi_ras_late_init/amdgpu_xgmi_ras_fini function code

2022-02-08 Thread yipechai
Optimize amdgpu_xgmi_ras_late_init/amdgpu_xgmi_ras_fini function code.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 40 +++-
 2 files changed, 6 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index d426de48d299..956cc994ca7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -441,6 +441,7 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.ras = _ras;
amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
+   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
}
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 15707af89212..a785b1e088cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -734,51 +734,20 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
 
 static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, void 
*ras_info)
 {
-   int r;
-   struct ras_ih_if ih_info = {
-   .cb = NULL,
-   };
-   struct ras_fs_if fs_info = {
-   .sysfs_name = "xgmi_wafl_err_count",
-   };
-
if (!adev->gmc.xgmi.supported ||
adev->gmc.xgmi.num_physical_nodes == 0)
return 0;
 
adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
 
-   if (!adev->gmc.xgmi.ras_if) {
-   adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), 
GFP_KERNEL);
-   if (!adev->gmc.xgmi.ras_if)
-   return -ENOMEM;
-   adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
-   adev->gmc.xgmi.ras_if->type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
-   adev->gmc.xgmi.ras_if->sub_block_index = 0;
-   }
-   ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
-   r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
-_info, _info);
-   if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
-   kfree(adev->gmc.xgmi.ras_if);
-   adev->gmc.xgmi.ras_if = NULL;
-   }
-
-   return r;
+   return amdgpu_ras_block_late_init(adev, adev->gmc.xgmi.ras_if);
 }
 
 static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
-   adev->gmc.xgmi.ras_if) {
-   struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if;
-   struct ras_ih_if ih_info = {
-   .cb = NULL,
-   };
-
-   amdgpu_ras_late_fini(adev, ras_if, _info);
-   kfree(ras_if);
-   }
+   adev->gmc.xgmi.ras_if)
+   amdgpu_ras_block_late_fini(adev, adev->gmc.xgmi.ras_if);
 }
 
 uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
@@ -982,8 +951,9 @@ struct amdgpu_ras_block_hw_ops  xgmi_ras_hw_ops = {
 struct amdgpu_xgmi_ras xgmi_ras = {
.ras_block = {
.ras_comm = {
-   .name = "xgmi",
+   .name = "xgmi_wafl",
.block = AMDGPU_RAS_BLOCK__XGMI_WAFL,
+   .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
},
.hw_ops = _ras_hw_ops,
.ras_late_init = amdgpu_xgmi_ras_late_init,
-- 
2.25.1



[PATCH 06/11] drm/amdgpu: Optimize amdgpu_nbio_ras_late_init/amdgpu_nbio_ras_fini function code

2022-02-08 Thread yipechai
Optimize amdgpu_nbio_ras_late_init/amdgpu_nbio_ras_fini function code.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 40 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   |  1 +
 3 files changed, 7 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 6ace2e390e77..89e61fdd3580 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -25,26 +25,9 @@
 int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
int r;
-   struct ras_ih_if ih_info = {
-   .cb = NULL,
-   };
-   struct ras_fs_if fs_info = {
-   .sysfs_name = "pcie_bif_err_count",
-   };
-
-   if (!adev->nbio.ras_if) {
-   adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), 
GFP_KERNEL);
-   if (!adev->nbio.ras_if)
-   return -ENOMEM;
-   adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF;
-   adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
-   adev->nbio.ras_if->sub_block_index = 0;
-   }
-   ih_info.head = fs_info.head = *adev->nbio.ras_if;
-   r = amdgpu_ras_late_init(adev, adev->nbio.ras_if,
-_info, _info);
+   r = amdgpu_ras_block_late_init(adev, adev->nbio.ras_if);
if (r)
-   goto free;
+   return r;
 
if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
r = amdgpu_irq_get(adev, >nbio.ras_controller_irq, 0);
@@ -53,30 +36,17 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, 
void *ras_info)
r = amdgpu_irq_get(adev, >nbio.ras_err_event_athub_irq, 
0);
if (r)
goto late_fini;
-   } else {
-   r = 0;
-   goto free;
}
 
return 0;
 late_fini:
-   amdgpu_ras_late_fini(adev, adev->nbio.ras_if, _info);
-free:
-   kfree(adev->nbio.ras_if);
-   adev->nbio.ras_if = NULL;
+   amdgpu_ras_block_late_fini(adev, adev->nbio.ras_if);
return r;
 }
 
 void amdgpu_nbio_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) &&
-   adev->nbio.ras_if) {
-   struct ras_common_if *ras_if = adev->nbio.ras_if;
-   struct ras_ih_if ih_info = {
-   .cb = NULL,
-   };
-
-   amdgpu_ras_late_fini(adev, ras_if, _info);
-   kfree(ras_if);
-   }
+   adev->nbio.ras_if)
+   amdgpu_ras_block_late_fini(adev, adev->nbio.ras_if);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b7aed19db7e9..8a76a4e07659 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2301,6 +2301,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->nbio.ras = _v7_4_ras;
amdgpu_ras_register_ras_block(adev, 
>nbio.ras->ras_block);
+   adev->nbio.ras_if = >nbio.ras->ras_block.ras_comm;
}
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index c7cca87f1647..14768570c298 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -667,6 +667,7 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = {
.ras_comm = {
.name = "pcie_bif",
.block = AMDGPU_RAS_BLOCK__PCIE_BIF,
+   .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
},
.hw_ops = _v7_4_ras_hw_ops,
.ras_late_init = amdgpu_nbio_ras_late_init,
-- 
2.25.1



[PATCH 05/11] drm/amdgpu: Optimize amdgpu_mmhub_ras_late_init/amdgpu_mmhub_ras_fini function code

2022-02-08 Thread yipechai
Optimize amdgpu_mmhub_ras_late_init/amdgpu_mmhub_ras_fini function code.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 37 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  2 ++
 2 files changed, 5 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
index f9b5472a75d7..2bdb4d8b7955 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -26,43 +26,12 @@
 
 int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
-   int r;
-   struct ras_ih_if ih_info = {
-   .cb = NULL,
-   };
-   struct ras_fs_if fs_info = {
-   .sysfs_name = "mmhub_err_count",
-   };
-
-   if (!adev->mmhub.ras_if) {
-   adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), 
GFP_KERNEL);
-   if (!adev->mmhub.ras_if)
-   return -ENOMEM;
-   adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB;
-   adev->mmhub.ras_if->type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
-   adev->mmhub.ras_if->sub_block_index = 0;
-   }
-   ih_info.head = fs_info.head = *adev->mmhub.ras_if;
-   r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if,
-_info, _info);
-   if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) {
-   kfree(adev->mmhub.ras_if);
-   adev->mmhub.ras_if = NULL;
-   }
-
-   return r;
+   return amdgpu_ras_block_late_init(adev, adev->mmhub.ras_if);
 }
 
 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
-   adev->mmhub.ras_if) {
-   struct ras_common_if *ras_if = adev->mmhub.ras_if;
-   struct ras_ih_if ih_info = {
-   .cb = NULL,
-   };
-
-   amdgpu_ras_late_fini(adev, ras_if, _info);
-   kfree(ras_if);
-   }
+   adev->mmhub.ras_if)
+   amdgpu_ras_block_late_fini(adev, adev->mmhub.ras_if);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b12fe6703f02..15958fd45f64 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1282,6 +1282,8 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct 
amdgpu_device *adev)
 
strcpy(adev->mmhub.ras->ras_block.ras_comm.name, "mmhub");
adev->mmhub.ras->ras_block.ras_comm.block = 
AMDGPU_RAS_BLOCK__MMHUB;
+   adev->mmhub.ras->ras_block.ras_comm.type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+   adev->mmhub.ras_if = >mmhub.ras->ras_block.ras_comm;
 
/* If don't define special ras_late_init function, use default 
ras_late_init */
if (!adev->mmhub.ras->ras_block.ras_late_init)
-- 
2.25.1



[PATCH 04/11] drm/amdgpu: Optimize amdgpu_mca_ras_late_init/amdgpu_mca_ras_fini function code

2022-02-08 Thread yipechai
Optimize amdgpu_mca_ras_late_init/amdgpu_mca_ras_fini function code.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 41 ++---
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   |  6 
 2 files changed, 8 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index ad057d6b2c77..1c77fe7e9e68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -74,48 +74,11 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device 
*adev,
 int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
 struct amdgpu_mca_ras *mca_dev)
 {
-   char sysfs_name[32] = {0};
-   int r;
-   struct ras_ih_if ih_info = {
-   .cb = NULL,
-   };
-   struct ras_fs_if fs_info= {
-   .sysfs_name = sysfs_name,
-   };
-
-   snprintf(sysfs_name, sizeof(sysfs_name), "%s_err_count",
-   mca_dev->ras->ras_block.ras_comm.name);
-
-   if (!mca_dev->ras_if) {
-   mca_dev->ras_if = kmalloc(sizeof(struct ras_common_if), 
GFP_KERNEL);
-   if (!mca_dev->ras_if)
-   return -ENOMEM;
-   mca_dev->ras_if->block = mca_dev->ras->ras_block.ras_comm.block;
-   mca_dev->ras_if->sub_block_index = 
mca_dev->ras->ras_block.ras_comm.sub_block_index;
-   mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
-   }
-   ih_info.head = fs_info.head = *mca_dev->ras_if;
-   r = amdgpu_ras_late_init(adev, mca_dev->ras_if,
-_info, _info);
-   if (r || !amdgpu_ras_is_supported(adev, mca_dev->ras_if->block)) {
-   kfree(mca_dev->ras_if);
-   mca_dev->ras_if = NULL;
-   }
-
-   return r;
+   return amdgpu_ras_block_late_init(adev, mca_dev->ras_if);
 }
 
 void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
 struct amdgpu_mca_ras *mca_dev)
 {
-   struct ras_ih_if ih_info = {
-   .cb = NULL,
-   };
-
-   if (!mca_dev->ras_if)
-   return;
-
-   amdgpu_ras_late_fini(adev, mca_dev->ras_if, _info);
-   kfree(mca_dev->ras_if);
-   mca_dev->ras_if = NULL;
+   amdgpu_ras_block_late_fini(adev, mca_dev->ras_if);
 }
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index 386416378a82..a307f336f7ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -71,6 +71,7 @@ struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
.ras_comm = {
.block = AMDGPU_RAS_BLOCK__MCA,
.sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP0,
+   .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
.name = "mp0",
},
.hw_ops = _v3_0_mp0_hw_ops,
@@ -108,6 +109,7 @@ struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
.ras_comm = {
.block = AMDGPU_RAS_BLOCK__MCA,
.sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP1,
+   .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
.name = "mp1",
},
.hw_ops = _v3_0_mp1_hw_ops,
@@ -145,6 +147,7 @@ struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
.ras_comm = {
.block = AMDGPU_RAS_BLOCK__MCA,
.sub_block_index = AMDGPU_RAS_MCA_BLOCK__MPIO,
+   .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
.name = "mpio",
},
.hw_ops = _v3_0_mpio_hw_ops,
@@ -165,6 +168,9 @@ static void mca_v3_0_init(struct amdgpu_device *adev)
amdgpu_ras_register_ras_block(adev, >mp0.ras->ras_block);
amdgpu_ras_register_ras_block(adev, >mp1.ras->ras_block);
amdgpu_ras_register_ras_block(adev, >mpio.ras->ras_block);
+   mca->mp0.ras_if = _v3_0_mp0_ras.ras_block.ras_comm;
+   mca->mp1.ras_if = _v3_0_mp1_ras.ras_block.ras_comm;
+   mca->mpio.ras_if = _v3_0_mpio_ras.ras_block.ras_comm;
 }
 
 const struct amdgpu_mca_funcs mca_v3_0_funcs = {
-- 
2.25.1



[PATCH 02/11] drm/amdgpu: Optimize amdgpu_gfx_ras_late_init/amdgpu_gfx_ras_fini function code

2022-02-08 Thread yipechai
Optimize amdgpu_gfx_ras_late_init/amdgpu_gfx_ras_fini function code.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 42 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  6 
 2 files changed, 11 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 43004822ec6f..fe392108b5c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -625,26 +625,9 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, 
uint32_t *value)
 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
int r;
-   struct ras_fs_if fs_info = {
-   .sysfs_name = "gfx_err_count",
-   };
-   struct ras_ih_if ih_info = {
-   .cb = amdgpu_gfx_process_ras_data_cb,
-   };
-
-   if (!adev->gfx.ras_if) {
-   adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), 
GFP_KERNEL);
-   if (!adev->gfx.ras_if)
-   return -ENOMEM;
-   adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX;
-   adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
-   adev->gfx.ras_if->sub_block_index = 0;
-   }
-   fs_info.head = ih_info.head = *adev->gfx.ras_if;
-   r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
-_info, _info);
+   r = amdgpu_ras_block_late_init(adev, adev->gfx.ras_if);
if (r)
-   goto free;
+   return r;
 
if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
if (!amdgpu_persistent_edc_harvesting_supported(adev))
@@ -653,34 +636,19 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, 
void *ras_info)
r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0);
if (r)
goto late_fini;
-   } else {
-   /* free gfx ras_if if ras is not supported */
-   r = 0;
-   goto free;
}
 
return 0;
 late_fini:
-   amdgpu_ras_late_fini(adev, adev->gfx.ras_if, _info);
-free:
-   kfree(adev->gfx.ras_if);
-   adev->gfx.ras_if = NULL;
+   amdgpu_ras_block_late_fini(adev, adev->gfx.ras_if);
return r;
 }
 
 void amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
-   adev->gfx.ras_if) {
-   struct ras_common_if *ras_if = adev->gfx.ras_if;
-   struct ras_ih_if ih_info = {
-   .head = *ras_if,
-   .cb = amdgpu_gfx_process_ras_data_cb,
-   };
-
-   amdgpu_ras_late_fini(adev, ras_if, _info);
-   kfree(ras_if);
-   }
+   adev->gfx.ras_if)
+   amdgpu_ras_block_late_fini(adev, adev->gfx.ras_if);
 }
 
 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0a291d2e5f91..0c3b3972c04e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2200,6 +2200,8 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device 
*adev)
 
strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
+   adev->gfx.ras->ras_block.ras_comm.type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+   adev->gfx.ras_if = >gfx.ras->ras_block.ras_comm;
 
/* If not define special ras_late_init function, use gfx 
default ras_late_init */
if (!adev->gfx.ras->ras_block.ras_late_init)
@@ -2208,6 +2210,10 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device 
*adev)
/* If not define special ras_fini function, use gfx default 
ras_fini */
if (!adev->gfx.ras->ras_block.ras_fini)
adev->gfx.ras->ras_block.ras_fini = amdgpu_gfx_ras_fini;
+
+   /* If not defined special ras_cb function, use default ras_cb */
+   if (!adev->gfx.ras->ras_block.ras_cb)
+   adev->gfx.ras->ras_block.ras_cb = 
amdgpu_gfx_process_ras_data_cb;
}
 
adev->gfx.config.gb_addr_config = gb_addr_config;
-- 
2.25.1



[PATCH 03/11] drm/amdgpu: Optimize amdgpu_hdp_ras_late_init/amdgpu_hdp_ras_fini function code

2022-02-08 Thread yipechai
Optimize amdgpu_hdp_ras_late_init/amdgpu_hdp_ras_fini function code.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 37 ++---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c   |  1 +
 3 files changed, 5 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index 518966a26130..21a5f884dd2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -26,43 +26,12 @@
 
 int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
-   int r;
-   struct ras_ih_if ih_info = {
-   .cb = NULL,
-   };
-   struct ras_fs_if fs_info = {
-   .sysfs_name = "hdp_err_count",
-   };
-
-   if (!adev->hdp.ras_if) {
-   adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), 
GFP_KERNEL);
-   if (!adev->hdp.ras_if)
-   return -ENOMEM;
-   adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP;
-   adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
-   adev->hdp.ras_if->sub_block_index = 0;
-   }
-   ih_info.head = fs_info.head = *adev->hdp.ras_if;
-   r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
-_info, _info);
-   if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
-   kfree(adev->hdp.ras_if);
-   adev->hdp.ras_if = NULL;
-   }
-
-   return r;
+   return amdgpu_ras_block_late_init(adev, adev->hdp.ras_if);
 }
 
 void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
 {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
-   adev->hdp.ras_if) {
-   struct ras_common_if *ras_if = adev->hdp.ras_if;
-   struct ras_ih_if ih_info = {
-   .cb = NULL,
-   };
-
-   amdgpu_ras_late_fini(adev, ras_if, _info);
-   kfree(ras_if);
-   }
+   adev->hdp.ras_if)
+   amdgpu_ras_block_late_fini(adev, adev->hdp.ras_if);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index af873c99d5e4..b12fe6703f02 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1302,6 +1302,7 @@ static void gmc_v9_0_set_hdp_ras_funcs(struct 
amdgpu_device *adev)
 {
adev->hdp.ras = _v4_0_ras;
amdgpu_ras_register_ras_block(adev, >hdp.ras->ras_block);
+   adev->hdp.ras_if = >hdp.ras->ras_block.ras_comm;
 }
 
 static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 503c292b321e..a9ed4232cdeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -160,6 +160,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
.ras_comm = {
.name = "hdp",
.block = AMDGPU_RAS_BLOCK__HDP,
+   .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
},
.hw_ops = _v4_0_ras_hw_ops,
.ras_late_init = amdgpu_hdp_ras_late_init,
-- 
2.25.1



[PATCH 01/11] drm/amdgpu: Optimize xxx_ras_late_init/xxx_ras_late_fini for each ras block

2022-02-08 Thread yipechai
1. Define amdgpu_ras_block_late_init to create sysfs nodes
   and interrupt handles.
2. Define amdgpu_ras_block_late_fini to remove sysfs nodes
   and interrupt handles.
3. Replace ras block variable members in struct
   amdgpu_ras_block_object with struct ras_common_if, which
   can makes it easy to associate each ras block instance
   with each ras block functional interface.
4. Add .ras_cb to struct amdgpu_ras_block_object.
5. Change each ras block to fit for the changement of struct
   amdgpu_ras_block_object.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c  |  7 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 35 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h  | 15 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c |  6 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|  4 +--
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  4 +--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c|  8 +++---
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c|  6 ++--
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c| 28 +++
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   |  6 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  4 +--
 11 files changed, 86 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 52a60c2316a2..ad057d6b2c77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -83,14 +83,15 @@ int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
.sysfs_name = sysfs_name,
};
 
-   snprintf(sysfs_name, sizeof(sysfs_name), "%s_err_count", 
mca_dev->ras->ras_block.name);
+   snprintf(sysfs_name, sizeof(sysfs_name), "%s_err_count",
+   mca_dev->ras->ras_block.ras_comm.name);
 
if (!mca_dev->ras_if) {
mca_dev->ras_if = kmalloc(sizeof(struct ras_common_if), 
GFP_KERNEL);
if (!mca_dev->ras_if)
return -ENOMEM;
-   mca_dev->ras_if->block = mca_dev->ras->ras_block.block;
-   mca_dev->ras_if->sub_block_index = 
mca_dev->ras->ras_block.sub_block_index;
+   mca_dev->ras_if->block = mca_dev->ras->ras_block.ras_comm.block;
+   mca_dev->ras_if->sub_block_index = 
mca_dev->ras->ras_block.ras_comm.sub_block_index;
mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
}
ih_info.head = fs_info.head = *mca_dev->ras_if;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5934326b9db3..b7aed19db7e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -877,7 +877,7 @@ static int amdgpu_ras_block_match_default(struct 
amdgpu_ras_block_object *block_
if (!block_obj)
return -EINVAL;
 
-   if (block_obj->block == block)
+   if (block_obj->ras_comm.block == block)
return 0;
 
return -EINVAL;
@@ -2457,6 +2457,23 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
return r;
 }
 
+int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
+   struct ras_common_if *ras_block)
+{
+   char sysfs_name[32];
+   struct ras_ih_if ih_info;
+   struct ras_fs_if fs_info;
+   struct amdgpu_ras_block_object *obj;
+
+   obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
+   ih_info.cb = obj->ras_cb;
+   ih_info.head = *ras_block;
+   snprintf(sysfs_name, sizeof(sysfs_name), "%s_err_count", 
ras_block->name);
+   fs_info.sysfs_name = (const char *)sysfs_name;
+   fs_info.head = *ras_block;
+   return amdgpu_ras_late_init(adev, ras_block, _info, _info);
+}
+
 /* helper function to remove ras fs node and interrupt handler */
 void amdgpu_ras_late_fini(struct amdgpu_device *adev,
  struct ras_common_if *ras_block,
@@ -2470,6 +2487,22 @@ void amdgpu_ras_late_fini(struct amdgpu_device *adev,
amdgpu_ras_interrupt_remove_handler(adev, ih_info);
 }
 
+void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+   struct ras_ih_if ih_info;
+   struct amdgpu_ras_block_object *obj;
+
+   if (!ras_block)
+   return;
+
+   obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
+   ih_info.head = *ras_block;
+   ih_info.cb = obj->ras_cb;
+
+   amdgpu_ras_late_fini(adev, ras_block, _info);
+}
+
 /* do some init work after IP late init as dependence.
  * and it runs in resume/gpu reset/booting up cases.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index a55743b12d57..8b94b556baf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ 

[PATCH V2 2/2] Revert "drm/amdgpu: Add judgement to avoid infinite loop"

2022-01-29 Thread yipechai
The commit 8583c8983f1b ("drm/amdgpu: Fixed the defect of
soft lock caused by infinite loop") had fixed this defect.

Revert workaround commit 76641cbbf196 ("drm/amdgpu: Add
judgement to avoid infinite loop").

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 9b94c9c4960c..5558df3b21f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -886,7 +886,6 @@ static int amdgpu_ras_block_match_default(struct 
amdgpu_ras_block_object *block_
 static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct 
amdgpu_device *adev,
enum amdgpu_ras_block block, uint32_t 
sub_block_index)
 {
-   int loop_cnt = 0;
struct amdgpu_ras_block_list *node, *tmp;
struct amdgpu_ras_block_object *obj;
 
@@ -910,9 +909,6 @@ static struct amdgpu_ras_block_object 
*amdgpu_ras_get_ras_block(struct amdgpu_de
if (amdgpu_ras_block_match_default(obj, block) == 0)
return obj;
}
-
-   if (++loop_cnt >= AMDGPU_RAS_BLOCK__LAST)
-   break;
}
 
return NULL;
-- 
2.25.1



[PATCH V2 1/2] drm/amdgpu: Fixed the defect of soft lock caused by infinite loop

2022-01-29 Thread yipechai
1. The infinite loop case only occurs on multiple cards support
   ras functions.
2. The explanation of root cause refer to commit 76641cbbf196
   ("drm/amdgpu: Add judgement to avoid infinite loop").
3. Create new node to manage each unique ras instance to guarantee
   each device .ras_list is completely independent.
4. Fixes: commit 7a6b8ab3231b51 ("drm/amdgpu: Unify ras block
   interface for each ras block").
5. The soft locked logs are as follows:
[  262.165690] CPU: 93 PID: 758 Comm: kworker/93:1 Tainted: G   OE 
5.13.0-27-generic #29~20.04.1-Ubuntu
[  262.165695] Hardware name: Supermicro AS -4124GS-TNR/H12DSG-O-CPU, BIOS 
T20200717143848 07/17/2020
[  262.165698] Workqueue: events amdgpu_ras_do_recovery [amdgpu]
[  262.165980] RIP: 0010:amdgpu_ras_get_ras_block+0x86/0xd0 [amdgpu]
[  262.166239] Code: 68 d8 4c 8d 71 d8 48 39 c3 74 54 49 8b 45 38 48 85 c0 74 
32 44 89 fa 44 89 e6 4c 89 ef e8 82 e4 9b dc 85 c0 74 3c 49 8b 46 28 <49> 8d 56 
28 4d 89 f5 48 83 e8 28 48 39 d3 74 25 49 89 c6 49 8b 45
[  262.166243] RSP: 0018:ac908fa87d80 EFLAGS: 0202
[  262.166247] RAX: c1394248 RBX: 91e4ab8d6e20 RCX: c1394248
[  262.166249] RDX: 91e4aa356e20 RSI: 000e RDI: 91e4ab8c
[  262.166252] RBP: ac908fa87da8 R08: 0007 R09: 0001
[  262.166254] R10: 91e4930b64ec R11:  R12: 000e
[  262.166256] R13: 91e4aa356df8 R14: c1394320 R15: 0003
[  262.166258] FS:  () GS:92238fb4() 
knlGS:
[  262.166261] CS:  0010 DS:  ES:  CR0: 80050033
[  262.166264] CR2: 0001004865d0 CR3: 00406d796000 CR4: 00350ee0
[  262.166267] Call Trace:
[  262.166272]  amdgpu_ras_do_recovery+0x130/0x290 [amdgpu]
[  262.166529]  ? psi_task_switch+0xd2/0x250
[  262.166537]  ? __switch_to+0x11d/0x460
[  262.166542]  ? __switch_to_asm+0x36/0x70
[  262.166549]  process_one_work+0x220/0x3c0
[  262.166556]  worker_thread+0x4d/0x3f0
[  262.166560]  ? process_one_work+0x3c0/0x3c0
[  262.166563]  kthread+0x12b/0x150
[  262.166568]  ? set_kthread_struct+0x40/0x40
[  262.166571]  ret_from_fork+0x22/0x30

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  3 --
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 9d7c778c1a2d..9b94c9c4960c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -75,6 +75,13 @@ const char *ras_mca_block_string[] = {
"mca_iohc",
 };
 
+struct amdgpu_ras_block_list {
+   /* ras block link */
+   struct list_head node;
+
+   struct amdgpu_ras_block_object *ras_obj;
+};
+
 const char *get_ras_block_str(struct ras_common_if *ras_block)
 {
if (!ras_block)
@@ -880,7 +887,8 @@ static struct amdgpu_ras_block_object 
*amdgpu_ras_get_ras_block(struct amdgpu_de
enum amdgpu_ras_block block, uint32_t 
sub_block_index)
 {
int loop_cnt = 0;
-   struct amdgpu_ras_block_object *obj, *tmp;
+   struct amdgpu_ras_block_list *node, *tmp;
+   struct amdgpu_ras_block_object *obj;
 
if (block >= AMDGPU_RAS_BLOCK__LAST)
return NULL;
@@ -888,7 +896,13 @@ static struct amdgpu_ras_block_object 
*amdgpu_ras_get_ras_block(struct amdgpu_de
if (!amdgpu_ras_is_supported(adev, block))
return NULL;
 
-   list_for_each_entry_safe(obj, tmp, >ras_list, node) {
+   list_for_each_entry_safe(node, tmp, >ras_list, node) {
+   if (!node->ras_obj) {
+   dev_warn(adev->dev, "Warning: abnormal ras list 
node.\n");
+   continue;
+   }
+
+   obj = node->ras_obj;
if (obj->ras_block_match) {
if (obj->ras_block_match(obj, block, sub_block_index) 
== 0)
return obj;
@@ -2527,6 +2541,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 
 int amdgpu_ras_fini(struct amdgpu_device *adev)
 {
+   struct amdgpu_ras_block_list *ras_node, *tmp;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
if (!adev->ras_enabled || !con)
@@ -2545,6 +2560,12 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
amdgpu_ras_set_context(adev, NULL);
kfree(con);
 
+   /* Clear ras blocks from ras_list and free ras block list node */
+   list_for_each_entry_safe(ras_node, tmp, >ras_list, node) {
+   list_del(_node->node);
+   kfree(ras_node);
+   }
+
return 0;
 }
 
@@ -2754,14 +2775,22 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
 

[PATCH 2/2] Revert "drm/amdgpu: Add judgement to avoid infinite loop"

2022-01-29 Thread yipechai
The commit 8583c8983f1b ("drm/amdgpu: Fixed the defect of
soft lock caused by infinite loop") had fixed this defect.

Revert workaround commit 76641cbbf196 ("drm/amdgpu: Add
judgement to avoid infinite loop").

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b0aa67308c31..5a43a220e9fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -886,7 +886,6 @@ static int amdgpu_ras_block_match_default(struct 
amdgpu_ras_block_object *block_
 static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct 
amdgpu_device *adev,
enum amdgpu_ras_block block, uint32_t 
sub_block_index)
 {
-   int loop_cnt = 0;
struct amdgpu_ras_block_list *node, *tmp;
struct amdgpu_ras_block_object *obj;
 
@@ -910,9 +909,6 @@ static struct amdgpu_ras_block_object 
*amdgpu_ras_get_ras_block(struct amdgpu_de
if (amdgpu_ras_block_match_default(obj, block) == 0)
return obj;
}
-
-   if (++loop_cnt >= AMDGPU_RAS_BLOCK__LAST)
-   break;
}
 
return NULL;
-- 
2.25.1



[PATCH 1/2] drm/amdgpu: Fixed the defect of soft lock caused by infinite loop

2022-01-29 Thread yipechai
1. The infinite loop case only occurs on multiple cards support
   ras functions.
2. The explanation of root cause refer to 76641cbbf196523b5752c6cf68f86.
3. Create new node to manage each unique ras instance to guarantee
   each device .ras_list is completely independent.
4. Fixes:7a6b8ab3231b511915cb94cac1debabf093.
5. The soft locked logs are as follows:
[  262.165690] CPU: 93 PID: 758 Comm: kworker/93:1 Tainted: G   OE 
5.13.0-27-generic #29~20.04.1-Ubuntu
[  262.165695] Hardware name: Supermicro AS -4124GS-TNR/H12DSG-O-CPU, BIOS 
T20200717143848 07/17/2020
[  262.165698] Workqueue: events amdgpu_ras_do_recovery [amdgpu]
[  262.165980] RIP: 0010:amdgpu_ras_get_ras_block+0x86/0xd0 [amdgpu]
[  262.166239] Code: 68 d8 4c 8d 71 d8 48 39 c3 74 54 49 8b 45 38 48 85 c0 74 
32 44 89 fa 44 89 e6 4c 89 ef e8 82 e4 9b dc 85 c0 74 3c 49 8b 46 28 <49> 8d 56 
28 4d 89 f5 48 83 e8 28 48 39 d3 74 25 49 89 c6 49 8b 45
[  262.166243] RSP: 0018:ac908fa87d80 EFLAGS: 0202
[  262.166247] RAX: c1394248 RBX: 91e4ab8d6e20 RCX: c1394248
[  262.166249] RDX: 91e4aa356e20 RSI: 000e RDI: 91e4ab8c
[  262.166252] RBP: ac908fa87da8 R08: 0007 R09: 0001
[  262.166254] R10: 91e4930b64ec R11:  R12: 000e
[  262.166256] R13: 91e4aa356df8 R14: c1394320 R15: 0003
[  262.166258] FS:  () GS:92238fb4() 
knlGS:
[  262.166261] CS:  0010 DS:  ES:  CR0: 80050033
[  262.166264] CR2: 0001004865d0 CR3: 00406d796000 CR4: 00350ee0
[  262.166267] Call Trace:
[  262.166272]  amdgpu_ras_do_recovery+0x130/0x290 [amdgpu]
[  262.166529]  ? psi_task_switch+0xd2/0x250
[  262.166537]  ? __switch_to+0x11d/0x460
[  262.166542]  ? __switch_to_asm+0x36/0x70
[  262.166549]  process_one_work+0x220/0x3c0
[  262.166556]  worker_thread+0x4d/0x3f0
[  262.166560]  ? process_one_work+0x3c0/0x3c0
[  262.166563]  kthread+0x12b/0x150
[  262.166568]  ? set_kthread_struct+0x40/0x40
[  262.166571]  ret_from_fork+0x22/0x30

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  3 --
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 9d7c778c1a2d..b0aa67308c31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -75,6 +75,13 @@ const char *ras_mca_block_string[] = {
"mca_iohc",
 };
 
+struct amdgpu_ras_block_list {
+   /* ras block link */
+   struct list_head node;
+
+   struct amdgpu_ras_block_object *ras_obj;
+};
+
 const char *get_ras_block_str(struct ras_common_if *ras_block)
 {
if (!ras_block)
@@ -880,7 +887,8 @@ static struct amdgpu_ras_block_object 
*amdgpu_ras_get_ras_block(struct amdgpu_de
enum amdgpu_ras_block block, uint32_t 
sub_block_index)
 {
int loop_cnt = 0;
-   struct amdgpu_ras_block_object *obj, *tmp;
+   struct amdgpu_ras_block_list *node, *tmp;
+   struct amdgpu_ras_block_object *obj;
 
if (block >= AMDGPU_RAS_BLOCK__LAST)
return NULL;
@@ -888,7 +896,13 @@ static struct amdgpu_ras_block_object 
*amdgpu_ras_get_ras_block(struct amdgpu_de
if (!amdgpu_ras_is_supported(adev, block))
return NULL;
 
-   list_for_each_entry_safe(obj, tmp, >ras_list, node) {
+   list_for_each_entry_safe(node, tmp, >ras_list, node) {
+   if (!node->ras_obj) {
+   DRM_ERROR("Warning: abnormal ras list node");
+   continue;
+   }
+
+   obj = node->ras_obj;
if (obj->ras_block_match) {
if (obj->ras_block_match(obj, block, sub_block_index) 
== 0)
return obj;
@@ -2527,6 +2541,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 
 int amdgpu_ras_fini(struct amdgpu_device *adev)
 {
+   struct amdgpu_ras_block_list *ras_node, *tmp;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
if (!adev->ras_enabled || !con)
@@ -2545,6 +2560,12 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
amdgpu_ras_set_context(adev, NULL);
kfree(con);
 
+   /* Clear ras blocks from ras_list and free ras block list node */
+   list_for_each_entry_safe(ras_node, tmp, >ras_list, node) {
+   list_del(_node->node);
+   kfree(ras_node);
+   }
+
return 0;
 }
 
@@ -2754,14 +2775,22 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object *ras_block_obj)
 {
+   struct amdgpu_ras_block_list *ras_node;
if 

[PATCH] drm/amdgpu: Add judgement to avoid infinite loop

2022-01-28 Thread yipechai
1. The infinite loop causing soft lock occurs on multiple amdgpu cards
   supporting ras feature.
2. This a workaround patch. It is valid for multiple amdgpu cards of the
   same type.
3. The root cause is that each GPU card device has a separate .ras_list
   link header, but the instance and linked list node of each ras block
   are unique. When each device is initialized, each ras instance will
   repeatedly add link node to the device every time. In this way, only
   the .ras_list of the last initialized device is completely correct.
   the .ras_list->prev and .ras_list->next of the device initialzied
   before can still point to the correct ras instance, but the prev
   pointer and next pointer of the pointed ras instance both point to
   the last initialized device's .ras_ list instead of the beginning
   .ras_ list. When using list_for_each_entry_safe searches for
   non-existent Ras nodes on devices other than the last device, the
   last ras instance next pointer cannot always be equal to the
   beginning .ras_list, so that the loop cannot be terminated, the
   program enters a infinite loop.
 BTW: Since the data and initialization process of each card are the same,
  the link list between ras instances will not be destroyed every time
  the device is initialized.
 4. The soft locked logs are as follows:
[  262.165690] CPU: 93 PID: 758 Comm: kworker/93:1 Tainted: G   OE 
5.13.0-27-generic #29~20.04.1-Ubuntu
[  262.165695] Hardware name: Supermicro AS -4124GS-TNR/H12DSG-O-CPU, BIOS 
T20200717143848 07/17/2020
[  262.165698] Workqueue: events amdgpu_ras_do_recovery [amdgpu]
[  262.165980] RIP: 0010:amdgpu_ras_get_ras_block+0x86/0xd0 [amdgpu]
[  262.166239] Code: 68 d8 4c 8d 71 d8 48 39 c3 74 54 49 8b 45 38 48 85 c0 74 
32 44 89 fa 44 89 e6 4c 89 ef e8 82 e4 9b dc 85 c0 74 3c 49 8b 46 28 <49> 8d 56 
28 4d 89 f5 48 83 e8 28 48 39 d3 74 25 49 89 c6 49 8b 45
[  262.166243] RSP: 0018:ac908fa87d80 EFLAGS: 0202
[  262.166247] RAX: c1394248 RBX: 91e4ab8d6e20 RCX: c1394248
[  262.166249] RDX: 91e4aa356e20 RSI: 000e RDI: 91e4ab8c
[  262.166252] RBP: ac908fa87da8 R08: 0007 R09: 0001
[  262.166254] R10: 91e4930b64ec R11:  R12: 000e
[  262.166256] R13: 91e4aa356df8 R14: c1394320 R15: 0003
[  262.166258] FS:  () GS:92238fb4() 
knlGS:
[  262.166261] CS:  0010 DS:  ES:  CR0: 80050033
[  262.166264] CR2: 0001004865d0 CR3: 00406d796000 CR4: 00350ee0
[  262.166267] Call Trace:
[  262.166272]  amdgpu_ras_do_recovery+0x130/0x290 [amdgpu]
[  262.166529]  ? psi_task_switch+0xd2/0x250
[  262.166537]  ? __switch_to+0x11d/0x460
[  262.166542]  ? __switch_to_asm+0x36/0x70
[  262.166549]  process_one_work+0x220/0x3c0
[  262.166556]  worker_thread+0x4d/0x3f0
[  262.166560]  ? process_one_work+0x3c0/0x3c0
[  262.166563]  kthread+0x12b/0x150
[  262.166568]  ? set_kthread_struct+0x40/0x40
[  262.166571]  ret_from_fork+0x22/0x30

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index d4e07d0acb66..3d533ef0783d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -884,6 +884,7 @@ static int amdgpu_ras_block_match_default(struct 
amdgpu_ras_block_object *block_
 static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct 
amdgpu_device *adev,
enum amdgpu_ras_block block, uint32_t 
sub_block_index)
 {
+   int loop_cnt = 0;
struct amdgpu_ras_block_object *obj, *tmp;
 
if (block >= AMDGPU_RAS_BLOCK__LAST)
@@ -900,6 +901,9 @@ static struct amdgpu_ras_block_object 
*amdgpu_ras_get_ras_block(struct amdgpu_de
if (amdgpu_ras_block_match_default(obj, block) == 0)
return obj;
}
+
+   if (++loop_cnt >= AMDGPU_RAS_BLOCK__LAST)
+   break;
}
 
return NULL;
-- 
2.25.1



[PATCH V2 2/2] Revert "drm/amdgpu: No longer insert ras blocks into ras_list if it already exists in ras_list"

2022-01-19 Thread yipechai
This reverts commit 48e175f7476c6deb7ccf1f10d081322d52830a17.

Xgmi ras initialization had been moved from .late_init to early_init,
the defect of repeated calling amdgpu_ras_register_ras_block had been
fixed, so revert this patch.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7a1d2bac698e..c92383fe7834 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2765,19 +2765,12 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object *ras_block_obj)
 {
-   struct amdgpu_ras_block_object *obj, *tmp;
if (!adev || !ras_block_obj)
return -EINVAL;
 
if (!amdgpu_ras_asic_supported(adev))
return 0;
 
-   /* If the ras object is in ras_list, don't add it again */
-   list_for_each_entry_safe(obj, tmp, >ras_list, node) {
-   if (obj == ras_block_obj)
-   return 0;
-   }
-
INIT_LIST_HEAD(_block_obj->node);
list_add_tail(_block_obj->node, >ras_list);
 
-- 
2.25.1



[PATCH V2 1/2] drm/amdgpu: Move xgmi ras initialization from .late_init to .early_init

2022-01-19 Thread yipechai
Move xgmi ras initialization from .late_init to .early_init, which let
xgmi ras can be initialized only once.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 15 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  5 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  5 +
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 3483a82f5734..788c0257832d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -436,6 +436,16 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device 
*adev, uint64_t addr,
} while (fault->timestamp < tmp);
 }
 
+int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)
+{
+   if (!adev->gmc.xgmi.connected_to_cpu) {
+   adev->gmc.xgmi.ras = _ras;
+   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
+   }
+
+   return 0;
+}
+
 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 {
int r;
@@ -452,11 +462,6 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (!adev->gmc.xgmi.connected_to_cpu) {
-   adev->gmc.xgmi.ras = _ras;
-   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
-   }
-
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 0001631cfedb..ac4c0e50b45c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -318,6 +318,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
  uint16_t pasid, uint64_t timestamp);
 void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
 uint16_t pasid);
+int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev);
 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 4f8d356f8432..7a6ad5d467b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -719,6 +719,7 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device 
*adev)
 
 static int gmc_v10_0_early_init(void *handle)
 {
+   int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
gmc_v10_0_set_mmhub_funcs(adev);
@@ -734,6 +735,10 @@ static int gmc_v10_0_early_init(void *handle)
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
 
+   r = amdgpu_gmc_ras_early_init(adev);
+   if (r)
+   return r;
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index c76ffd1a70cd..3cdd3d459d51 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1318,6 +1318,7 @@ static void gmc_v9_0_set_mca_funcs(struct amdgpu_device 
*adev)
 
 static int gmc_v9_0_early_init(void *handle)
 {
+   int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
/* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables 
*/
@@ -1347,6 +1348,10 @@ static int gmc_v9_0_early_init(void *handle)
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
 
+   r = amdgpu_gmc_ras_early_init(adev);
+   if (r)
+   return r;
+
return 0;
 }
 
-- 
2.25.1



[PATCH 2/2] Revert "drm/amdgpu: No longer insert ras blocks into ras_list if it already exists in ras_list"

2022-01-19 Thread yipechai
This reverts commit 48e175f7476c6deb7ccf1f10d081322d52830a17.

Xgmi ras initialization had been moved from .late_init to early_init,
the defect of repeated calling amdgpu_ras_register_ras_block had been
fixed, so revert this patch.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 2b71611be388..e6b81543e104 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2775,19 +2775,12 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object *ras_block_obj)
 {
-   struct amdgpu_ras_block_object *obj, *tmp;
if (!adev || !ras_block_obj)
return -EINVAL;
 
if (!amdgpu_ras_asic_supported(adev))
return 0;
 
-   /* If the ras object is in ras_list, don't add it again */
-   list_for_each_entry_safe(obj, tmp, >ras_list, node) {
-   if (obj == ras_block_obj)
-   return 0;
-   }
-
INIT_LIST_HEAD(_block_obj->node);
list_add_tail(_block_obj->node, >ras_list);
 
-- 
2.25.1



[PATCH 1/2] drm/amdgpu: Move xgmi ras initialization from .late_init to early_init

2022-01-19 Thread yipechai
Move xgmi ras initialization from .late_init to early_init, which let
xgmi ras can be initialized only once.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|  5 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 10 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|  3 +++
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 22f50aadf694..ece6397f81de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2189,6 +2189,8 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
}
}
 
+   amdgpu_ras_early_init(adev);
+
adev->cg_flags &= amdgpu_cg_mask;
adev->pg_flags &= amdgpu_pg_mask;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 3483a82f5734..d83eee1984c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -452,11 +452,6 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (!adev->gmc.xgmi.connected_to_cpu) {
-   adev->gmc.xgmi.ras = _ras;
-   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
-   }
-
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7a1d2bac698e..2b71611be388 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2238,6 +2238,16 @@ static void amdgpu_ras_counte_dw(struct work_struct 
*work)
pm_runtime_put_autosuspend(dev->dev);
 }
 
+int amdgpu_ras_early_init(struct amdgpu_device *adev)
+{
+   if (!adev->gmc.xgmi.connected_to_cpu) {
+   adev->gmc.xgmi.ras = _ras;
+   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
+   }
+
+   return 0;
+}
+
 int amdgpu_ras_init(struct amdgpu_device *adev)
 {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index a51a281bd91a..7d99e3736ab9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -600,6 +600,9 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
}
 }
 
+/* called in ip_early_init */
+int amdgpu_ras_early_init(struct amdgpu_device *adev);
+
 /* called in ip_init and ip_fini */
 int amdgpu_ras_init(struct amdgpu_device *adev);
 int amdgpu_ras_fini(struct amdgpu_device *adev);
-- 
2.25.1



[PATCH 2/3] drm/amdgpu: Move xgmi ras initialization from .late_init to .early_init

2022-01-18 Thread yipechai
Move xgmi ras initialization from .late_init to .early_init, which let
xgmi ras be initialized only once.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 5 -
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 1 -
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 9 +
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 3483a82f5734..d83eee1984c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -452,11 +452,6 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (!adev->gmc.xgmi.connected_to_cpu) {
-   adev->gmc.xgmi.ras = _ras;
-   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
-   }
-
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 4f8d356f8432..5f9f82091000 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -716,7 +716,6 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device 
*adev)
}
 }
 
-
 static int gmc_v10_0_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index c76ffd1a70cd..8d1b11368a7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1303,6 +1303,14 @@ static void gmc_v9_0_set_hdp_ras_funcs(struct 
amdgpu_device *adev)
amdgpu_ras_register_ras_block(adev, >hdp.ras->ras_block);
 }
 
+static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev)
+{
+   if (!adev->gmc.xgmi.connected_to_cpu) {
+   adev->gmc.xgmi.ras = _ras;
+   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
+   }
+}
+
 static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev)
 {
/* is UMC the right IP to check for MCA?  Maybe DF? */
@@ -1339,6 +1347,7 @@ static int gmc_v9_0_early_init(void *handle)
gmc_v9_0_set_gfxhub_funcs(adev);
gmc_v9_0_set_hdp_ras_funcs(adev);
gmc_v9_0_set_mca_funcs(adev);
+   gmc_v9_0_set_xgmi_ras_funcs(adev);
 
adev->gmc.shared_aperture_start = 0x2000ULL;
adev->gmc.shared_aperture_end =
-- 
2.25.1



[PATCH 3/3] drm/amdgpu: Remove redundant code in gmc v10

2022-01-18 Thread yipechai
Gmc v10 doesn't support ras function, remove redundant code in it.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 19 ---
 1 file changed, 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 5f9f82091000..a833ef130495 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -664,25 +664,10 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
adev->umc.channel_idx_tbl = _v8_7_channel_idx_tbl[0][0];
-   adev->umc.ras = _v8_7_ras;
break;
default:
break;
}
-   if (adev->umc.ras) {
-   amdgpu_ras_register_ras_block(adev, >umc.ras->ras_block);
-
-   strcpy(adev->umc.ras->ras_block.name, "umc");
-   adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC;
-
-   /* If don't define special ras_late_init function, use default 
ras_late_init */
-   if (!adev->umc.ras->ras_block.ras_late_init)
-   adev->umc.ras->ras_block.ras_late_init = 
amdgpu_umc_ras_late_init;
-
-   /* If don't define special ras_fini function, use default 
ras_fini */
-   if (!adev->umc.ras->ras_block.ras_fini)
-   adev->umc.ras->ras_block.ras_fini = 
amdgpu_umc_ras_fini;
-   }
 }
 
 
@@ -745,10 +730,6 @@ static int gmc_v10_0_late_init(void *handle)
if (r)
return r;
 
-   r = amdgpu_gmc_ras_late_init(adev);
-   if (r)
-   return r;
-
return amdgpu_irq_get(adev, >gmc.vm_fault, 0);
 }
 
-- 
2.25.1



[PATCH 1/3] drm/amdgpu: Remove repeated calls

2022-01-18 Thread yipechai
Remove repeated calls.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7a1d2bac698e..4992bc554c0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1704,9 +1704,7 @@ static void amdgpu_ras_log_on_err_counter(struct 
amdgpu_device *adev)
 static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
  struct ras_query_if *info)
 {
-   struct amdgpu_ras_block_object *block_obj = 
amdgpu_ras_get_ras_block(adev,
-   
info->head.block,
-   
info->head.sub_block_index);
+   struct amdgpu_ras_block_object *block_obj;
/*
 * Only two block need to query read/write
 * RspStatus at current state
-- 
2.25.1



[PATCH V2 5/5] drm/amdgpu: Fix the code style warnings in hdp xgmi mca and umc

2022-01-13 Thread yipechai
drm/amdgpu: Fix the code style warnings in hdp xgmi mca and umc:
1. WARNING: missing space after struct definition.
2. WARNING: please, no space before tabs.
3. WARNING: line length of xxx exceeds 100 columns.
4. ERROR: "foo* bar" should be "foo *bar".
5. ERROR: space required before the open parenthesis '('.
6. ERROR: space prohibited after that open parenthesis '('.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c| 7 ---
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index c94a4b3c8d6d..4af2c2a322e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -24,7 +24,7 @@
 #define __AMDGPU_HDP_H__
 #include "amdgpu_ras.h"
 
-struct amdgpu_hdp_ras{
+struct amdgpu_hdp_ras {
struct amdgpu_ras_block_object ras_block;
 };
 
@@ -40,7 +40,7 @@ struct amdgpu_hdp_funcs {
 struct amdgpu_hdp {
struct ras_common_if*ras_if;
const struct amdgpu_hdp_funcs   *funcs;
-   struct amdgpu_hdp_ras   *ras;
+   struct amdgpu_hdp_ras   *ras;
 };
 
 int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 478457637d29..5929d6f528c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -950,7 +950,8 @@ static void amdgpu_xgmi_query_ras_error_count(struct 
amdgpu_device *adev,
 static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,  void 
*inject_if)
 {
int ret = 0;
-   struct ta_ras_trigger_error_input *block_info =  (struct 
ta_ras_trigger_error_input *)inject_if;
+   struct ta_ras_trigger_error_input *block_info =
+   (struct ta_ras_trigger_error_input *)inject_if;
 
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index f51092041edc..68565262af9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -47,12 +47,13 @@ static void mca_v3_0_mp0_ras_fini(struct amdgpu_device 
*adev)
amdgpu_mca_ras_fini(adev, >mca.mp0);
 }
 
-static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object* block_obj, 
enum amdgpu_ras_block block, uint32_t sub_block_index)
+static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj,
+   enum amdgpu_ras_block block, uint32_t 
sub_block_index)
 {
-   if(!block_obj)
+   if (!block_obj)
return -EINVAL;
 
-   if( (block_obj->block == block) &&
+   if ((block_obj->block == block) &&
(block_obj->sub_block_index == sub_block_index)) {
return 0;
}
-- 
2.25.1



[PATCH V2 4/5] drm/amdgpu: Fix the code style warnings in sdma

2022-01-13 Thread yipechai
Fix the code style warnings in sdma:
1. WARNING: Missing a blank line after declarations.
2. ERROR: that open brace { should be on the previous line.
3. WARNING: unnecessary whitespace before a quoted newline.
4. ERROR: space required after that ',' (ctx:VxV).

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 8 
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 3c1483dc113e..06a7ceda4c87 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2766,10 +2766,10 @@ static int 
sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *ade
 static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,  void 
*ras_error_status)
 {
int i = 0;
+
for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, 
ras_error_status))
-   {
-   dev_err(adev->dev, "Query ras error count failed in 
SDMA%d \n", i);
+   if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, 
ras_error_status)) {
+   dev_err(adev->dev, "Query ras error count failed in 
SDMA%d\n", i);
return;
}
}
@@ -2814,7 +2814,7 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device 
*adev)
if (adev->sdma.ras) {
amdgpu_ras_register_ras_block(adev, >sdma.ras->ras_block);
 
-   strcpy(adev->sdma.ras->ras_block.name,"sdma");
+   strcpy(adev->sdma.ras->ras_block.name, "sdma");
adev->sdma.ras->ras_block.block = AMDGPU_RAS_BLOCK__SDMA;
 
/* If don't define special ras_late_init function, use default 
ras_late_init */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
index 5c1ba1116e5c..6f9895cdddb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
@@ -248,10 +248,10 @@ static void sdma_v4_4_reset_ras_error_count(struct 
amdgpu_device *adev)
 static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev,  void 
*ras_error_status)
 {
int i = 0;
+
for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, 
ras_error_status))
-   {
-   dev_err(adev->dev, "Query ras error count failed in 
SDMA%d \n", i);
+   if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, 
ras_error_status)) {
+   dev_err(adev->dev, "Query ras error count failed in 
SDMA%d\n", i);
return;
}
}
-- 
2.25.1



[PATCH V2 3/5] drm/amdgpu: Fix the code style warnings in gmc

2022-01-13 Thread yipechai
Fix the code style warnings in gmc:
ERROR: space required after that ',' (ctx:VxV).

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 5ef4ad28ab26..4f8d356f8432 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -672,7 +672,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device 
*adev)
if (adev->umc.ras) {
amdgpu_ras_register_ras_block(adev, >umc.ras->ras_block);
 
-   strcpy(adev->umc.ras->ras_block.name,"umc");
+   strcpy(adev->umc.ras->ras_block.name, "umc");
adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC;
 
/* If don't define special ras_late_init function, use default 
ras_late_init */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3965aae435f8..c76ffd1a70cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1231,7 +1231,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
if (adev->umc.ras) {
amdgpu_ras_register_ras_block(adev, >umc.ras->ras_block);
 
-   strcpy(adev->umc.ras->ras_block.name,"umc");
+   strcpy(adev->umc.ras->ras_block.name, "umc");
adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC;
 
/* If don't define special ras_late_init function, use default 
ras_late_init */
-- 
2.25.1



[PATCH V2 2/5] drm/amdgpu: Fix the code style warnings in gfx

2022-01-13 Thread yipechai
Fix the code style warnings in gfx:
1. WARNING: suspect code indent for conditional statements.
2. ERROR: spaces required around that '=' (ctx:WxV).

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d36a6bc62560..e12f9f5c3beb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2314,11 +2314,11 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device 
*adev)
 
/* If not define special ras_late_init function, use gfx 
default ras_late_init */
if (!adev->gfx.ras->ras_block.ras_late_init)
-   adev->gfx.ras->ras_block.ras_late_init = 
amdgpu_gfx_ras_late_init;
+   adev->gfx.ras->ras_block.ras_late_init = 
amdgpu_gfx_ras_late_init;
 
/* If not define special ras_fini function, use gfx default 
ras_fini */
if (!adev->gfx.ras->ras_block.ras_fini)
-   adev->gfx.ras->ras_block.ras_fini = 
amdgpu_gfx_ras_fini;
+   adev->gfx.ras->ras_block.ras_fini = amdgpu_gfx_ras_fini;
}
 
adev->gfx.config.gb_addr_config = gb_addr_config;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 7ec6243e015e..7653ebd0e67b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -1930,7 +1930,7 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct 
amdgpu_device *adev)
mutex_unlock(>grbm_idx_mutex);
 }
 
-struct amdgpu_ras_block_hw_ops  gfx_v9_4_2_ras_ops ={
+struct amdgpu_ras_block_hw_ops  gfx_v9_4_2_ras_ops = {
.ras_error_inject = _v9_4_2_ras_error_inject,
.query_ras_error_count = _v9_4_2_query_ras_error_count,
.reset_ras_error_count = _v9_4_2_reset_ras_error_count,
-- 
2.25.1



[PATCH V2 1/5] drm/amdgpu: Fix the code style warnings in amdgpu_ras

2022-01-13 Thread yipechai
Fix the code style warnings in amdgpu_ras:
1. ERROR: space required before the open parenthesis '('.
2. WARNING: line length of xxx exceeds 100 columns.
3. ERROR: "foo* bar" should be "foo *bar".
4. WARNING: unnecessary whitespace before a quoted newline.
5. WARNING: space prohibited before semicolon.
6. WARNING: suspect code indent for conditional statements.
7. WARNING: braces {} are not necessary for single statement blocks.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 41 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 +++---
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 0bb6b5354802..23502b2b0770 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -872,7 +872,7 @@ static int amdgpu_ras_enable_all_features(struct 
amdgpu_device *adev,
 static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object 
*block_obj,
enum amdgpu_ras_block block)
 {
-   if(!block_obj)
+   if (!block_obj)
return -EINVAL;
 
if (block_obj->block == block)
@@ -881,7 +881,7 @@ static int amdgpu_ras_block_match_default(struct 
amdgpu_ras_block_object *block_
return -EINVAL;
 }
 
-static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct 
amdgpu_device *adev,
+static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct 
amdgpu_device *adev,
enum amdgpu_ras_block block, uint32_t 
sub_block_index)
 {
struct amdgpu_ras_block_object *obj, *tmp;
@@ -941,7 +941,7 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device 
*adev, struct ras_err_d
 int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
  struct ras_query_if *info)
 {
-   struct amdgpu_ras_block_object* block_obj = NULL;
+   struct amdgpu_ras_block_object *block_obj = NULL;
struct ras_manager *obj = amdgpu_ras_find_obj(adev, >head);
struct ras_err_data err_data = {0, 0, 0, NULL};
 
@@ -953,7 +953,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
} else {
block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function \n",
+   dev_info(adev->dev, "%s doesn't config ras function.\n",
get_ras_block_str(>head));
return -EINVAL;
}
@@ -1023,13 +1023,14 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
 int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
enum amdgpu_ras_block block)
 {
-   struct amdgpu_ras_block_object* block_obj = 
amdgpu_ras_get_ras_block(adev, block, 0);
+   struct amdgpu_ras_block_object *block_obj = 
amdgpu_ras_get_ras_block(adev, block, 0);
 
if (!amdgpu_ras_is_supported(adev, block))
return -EINVAL;
 
if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function \n", 
ras_block_str(block));
+   dev_info(adev->dev, "%s doesn't config ras function.\n",
+   ras_block_str(block));
return -EINVAL;
}
 
@@ -1066,7 +1067,8 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
return -EINVAL;
 
if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function \n", 
get_ras_block_str(>head));
+   dev_info(adev->dev, "%s doesn't config ras function.\n",
+   get_ras_block_str(>head));
return -EINVAL;
}
 
@@ -1702,19 +1704,25 @@ static void amdgpu_ras_log_on_err_counter(struct 
amdgpu_device *adev)
 static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
  struct ras_query_if *info)
 {
-   struct amdgpu_ras_block_object* block_obj = 
amdgpu_ras_get_ras_block(adev, info->head.block, info->head.sub_block_index);
+   struct amdgpu_ras_block_object *block_obj = 
amdgpu_ras_get_ras_block(adev,
+   
info->head.block,
+   
info->head.sub_block_index);
/*
 * Only two block need to query read/write
 * RspStatus at current state
 */
if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
(info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
-  

[PATCH 4/5] drm/amdgpu: Fix the code style warnings in sdma

2022-01-13 Thread yipechai
Fix the code style warnings in sdma.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 5 ++---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 3c1483dc113e..e3f00376a2b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2767,8 +2767,7 @@ static void sdma_v4_0_query_ras_error_count(struct 
amdgpu_device *adev,  void *r
 {
int i = 0;
for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, 
ras_error_status))
-   {
+   if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, 
ras_error_status)) {
dev_err(adev->dev, "Query ras error count failed in 
SDMA%d \n", i);
return;
}
@@ -2814,7 +2813,7 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device 
*adev)
if (adev->sdma.ras) {
amdgpu_ras_register_ras_block(adev, >sdma.ras->ras_block);
 
-   strcpy(adev->sdma.ras->ras_block.name,"sdma");
+   strcpy(adev->sdma.ras->ras_block.name, "sdma");
adev->sdma.ras->ras_block.block = AMDGPU_RAS_BLOCK__SDMA;
 
/* If don't define special ras_late_init function, use default 
ras_late_init */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
index 5c1ba1116e5c..5c90f456ece1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
@@ -249,8 +249,7 @@ static void sdma_v4_4_query_ras_error_count(struct 
amdgpu_device *adev,  void *r
 {
int i = 0;
for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, 
ras_error_status))
-   {
+   if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, 
ras_error_status)) {
dev_err(adev->dev, "Query ras error count failed in 
SDMA%d \n", i);
return;
}
-- 
2.25.1



[PATCH 5/5] drm/amdgpu: Fix the code style warnings in hdp xgmi mca and umc

2022-01-13 Thread yipechai
drm/amdgpu: Fix the code style warnings in hdp xgmi mca and umc.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c| 7 ---
 drivers/gpu/drm/amd/amdgpu/umc_v6_1.c| 3 ++-
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index c94a4b3c8d6d..8e9694a4d335 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -24,7 +24,7 @@
 #define __AMDGPU_HDP_H__
 #include "amdgpu_ras.h"
 
-struct amdgpu_hdp_ras{
+struct amdgpu_hdp_ras {
struct amdgpu_ras_block_object ras_block;
 };
 
@@ -40,7 +40,7 @@ struct amdgpu_hdp_funcs {
 struct amdgpu_hdp {
struct ras_common_if*ras_if;
const struct amdgpu_hdp_funcs   *funcs;
-   struct amdgpu_hdp_ras   *ras;
+   struct amdgpu_hdp_ras *ras;
 };
 
 int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 478457637d29..5929d6f528c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -950,7 +950,8 @@ static void amdgpu_xgmi_query_ras_error_count(struct 
amdgpu_device *adev,
 static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,  void 
*inject_if)
 {
int ret = 0;
-   struct ta_ras_trigger_error_input *block_info =  (struct 
ta_ras_trigger_error_input *)inject_if;
+   struct ta_ras_trigger_error_input *block_info =
+   (struct ta_ras_trigger_error_input *)inject_if;
 
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index f51092041edc..68565262af9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -47,12 +47,13 @@ static void mca_v3_0_mp0_ras_fini(struct amdgpu_device 
*adev)
amdgpu_mca_ras_fini(adev, >mca.mp0);
 }
 
-static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object* block_obj, 
enum amdgpu_ras_block block, uint32_t sub_block_index)
+static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj,
+   enum amdgpu_ras_block block, uint32_t 
sub_block_index)
 {
-   if(!block_obj)
+   if (!block_obj)
return -EINVAL;
 
-   if( (block_obj->block == block) &&
+   if ((block_obj->block == block) &&
(block_obj->sub_block_index == sub_block_index)) {
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 4776301972d4..c9891b7a05a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -475,4 +475,5 @@ struct amdgpu_umc_ras umc_v6_1_ras = {
.hw_ops = _v6_1_ras_hw_ops,
},
.err_cnt_init = umc_v6_1_err_cnt_init,
-};
\ No newline at end of file
+};
+
-- 
2.25.1



[PATCH 3/5] drm/amdgpu: Fix the code style warnings in gmc

2022-01-13 Thread yipechai
Fix the code style warnings in gmc.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 5ef4ad28ab26..4f8d356f8432 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -672,7 +672,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device 
*adev)
if (adev->umc.ras) {
amdgpu_ras_register_ras_block(adev, >umc.ras->ras_block);
 
-   strcpy(adev->umc.ras->ras_block.name,"umc");
+   strcpy(adev->umc.ras->ras_block.name, "umc");
adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC;
 
/* If don't define special ras_late_init function, use default 
ras_late_init */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3965aae435f8..c76ffd1a70cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1231,7 +1231,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
if (adev->umc.ras) {
amdgpu_ras_register_ras_block(adev, >umc.ras->ras_block);
 
-   strcpy(adev->umc.ras->ras_block.name,"umc");
+   strcpy(adev->umc.ras->ras_block.name, "umc");
adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC;
 
/* If don't define special ras_late_init function, use default 
ras_late_init */
-- 
2.25.1



[PATCH 2/5] drm/amdgpu: Fix the code style warnings in gfx

2022-01-13 Thread yipechai
Fix the code style warnings in gfx.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d36a6bc62560..e12f9f5c3beb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2314,11 +2314,11 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device 
*adev)
 
/* If not define special ras_late_init function, use gfx 
default ras_late_init */
if (!adev->gfx.ras->ras_block.ras_late_init)
-   adev->gfx.ras->ras_block.ras_late_init = 
amdgpu_gfx_ras_late_init;
+   adev->gfx.ras->ras_block.ras_late_init = 
amdgpu_gfx_ras_late_init;
 
/* If not define special ras_fini function, use gfx default 
ras_fini */
if (!adev->gfx.ras->ras_block.ras_fini)
-   adev->gfx.ras->ras_block.ras_fini = 
amdgpu_gfx_ras_fini;
+   adev->gfx.ras->ras_block.ras_fini = amdgpu_gfx_ras_fini;
}
 
adev->gfx.config.gb_addr_config = gb_addr_config;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 7ec6243e015e..7653ebd0e67b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -1930,7 +1930,7 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct 
amdgpu_device *adev)
mutex_unlock(>grbm_idx_mutex);
 }
 
-struct amdgpu_ras_block_hw_ops  gfx_v9_4_2_ras_ops ={
+struct amdgpu_ras_block_hw_ops  gfx_v9_4_2_ras_ops = {
.ras_error_inject = _v9_4_2_ras_error_inject,
.query_ras_error_count = _v9_4_2_query_ras_error_count,
.reset_ras_error_count = _v9_4_2_reset_ras_error_count,
-- 
2.25.1



[PATCH 1/5] drm/amdgpu: Fix the code style warnings in amdgpu_ras

2022-01-13 Thread yipechai
Fix the code style warnings in amdgpu_ras.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 41 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 +++---
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 0bb6b5354802..23502b2b0770 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -872,7 +872,7 @@ static int amdgpu_ras_enable_all_features(struct 
amdgpu_device *adev,
 static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object 
*block_obj,
enum amdgpu_ras_block block)
 {
-   if(!block_obj)
+   if (!block_obj)
return -EINVAL;
 
if (block_obj->block == block)
@@ -881,7 +881,7 @@ static int amdgpu_ras_block_match_default(struct 
amdgpu_ras_block_object *block_
return -EINVAL;
 }
 
-static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct 
amdgpu_device *adev,
+static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct 
amdgpu_device *adev,
enum amdgpu_ras_block block, uint32_t 
sub_block_index)
 {
struct amdgpu_ras_block_object *obj, *tmp;
@@ -941,7 +941,7 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device 
*adev, struct ras_err_d
 int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
  struct ras_query_if *info)
 {
-   struct amdgpu_ras_block_object* block_obj = NULL;
+   struct amdgpu_ras_block_object *block_obj = NULL;
struct ras_manager *obj = amdgpu_ras_find_obj(adev, >head);
struct ras_err_data err_data = {0, 0, 0, NULL};
 
@@ -953,7 +953,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
} else {
block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function \n",
+   dev_info(adev->dev, "%s doesn't config ras function.\n",
get_ras_block_str(>head));
return -EINVAL;
}
@@ -1023,13 +1023,14 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
 int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
enum amdgpu_ras_block block)
 {
-   struct amdgpu_ras_block_object* block_obj = 
amdgpu_ras_get_ras_block(adev, block, 0);
+   struct amdgpu_ras_block_object *block_obj = 
amdgpu_ras_get_ras_block(adev, block, 0);
 
if (!amdgpu_ras_is_supported(adev, block))
return -EINVAL;
 
if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function \n", 
ras_block_str(block));
+   dev_info(adev->dev, "%s doesn't config ras function.\n",
+   ras_block_str(block));
return -EINVAL;
}
 
@@ -1066,7 +1067,8 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
return -EINVAL;
 
if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function \n", 
get_ras_block_str(>head));
+   dev_info(adev->dev, "%s doesn't config ras function.\n",
+   get_ras_block_str(>head));
return -EINVAL;
}
 
@@ -1702,19 +1704,25 @@ static void amdgpu_ras_log_on_err_counter(struct 
amdgpu_device *adev)
 static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
  struct ras_query_if *info)
 {
-   struct amdgpu_ras_block_object* block_obj = 
amdgpu_ras_get_ras_block(adev, info->head.block, info->head.sub_block_index);
+   struct amdgpu_ras_block_object *block_obj = 
amdgpu_ras_get_ras_block(adev,
+   
info->head.block,
+   
info->head.sub_block_index);
/*
 * Only two block need to query read/write
 * RspStatus at current state
 */
if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
(info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
-   return ;
+   return;
+
+   block_obj = amdgpu_ras_get_ras_block(adev,
+   info->head.block,
+   info->head.sub_block_index);
 
-   block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 
info->head.sub_block_index);
if (!block_obj || !block_obj->hw_ops) {
-   dev_info(adev->dev, "%s do

[PATCH V2 2/2] drm/amdgpu: To eliminate the warning of no previous prototype for 'amdgpu_ras_block_match_default'

2022-01-13 Thread yipechai
To eliminate the warning of no previous prototype
for 'amdgpu_ras_block_match_default'.

Fix compile warnings.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 394a18e3c6af..7afeec4255bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -869,7 +869,8 @@ static int amdgpu_ras_enable_all_features(struct 
amdgpu_device *adev,
 }
 /* feature ctl end */
 
-int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object* block_obj, 
enum amdgpu_ras_block block)
+static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object 
*block_obj,
+   enum amdgpu_ras_block block)
 {
if(!block_obj)
return -EINVAL;
-- 
2.25.1



[PATCH V2 1/2] drm/amdgpu: Use ARRAY_SIZE to get array length

2022-01-13 Thread yipechai
Use ARRAY_SIZE to get array length.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 23f4290b2fde..394a18e3c6af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -89,7 +89,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
return ras_block_string[ras_block->block];
 }
 
-#define ras_block_str(_BLOCK_)  (((_BLOCK_) < 
(sizeof(*ras_block_string)/sizeof(const char*))) ? ras_block_string[_BLOCK_] : 
"Out Of Range")
+#define ras_block_str(_BLOCK_) \
+   (((_BLOCK_) < ARRAY_SIZE(ras_block_string)) ? ras_block_string[_BLOCK_] 
: "Out Of Range")
 
 #define ras_err_str(i) (ras_error_string[ffs(i)])
 
-- 
2.25.1



[PATCH 3/3] drm/amdgpu: Adjust the code format

2022-01-12 Thread yipechai
Adjust the code format.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7afeec4255bd..54d807b021fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2764,9 +2764,8 @@ int amdgpu_ras_register_ras_block(struct amdgpu_device 
*adev,
 
/* If the ras object is in ras_list, don't add it again */
list_for_each_entry_safe(obj, tmp, >ras_list, node) {
-   if (obj == ras_block_obj) {
+   if (obj == ras_block_obj)
return 0;
-   }
}
 
INIT_LIST_HEAD(_block_obj->node);
-- 
2.25.1



[PATCH 2/3] drm/amdgpu: Fix compile warnings

2022-01-12 Thread yipechai
Fix compile warnings.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 394a18e3c6af..7afeec4255bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -869,7 +869,8 @@ static int amdgpu_ras_enable_all_features(struct 
amdgpu_device *adev,
 }
 /* feature ctl end */
 
-int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object* block_obj, 
enum amdgpu_ras_block block)
+static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object 
*block_obj,
+   enum amdgpu_ras_block block)
 {
if(!block_obj)
return -EINVAL;
-- 
2.25.1



[PATCH 1/3] drm/amdgpu: Use ARRAY_SIZE to get array length

2022-01-12 Thread yipechai
Use ARRAY_SIZE to get array length.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 23f4290b2fde..394a18e3c6af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -89,7 +89,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
return ras_block_string[ras_block->block];
 }
 
-#define ras_block_str(_BLOCK_)  (((_BLOCK_) < 
(sizeof(*ras_block_string)/sizeof(const char*))) ? ras_block_string[_BLOCK_] : 
"Out Of Range")
+#define ras_block_str(_BLOCK_) \
+   (((_BLOCK_) < ARRAY_SIZE(ras_block_string)) ? ras_block_string[_BLOCK_] 
: "Out Of Range")
 
 #define ras_err_str(i) (ras_error_string[ffs(i)])
 
-- 
2.25.1



[PATCH V2 2/2] drm/amdgpu: No longer insert ras blocks into ras_list if it already exists in ras_list

2022-01-12 Thread yipechai
No longer insert ras blocks into ras_list if it already exists in ras_list.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 614ae8455c9f..d208fde509de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2754,12 +2754,20 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object* ras_block_obj)
 {
+   struct amdgpu_ras_block_object *obj, *tmp;
if (!adev || !ras_block_obj)
return -EINVAL;
 
if (!amdgpu_ras_asic_supported(adev))
return 0;
 
+   /* If the ras object is in ras_list, don't add it again */
+   list_for_each_entry_safe(obj, tmp, >ras_list, node) {
+   if (obj == ras_block_obj) {
+   return 0;
+   }
+   }
+
INIT_LIST_HEAD(_block_obj->node);
list_add_tail(_block_obj->node, >ras_list);
 
-- 
2.25.1



[PATCH V2 1/2] drm/amdgpu: Add ras supported check for register_ras_block

2022-01-12 Thread yipechai
Add ras supported check for register_ras_block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b1bedfd4febc..614ae8455c9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2757,6 +2757,9 @@ int amdgpu_ras_register_ras_block(struct amdgpu_device 
*adev,
if (!adev || !ras_block_obj)
return -EINVAL;
 
+   if (!amdgpu_ras_asic_supported(adev))
+   return 0;
+
INIT_LIST_HEAD(_block_obj->node);
list_add_tail(_block_obj->node, >ras_list);
 
-- 
2.25.1



[PATCH 2/2] drm/amdgpu: No longer insert ras blocks into ras_list if it already exists in ras_list

2022-01-11 Thread yipechai
No longer insert ras blocks into ras_list if it already exists in ras_list.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 62be0b4909b3..e6d3bb4b56e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2754,9 +2754,17 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object* ras_block_obj)
 {
+   struct amdgpu_ras_block_object *obj, *tmp;
if (!adev || !amdgpu_ras_asic_supported(adev) || !ras_block_obj)
return -EINVAL;
 
+   /* If the ras object had been in ras_list, doesn't add it to ras_list 
again */
+   list_for_each_entry_safe(obj, tmp, >ras_list, node) {
+   if (obj == ras_block_obj) {
+   return 0;
+   }
+   }
+
INIT_LIST_HEAD(_block_obj->node);
list_add_tail(_block_obj->node, >ras_list);
 
-- 
2.25.1



[PATCH 1/2] drm/amdgpu: Add a filter condition to restrict the SW ras function to be registered only by asics whose hardware supports the ras function

2022-01-11 Thread yipechai
Add a filter condition to restrict the SW ras function to be registered only by 
asics whose hardware supports the ras function.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b1bedfd4febc..62be0b4909b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2754,7 +2754,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object* ras_block_obj)
 {
-   if (!adev || !ras_block_obj)
+   if (!adev || !amdgpu_ras_asic_supported(adev) || !ras_block_obj)
return -EINVAL;
 
INIT_LIST_HEAD(_block_obj->node);
-- 
2.25.1



[PATCH V3 12/12] drm/amdgpu: Removed redundant ras code

2021-12-28 Thread yipechai
Removed redundant ras code.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 82 ++---
 1 file changed, 20 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 21765e05b003..17de79be6d8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -910,51 +910,23 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
return -EINVAL;
 
block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
+   if (!block_obj || !block_obj->hw_ops)   {
+   dev_info(adev->dev, "%s doesn't config ras function \n",
+   get_ras_block_str(>head));
+   return -EINVAL;
+   }
 
-   switch (info->head.block) {
-   case AMDGPU_RAS_BLOCK__UMC:
-   if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function \n",
-   get_ras_block_str(>head));
-   return -EINVAL;
-   }
+   if (block_obj->hw_ops->query_ras_error_count)
+   block_obj->hw_ops->query_ras_error_count(adev, _data);
 
-   if (block_obj->hw_ops->query_ras_error_count)
-   block_obj->hw_ops->query_ras_error_count(adev, 
_data);
-   /* umc query_ras_error_address is also responsible for clearing
-* error status
-*/
-   if (block_obj->hw_ops->query_ras_error_address)
-   block_obj->hw_ops->query_ras_error_address(adev, 
_data);
-   break;
-   case AMDGPU_RAS_BLOCK__SDMA:
-   case AMDGPU_RAS_BLOCK__GFX:
-   case AMDGPU_RAS_BLOCK__MMHUB:
-   if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function \n",
-   get_ras_block_str(>head));
-   return -EINVAL;
-   }
-   if (block_obj->hw_ops->query_ras_error_count)
-   block_obj->hw_ops->query_ras_error_count(adev, 
_data);
+   if (info->head.block == AMDGPU_RAS_BLOCK__UMC)
+   block_obj->hw_ops->query_ras_error_address(adev, _data);
 
+   if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
+   (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
+   (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
if (block_obj->hw_ops->query_ras_error_status)
block_obj->hw_ops->query_ras_error_status(adev);
-   break;
-   case AMDGPU_RAS_BLOCK__PCIE_BIF:
-   case AMDGPU_RAS_BLOCK__XGMI_WAFL:
-   case AMDGPU_RAS_BLOCK__HDP:
-   case AMDGPU_RAS_BLOCK__MCA:
-   if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function \n",
-   get_ras_block_str(>head));
-   return -EINVAL;
-   }
-   if (block_obj->hw_ops->query_ras_error_count)
-   block_obj->hw_ops->query_ras_error_count(adev, 
_data);
-   break;
-   default:
-   break;
}
 
obj->err_data.ue_count += err_data.ue_count;
@@ -1016,32 +988,18 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device 
*adev,
if (!amdgpu_ras_is_supported(adev, block))
return -EINVAL;
 
-   switch (block) {
-   case AMDGPU_RAS_BLOCK__GFX:
-   case AMDGPU_RAS_BLOCK__MMHUB:
-   if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function 
\n", ras_block_str(block));
-   return -EINVAL;
-   }
+   if (!block_obj || !block_obj->hw_ops)   {
+   dev_info(adev->dev, "%s doesn't config ras function \n", 
ras_block_str(block));
+   return -EINVAL;
+   }
 
-   if (block_obj->hw_ops->reset_ras_error_count)
-   block_obj->hw_ops->reset_ras_error_count(adev);
+   if (block_obj->hw_ops->reset_ras_error_count)
+   block_obj->hw_ops->reset_ras_error_count(adev);
 
+   if ((block == AMDGPU_RAS_BLOCK__GFX) ||
+   (block == AMDGPU_RAS_BLOCK__MMHUB)) {
if (block_obj->hw_ops->reset_ras_error_status)
block_obj->hw_ops->reset_ras_error_status(adev);
-   break;
-   case AMDGPU_RAS_BLOCK__SDMA:
-   case AMDGPU_RAS_BLOCK__HDP:
-   if (!block_obj || !block_obj->hw_ops)   {
- 

[PATCH V3 11/12] drm/amdgpu: Adjust error inject function code style in amdgpu_ras.c

2021-12-28 Thread yipechai
1. Move xgmi special error inject function from amdgpu_ras.c to xgmi block.
2. Support to use psp_ras_trigger_error as default error inject function in 
amdgpu_ras.c. If .ras_error_inject isn't defined in ras block, default error 
inject function will take effect.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 60 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 27 +++
 2 files changed, 39 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index edba3ee292c8..21765e05b003 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1047,32 +1047,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device 
*adev,
return 0;
 }
 
-/* Trigger XGMI/WAFL error */
-static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
-struct ta_ras_trigger_error_input *block_info)
-{
-   int ret;
-
-   if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
-   dev_warn(adev->dev, "Failed to disallow df cstate");
-
-   if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
-   dev_warn(adev->dev, "Failed to disallow XGMI power down");
-
-   ret = psp_ras_trigger_error(>psp, block_info);
-
-   if (amdgpu_ras_intr_triggered())
-   return ret;
-
-   if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
-   dev_warn(adev->dev, "Failed to allow XGMI power down");
-
-   if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
-   dev_warn(adev->dev, "Failed to allow df cstate");
-
-   return ret;
-}
-
 /* wrapper of psp_ras_trigger_error */
 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info)
@@ -1091,6 +1065,11 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
if (!obj)
return -EINVAL;
 
+   if (!block_obj || !block_obj->hw_ops)   {
+   dev_info(adev->dev, "%s doesn't config ras function \n", 
get_ras_block_str(>head));
+   return -EINVAL;
+   }
+
/* Calculate XGMI relative offset */
if (adev->gmc.xgmi.num_physical_nodes > 1) {
block_info.address =
@@ -1098,30 +1077,15 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
  block_info.address);
}
 
-   switch (info->head.block) {
-   case AMDGPU_RAS_BLOCK__GFX:
-   if (!block_obj || !block_obj->hw_ops)   {
-   dev_info(adev->dev, "%s doesn't config ras function 
\n", get_ras_block_str(>head));
-   return -EINVAL;
-   }
-
+   if (info->head.block == AMDGPU_RAS_BLOCK__GFX) {
if (block_obj->hw_ops->ras_error_inject)
ret = block_obj->hw_ops->ras_error_inject(adev, info);
-   break;
-   case AMDGPU_RAS_BLOCK__UMC:
-   case AMDGPU_RAS_BLOCK__SDMA:
-   case AMDGPU_RAS_BLOCK__MMHUB:
-   case AMDGPU_RAS_BLOCK__PCIE_BIF:
-   case AMDGPU_RAS_BLOCK__MCA:
-   ret = psp_ras_trigger_error(>psp, _info);
-   break;
-   case AMDGPU_RAS_BLOCK__XGMI_WAFL:
-   ret = amdgpu_ras_error_inject_xgmi(adev, _info);
-   break;
-   default:
-   dev_info(adev->dev, "%s error injection is not supported yet\n",
-get_ras_block_str(>head));
-   ret = -EINVAL;
+   } else {
+   /* If defined special ras_error_inject(e.g: xgmi), implement 
special ras_error_inject */
+   if (block_obj->hw_ops->ras_error_inject)
+   ret = block_obj->hw_ops->ras_error_inject(adev, 
_info);
+   else  /*If not defined .ras_error_inject, use default 
ras_error_inject*/
+   ret = psp_ras_trigger_error(>psp, _info);
}
 
if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index e3c6898c5d13..a2bd3a2abe72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -940,9 +940,36 @@ static void amdgpu_xgmi_query_ras_error_count(struct 
amdgpu_device *adev,
err_data->ce_count += ce_cnt;
 }
 
+/* Trigger XGMI/WAFL error */
+static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,  void 
*inject_if)
+{
+   int ret = 0;;
+   struct ta_ras_trigger_error_input *block_info =  (struct 
ta_ras_trigger_error_input *)inject_if;
+
+   if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+   dev_warn(adev->dev, "Failed to disallow df cstate");
+
+   if (amdgpu_dp

[PATCH V3 09/12] drm/amdgpu: Modify sdma block to fit for the unified ras block data and ops

2021-12-28 Thread yipechai
1.Modify sdma block to fit for the unified ras block data and ops.
2.Change amdgpu_sdma_ras_funcs to amdgpu_sdma_ras, and the corresponding 
variable name remove _funcs suffix.
3.Remove the const flag of sdma ras variable so that sdma ras block can be able 
to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register sdma ras block into 
amdgpu device ras block link list.
5.Remove the redundant code about sdma in amdgpu_ras.c after using the unified 
ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of 
sdma versions. If .ras_late_init and .ras_fini had been defined by the selected 
sdma version, the defined functions will take effect; if not defined, default 
fill them with amdgpu_sdma_ras_late_init and amdgpu_sdma_ras_fini.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  |  9 
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 12 ++---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 58 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c   | 25 --
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h   |  2 +-
 5 files changed, 71 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5a8fccfdb0bb..4ca51f623751 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -953,12 +953,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
block_obj->hw_ops->query_ras_error_address(adev, 
_data);
break;
case AMDGPU_RAS_BLOCK__SDMA:
-   if (adev->sdma.funcs->query_ras_error_count) {
-   for (i = 0; i < adev->sdma.num_instances; i++)
-   adev->sdma.funcs->query_ras_error_count(adev, i,
-   
_data);
-   }
-   break;
case AMDGPU_RAS_BLOCK__GFX:
case AMDGPU_RAS_BLOCK__MMHUB:
if (!block_obj || !block_obj->hw_ops)   {
@@ -1064,9 +1058,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device 
*adev,
block_obj->hw_ops->reset_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__SDMA:
-   if (adev->sdma.funcs->reset_ras_error_count)
-   adev->sdma.funcs->reset_ras_error_count(adev);
-   break;
case AMDGPU_RAS_BLOCK__HDP:
if (!block_obj || !block_obj->hw_ops)   {
dev_info(adev->dev, "%s doesn't config ras function 
\n", ras_block_str(block));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index f8fb755e3aa6..eaee12ab6518 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -23,6 +23,7 @@
 
 #ifndef __AMDGPU_SDMA_H__
 #define __AMDGPU_SDMA_H__
+#include "amdgpu_ras.h"
 
 /* max number of IP instances */
 #define AMDGPU_MAX_SDMA_INSTANCES  8
@@ -50,13 +51,8 @@ struct amdgpu_sdma_instance {
boolburst_nop;
 };
 
-struct amdgpu_sdma_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev,
-   void *ras_ih_info);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   int (*query_ras_error_count)(struct amdgpu_device *adev,
-   uint32_t instance, void *ras_error_status);
-   void (*reset_ras_error_count)(struct amdgpu_device *adev);
+struct amdgpu_sdma_ras {
+   struct amdgpu_ras_block_object ras_block;
 };
 
 struct amdgpu_sdma {
@@ -73,7 +69,7 @@ struct amdgpu_sdma {
uint32_tsrbm_soft_reset;
boolhas_page_queue;
struct ras_common_if*ras_if;
-   const struct amdgpu_sdma_ras_funcs  *funcs;
+   struct amdgpu_sdma_ras  *ras;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 69c9e460c1eb..5500f93f6ecd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1898,13 +1898,13 @@ static int sdma_v4_0_late_init(void *handle)
sdma_v4_0_setup_ulv(adev);
 
if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
-   if (adev->sdma.funcs &&
-   adev->sdma.funcs->reset_ras_error_count)
-   adev->sdma.funcs->reset_ras_error_count(adev);
+   if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
+   adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count)
+   
adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev);
}
 
-   if (adev->sdma.funcs &&

[PATCH V3 10/12] drm/amdgpu: Modify mca block to fit for the unified ras block data and ops

2021-12-28 Thread yipechai
1.Modify mca block to fit for the unified ras block data and ops.
2.Define special .ras_block_match function for mca block to identify itself.
3.Change amdgpu_mca_ras_funcs to amdgpu_mca_ras_block(amdgpu_mca_ras had been 
used), and the corresponding variable name remove _funcs suffix.
4.Remove the const flag of cma ras variable so that cma ras block can be able 
to be inserted into amdgpu device ras block link list.
5.Invoke amdgpu_ras_register_ras_block function to register cma ras block into 
amdgpu device ras block link list.
6.Remove the redundant code about cma in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 15 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 11 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 14 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 29 +
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 85 ++---
 5 files changed, 78 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 429d89188d94..b7306724898d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -463,23 +463,20 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (adev->mca.mp0.ras_funcs &&
-   adev->mca.mp0.ras_funcs->ras_late_init) {
-   r = adev->mca.mp0.ras_funcs->ras_late_init(adev);
+   if (adev->mca.mp0.ras && adev->mca.mp0.ras->ras_block.ras_late_init) {
+   r = adev->mca.mp0.ras->ras_block.ras_late_init(adev, NULL);
if (r)
return r;
}
 
-   if (adev->mca.mp1.ras_funcs &&
-   adev->mca.mp1.ras_funcs->ras_late_init) {
-   r = adev->mca.mp1.ras_funcs->ras_late_init(adev);
+   if (adev->mca.mp1.ras && adev->mca.mp1.ras->ras_block.ras_late_init) {
+   r = adev->mca.mp1.ras->ras_block.ras_late_init(adev, NULL);
if (r)
return r;
}
 
-   if (adev->mca.mpio.ras_funcs &&
-   adev->mca.mpio.ras_funcs->ras_late_init) {
-   r = adev->mca.mpio.ras_funcs->ras_late_init(adev);
+   if (adev->mca.mpio.ras && adev->mca.mpio.ras->ras_block.ras_late_init) {
+   r = adev->mca.mpio.ras->ras_block.ras_late_init(adev, NULL);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index ce538f4819f9..52a60c2316a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -74,20 +74,23 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device 
*adev,
 int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
 struct amdgpu_mca_ras *mca_dev)
 {
+   char sysfs_name[32] = {0};
int r;
struct ras_ih_if ih_info = {
.cb = NULL,
};
-   struct ras_fs_if fs_info = {
-   .sysfs_name = mca_dev->ras_funcs->sysfs_name,
+   struct ras_fs_if fs_info= {
+   .sysfs_name = sysfs_name,
};
 
+   snprintf(sysfs_name, sizeof(sysfs_name), "%s_err_count", 
mca_dev->ras->ras_block.name);
+
if (!mca_dev->ras_if) {
mca_dev->ras_if = kmalloc(sizeof(struct ras_common_if), 
GFP_KERNEL);
if (!mca_dev->ras_if)
return -ENOMEM;
-   mca_dev->ras_if->block = mca_dev->ras_funcs->ras_block;
-   mca_dev->ras_if->sub_block_index = 
mca_dev->ras_funcs->ras_sub_block;
+   mca_dev->ras_if->block = mca_dev->ras->ras_block.block;
+   mca_dev->ras_if->sub_block_index = 
mca_dev->ras->ras_block.sub_block_index;
mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
}
ih_info.head = fs_info.head = *mca_dev->ras_if;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index c74bc7177066..be030c4031d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -21,21 +21,13 @@
 #ifndef __AMDGPU_MCA_H__
 #define __AMDGPU_MCA_H__
 
-struct amdgpu_mca_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
-   void (*query_ras_error_address)(struct amdgpu_device *adev,
-   void *ras_error_status);
-   uint32_t ras_block;
-   

[PATCH V3 08/12] drm/amdgpu: Modify umc block to fit for the unified ras block data and ops

2021-12-28 Thread yipechai
1.Modify umc block to fit for the unified ras block data and ops.
2.Change amdgpu_umc_ras_funcs to amdgpu_umc_ras, and the corresponding variable 
name remove _funcs suffix.
3.Remove the const flag of umc ras variable so that umc ras block can be able 
to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register umc ras block into 
amdgpu device ras block link list.
5.Remove the redundant code about umc in amdgpu_ras.c after using the unified 
ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of 
umc versions. If .ras_late_init and .ras_fini had been defined by the selected 
umc version, the defined functions will take effect; if not defined, default 
fill them with amdgpu_umc_ras_late_init and amdgpu_umc_ras_fini.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 10 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 22 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 20 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 14 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 16 +++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 21 ++---
 drivers/gpu/drm/amd/amdgpu/umc_v6_1.c   | 12 
 drivers/gpu/drm/amd/amdgpu/umc_v6_1.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c   | 12 
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/umc_v8_7.c   | 12 
 drivers/gpu/drm/amd/amdgpu/umc_v8_7.h   |  2 +-
 12 files changed, 92 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index b12da46c483a..429d89188d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -434,9 +434,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 {
int r;
 
-   if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->ras_late_init) {
-   r = adev->umc.ras_funcs->ras_late_init(adev);
+   if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) {
+   r = adev->umc.ras->ras_block.ras_late_init(adev, NULL);
if (r)
return r;
}
@@ -490,9 +489,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 {
-   if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->ras_fini)
-   adev->umc.ras_funcs->ras_fini(adev);
+   if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini)
+   adev->umc.ras->ras_block.ras_fini(adev);
 
if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
adev->mmhub.ras->ras_block.ras_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 26bd9da31ffc..5a8fccfdb0bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -938,15 +938,19 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
 
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__UMC:
-   if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_error_count)
-   adev->umc.ras_funcs->query_ras_error_count(adev, 
_data);
+   if (!block_obj || !block_obj->hw_ops)   {
+   dev_info(adev->dev, "%s doesn't config ras function \n",
+   get_ras_block_str(>head));
+   return -EINVAL;
+   }
+
+   if (block_obj->hw_ops->query_ras_error_count)
+   block_obj->hw_ops->query_ras_error_count(adev, 
_data);
/* umc query_ras_error_address is also responsible for clearing
 * error status
 */
-   if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_error_address)
-   adev->umc.ras_funcs->query_ras_error_address(adev, 
_data);
+   if (block_obj->hw_ops->query_ras_error_address)
+   block_obj->hw_ops->query_ras_error_address(adev, 
_data);
break;
case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->query_ras_error_count) {
@@ -2374,12 +2378,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* Init poison supported flag, the default value is false */
if (adev->df.funcs &&
adev->df.funcs->query_ras_poison_mode &&
-   adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_poison_mode) {
+   adev->um

[PATCH V3 07/12] drm/amdgpu: Modify nbio block to fit for the unified ras block data and ops

2021-12-28 Thread yipechai
1.Modify nbio block to fit for the unified ras block data and ops.
2.Change amdgpu_nbio_ras_funcs to amdgpu_nbio_ras, and the corresponding 
variable name remove _funcs suffix.
3.Remove the const flag of mmhub ras variable so that nbio ras block can be 
able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register nbio ras block into 
amdgpu device ras block link list.
5.Remove the redundant code about nbio in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c  | 12 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 11 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 22 ++
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 17 +
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/soc15.c   | 18 --
 7 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 5208b2dd176a..24feceb51289 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -208,13 +208,13 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
 * ack the interrupt if it is there
 */
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
-   if (adev->nbio.ras_funcs &&
-   adev->nbio.ras_funcs->handle_ras_controller_intr_no_bifring)
-   
adev->nbio.ras_funcs->handle_ras_controller_intr_no_bifring(adev);
+   if (adev->nbio.ras &&
+   adev->nbio.ras->handle_ras_controller_intr_no_bifring)
+   
adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
 
-   if (adev->nbio.ras_funcs &&
-   
adev->nbio.ras_funcs->handle_ras_err_event_athub_intr_no_bifring)
-   
adev->nbio.ras_funcs->handle_ras_err_event_athub_intr_no_bifring(adev);
+   if (adev->nbio.ras &&
+   adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
+   
adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
}
 
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 6201a5f4b4fa..f54c183f1b1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -22,7 +22,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
int r;
struct ras_ih_if ih_info = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 843052205bd5..4afb76d3cd97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -47,15 +47,12 @@ struct nbio_hdp_flush_reg {
u32 ref_and_mask_sdma7;
 };
 
-struct amdgpu_nbio_ras_funcs {
+struct amdgpu_nbio_ras {
+   struct amdgpu_ras_block_object ras_block;
void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device 
*adev);
void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device 
*adev);
int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
-   void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_nbio_funcs {
@@ -104,9 +101,9 @@ struct amdgpu_nbio {
struct amdgpu_irq_src ras_err_event_athub_irq;
struct ras_common_if *ras_if;
const struct amdgpu_nbio_funcs *funcs;
-   const struct amdgpu_nbio_ras_funcs *ras_funcs;
+   struct amdgpu_nbio_ras  *ras;
 };
 
-int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev);
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, void *ras_info);
 void amdgpu_nbio_ras_fini(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index fa0ebd484c7e..26bd9da31ffc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -969,10 +969,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
block_obj->hw_ops->query_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__PCIE_BIF:
-   if (adev->nbio.ras_funcs &&
-   adev->nbio.ras_fu

[PATCH V3 03/12] drm/amdgpu: Modify gfx block to fit for the unified ras block data and ops

2021-12-28 Thread yipechai
1.Modify gfx block to fit for the unified ras block data and ops.
2.Change amdgpu_gfx_ras_funcs to amdgpu_gfx_ras, and the corresponding variable 
name remove _funcs suffix.
3.Remove the const flag of gfx ras variable so that gfx ras block can be able 
to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register gfx ras block into 
amdgpu device ras block link list.
5.Remove the redundant code about gfx in amdgpu_ras.c after using the unified 
ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of 
gfx versions. If .ras_late_init and .ras_fini had been defined by the selected 
gfx version, the defined functions will take effect; if not defined, default 
fill with amdgpu_gfx_ras_late_init and amdgpu_gfx_ras_fini.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  8 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 17 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 61 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 65 -
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c   | 24 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 25 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h |  2 +-
 8 files changed, 121 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 1795d448c700..18e4106aa03b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -622,7 +622,7 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, 
uint32_t *value)
return r;
 }
 
-int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
int r;
struct ras_fs_if fs_info = {
@@ -696,9 +696,9 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device 
*adev,
 */
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
-   if (adev->gfx.ras_funcs &&
-   adev->gfx.ras_funcs->query_ras_error_count)
-   adev->gfx.ras_funcs->query_ras_error_count(adev, 
err_data);
+   if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
+   adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
+   
adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
amdgpu_ras_reset_gpu(adev);
}
return AMDGPU_RAS_SUCCESS;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 6b78b4a0e182..35f2a724616e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -31,6 +31,7 @@
 #include "amdgpu_ring.h"
 #include "amdgpu_rlc.h"
 #include "soc15.h"
+#include "amdgpu_ras.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -213,16 +214,8 @@ struct amdgpu_cu_info {
uint32_t bitmap[4][4];
 };
 
-struct amdgpu_gfx_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   int (*ras_error_inject)(struct amdgpu_device *adev,
-   void *inject_if);
-   int (*query_ras_error_count)(struct amdgpu_device *adev,
-void *ras_error_status);
-   void (*reset_ras_error_count)(struct amdgpu_device *adev);
-   void (*query_ras_error_status)(struct amdgpu_device *adev);
-   void (*reset_ras_error_status)(struct amdgpu_device *adev);
+struct amdgpu_gfx_ras {
+   struct amdgpu_ras_block_object  ras_block;
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
 };
 
@@ -348,7 +341,7 @@ struct amdgpu_gfx {
 
/*ras */
struct ras_common_if*ras_if;
-   const struct amdgpu_gfx_ras_funcs   *ras_funcs;
+   struct amdgpu_gfx_ras   *ras;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
@@ -410,7 +403,7 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device 
*adev, int me,
int pipe, int queue);
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
-int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev);
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info);
 void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
ind

[PATCH V3 06/12] drm/amdgpu: Modify mmhub block to fit for the unified ras block data and ops

2021-12-28 Thread yipechai
1.Modify mmhub block to fit for the unified ras block data and ops.
2.Change amdgpu_mmhub_ras_funcs to amdgpu_mmhub_ras, and the corresponding 
variable name remove _funcs suffix.
3.Remove the const flag of mmhub ras variable so that mmhub ras block can be 
able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register mmhub ras block 
into amdgpu device ras block link list. 5.Remove the redundant code about mmhub 
in amdgpu_ras.c after using the unified ras block.
5.Remove the redundant code about mmhub in amdgpu_ras.c after using the unified 
ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of 
mmhub versions. If .ras_late_init and .ras_fini had been defined by the 
selected mmhub version, the defined functions will take effect; if not defined, 
default fill them with amdgpu_mmhub_ras_late_init and amdgpu_mmhub_ras_fini.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c| 10 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h  | 14 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 47 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 28 ++---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c| 10 +++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c| 10 +++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c| 10 +++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h|  2 +-
 12 files changed, 74 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0980396ee709..feb93880c63d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3377,9 +3377,9 @@ static void amdgpu_device_xgmi_reset_func(struct 
work_struct *__work)
if (adev->asic_reset_res)
goto fail;
 
-   if (adev->mmhub.ras_funcs &&
-   adev->mmhub.ras_funcs->reset_ras_error_count)
-   adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+   if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
+   adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
+   
adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
} else {
 
task_barrier_full(>tb);
@@ -4705,9 +4705,9 @@ int amdgpu_do_asic_reset(struct list_head 
*device_list_handle,
 
if (!r && amdgpu_ras_intr_triggered()) {
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
-   if (tmp_adev->mmhub.ras_funcs &&
-   tmp_adev->mmhub.ras_funcs->reset_ras_error_count)
-   
tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);
+   if (tmp_adev->mmhub.ras && 
tmp_adev->mmhub.ras->ras_block.hw_ops &&
+   
tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
+   
tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
}
 
amdgpu_ras_intr_cleared();
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index b8902ad7e375..b12da46c483a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -441,9 +441,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (adev->mmhub.ras_funcs &&
-   adev->mmhub.ras_funcs->ras_late_init) {
-   r = adev->mmhub.ras_funcs->ras_late_init(adev);
+   if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) {
+   r = adev->mmhub.ras->ras_block.ras_late_init(adev, NULL);
if (r)
return r;
}
@@ -495,9 +494,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
adev->umc.ras_funcs->ras_fini)
adev->umc.ras_funcs->ras_fini(adev);
 
-   if (adev->mmhub.ras_funcs &&
-   adev->mmhub.ras_funcs->ras_fini)
-   adev->mmhub.ras_funcs->ras_fini(adev);
+   if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
+   adev->mmhub.ras->ras_block.ras_fini(adev);
 
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
adev->gmc.xgmi.ras->ras_block.ras_fini(adev);
diff --git a/drivers/gpu/drm/amd/

[PATCH V3 05/12] drm/amdgpu: Modify hdp block to fit for the unified ras block data and ops

2021-12-28 Thread yipechai
1.Modify hdp block to fit for the unified ras block data and ops.
2.Change amdgpu_hdp_ras_funcs to amdgpu_hdp_ras, and the corresponding variable 
name remove _funcs suffix.
3.Remove the const flag of hdp ras variable so that hdp ras block can be able 
to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register hdp ras block into 
amdgpu device ras block link list.
5.Remove the redundant code about hdp in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 10 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  9 +
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c   | 14 +++---
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h   |  2 +-
 7 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index af65ec46f783..b8902ad7e375 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -459,9 +459,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (adev->hdp.ras_funcs &&
-   adev->hdp.ras_funcs->ras_late_init) {
-   r = adev->hdp.ras_funcs->ras_late_init(adev);
+   if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_late_init) {
+   r = adev->hdp.ras->ras_block.ras_late_init(adev, NULL);
if (r)
return r;
}
@@ -503,9 +502,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
adev->gmc.xgmi.ras->ras_block.ras_fini(adev);
 
-   if (adev->hdp.ras_funcs &&
-   adev->hdp.ras_funcs->ras_fini)
-   adev->hdp.ras_funcs->ras_fini(adev);
+   if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_fini)
+   adev->hdp.ras->ras_block.ras_fini(adev);
 }
 
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index 1d50d534d77c..5e6b57de3e1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -24,7 +24,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
int r;
struct ras_ih_if ih_info = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 7ec99d591584..c94a4b3c8d6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -22,13 +22,10 @@
  */
 #ifndef __AMDGPU_HDP_H__
 #define __AMDGPU_HDP_H__
+#include "amdgpu_ras.h"
 
-struct amdgpu_hdp_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
-   void (*reset_ras_error_count)(struct amdgpu_device *adev);
+struct amdgpu_hdp_ras{
+   struct amdgpu_ras_block_object ras_block;
 };
 
 struct amdgpu_hdp_funcs {
@@ -43,9 +40,9 @@ struct amdgpu_hdp_funcs {
 struct amdgpu_hdp {
struct ras_common_if*ras_if;
const struct amdgpu_hdp_funcs   *funcs;
-   const struct amdgpu_hdp_ras_funcs   *ras_funcs;
+   struct amdgpu_hdp_ras   *ras;
 };
 
-int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev);
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info);
 void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
 #endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 8b0d11bb8186..ecb35d9994ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -982,6 +982,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
adev->nbio.ras_funcs->query_ras_error_count(adev, 
_data);
break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+   case AMDGPU_RAS_BLOCK__HDP:
if (!block_obj || !block_obj->hw_ops)   {
dev_info(adev->dev, "%s doesn't config ras function \n",
get_ras_block_str(>head));
@@ -990,11 +991,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
if (block_obj->hw_ops->query_ras_error_count)
block_

[PATCH V3 02/12] drm/amdgpu: Modify the compilation failed problem when other ras blocks' .h include amdgpu_ras.h

2021-12-28 Thread yipechai
Modify the compilation failed problem when other ras blocks' .h include 
amdgpu_ras.h.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 22 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 23 ---
 2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 9dd698354e04..67a08629711c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2773,6 +2773,28 @@ static void amdgpu_register_bad_pages_mca_notifier(void)
 }
 }
 #endif
+
+/* check if ras is supported on block, say, sdma, gfx */
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,
+   unsigned int block)
+{
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+   if (block >= AMDGPU_RAS_BLOCK_COUNT)
+   return 0;
+   return ras && (adev->ras_enabled & (1 << block));
+}
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
+{
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+   if (atomic_cmpxchg(>in_recovery, 0, 1) == 0)
+   schedule_work(>recovery_work);
+   return 0;
+}
+
+
 /* Register each ip ras block into amdgpu ras */
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object* ras_block_obj)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 9dbe8d49b891..3d7a45ec4d9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -513,16 +513,6 @@ struct amdgpu_ras_block_hw_ops {
 #define amdgpu_ras_get_context(adev)   ((adev)->psp.ras_context.ras)
 #define amdgpu_ras_set_context(adev, ras_con)  ((adev)->psp.ras_context.ras = 
(ras_con))
 
-/* check if ras is supported on block, say, sdma, gfx */
-static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
-   unsigned int block)
-{
-   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
-   if (block >= AMDGPU_RAS_BLOCK_COUNT)
-   return 0;
-   return ras && (adev->ras_enabled & (1 << block));
-}
 
 int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
 
@@ -539,15 +529,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
 
 int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
 
-static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
-{
-   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
-   if (atomic_cmpxchg(>in_recovery, 0, 1) == 0)
-   schedule_work(>recovery_work);
-   return 0;
-}
-
 static inline enum ta_ras_block
 amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
switch (block) {
@@ -679,5 +660,9 @@ const char *get_ras_block_str(struct ras_common_if 
*ras_block);
 
 bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
 
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,unsigned int 
block);
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
+
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct 
amdgpu_ras_block_object* ras_block_obj);
 #endif
-- 
2.25.1



[PATCH V3 04/12] drm/amdgpu: Modify xgmi block to fit for the unified ras block data and ops

2021-12-28 Thread yipechai
1.Modify gmc block to fit for the unified ras block data and ops.
2.Change amdgpu_xgmi_ras_funcs to amdgpu_xgmi_ras, and the corresponding 
variable name remove _funcs suffix.
3.Remove the const flag of gmc ras variable so that gmc ras block can be able 
to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register gmc ras block into 
amdgpu device ras block link list.
5.Remove the redundant code about gmc in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 16 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  | 11 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 10 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 26 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h |  4 ++--
 5 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 83f26bca7dac..af65ec46f783 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -448,12 +448,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (!adev->gmc.xgmi.connected_to_cpu)
-   adev->gmc.xgmi.ras_funcs = _ras_funcs;
+   if (!adev->gmc.xgmi.connected_to_cpu) {
+   adev->gmc.xgmi.ras = _ras;
+   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
+   }
 
-   if (adev->gmc.xgmi.ras_funcs &&
-   adev->gmc.xgmi.ras_funcs->ras_late_init) {
-   r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev);
+   if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
+   r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
if (r)
return r;
}
@@ -499,9 +500,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
adev->mmhub.ras_funcs->ras_fini)
adev->mmhub.ras_funcs->ras_fini(adev);
 
-   if (adev->gmc.xgmi.ras_funcs &&
-   adev->gmc.xgmi.ras_funcs->ras_fini)
-   adev->gmc.xgmi.ras_funcs->ras_fini(adev);
+   if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
+   adev->gmc.xgmi.ras->ras_block.ras_fini(adev);
 
if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->ras_fini)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index e55201134a01..923db5ff5859 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -29,6 +29,7 @@
 #include 
 
 #include "amdgpu_irq.h"
+#include "amdgpu_ras.h"
 
 /* VA hole for 48bit addresses on Vega10 */
 #define AMDGPU_GMC_HOLE_START  0x8000ULL
@@ -135,12 +136,8 @@ struct amdgpu_gmc_funcs {
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
 };
 
-struct amdgpu_xgmi_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   int (*query_ras_error_count)(struct amdgpu_device *adev,
-void *ras_error_status);
-   void (*reset_ras_error_count)(struct amdgpu_device *adev);
+struct amdgpu_xgmi_ras {
+   struct amdgpu_ras_block_object ras_block;
 };
 
 struct amdgpu_xgmi {
@@ -159,7 +156,7 @@ struct amdgpu_xgmi {
struct ras_common_if *ras_if;
bool connected_to_cpu;
bool pending_reset;
-   const struct amdgpu_xgmi_ras_funcs *ras_funcs;
+   struct amdgpu_xgmi_ras *ras;
 };
 
 struct amdgpu_gmc {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 8ca1f294c202..8b0d11bb8186 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -982,9 +982,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
adev->nbio.ras_funcs->query_ras_error_count(adev, 
_data);
break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
-   if (adev->gmc.xgmi.ras_funcs &&
-   adev->gmc.xgmi.ras_funcs->query_ras_error_count)
-   adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, 
_data);
+   if (!block_obj || !block_obj->hw_ops)   {
+   dev_info(adev->dev, "%s doesn't config ras function \n",
+   get_ras_block_str(>head));
+   return -EINVAL;
+   }
+   if (block_obj->hw_ops->query_ras_error_count)
+   block_obj->hw_ops->query_ras_error_count(adev, 
_data);
brea

[PATCH V3 01/12] drm/amdgpu: Unify ras block interface for each ras block

2021-12-28 Thread yipechai
1. Define unified ops interface for each block.
2. Add ras_block_match function pointer in ops interface, each ras block can 
customize specail match function to identify itself.
3. Add amdgpu_ras_block_match_default new function. If a ras block doesn't 
define .ras_block_match, default execute amdgpu_ras_block_match_default to 
identify this ras block.
4. Define unified basic ras block data for each ras block.
5. Create dedicated amdgpu device ras block link list to manage all of the ras 
blocks.
6. Add amdgpu_ras_register_ras_block new function interface for each ras block 
to register itself to ras controlling block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 46 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h| 28 +
 4 files changed, 78 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index db1505455761..eddf230856e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1151,6 +1151,8 @@ struct amdgpu_device {
boolbarrier_has_auto_waitcnt;
 
struct amdgpu_reset_control *reset_cntl;
+
+   struct list_headras_list;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 73ec46140d68..0980396ee709 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3578,6 +3578,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
INIT_LIST_HEAD(>reset_list);
 
+   INIT_LIST_HEAD(>ras_list);
+
INIT_DELAYED_WORK(>delayed_init_work,
  amdgpu_device_delayed_init_work_handler);
INIT_DELAYED_WORK(>gfx.gfx_off_delay_work,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 90f0db3b4f65..9dd698354e04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -862,6 +862,40 @@ static int amdgpu_ras_enable_all_features(struct 
amdgpu_device *adev,
 }
 /* feature ctl end */
 
+int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object* block_obj, 
enum amdgpu_ras_block block)
+{
+   if(!block_obj)
+   return -EINVAL;
+
+   if (block_obj->block == block)
+   return 0;
+
+   return -EINVAL;
+}
+
+static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct 
amdgpu_device *adev,
+   enum amdgpu_ras_block block, uint32_t 
sub_block_index)
+{
+   struct amdgpu_ras_block_object *obj, *tmp;
+
+   if (block >= AMDGPU_RAS_BLOCK__LAST)
+   return NULL;
+
+   if (!amdgpu_ras_is_supported(adev, block))
+   return NULL;
+
+   list_for_each_entry_safe(obj, tmp, >ras_list, node) {
+   if (obj->ras_block_match) {
+   if (obj->ras_block_match(obj, block, sub_block_index) 
== 0)
+   return obj;
+   } else {
+   if (amdgpu_ras_block_match_default(obj, block) == 0)
+   return obj;
+   }
+   }
+
+   return NULL;
+}
 
 void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev,
   struct ras_common_if *ras_block,
@@ -2739,3 +2773,15 @@ static void amdgpu_register_bad_pages_mca_notifier(void)
 }
 }
 #endif
+/* Register each ip ras block into amdgpu ras */
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
+   struct amdgpu_ras_block_object* ras_block_obj)
+{
+   if (!adev || !ras_block_obj)
+   return -EINVAL;
+
+   INIT_LIST_HEAD(_block_obj->node);
+   list_add_tail(_block_obj->node, >ras_list);
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index cdd0010a5389..9dbe8d49b891 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -469,6 +469,33 @@ struct ras_debug_if {
};
int op;
 };
+
+struct amdgpu_ras_block_object {
+   /* block name */
+   char name[32];
+
+   enum amdgpu_ras_block block;
+
+   uint32_t sub_block_index;
+
+   /* ras block link */
+   struct list_head node;
+
+   int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum 
amdgpu_ras_block block, uint32_t sub_block_index);
+   int (*ras_late_init)(struct amdgpu_device *adev, void *ras_info);
+   void (*ras_fini)(struct amdgpu_device *adev);
+   const struct amdgpu_ras_block_hw_ops *hw_ops;
+};
+
+struct amdgpu_ras_block_hw_ops {
+   int  (*ras_erro

[PATCH] drm/amdkfd: enable sdma ecc interrupt event can be handled by event_interrupt_wq_v9

2021-12-28 Thread yipechai
Enable sdma ecc interrupt event can be handled by event_interrupt_wq_v9.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index d15fe087ba55..1d92e1b7f8d4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -220,6 +220,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
 */
return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
source_id == SOC15_INTSRC_SDMA_TRAP ||
+   source_id == SOC15_INTSRC_SDMA_ECC ||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
KFD_IRQ_IS_FENCE(client_id, source_id) ||
-- 
2.25.1



[PATCH] drm/amdgpu: Remove the redundant code of psp bootloader functions

2021-12-08 Thread yipechai
The psp bootloader functions code of psp_v13_0.c had been optimized before. 
According the code style of psp_v13_0.c to remove the redundant code of 
psp_v11_0.c.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 79 ++
 1 file changed, 16 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index de53ca061d88..a3e6aa17a8f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -284,13 +284,16 @@ static bool psp_v11_0_is_sos_alive(struct psp_context 
*psp)
return sol_reg != 0x0;
 }
 
-static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
+static int psp_v11_0_bootloader_load_component(struct psp_context  *psp,
+  struct psp_bin_desc  
*bin_desc,
+  enum psp_bootloader_cmd  bl_cmd)
 {
int ret;
uint32_t psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
+   uint32_t sol_reg;
 
-   /* Check tOS sign of life register to confirm sys driver and sOS
+   /* Check sOS sign of life register to confirm sys driver and sOS
 * are already been loaded.
 */
if (psp_v11_0_is_sos_alive(psp))
@@ -300,13 +303,13 @@ static int psp_v11_0_bootloader_load_kdb(struct 
psp_context *psp)
if (ret)
return ret;
 
-   /* Copy PSP KDB binary to memory */
-   psp_copy_fw(psp, psp->kdb.start_addr, psp->kdb.size_bytes);
+   /* Copy PSP System Driver binary to memory */
+   psp_copy_fw(psp, bin_desc->start_addr, bin_desc->size_bytes);
 
-   /* Provide the PSP KDB to bootloader */
+   /* Provide the sys driver to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
   (uint32_t)(psp->fw_pri_mc_addr >> 20));
-   psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE;
+   psp_gfxdrv_command_reg = bl_cmd;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
   psp_gfxdrv_command_reg);
 
@@ -315,69 +318,19 @@ static int psp_v11_0_bootloader_load_kdb(struct 
psp_context *psp)
return ret;
 }
 
-static int psp_v11_0_bootloader_load_spl(struct psp_context *psp)
+static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
 {
-   int ret;
-   uint32_t psp_gfxdrv_command_reg = 0;
-   struct amdgpu_device *adev = psp->adev;
-
-   /* Check tOS sign of life register to confirm sys driver and sOS
-* are already been loaded.
-*/
-   if (psp_v11_0_is_sos_alive(psp))
-   return 0;
-
-   ret = psp_v11_0_wait_for_bootloader(psp);
-   if (ret)
-   return ret;
-
-   /* Copy PSP SPL binary to memory */
-   psp_copy_fw(psp, psp->spl.start_addr, psp->spl.size_bytes);
-
-   /* Provide the PSP SPL to bootloader */
-   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
-  (uint32_t)(psp->fw_pri_mc_addr >> 20));
-   psp_gfxdrv_command_reg = PSP_BL__LOAD_TOS_SPL_TABLE;
-   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
-  psp_gfxdrv_command_reg);
-
-   ret = psp_v11_0_wait_for_bootloader(psp);
+   return psp_v11_0_bootloader_load_component(psp, >kdb, 
PSP_BL__LOAD_KEY_DATABASE);
+}
 
-   return ret;
+static int psp_v11_0_bootloader_load_spl(struct psp_context *psp)
+{
+   return psp_v11_0_bootloader_load_component(psp, >kdb, 
PSP_BL__LOAD_TOS_SPL_TABLE);
 }
 
 static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
 {
-   int ret;
-   uint32_t psp_gfxdrv_command_reg = 0;
-   struct amdgpu_device *adev = psp->adev;
-
-   /* Check sOS sign of life register to confirm sys driver and sOS
-* are already been loaded.
-*/
-   if (psp_v11_0_is_sos_alive(psp))
-   return 0;
-
-   ret = psp_v11_0_wait_for_bootloader(psp);
-   if (ret)
-   return ret;
-
-   /* Copy PSP System Driver binary to memory */
-   psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes);
-
-   /* Provide the sys driver to bootloader */
-   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
-  (uint32_t)(psp->fw_pri_mc_addr >> 20));
-   psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV;
-   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
-  psp_gfxdrv_command_reg);
-
-   /* there might be handshake issue with hardware which needs delay */
-   mdelay(20);
-
-   ret = psp_v11_0_wait_for_bootloader(psp);
-
-   return ret;
+   return psp_v11_0_bootloader_load_component(psp, >kdb, 
PSP_BL__LOAD_SYSDRV);
 }
 
 static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
-- 
2.25.1



[PATCH V2 11/11] drm/amdgpu: Move error inject function from amdgpu_ras.c to each block

2021-12-01 Thread yipechai
Move each block error inject function from amdgpu_ras.c to each block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 62 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 28 +++
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c| 18 +++
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 16 ++
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c  | 16 ++
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c  | 16 ++
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 16 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 16 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c   | 16 ++
 drivers/gpu/drm/amd/amdgpu/umc_v6_1.c| 16 ++
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c| 16 ++
 drivers/gpu/drm/amd/amdgpu/umc_v8_7.c| 16 ++
 12 files changed, 201 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 2e38bd3d3d45..87b625d305c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1032,31 +1032,7 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device 
*adev,
return 0;
 }
 
-/* Trigger XGMI/WAFL error */
-static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
-struct ta_ras_trigger_error_input *block_info)
-{
-   int ret;
-
-   if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
-   dev_warn(adev->dev, "Failed to disallow df cstate");
 
-   if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
-   dev_warn(adev->dev, "Failed to disallow XGMI power down");
-
-   ret = psp_ras_trigger_error(>psp, block_info);
-
-   if (amdgpu_ras_intr_triggered())
-   return ret;
-
-   if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
-   dev_warn(adev->dev, "Failed to allow XGMI power down");
-
-   if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
-   dev_warn(adev->dev, "Failed to allow df cstate");
-
-   return ret;
-}
 
 /* wrapper of psp_ras_trigger_error */
 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
@@ -1076,41 +1052,25 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
if (!obj)
return -EINVAL;
 
+   if (!block_obj || !block_obj->ops)  {
+   dev_info(adev->dev, "%s don't config ras function \n", 
get_ras_block_str(>head));
+   return -EINVAL;
+   }
+
/* Calculate XGMI relative offset */
if (adev->gmc.xgmi.num_physical_nodes > 1) {
-   block_info.address =
-   amdgpu_xgmi_get_relative_phy_addr(adev,
- block_info.address);
+   block_info.address =  amdgpu_xgmi_get_relative_phy_addr(adev, 
block_info.address);
}
 
-   switch (info->head.block) {
-   case AMDGPU_RAS_BLOCK__GFX:
-   if (!block_obj || !block_obj->ops)  {
-   dev_info(adev->dev, "%s don't config ras function \n", 
get_ras_block_str(>head));
-   return -EINVAL;
-   }
-   if (block_obj->ops->ras_error_inject)
+   if (block_obj->ops->ras_error_inject) {
+   if(info->head.block == AMDGPU_RAS_BLOCK__GFX)
ret = block_obj->ops->ras_error_inject(adev, info);
-   break;
-   case AMDGPU_RAS_BLOCK__UMC:
-   case AMDGPU_RAS_BLOCK__SDMA:
-   case AMDGPU_RAS_BLOCK__MMHUB:
-   case AMDGPU_RAS_BLOCK__PCIE_BIF:
-   case AMDGPU_RAS_BLOCK__MCA:
-   ret = psp_ras_trigger_error(>psp, _info);
-   break;
-   case AMDGPU_RAS_BLOCK__XGMI_WAFL:
-   ret = amdgpu_ras_error_inject_xgmi(adev, _info);
-   break;
-   default:
-   dev_info(adev->dev, "%s error injection is not supported yet\n",
-get_ras_block_str(>head));
-   ret = -EINVAL;
+   else
+   ret = block_obj->ops->ras_error_inject(adev, 
_info);
}
 
if (ret)
-   dev_err(adev->dev, "ras inject %s failed %d\n",
-   get_ras_block_str(>head), ret);
+   dev_err(adev->dev, "ras inject %s failed %d\n", 
get_ras_block_str(>head), ret);
 
return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index da541c7b1ec2..298742afba99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -940,6 +940,33 @@ static void amdgpu_xgmi_query_ras_error_count(struct 
amdgpu_device *adev,
err_data->ce_count += ce_cnt;
 }
 
+/* Trigger XGMI/WAFL error */
+static int amdgpu_ras_er

[PATCH V2 10/11] drm/amdgpu: Modify mca block to fit for the unified ras block data and ops

2021-12-01 Thread yipechai
1.Modify mca block to fit for the unified ras block data and ops.
2.Implement .ras_block_match function pointer for mca block to identify itself.
3.Change amdgpu_mca_ras_funcs to amdgpu_mca_ras_block(amdgpu_mca_ras had been 
used), and the corresponding variable name remove _funcs suffix.
4.Remove the const flag of cma ras variable so that cma ras block can be able 
to be insertted into amdgpu device ras block link list.
5.Invoke amdgpu_ras_register_ras_block function to register cma ras block into 
amdgpu device ras block link list.
6.Remove the redundant code about cma in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 18 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c |  6 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 14 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 29 +--
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 67 +++--
 5 files changed, 68 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index ead143214448..065d98cc028f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -467,23 +467,23 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (adev->mca.mp0.ras_funcs &&
-   adev->mca.mp0.ras_funcs->ras_late_init) {
-   r = adev->mca.mp0.ras_funcs->ras_late_init(adev);
+   if (adev->mca.mp0.ras && adev->mca.mp0.ras->ras_block.ops &&
+   adev->mca.mp0.ras->ras_block.ops->ras_late_init) {
+   r = adev->mca.mp0.ras->ras_block.ops->ras_late_init(adev);
if (r)
return r;
}
 
-   if (adev->mca.mp1.ras_funcs &&
-   adev->mca.mp1.ras_funcs->ras_late_init) {
-   r = adev->mca.mp1.ras_funcs->ras_late_init(adev);
+   if (adev->mca.mp1.ras && adev->mca.mp1.ras->ras_block.ops &&
+   adev->mca.mp1.ras->ras_block.ops->ras_late_init) {
+   r = adev->mca.mp1.ras->ras_block.ops->ras_late_init(adev);
if (r)
return r;
}
 
-   if (adev->mca.mpio.ras_funcs &&
-   adev->mca.mpio.ras_funcs->ras_late_init) {
-   r = adev->mca.mpio.ras_funcs->ras_late_init(adev);
+   if (adev->mca.mpio.ras && adev->mca.mpio.ras->ras_block.ops &&
+   adev->mca.mpio.ras->ras_block.ops->ras_late_init) {
+   r = adev->mca.mpio.ras->ras_block.ops->ras_late_init(adev);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index ce538f4819f9..86dbe485a644 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -79,15 +79,15 @@ int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
.cb = NULL,
};
struct ras_fs_if fs_info = {
-   .sysfs_name = mca_dev->ras_funcs->sysfs_name,
+   .sysfs_name = mca_dev->ras->ras_block.name,
};
 
if (!mca_dev->ras_if) {
mca_dev->ras_if = kmalloc(sizeof(struct ras_common_if), 
GFP_KERNEL);
if (!mca_dev->ras_if)
return -ENOMEM;
-   mca_dev->ras_if->block = mca_dev->ras_funcs->ras_block;
-   mca_dev->ras_if->sub_block_index = 
mca_dev->ras_funcs->ras_sub_block;
+   mca_dev->ras_if->block = mca_dev->ras->ras_block.block;
+   mca_dev->ras_if->sub_block_index = 
mca_dev->ras->ras_block.sub_block_index;
mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
}
ih_info.head = fs_info.head = *mca_dev->ras_if;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index c74bc7177066..be030c4031d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -21,21 +21,13 @@
 #ifndef __AMDGPU_MCA_H__
 #define __AMDGPU_MCA_H__
 
-struct amdgpu_mca_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
-   void (*query_ras_error_address)(struct amdgpu_device *adev,
-   void *ras_error_status);
-   uint32_t ras_block;
-   uint32_t ras_sub_block;
-   const char* sysfs_name;
+struct amdgpu_mca_ras_block {
+   struct amdgpu_ras_block_object ras_bloc

[PATCH V2 09/11] drm/amdgpu: Modify sdma block to fit for the unified ras block data and ops

2021-12-01 Thread yipechai
1.Modify sdma block to fit for the unified ras block data and ops.
2.Implement .ras_block_match function pointer for sdma block to identify itself.
3.Change amdgpu_sdma_ras_funcs to amdgpu_sdma_ras, and the corresponding 
variable name remove _funcs suffix.
4.Remove the const flag of sdma ras variable so that sdma ras block can be able 
to be insertted into amdgpu device ras block link list.
5.Invoke amdgpu_ras_register_ras_block function to register sdma ras block into 
amdgpu device ras block link list.
6.Remove the redundant code about sdma in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  |  9 
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 13 ++---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 61 +++-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c   | 40 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h   |  2 +-
 5 files changed, 92 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7d050afd7e2e..6a145d0e0032 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -939,12 +939,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
block_obj->ops->query_ras_error_address(adev, 
_data);
break;
case AMDGPU_RAS_BLOCK__SDMA:
-   if (adev->sdma.funcs->query_ras_error_count) {
-   for (i = 0; i < adev->sdma.num_instances; i++)
-   adev->sdma.funcs->query_ras_error_count(adev, i,
-   
_data);
-   }
-   break;
case AMDGPU_RAS_BLOCK__GFX:
case AMDGPU_RAS_BLOCK__MMHUB:
if (!block_obj || !block_obj->ops)  {
@@ -1049,9 +1043,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device 
*adev,
block_obj->ops->reset_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__SDMA:
-   if (adev->sdma.funcs->reset_ras_error_count)
-   adev->sdma.funcs->reset_ras_error_count(adev);
-   break;
case AMDGPU_RAS_BLOCK__HDP:
if (!block_obj || !block_obj->ops)  {
dev_info(adev->dev, "%s don't config ras function \n", 
ras_block_str(block));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index f8fb755e3aa6..a0761cf50ae0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -23,6 +23,7 @@
 
 #ifndef __AMDGPU_SDMA_H__
 #define __AMDGPU_SDMA_H__
+#include "amdgpu_ras.h"
 
 /* max number of IP instances */
 #define AMDGPU_MAX_SDMA_INSTANCES  8
@@ -50,13 +51,9 @@ struct amdgpu_sdma_instance {
boolburst_nop;
 };
 
-struct amdgpu_sdma_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev,
-   void *ras_ih_info);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   int (*query_ras_error_count)(struct amdgpu_device *adev,
-   uint32_t instance, void *ras_error_status);
-   void (*reset_ras_error_count)(struct amdgpu_device *adev);
+struct amdgpu_sdma_ras {
+   struct amdgpu_ras_block_object ras_block;
+   int (*sdma_ras_late_init)(struct amdgpu_device *adev, void 
*ras_ih_info);
 };
 
 struct amdgpu_sdma {
@@ -73,7 +70,7 @@ struct amdgpu_sdma {
uint32_tsrbm_soft_reset;
boolhas_page_queue;
struct ras_common_if*ras_if;
-   const struct amdgpu_sdma_ras_funcs  *funcs;
+   struct amdgpu_sdma_ras  *ras;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 69c9e460c1eb..30a651613776 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1898,13 +1898,13 @@ static int sdma_v4_0_late_init(void *handle)
sdma_v4_0_setup_ulv(adev);
 
if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
-   if (adev->sdma.funcs &&
-   adev->sdma.funcs->reset_ras_error_count)
-   adev->sdma.funcs->reset_ras_error_count(adev);
+   if (adev->sdma.ras && adev->sdma.ras->ras_block.ops &&
+   adev->sdma.ras->ras_block.ops->reset_ras_error_count)
+   
adev->sdma.ras->ras_block.ops->reset_ras_error_count(adev);
}
 
-   if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
-   return adev->sdma.funcs->ras_late_init(adev, _info);
+   if (adev->sdma.ras && adev->sd

[PATCH V2 08/11] drm/amdgpu: Modify umc block to fit for the unified ras block data and ops

2021-12-01 Thread yipechai
1.Modify umc block to fit for the unified ras block data and ops.
2.Implement .ras_block_match function pointer for umc block to identify itself.
3.Change amdgpu_umc_ras_funcs to amdgpu_umc_ras, and the corresponding variable 
name remove _funcs suffix.
4.Remove the const flag of umc ras variable so that umc ras block can be able 
to be insertted into amdgpu device ras block link list.
5.Invoke amdgpu_ras_register_ras_block function to register umc ras block into 
amdgpu device ras block link list.
6.Remove the redundant code about umc in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 12 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 21 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 18 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 13 -
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  4 +++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  9 ++---
 drivers/gpu/drm/amd/amdgpu/umc_v6_1.c   | 25 +++--
 drivers/gpu/drm/amd/amdgpu/umc_v6_1.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c   | 23 ++-
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/umc_v8_7.c   | 25 +++--
 drivers/gpu/drm/amd/amdgpu/umc_v8_7.h   |  2 +-
 12 files changed, 111 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 317b5e93a1f0..ead143214448 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -434,9 +434,9 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 {
int r;
 
-   if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->ras_late_init) {
-   r = adev->umc.ras_funcs->ras_late_init(adev);
+   if (adev->umc.ras && adev->umc.ras->ras_block.ops &&
+   adev->umc.ras->ras_block.ops->ras_late_init) {
+   r = adev->umc.ras->ras_block.ops->ras_late_init(adev);
if (r)
return r;
}
@@ -493,9 +493,9 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 {
-   if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->ras_fini)
-   adev->umc.ras_funcs->ras_fini(adev);
+   if (adev->umc.ras && adev->umc.ras->ras_block.ops &&
+   adev->umc.ras->ras_block.ops->ras_fini)
+   adev->umc.ras->ras_block.ops->ras_fini(adev);
 
if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ops &&
adev->mmhub.ras->ras_block.ops->ras_fini)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 273a550741e4..7d050afd7e2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -925,15 +925,18 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
 
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__UMC:
-   if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_error_count)
-   adev->umc.ras_funcs->query_ras_error_count(adev, 
_data);
+   if (!block_obj || !block_obj->ops)  {
+   dev_info(adev->dev, "%s don't config ras function \n",
+   get_ras_block_str(>head));
+   return -EINVAL;
+   }
+   if (block_obj->ops->query_ras_error_count)
+   block_obj->ops->query_ras_error_count(adev, _data);
/* umc query_ras_error_address is also responsible for clearing
 * error status
 */
-   if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_error_address)
-   adev->umc.ras_funcs->query_ras_error_address(adev, 
_data);
+   if (block_obj->ops->query_ras_error_address)
+   block_obj->ops->query_ras_error_address(adev, 
_data);
break;
case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->query_ras_error_count) {
@@ -2359,12 +2362,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* Init poison supported flag, the default value is false */
if (adev->df.funcs &&
adev->df.funcs->query_ras_poison_mode &&
-   adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_poison_mode) {
+   adev->umc.ras && adev->umc.ras->ras_block.ops &&
+   adev->umc.ras->ras_block.

[PATCH V2 07/11] drm/amdgpu: Modify nbio block to fit for the unified ras block data and ops

2021-12-01 Thread yipechai
1.Modify nbio block to fit for the unified ras block data and ops.
2.Implement .ras_block_match function pointer for nbio block to identify itself.
3.Change amdgpu_nbio_ras_funcs to amdgpu_nbio_ras, and the corresponding 
variable name remove _funcs suffix.
4.Remove the const flag of mmhub ras variable so that nbio ras block can be 
able to be insertted into amdgpu device ras block link list.
5.Invoke amdgpu_ras_register_ras_block function to register nbio ras block into 
amdgpu device ras block link list.
6.Remove the redundant code about nbio in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c  | 12 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h |  9 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 22 -
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 30 
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/soc15.c   | 20 
 6 files changed, 56 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 5208b2dd176a..24feceb51289 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -208,13 +208,13 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
 * ack the interrupt if it is there
 */
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
-   if (adev->nbio.ras_funcs &&
-   adev->nbio.ras_funcs->handle_ras_controller_intr_no_bifring)
-   
adev->nbio.ras_funcs->handle_ras_controller_intr_no_bifring(adev);
+   if (adev->nbio.ras &&
+   adev->nbio.ras->handle_ras_controller_intr_no_bifring)
+   
adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
 
-   if (adev->nbio.ras_funcs &&
-   
adev->nbio.ras_funcs->handle_ras_err_event_athub_intr_no_bifring)
-   
adev->nbio.ras_funcs->handle_ras_err_event_athub_intr_no_bifring(adev);
+   if (adev->nbio.ras &&
+   adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
+   
adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
}
 
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 843052205bd5..4a1fb85939d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -47,15 +47,12 @@ struct nbio_hdp_flush_reg {
u32 ref_and_mask_sdma7;
 };
 
-struct amdgpu_nbio_ras_funcs {
+struct amdgpu_nbio_ras {
+   struct amdgpu_ras_block_object ras_block;
void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device 
*adev);
void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device 
*adev);
int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
-   void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_nbio_funcs {
@@ -104,7 +101,7 @@ struct amdgpu_nbio {
struct amdgpu_irq_src ras_err_event_athub_irq;
struct ras_common_if *ras_if;
const struct amdgpu_nbio_funcs *funcs;
-   const struct amdgpu_nbio_ras_funcs *ras_funcs;
+   struct amdgpu_nbio_ras  *ras;
 };
 
 int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index d705d8b1daf6..273a550741e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -957,10 +957,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
block_obj->ops->query_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__PCIE_BIF:
-   if (adev->nbio.ras_funcs &&
-   adev->nbio.ras_funcs->query_ras_error_count)
-   adev->nbio.ras_funcs->query_ras_error_count(adev, 
_data);
-   break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
case AMDGPU_RAS_BLOCK__HDP:
if (!block_obj || !block_obj->ops)  {
@@ -2336,24 +2332,26 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
-   if (!adev->gmc.xgmi.connected_to_cpu)
-   adev->nbio.ras_funcs = _v7_4_ras_funcs;
+   if (!adev->gmc.xgmi.connected_to_cpu) {

[PATCH V2 06/11] drm/amdgpu: Modify mmhub block to fit for the unified ras block data and ops

2021-12-01 Thread yipechai
1.Modify mmhub block to fit for the unified ras block data and ops.
2.Implement .ras_block_match function pointer for mmhub block to identify 
itself.
3.Change amdgpu_mmhub_ras_funcs to amdgpu_mmhub_ras, and the corresponding 
variable name remove _funcs suffix.
4.Remove the const flag of mmhub ras variable so that mmhub ras block can be 
able to be insertted into amdgpu device ras block link list.
5.Invoke amdgpu_ras_register_ras_block function to register mmhub ras block 
into amdgpu device ras block link list. 5.Remove the redundant code about mmhub 
in amdgpu_ras.c after using the unified ras block.
6.Remove the redundant code about mmhub in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c| 12 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h  | 12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 49 +++---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 16 ---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c| 23 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c| 23 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c| 23 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h|  2 +-
 11 files changed, 108 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0980396ee709..c7d5592f0cf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3377,9 +3377,9 @@ static void amdgpu_device_xgmi_reset_func(struct 
work_struct *__work)
if (adev->asic_reset_res)
goto fail;
 
-   if (adev->mmhub.ras_funcs &&
-   adev->mmhub.ras_funcs->reset_ras_error_count)
-   adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+   if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ops &&
+   adev->mmhub.ras->ras_block.ops->reset_ras_error_count)
+   
adev->mmhub.ras->ras_block.ops->reset_ras_error_count(adev);
} else {
 
task_barrier_full(>tb);
@@ -4705,9 +4705,9 @@ int amdgpu_do_asic_reset(struct list_head 
*device_list_handle,
 
if (!r && amdgpu_ras_intr_triggered()) {
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
-   if (tmp_adev->mmhub.ras_funcs &&
-   tmp_adev->mmhub.ras_funcs->reset_ras_error_count)
-   
tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);
+   if (tmp_adev->mmhub.ras && 
tmp_adev->mmhub.ras->ras_block.ops &&
+   
tmp_adev->mmhub.ras->ras_block.ops->reset_ras_error_count)
+   
tmp_adev->mmhub.ras->ras_block.ops->reset_ras_error_count(tmp_adev);
}
 
amdgpu_ras_intr_cleared();
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 0d06e7a2b951..317b5e93a1f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -441,9 +441,9 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (adev->mmhub.ras_funcs &&
-   adev->mmhub.ras_funcs->ras_late_init) {
-   r = adev->mmhub.ras_funcs->ras_late_init(adev);
+   if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ops &&
+   adev->mmhub.ras->ras_block.ops->ras_late_init) {
+   r = adev->mmhub.ras->ras_block.ops->ras_late_init(adev);
if (r)
return r;
}
@@ -497,9 +497,9 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
adev->umc.ras_funcs->ras_fini)
adev->umc.ras_funcs->ras_fini(adev);
 
-   if (adev->mmhub.ras_funcs &&
-   adev->mmhub.ras_funcs->ras_fini)
-   adev->mmhub.ras_funcs->ras_fini(adev);
+   if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ops &&
+   adev->mmhub.ras->ras_block.ops->ras_fini)
+   adev->mmhub.ras->ras_block.ops->ras_fini(adev);
 
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ops &&
adev->gmc.xgmi.ras->ras_block.ops->ras_fini)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index b27fcbccce2b..6d10b3f248db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgp

[PATCH V2 04/11] drm/amdgpu: Modify gmc block to fit for the unified ras block data and ops

2021-12-01 Thread yipechai
1.Modify gmc block to fit for the unified ras block data and ops
2.Implement .ras_block_match function pointer for gmc block to identify itself.
3.Change amdgpu_xgmi_ras_funcs to amdgpu_xgmi_ras, and the corresponding 
variable name remove _funcs suffix.
4.Remove the const flag of gmc ras variable so that gmc ras block can be able 
to be insertted into amdgpu device ras block link list.
5.Invoke amdgpu_ras_register_ras_block function to register gmc ras block into 
amdgpu device ras block link list.
6.Remove the redundant code about gmc in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 18 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  | 11 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 10 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 31 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h |  4 +--
 5 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 83f26bca7dac..3ba2f0f1f1b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -448,12 +448,14 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (!adev->gmc.xgmi.connected_to_cpu)
-   adev->gmc.xgmi.ras_funcs = _ras_funcs;
+   if (!adev->gmc.xgmi.connected_to_cpu) {
+   adev->gmc.xgmi.ras = _ras;
+   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
+   }
 
-   if (adev->gmc.xgmi.ras_funcs &&
-   adev->gmc.xgmi.ras_funcs->ras_late_init) {
-   r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev);
+   if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ops &&
+   adev->gmc.xgmi.ras->ras_block.ops->ras_late_init) {
+   r = adev->gmc.xgmi.ras->ras_block.ops->ras_late_init(adev);
if (r)
return r;
}
@@ -499,9 +501,9 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
adev->mmhub.ras_funcs->ras_fini)
adev->mmhub.ras_funcs->ras_fini(adev);
 
-   if (adev->gmc.xgmi.ras_funcs &&
-   adev->gmc.xgmi.ras_funcs->ras_fini)
-   adev->gmc.xgmi.ras_funcs->ras_fini(adev);
+   if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ops &&
+   adev->gmc.xgmi.ras->ras_block.ops->ras_fini)
+   adev->gmc.xgmi.ras->ras_block.ops->ras_fini(adev);
 
if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->ras_fini)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index e55201134a01..923db5ff5859 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -29,6 +29,7 @@
 #include 
 
 #include "amdgpu_irq.h"
+#include "amdgpu_ras.h"
 
 /* VA hole for 48bit addresses on Vega10 */
 #define AMDGPU_GMC_HOLE_START  0x8000ULL
@@ -135,12 +136,8 @@ struct amdgpu_gmc_funcs {
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
 };
 
-struct amdgpu_xgmi_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   int (*query_ras_error_count)(struct amdgpu_device *adev,
-void *ras_error_status);
-   void (*reset_ras_error_count)(struct amdgpu_device *adev);
+struct amdgpu_xgmi_ras {
+   struct amdgpu_ras_block_object ras_block;
 };
 
 struct amdgpu_xgmi {
@@ -159,7 +156,7 @@ struct amdgpu_xgmi {
struct ras_common_if *ras_if;
bool connected_to_cpu;
bool pending_reset;
-   const struct amdgpu_xgmi_ras_funcs *ras_funcs;
+   struct amdgpu_xgmi_ras *ras;
 };
 
 struct amdgpu_gmc {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 190a4a4e9d7a..a6a2f928c6ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -970,9 +970,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
adev->nbio.ras_funcs->query_ras_error_count(adev, 
_data);
break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
-   if (adev->gmc.xgmi.ras_funcs &&
-   adev->gmc.xgmi.ras_funcs->query_ras_error_count)
-   adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, 
_data);
+   if (!block_obj || !block_obj->ops)  {
+   dev_info(adev->dev, "%s don't config ras function \n",
+   get_ras_block_str(>head));
+  

[PATCH V2 05/11] drm/amdgpu: Modify hdp block to fit for the unified ras block data and ops

2021-12-01 Thread yipechai
1.Modify hdp block to fit for the unified ras block data and ops.
2.Implement .ras_block_match function pointer for hdp block to identify itself.
3.Change amdgpu_hdp_ras_funcs to amdgpu_hdp_ras, and the corresponding variable 
name remove _funcs suffix.
4.Remove the const flag of hdp ras variable so that hdp ras block can be able 
to be insertted into amdgpu device ras block link list.
5.Invoke amdgpu_ras_register_ras_block function to register hdp ras block into 
amdgpu device ras block link list.
6.Remove the redundant code about hdp in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 12 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 11 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  9 +
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c   | 22 +-
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h   |  2 +-
 6 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 3ba2f0f1f1b4..0d06e7a2b951 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -460,9 +460,9 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (adev->hdp.ras_funcs &&
-   adev->hdp.ras_funcs->ras_late_init) {
-   r = adev->hdp.ras_funcs->ras_late_init(adev);
+   if (adev->hdp.ras && adev->hdp.ras->ras_block.ops &&
+   adev->hdp.ras->ras_block.ops->ras_late_init) {
+   r = adev->hdp.ras->ras_block.ops->ras_late_init(adev);
if (r)
return r;
}
@@ -505,9 +505,9 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
adev->gmc.xgmi.ras->ras_block.ops->ras_fini)
adev->gmc.xgmi.ras->ras_block.ops->ras_fini(adev);
 
-   if (adev->hdp.ras_funcs &&
-   adev->hdp.ras_funcs->ras_fini)
-   adev->hdp.ras_funcs->ras_fini(adev);
+   if (adev->hdp.ras && adev->hdp.ras->ras_block.ops &&
+   adev->hdp.ras->ras_block.ops->ras_fini)
+   adev->hdp.ras->ras_block.ops->ras_fini(adev);
 }
 
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 7ec99d591584..6e53898fb283 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -22,13 +22,10 @@
  */
 #ifndef __AMDGPU_HDP_H__
 #define __AMDGPU_HDP_H__
+#include "amdgpu_ras.h"
 
-struct amdgpu_hdp_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
-   void (*reset_ras_error_count)(struct amdgpu_device *adev);
+struct amdgpu_hdp_ras{
+   struct amdgpu_ras_block_object ras_block;
 };
 
 struct amdgpu_hdp_funcs {
@@ -43,7 +40,7 @@ struct amdgpu_hdp_funcs {
 struct amdgpu_hdp {
struct ras_common_if*ras_if;
const struct amdgpu_hdp_funcs   *funcs;
-   const struct amdgpu_hdp_ras_funcs   *ras_funcs;
+   struct amdgpu_hdp_ras   *ras;
 };
 
 int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a6a2f928c6ca..bed414404c6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -970,6 +970,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
adev->nbio.ras_funcs->query_ras_error_count(adev, 
_data);
break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+   case AMDGPU_RAS_BLOCK__HDP:
if (!block_obj || !block_obj->ops)  {
dev_info(adev->dev, "%s don't config ras function \n",
get_ras_block_str(>head));
@@ -978,11 +979,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
if (block_obj->ops->query_ras_error_count)
block_obj->ops->query_ras_error_count(adev, _data);
break;
-   case AMDGPU_RAS_BLOCK__HDP:
-   if (adev->hdp.ras_funcs &&
-   adev->hdp.ras_funcs->query_ras_error_count)
-   adev->hdp.ras_funcs->query_ras_error_count(adev, 
_data);
-   break;
case AMDGPU_RAS_BLOCK__MCA:
amdgpu_ras_mca_query_error_status(adev, >head, _data);
break;
@@ -1074,9 +1070,13 @@ int amdgp

[PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the unified ras block data and ops

2021-12-01 Thread yipechai
1.Modify gfx block to fit for the unified ras block data and ops
2.Implement .ras_block_match function pointer for gfx block to identify itself.
3.Change amdgpu_gfx_ras_funcs to amdgpu_gfx_ras, and the corresponding variable 
name remove _funcs suffix.
4.Remove the const flag of gfx ras variable so that gfx ras block can be able 
to be insertted into amdgpu device ras block link list.
5.Invoke amdgpu_ras_register_ras_block function to register gfx ras block into 
amdgpu device ras block link list.
6.Remove the redundant code about gfx in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 15 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 80 ++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 73 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c   | 39 
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 42 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h |  2 +-
 8 files changed, 178 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 1795d448c700..da8691259ac1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -696,9 +696,9 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device 
*adev,
 */
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
-   if (adev->gfx.ras_funcs &&
-   adev->gfx.ras_funcs->query_ras_error_count)
-   adev->gfx.ras_funcs->query_ras_error_count(adev, 
err_data);
+   if (adev->gfx.ras && adev->gfx.ras->ras_block.ops &&
+   adev->gfx.ras->ras_block.ops->query_ras_error_count)
+   
adev->gfx.ras->ras_block.ops->query_ras_error_count(adev, err_data);
amdgpu_ras_reset_gpu(adev);
}
return AMDGPU_RAS_SUCCESS;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 6b78b4a0e182..ff4a8428a84b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -31,6 +31,7 @@
 #include "amdgpu_ring.h"
 #include "amdgpu_rlc.h"
 #include "soc15.h"
+#include "amdgpu_ras.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -213,16 +214,8 @@ struct amdgpu_cu_info {
uint32_t bitmap[4][4];
 };
 
-struct amdgpu_gfx_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   int (*ras_error_inject)(struct amdgpu_device *adev,
-   void *inject_if);
-   int (*query_ras_error_count)(struct amdgpu_device *adev,
-void *ras_error_status);
-   void (*reset_ras_error_count)(struct amdgpu_device *adev);
-   void (*query_ras_error_status)(struct amdgpu_device *adev);
-   void (*reset_ras_error_status)(struct amdgpu_device *adev);
+struct amdgpu_gfx_ras {
+   struct amdgpu_ras_block_object  ras_block;
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
 };
 
@@ -348,7 +341,7 @@ struct amdgpu_gfx {
 
/*ras */
struct ras_common_if*ras_if;
-   const struct amdgpu_gfx_ras_funcs   *ras_funcs;
+   struct amdgpu_gfx_ras   *ras;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1cf1f6331db1..190a4a4e9d7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -862,6 +862,27 @@ static int amdgpu_ras_enable_all_features(struct 
amdgpu_device *adev,
 }
 /* feature ctl end */
 
+static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct 
amdgpu_device *adev,
+   enum amdgpu_ras_block block, uint32_t 
sub_block_index)
+{
+   struct amdgpu_ras_block_object *obj, *tmp;
+
+   if (block >= AMDGPU_RAS_BLOCK__LAST) {
+   return NULL;
+   }
+
+   list_for_each_entry_safe(obj, tmp, >ras_list, node) {
+   if( !obj->ops || !obj->ops->ras_block_match) {
+   dev_info(adev->dev, "%s don't config ops or  
ras_block_match\n", obj->name);
+   continue;
+   }
+   if (!obj->ops->ras_block_match(obj, block, sub_block_index)) {
+   return obj;
+   }
+   }
+
+   return NULL;
+}
 
 void amdgpu_ras_mca_query_error_status(struct

[PATCH V2 02/11] drm/amdgpu: Modify the compilation failed problem when other ras blocks' .h include amdgpu_ras.h

2021-12-01 Thread yipechai
Modify the compilation failed problem when other ras blocks' .h include 
amdgpu_ras.h.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 22 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 23 ---
 2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 8713575c7cf1..1cf1f6331db1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2739,6 +2739,28 @@ static void amdgpu_register_bad_pages_mca_notifier(void)
 }
 }
 #endif
+
+/* check if ras is supported on block, say, sdma, gfx */
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,
+   unsigned int block)
+{
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+   if (block >= AMDGPU_RAS_BLOCK_COUNT)
+   return 0;
+   return ras && (adev->ras_enabled & (1 << block));
+}
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
+{
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+   if (atomic_cmpxchg(>in_recovery, 0, 1) == 0)
+   schedule_work(>recovery_work);
+   return 0;
+}
+
+
 /* Rigister each ip ras block into amdgpu ras */
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object* ras_block_obj)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index d6e5e3c862bd..41623a649fa1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -514,16 +514,6 @@ struct amdgpu_ras_block_ops {
 #define amdgpu_ras_get_context(adev)   ((adev)->psp.ras_context.ras)
 #define amdgpu_ras_set_context(adev, ras_con)  ((adev)->psp.ras_context.ras = 
(ras_con))
 
-/* check if ras is supported on block, say, sdma, gfx */
-static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
-   unsigned int block)
-{
-   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
-   if (block >= AMDGPU_RAS_BLOCK_COUNT)
-   return 0;
-   return ras && (adev->ras_enabled & (1 << block));
-}
 
 int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
 
@@ -540,15 +530,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
 
 int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
 
-static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
-{
-   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
-   if (atomic_cmpxchg(>in_recovery, 0, 1) == 0)
-   schedule_work(>recovery_work);
-   return 0;
-}
-
 static inline enum ta_ras_block
 amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
switch (block) {
@@ -680,5 +661,9 @@ const char *get_ras_block_str(struct ras_common_if 
*ras_block);
 
 bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
 
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,unsigned int 
block);
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
+
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct 
amdgpu_ras_block_object* ras_block_obj);
 #endif
-- 
2.25.1



[PATCH V2 01/11] drm/amdgpu: Unify ras block interface for each ras block

2021-12-01 Thread yipechai
1. Define unified ops interface for each block.
2. Add ras_block_match function pointer in ops interface for each ras block to 
identify itself.
3. Define unified basic ras block data for each ras block.
4. Create dedicated amdgpu device ras block link list to manage all of the ras 
blocks.
5. Add amdgpu_ras_register_ras_block new function interface for each ras block 
to register itself to ras controlling block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 12 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h| 29 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index db1505455761..eddf230856e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1151,6 +1151,8 @@ struct amdgpu_device {
boolbarrier_has_auto_waitcnt;
 
struct amdgpu_reset_control *reset_cntl;
+
+   struct list_headras_list;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 73ec46140d68..0980396ee709 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3578,6 +3578,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
INIT_LIST_HEAD(>reset_list);
 
+   INIT_LIST_HEAD(>ras_list);
+
INIT_DELAYED_WORK(>delayed_init_work,
  amdgpu_device_delayed_init_work_handler);
INIT_DELAYED_WORK(>gfx.gfx_off_delay_work,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 90f0db3b4f65..8713575c7cf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2739,3 +2739,15 @@ static void amdgpu_register_bad_pages_mca_notifier(void)
 }
 }
 #endif
+/* Rigister each ip ras block into amdgpu ras */
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
+   struct amdgpu_ras_block_object* ras_block_obj)
+{
+   if (!adev || !ras_block_obj)
+   return -EINVAL;
+
+   INIT_LIST_HEAD(_block_obj->node);
+   list_add_tail(_block_obj->node, >ras_list);
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index cdd0010a5389..d6e5e3c862bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -469,6 +469,34 @@ struct ras_debug_if {
};
int op;
 };
+
+struct amdgpu_ras_block_object {
+   /* block name */
+   char name[32];
+
+   enum amdgpu_ras_block block;
+
+   uint32_t sub_block_index;
+
+   /* ras block link */
+   struct list_head node;
+
+   const struct amdgpu_ras_block_ops *ops;
+};
+
+struct amdgpu_ras_block_ops {
+   int (*ras_block_match)(struct amdgpu_ras_block_object* block_obj, enum 
amdgpu_ras_block block, uint32_t sub_block_index);
+   int (*ras_late_init)(struct amdgpu_device *adev);
+   void (*ras_fini)(struct amdgpu_device *adev);
+   int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
+   void  (*query_ras_error_count)(struct amdgpu_device *adev,void 
*ras_error_status);
+   void (*query_ras_error_status)(struct amdgpu_device *adev);
+   bool  (*query_ras_poison_mode)(struct amdgpu_device *adev);
+   void (*query_ras_error_address)(struct amdgpu_device *adev, void 
*ras_error_status);
+   void (*reset_ras_error_count)(struct amdgpu_device *adev);
+   void (*reset_ras_error_status)(struct amdgpu_device *adev);
+};
+
 /* work flow
  * vbios
  * 1: ras feature enable (enabled by default)
@@ -652,4 +680,5 @@ const char *get_ras_block_str(struct ras_common_if 
*ras_block);
 
 bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
 
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct 
amdgpu_ras_block_object* ras_block_obj);
 #endif
-- 
2.25.1



[PATCH 8/9] drm/amdgpu: Modify umc block to fit for the unified ras function pointers.

2021-11-25 Thread yipechai
Modify umc block ras functions to fit for the unified ras function pointers.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c |  8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 12 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c |  8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  9 ++---
 drivers/gpu/drm/amd/amdgpu/umc_v6_1.c   | 10 ++
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c   | 12 +++-
 drivers/gpu/drm/amd/amdgpu/umc_v8_7.c   | 11 ++-
 7 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 7780effdf3ac..4499cc5186cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -435,8 +435,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
int r;
 
if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->ras_late_init) {
-   r = adev->umc.ras_funcs->ras_late_init(adev);
+   adev->umc.ras_funcs->ops.ras_late_init) {
+   r = adev->umc.ras_funcs->ops.ras_late_init(adev);
if (r)
return r;
}
@@ -492,8 +492,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 {
if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->ras_fini)
-   adev->umc.ras_funcs->ras_fini(adev);
+   adev->umc.ras_funcs->ops.ras_fini)
+   adev->umc.ras_funcs->ops.ras_fini(adev);
 
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->ops.ras_fini)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 2c79172f6031..65306e0079af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -902,14 +902,14 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev,
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__UMC:
if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_error_count)
-   adev->umc.ras_funcs->query_ras_error_count(adev, 
_data);
+   adev->umc.ras_funcs->ops.query_ras_error_count)
+   adev->umc.ras_funcs->ops.query_ras_error_count(adev, 
_data);
/* umc query_ras_error_address is also responsible for clearing
 * error status
 */
if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_error_address)
-   adev->umc.ras_funcs->query_ras_error_address(adev, 
_data);
+   adev->umc.ras_funcs->ops.query_ras_error_address)
+   adev->umc.ras_funcs->ops.query_ras_error_address(adev, 
_data);
break;
case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->query_ras_error_count) {
@@ -2341,11 +2341,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
if (adev->df.funcs &&
adev->df.funcs->query_ras_poison_mode &&
adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_poison_mode) {
+   adev->umc.ras_funcs->ops.query_ras_poison_mode) {
df_poison =
adev->df.funcs->query_ras_poison_mode(adev);
umc_poison =
-   adev->umc.ras_funcs->query_ras_poison_mode(adev);
+   adev->umc.ras_funcs->ops.query_ras_poison_mode(adev);
/* Only poison is set in both DF and UMC, we can support it */
if (df_poison && umc_poison)
con->poison_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 0c7c56a91b25..9a44c410be06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -98,11 +98,11 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device 
*adev,
 
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_error_count)
-   adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status);
+   adev->umc.ras_funcs->ops.query_ras_error_count)
+   adev->umc.ras_funcs->ops.query_ras_error_count(adev, 
ras_error_status);
 
if (adev->umc.ras_funcs &&
-   adev->umc.ras_funcs->query_ras_error_address &&
+   adev->umc.ras_funcs->ops.query_ras_error_address &&
adev->umc.max_ras

  1   2   >