RE: [PATCH V2] drm/amdgpu: Move common initialization operations of each ras block to one function
[AMD Official Use Only] > -Original Message- > From: Chai, Thomas > Sent: Monday, March 7, 2022 4:43 PM > To: amd-gfx@lists.freedesktop.org > Cc: Chai, Thomas ; Zhang, Hawking > ; Zhou1, Tao ; Clements, > John ; Chai, Thomas > Subject: [PATCH V2] drm/amdgpu: Move common initialization operations of > each ras block to one function > > Define amdgpu_ras_sw_init function to initialize all ras blocks. > > V2: Modify error debugging information. > > Signed-off-by: yipechai > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 + > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c| 2 - > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 143 - > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h| 1 + > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 --- > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 16 --- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 28 > drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 6 - > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 17 --- > 9 files changed, 148 insertions(+), 92 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 6113ddc765a7..0c83eb69dad5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -2402,6 +2402,12 @@ static int amdgpu_device_ip_init(struct > amdgpu_device *adev) > } > } > > + r = amdgpu_ras_sw_init(adev); > + if (r) { > + DRM_ERROR("amdgpu_ras_sw_init failed (%d).\n", r); > + goto init_failed; > + } > + > if (amdgpu_sriov_vf(adev)) > amdgpu_virt_init_data_exchange(adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > index ab75e189bc0b..544241f357b2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > @@ -440,8 +440,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device > *adev) { > if (!adev->gmc.xgmi.connected_to_cpu) { > adev->gmc.xgmi.ras = &xgmi_ras; > - amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras- > >ras_block); > - adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras- > >ras_block.ras_comm; > } > > return 0; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index d3875618ebf5..89075ab9e82e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -2299,8 +2299,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev) > case CHIP_ALDEBARAN: > if (!adev->gmc.xgmi.connected_to_cpu) { > adev->nbio.ras = &nbio_v7_4_ras; > - amdgpu_ras_register_ras_block(adev, &adev- > >nbio.ras->ras_block); > - adev->nbio.ras_if = &adev->nbio.ras- > >ras_block.ras_comm; > } > break; > default: > @@ -2533,6 +2531,147 @@ void amdgpu_ras_suspend(struct amdgpu_device > *adev) > amdgpu_ras_disable_all_features(adev, 1); } > > +int amdgpu_ras_sw_init(struct amdgpu_device *adev) { > + int err = 0; > + > + if (!amdgpu_ras_asic_supported(adev)) > + return 0; > + > + if (adev->nbio.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev->nbio.ras- > >ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register nbio ras > block!\n"); > + return err; > + } > + adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; > + } > + > + if (adev->gmc.xgmi.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev- > >gmc.xgmi.ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register xgmi ras > block!\n"); > + return err; > + } > + adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras- > >ras_block.ras_comm; > + } > + > + if (adev->gfx.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras- > >ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register gfx ras > block!\n"); > + return err; > + } > + > + strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx"); > + adev->gfx.ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__GFX; > + adev->gfx.ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > + adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm; > + > + /* If not define special ras_late_init function, use gfx default > ras_late_init */ > + if (!adev->gfx.ras->ras_block.ras_late_init) > + adev->gfx.ras->ras_block.ras_late_init = > amdgpu_gfx_ras_late_init; > + > + /* If not defined special ras_cb fu
RE: [PATCH V2] drm/amdgpu: Move common initialization operations of each ras block to one function
[AMD Official Use Only] OK -Original Message- From: Chen, Guchun Sent: Monday, March 7, 2022 5:11 PM To: Chai, Thomas ; amd-gfx@lists.freedesktop.org Cc: Zhou1, Tao ; Zhang, Hawking ; Clements, John ; Chai, Thomas ; Chai, Thomas Subject: RE: [PATCH V2] drm/amdgpu: Move common initialization operations of each ras block to one function if (!adev->gmc.xgmi.connected_to_cpu) { adev->gmc.xgmi.ras = &xgmi_ras; - amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); - adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; } Coding style needs to be fixed as well. '{}' should be dropped as there is only one line after upper if. Regards, Guchun -Original Message- From: amd-gfx On Behalf Of yipechai Sent: Monday, March 7, 2022 4:43 PM To: amd-gfx@lists.freedesktop.org Cc: Zhou1, Tao ; Zhang, Hawking ; Clements, John ; Chai, Thomas ; Chai, Thomas Subject: [PATCH V2] drm/amdgpu: Move common initialization operations of each ras block to one function Define amdgpu_ras_sw_init function to initialize all ras blocks. V2: Modify error debugging information. Signed-off-by: yipechai --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c| 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 143 - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h| 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 16 --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 28 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 6 - drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 17 --- 9 files changed, 148 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6113ddc765a7..0c83eb69dad5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2402,6 +2402,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } } + r = amdgpu_ras_sw_init(adev); + if (r) { + DRM_ERROR("amdgpu_ras_sw_init failed (%d).\n", r); + goto init_failed; + } + if (amdgpu_sriov_vf(adev)) amdgpu_virt_init_data_exchange(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index ab75e189bc0b..544241f357b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -440,8 +440,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev) { if (!adev->gmc.xgmi.connected_to_cpu) { adev->gmc.xgmi.ras = &xgmi_ras; - amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); - adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d3875618ebf5..89075ab9e82e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2299,8 +2299,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev) case CHIP_ALDEBARAN: if (!adev->gmc.xgmi.connected_to_cpu) { adev->nbio.ras = &nbio_v7_4_ras; - amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block); - adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; } break; default: @@ -2533,6 +2531,147 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) amdgpu_ras_disable_all_features(adev, 1); } +int amdgpu_ras_sw_init(struct amdgpu_device *adev) { + int err = 0; + + if (!amdgpu_ras_asic_supported(adev)) + return 0; + + if (adev->nbio.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register nbio ras block!\n"); + return err; + } + adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; + } + + if (adev->gmc.xgmi.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register xgmi ras block!\n"); + return err; + } + adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; + } + + if (adev->gfx.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras-
RE: [PATCH V2] drm/amdgpu: Move common initialization operations of each ras block to one function
if (!adev->gmc.xgmi.connected_to_cpu) { adev->gmc.xgmi.ras = &xgmi_ras; - amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); - adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; } Coding style needs to be fixed as well. '{}' should be dropped as there is only one line after upper if. Regards, Guchun -Original Message- From: amd-gfx On Behalf Of yipechai Sent: Monday, March 7, 2022 4:43 PM To: amd-gfx@lists.freedesktop.org Cc: Zhou1, Tao ; Zhang, Hawking ; Clements, John ; Chai, Thomas ; Chai, Thomas Subject: [PATCH V2] drm/amdgpu: Move common initialization operations of each ras block to one function Define amdgpu_ras_sw_init function to initialize all ras blocks. V2: Modify error debugging information. Signed-off-by: yipechai --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c| 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 143 - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h| 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 16 --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 28 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 6 - drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 17 --- 9 files changed, 148 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6113ddc765a7..0c83eb69dad5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2402,6 +2402,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } } + r = amdgpu_ras_sw_init(adev); + if (r) { + DRM_ERROR("amdgpu_ras_sw_init failed (%d).\n", r); + goto init_failed; + } + if (amdgpu_sriov_vf(adev)) amdgpu_virt_init_data_exchange(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index ab75e189bc0b..544241f357b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -440,8 +440,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev) { if (!adev->gmc.xgmi.connected_to_cpu) { adev->gmc.xgmi.ras = &xgmi_ras; - amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); - adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d3875618ebf5..89075ab9e82e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2299,8 +2299,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev) case CHIP_ALDEBARAN: if (!adev->gmc.xgmi.connected_to_cpu) { adev->nbio.ras = &nbio_v7_4_ras; - amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block); - adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; } break; default: @@ -2533,6 +2531,147 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) amdgpu_ras_disable_all_features(adev, 1); } +int amdgpu_ras_sw_init(struct amdgpu_device *adev) { + int err = 0; + + if (!amdgpu_ras_asic_supported(adev)) + return 0; + + if (adev->nbio.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register nbio ras block!\n"); + return err; + } + adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; + } + + if (adev->gmc.xgmi.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register xgmi ras block!\n"); + return err; + } + adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; + } + + if (adev->gfx.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register gfx ras block!\n"); + return err; + } + + strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx"); + adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX; + adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm; + + /* If not define special ras_late_init function, u