RE: [PATCH V2] drm/amdgpu: Move common initialization operations of each ras block to one function

2022-03-07 Thread Zhou1, Tao
[AMD Official Use Only]



> -Original Message-
> From: Chai, Thomas 
> Sent: Monday, March 7, 2022 4:43 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Chai, Thomas ; Zhang, Hawking
> ; Zhou1, Tao ; Clements,
> John ; Chai, Thomas 
> Subject: [PATCH V2] drm/amdgpu: Move common initialization operations of
> each ras block to one function
> 
> Define amdgpu_ras_sw_init function to initialize all ras blocks.
> 
> V2: Modify error debugging information.
> 
> Signed-off-by: yipechai 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   6 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|   2 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 143 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|   1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  21 ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  16 ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  28 
>  drivers/gpu/drm/amd/amdgpu/mca_v3_0.c  |   6 -
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  17 ---
>  9 files changed, 148 insertions(+), 92 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 6113ddc765a7..0c83eb69dad5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2402,6 +2402,12 @@ static int amdgpu_device_ip_init(struct
> amdgpu_device *adev)
>   }
>   }
> 
> + r = amdgpu_ras_sw_init(adev);
> + if (r) {
> + DRM_ERROR("amdgpu_ras_sw_init failed (%d).\n", r);
> + goto init_failed;
> + }
> +
>   if (amdgpu_sriov_vf(adev))
>   amdgpu_virt_init_data_exchange(adev);
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index ab75e189bc0b..544241f357b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -440,8 +440,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device
> *adev)  {
>   if (!adev->gmc.xgmi.connected_to_cpu) {
>   adev->gmc.xgmi.ras = _ras;
> - amdgpu_ras_register_ras_block(adev, >gmc.xgmi.ras-
> >ras_block);
> - adev->gmc.xgmi.ras_if = >gmc.xgmi.ras-
> >ras_block.ras_comm;
>   }
> 
>   return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index d3875618ebf5..89075ab9e82e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2299,8 +2299,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>   case CHIP_ALDEBARAN:
>   if (!adev->gmc.xgmi.connected_to_cpu) {
>   adev->nbio.ras = _v7_4_ras;
> - amdgpu_ras_register_ras_block(adev, 
> >nbio.ras->ras_block);
> - adev->nbio.ras_if = >nbio.ras-
> >ras_block.ras_comm;
>   }
>   break;
>   default:
> @@ -2533,6 +2531,147 @@ void amdgpu_ras_suspend(struct amdgpu_device
> *adev)
>   amdgpu_ras_disable_all_features(adev, 1);  }
> 
> +int amdgpu_ras_sw_init(struct amdgpu_device *adev) {
> + int err = 0;
> +
> + if (!amdgpu_ras_asic_supported(adev))
> + return 0;
> +
> + if (adev->nbio.ras) {
> + err = amdgpu_ras_register_ras_block(adev, >nbio.ras-
> >ras_block);
> + if (err) {
> + dev_err(adev->dev, "Failed to register nbio ras
> block!\n");
> + return err;
> + }
> + adev->nbio.ras_if = >nbio.ras->ras_block.ras_comm;
> + }
> +
> + if (adev->gmc.xgmi.ras) {
> + err = amdgpu_ras_register_ras_block(adev, 
> >gmc.xgmi.ras->ras_block);
> + if (err) {
> + dev_err(adev->dev, "Failed to register xgmi ras
> block!\n");
> + return err;
> + }
> + adev->gmc.xgmi.ras_if = >gmc.xgmi.ras-
> >ras_block.ras_comm;
> + }
> +
> + if (adev->gfx.ras) {
> + err = amdgpu_ras_register_ras_block(adev, >gfx.ras-
> >ras_block);
> + if (err) {
> + dev_err(adev->dev, "Failed to register gfx ras 
> block!\n");
> + return err;
> + }
> +
> + strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
> + adev->gfx.ras->ras_block.ras_comm.block =
> AMDGPU_RAS_BLOCK__GFX;
> + adev->gfx.ras->ras_block.ras_comm.type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
> + adev->gfx.ras_if = >gfx.ras->ras_block.ras_comm;
> +
> + /* If not define special ras_late_init function, use gfx default
> ras_late_init */
> + if (!adev->gfx.ras->ras_block.ras_late_init)
> + adev->gfx.ras->ras_block.ras_late_init =
> amdgpu_gfx_ras_late_init;
> +
> + /* If not defined special ras_cb function, use default ras_cb */
> + if 

RE: [PATCH V2] drm/amdgpu: Move common initialization operations of each ras block to one function

2022-03-07 Thread Chai, Thomas
[AMD Official Use Only]

OK

-Original Message-
From: Chen, Guchun  
Sent: Monday, March 7, 2022 5:11 PM
To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao ; Zhang, Hawking ; 
Clements, John ; Chai, Thomas ; 
Chai, Thomas 
Subject: RE: [PATCH V2] drm/amdgpu: Move common initialization operations of 
each ras block to one function

if (!adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.ras = _ras;
-   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
-   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
}

Coding style needs to be fixed as well. '{}' should be dropped as there is only 
one line after upper if.

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of yipechai
Sent: Monday, March 7, 2022 4:43 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao ; Zhang, Hawking ; 
Clements, John ; Chai, Thomas ; 
Chai, Thomas 
Subject: [PATCH V2] drm/amdgpu: Move common initialization operations of each 
ras block to one function

Define amdgpu_ras_sw_init function to initialize all ras blocks.

V2: Modify error debugging information.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 143 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  21 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  16 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  28 
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c  |   6 -
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  17 ---
 9 files changed, 148 insertions(+), 92 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6113ddc765a7..0c83eb69dad5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2402,6 +2402,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
}
}
 
+   r = amdgpu_ras_sw_init(adev);
+   if (r) {
+   DRM_ERROR("amdgpu_ras_sw_init failed (%d).\n", r);
+   goto init_failed;
+   }
+
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index ab75e189bc0b..544241f357b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -440,8 +440,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)  {
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.ras = _ras;
-   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
-   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
}
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index d3875618ebf5..89075ab9e82e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2299,8 +2299,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
case CHIP_ALDEBARAN:
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->nbio.ras = _v7_4_ras;
-   amdgpu_ras_register_ras_block(adev, 
>nbio.ras->ras_block);
-   adev->nbio.ras_if = >nbio.ras->ras_block.ras_comm;
}
break;
default:
@@ -2533,6 +2531,147 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
amdgpu_ras_disable_all_features(adev, 1);  }
 
+int amdgpu_ras_sw_init(struct amdgpu_device *adev) {
+   int err = 0;
+
+   if (!amdgpu_ras_asic_supported(adev))
+   return 0;
+
+   if (adev->nbio.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>nbio.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register nbio ras 
block!\n");
+   return err;
+   }
+   adev->nbio.ras_if = >nbio.ras->ras_block.ras_comm;
+   }
+
+   if (adev->gmc.xgmi.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register xgmi ras 
block!\n");
+   return err;
+   }
+   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
+   }
+
+   if (adev->gfx.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>gfx.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register gfx ras 
block!\n");

RE: [PATCH V2] drm/amdgpu: Move common initialization operations of each ras block to one function

2022-03-07 Thread Chen, Guchun
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.ras = _ras;
-   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
-   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
}

Coding style needs to be fixed as well. '{}' should be dropped as there is only 
one line after upper if.

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of yipechai
Sent: Monday, March 7, 2022 4:43 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao ; Zhang, Hawking ; 
Clements, John ; Chai, Thomas ; 
Chai, Thomas 
Subject: [PATCH V2] drm/amdgpu: Move common initialization operations of each 
ras block to one function

Define amdgpu_ras_sw_init function to initialize all ras blocks.

V2: Modify error debugging information.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 143 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  21 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  16 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  28 
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c  |   6 -
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  17 ---
 9 files changed, 148 insertions(+), 92 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6113ddc765a7..0c83eb69dad5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2402,6 +2402,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
}
}
 
+   r = amdgpu_ras_sw_init(adev);
+   if (r) {
+   DRM_ERROR("amdgpu_ras_sw_init failed (%d).\n", r);
+   goto init_failed;
+   }
+
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index ab75e189bc0b..544241f357b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -440,8 +440,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)  {
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.ras = _ras;
-   amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
-   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
}
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index d3875618ebf5..89075ab9e82e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2299,8 +2299,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
case CHIP_ALDEBARAN:
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->nbio.ras = _v7_4_ras;
-   amdgpu_ras_register_ras_block(adev, 
>nbio.ras->ras_block);
-   adev->nbio.ras_if = >nbio.ras->ras_block.ras_comm;
}
break;
default:
@@ -2533,6 +2531,147 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
amdgpu_ras_disable_all_features(adev, 1);  }
 
+int amdgpu_ras_sw_init(struct amdgpu_device *adev) {
+   int err = 0;
+
+   if (!amdgpu_ras_asic_supported(adev))
+   return 0;
+
+   if (adev->nbio.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>nbio.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register nbio ras 
block!\n");
+   return err;
+   }
+   adev->nbio.ras_if = >nbio.ras->ras_block.ras_comm;
+   }
+
+   if (adev->gmc.xgmi.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>gmc.xgmi.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register xgmi ras 
block!\n");
+   return err;
+   }
+   adev->gmc.xgmi.ras_if = >gmc.xgmi.ras->ras_block.ras_comm;
+   }
+
+   if (adev->gfx.ras) {
+   err = amdgpu_ras_register_ras_block(adev, 
>gfx.ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register gfx ras 
block!\n");
+   return err;
+   }
+
+   strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
+   adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
+   adev->gfx.ras->ras_block.ras_comm.type = 
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+   adev->gfx.ras_if = >gfx.ras->ras_block.ras_comm;
+
+   /* If not define special ras_late_init function, use gfx 
default ras_late_init */
+   if