RE: [PATCH 1/2] drm/amdgpu: Move xgmi ras initialization from .late_init to early_init

2022-01-19 Thread Zhou1, Tao
[AMD Official Use Only]



> -Original Message-
> From: Chai, Thomas 
> Sent: Wednesday, January 19, 2022 5:25 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Chai, Thomas ; Zhang, Hawking
> ; Zhou1, Tao ; Clements,
> John ; Chai, Thomas 
> Subject: [PATCH 1/2] drm/amdgpu: Move xgmi ras initialization from .late_init
> to early_init
> 
> Move xgmi ras initialization from .late_init to early_init, which let xgmi 
> ras can
> be initialized only once.
> 
> Signed-off-by: yipechai 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|  5 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 10 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|  3 +++
>  4 files changed, 15 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 22f50aadf694..ece6397f81de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2189,6 +2189,8 @@ static int amdgpu_device_ip_early_init(struct
> amdgpu_device *adev)
>   }
>   }
> 
> + amdgpu_ras_early_init(adev);
> +
>   adev->cg_flags &= amdgpu_cg_mask;
>   adev->pg_flags &= amdgpu_pg_mask;
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 3483a82f5734..d83eee1984c8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -452,11 +452,6 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device
> *adev)
>   return r;
>   }
> 
> - if (!adev->gmc.xgmi.connected_to_cpu) {
> - adev->gmc.xgmi.ras = &xgmi_ras;
> - amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras-
> >ras_block);
> - }
> -
>   if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init)
> {
>   r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
>   if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 7a1d2bac698e..2b71611be388 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2238,6 +2238,16 @@ static void amdgpu_ras_counte_dw(struct
> work_struct *work)
>   pm_runtime_put_autosuspend(dev->dev);
>  }
> 
> +int amdgpu_ras_early_init(struct amdgpu_device *adev) {
> + if (!adev->gmc.xgmi.connected_to_cpu) {
> + adev->gmc.xgmi.ras = &xgmi_ras;
> + amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras-
> >ras_block);
> + }
> +
> + return 0;
> +}
[Tao]: I suggest creating amdgpu_gmc_ras_late_init in amdgpu_gmc.c and call it 
in each gmc_vx.c.
Please make sure gpu reset and S3 tests pass.

> +
>  int amdgpu_ras_init(struct amdgpu_device *adev)  {
>   struct amdgpu_ras *con = amdgpu_ras_get_context(adev); diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index a51a281bd91a..7d99e3736ab9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -600,6 +600,9 @@ amdgpu_ras_error_to_ta(enum
> amdgpu_ras_error_type error) {
>   }
>  }
> 
> +/* called in ip_early_init */
> +int amdgpu_ras_early_init(struct amdgpu_device *adev);
> +
>  /* called in ip_init and ip_fini */
>  int amdgpu_ras_init(struct amdgpu_device *adev);  int amdgpu_ras_fini(struct
> amdgpu_device *adev);
> --
> 2.25.1


[PATCH 1/2] drm/amdgpu: Move xgmi ras initialization from .late_init to early_init

2022-01-19 Thread yipechai
Move xgmi ras initialization from .late_init to early_init, which let
xgmi ras can be initialized only once.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|  5 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 10 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|  3 +++
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 22f50aadf694..ece6397f81de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2189,6 +2189,8 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
}
}
 
+   amdgpu_ras_early_init(adev);
+
adev->cg_flags &= amdgpu_cg_mask;
adev->pg_flags &= amdgpu_pg_mask;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 3483a82f5734..d83eee1984c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -452,11 +452,6 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
 
-   if (!adev->gmc.xgmi.connected_to_cpu) {
-   adev->gmc.xgmi.ras = &xgmi_ras;
-   amdgpu_ras_register_ras_block(adev, 
&adev->gmc.xgmi.ras->ras_block);
-   }
-
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7a1d2bac698e..2b71611be388 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2238,6 +2238,16 @@ static void amdgpu_ras_counte_dw(struct work_struct 
*work)
pm_runtime_put_autosuspend(dev->dev);
 }
 
+int amdgpu_ras_early_init(struct amdgpu_device *adev)
+{
+   if (!adev->gmc.xgmi.connected_to_cpu) {
+   adev->gmc.xgmi.ras = &xgmi_ras;
+   amdgpu_ras_register_ras_block(adev, 
&adev->gmc.xgmi.ras->ras_block);
+   }
+
+   return 0;
+}
+
 int amdgpu_ras_init(struct amdgpu_device *adev)
 {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index a51a281bd91a..7d99e3736ab9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -600,6 +600,9 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
}
 }
 
+/* called in ip_early_init */
+int amdgpu_ras_early_init(struct amdgpu_device *adev);
+
 /* called in ip_init and ip_fini */
 int amdgpu_ras_init(struct amdgpu_device *adev);
 int amdgpu_ras_fini(struct amdgpu_device *adev);
-- 
2.25.1