[PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the unified ras block data and ops

2021-12-01 Thread yipechai
1.Modify gfx block to fit for the unified ras block data and ops
2.Implement .ras_block_match function pointer for gfx block to identify itself.
3.Change amdgpu_gfx_ras_funcs to amdgpu_gfx_ras, and the corresponding variable 
name remove _funcs suffix.
4.Remove the const flag of gfx ras variable so that gfx ras block can be able 
to be insertted into amdgpu device ras block link list.
5.Invoke amdgpu_ras_register_ras_block function to register gfx ras block into 
amdgpu device ras block link list.
6.Remove the redundant code about gfx in amdgpu_ras.c after using the unified 
ras block.

Signed-off-by: yipechai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 15 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 80 ++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 73 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c   | 39 
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 42 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h |  2 +-
 8 files changed, 178 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 1795d448c700..da8691259ac1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -696,9 +696,9 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device 
*adev,
 */
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
-   if (adev->gfx.ras_funcs &&
-   adev->gfx.ras_funcs->query_ras_error_count)
-   adev->gfx.ras_funcs->query_ras_error_count(adev, 
err_data);
+   if (adev->gfx.ras && adev->gfx.ras->ras_block.ops &&
+   adev->gfx.ras->ras_block.ops->query_ras_error_count)
+   
adev->gfx.ras->ras_block.ops->query_ras_error_count(adev, err_data);
amdgpu_ras_reset_gpu(adev);
}
return AMDGPU_RAS_SUCCESS;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 6b78b4a0e182..ff4a8428a84b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -31,6 +31,7 @@
 #include "amdgpu_ring.h"
 #include "amdgpu_rlc.h"
 #include "soc15.h"
+#include "amdgpu_ras.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -213,16 +214,8 @@ struct amdgpu_cu_info {
uint32_t bitmap[4][4];
 };
 
-struct amdgpu_gfx_ras_funcs {
-   int (*ras_late_init)(struct amdgpu_device *adev);
-   void (*ras_fini)(struct amdgpu_device *adev);
-   int (*ras_error_inject)(struct amdgpu_device *adev,
-   void *inject_if);
-   int (*query_ras_error_count)(struct amdgpu_device *adev,
-void *ras_error_status);
-   void (*reset_ras_error_count)(struct amdgpu_device *adev);
-   void (*query_ras_error_status)(struct amdgpu_device *adev);
-   void (*reset_ras_error_status)(struct amdgpu_device *adev);
+struct amdgpu_gfx_ras {
+   struct amdgpu_ras_block_object  ras_block;
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
 };
 
@@ -348,7 +341,7 @@ struct amdgpu_gfx {
 
/*ras */
struct ras_common_if*ras_if;
-   const struct amdgpu_gfx_ras_funcs   *ras_funcs;
+   struct amdgpu_gfx_ras   *ras;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1cf1f6331db1..190a4a4e9d7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -862,6 +862,27 @@ static int amdgpu_ras_enable_all_features(struct 
amdgpu_device *adev,
 }
 /* feature ctl end */
 
+static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct 
amdgpu_device *adev,
+   enum amdgpu_ras_block block, uint32_t 
sub_block_index)
+{
+   struct amdgpu_ras_block_object *obj, *tmp;
+
+   if (block >= AMDGPU_RAS_BLOCK__LAST) {
+   return NULL;
+   }
+
+   list_for_each_entry_safe(obj, tmp, &adev->ras_list, node) {
+   if( !obj->ops || !obj->ops->ras_block_match) {
+   dev_info(adev->dev, "%s don't config ops or  
ras_block_match\n", obj->name);
+   continue;
+   }
+   if (!obj->ops->ras_block_match(obj, block, sub_block_index)) {
+   return obj;
+   }
+   }
+
+   return NULL;
+}
 
 void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev,
   struct ras_common_if *ras_block,
@@ -892,6 +913,7 @@ void amdgpu_ras_mca_query_error_sta

RE: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the unified ras block data and ops

2021-12-06 Thread Zhou1, Tao
[AMD Official Use Only]

Please see my comments inline.

> -Original Message-
> From: Chai, Thomas 
> Sent: Wednesday, December 1, 2021 6:53 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Chai, Thomas ; Zhang, Hawking
> ; Zhou1, Tao ; Chai,
> Thomas 
> Subject: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the unified
> ras block data and ops
> 
> 1.Modify gfx block to fit for the unified ras block data and ops
> 2.Implement .ras_block_match function pointer for gfx block to identify 
> itself.
> 3.Change amdgpu_gfx_ras_funcs to amdgpu_gfx_ras, and the corresponding
> variable name remove _funcs suffix.
> 4.Remove the const flag of gfx ras variable so that gfx ras block can be able 
> to
> be insertted into amdgpu device ras block link list.
> 5.Invoke amdgpu_ras_register_ras_block function to register gfx ras block into
> amdgpu device ras block link list.
> 6.Remove the redundant code about gfx in amdgpu_ras.c after using the unified
> ras block.
> 
> Signed-off-by: yipechai 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  6 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 15 ++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 80 ++---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 73 +++---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c   | 39 
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 42 +
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h |  2 +-
>  8 files changed, 178 insertions(+), 81 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 1795d448c700..da8691259ac1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -696,9 +696,9 @@ int amdgpu_gfx_process_ras_data_cb(struct
> amdgpu_device *adev,
>*/
>   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
>   kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->query_ras_error_count)
> - adev->gfx.ras_funcs->query_ras_error_count(adev,
> err_data);
> + if (adev->gfx.ras && adev->gfx.ras->ras_block.ops &&
> + adev->gfx.ras->ras_block.ops->query_ras_error_count)
> + adev->gfx.ras->ras_block.ops-
> >query_ras_error_count(adev, err_data);
>   amdgpu_ras_reset_gpu(adev);
>   }
>   return AMDGPU_RAS_SUCCESS;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 6b78b4a0e182..ff4a8428a84b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -31,6 +31,7 @@
>  #include "amdgpu_ring.h"
>  #include "amdgpu_rlc.h"
>  #include "soc15.h"
> +#include "amdgpu_ras.h"
> 
>  /* GFX current status */
>  #define AMDGPU_GFX_NORMAL_MODE   0xL
> @@ -213,16 +214,8 @@ struct amdgpu_cu_info {
>   uint32_t bitmap[4][4];
>  };
> 
> -struct amdgpu_gfx_ras_funcs {
> - int (*ras_late_init)(struct amdgpu_device *adev);
> - void (*ras_fini)(struct amdgpu_device *adev);
> - int (*ras_error_inject)(struct amdgpu_device *adev,
> - void *inject_if);
> - int (*query_ras_error_count)(struct amdgpu_device *adev,
> -  void *ras_error_status);
> - void (*reset_ras_error_count)(struct amdgpu_device *adev);
> - void (*query_ras_error_status)(struct amdgpu_device *adev);
> - void (*reset_ras_error_status)(struct amdgpu_device *adev);
> +struct amdgpu_gfx_ras {
> + struct amdgpu_ras_block_object  ras_block;
>   void (*enable_watchdog_timer)(struct amdgpu_device *adev);  };

[Tao] Can we add " enable_watchdog_timer" function into amdgpu_ras_block_ops 
structure?
And I think using ras_block directly is more simple than amdgpu_gfx_ras 
gfx_v9_0_ras structure.

> 
> @@ -348,7 +341,7 @@ struct amdgpu_gfx {
> 
>   /*ras */
>   struct ras_common_if*ras_if;
> - const struct amdgpu_gfx_ras_funcs   *ras_funcs;
> + struct amdgpu_gfx_ras   *ras;
>  };
> 
>  #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs-
> >get_gpu_clock_counter((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 1cf1f6331db1..190a4a4e9d7a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/am

RE: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the unified ras block data and ops

2021-12-06 Thread Zhou1, Tao
[AMD Official Use Only]

Hi Thomas,

Please see my two comments.

Regards,
Tao

> -Original Message-
> From: Chai, Thomas 
> Sent: Tuesday, December 7, 2021 11:37 AM
> To: Zhou1, Tao ; amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking 
> Subject: RE: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the
> unified ras block data and ops
> 
> Hi tao:
>  I add my comments behind your comments. Please review.
> 
> -Original Message-
> From: Zhou1, Tao 
> Sent: Monday, December 6, 2021 2:58 PM
> To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking 
> Subject: RE: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the
> unified ras block data and ops
> 
> [AMD Official Use Only]
> 
> Please see my comments inline.
> 
> > -Original Message-
> > From: Chai, Thomas 
> > Sent: Wednesday, December 1, 2021 6:53 PM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Chai, Thomas ; Zhang, Hawking
> > ; Zhou1, Tao ; Chai,
> Thomas
> > 
> > Subject: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the
> > unified ras block data and ops
> >
> > 1.Modify gfx block to fit for the unified ras block data and ops
> > 2.Implement .ras_block_match function pointer for gfx block to identify 
> > itself.
> > 3.Change amdgpu_gfx_ras_funcs to amdgpu_gfx_ras, and the corresponding
> > variable name remove _funcs suffix.
> > 4.Remove the const flag of gfx ras variable so that gfx ras block can
> > be able to be insertted into amdgpu device ras block link list.
> > 5.Invoke amdgpu_ras_register_ras_block function to register gfx ras
> > block into amdgpu device ras block link list.
> > 6.Remove the redundant code about gfx in amdgpu_ras.c after using the
> > unified ras block.
> >
> > Signed-off-by: yipechai 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  6 +-
> > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 15 ++---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 80 ++--
> -
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 73 +++---
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c   | 39 
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h   |  2 +-
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 42 +
> > drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h |  2 +-
> >  8 files changed, 178 insertions(+), 81 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > index 1795d448c700..da8691259ac1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > @@ -696,9 +696,9 @@ int amdgpu_gfx_process_ras_data_cb(struct
> > amdgpu_device *adev,
> >  */
> > if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
> > kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
> > -   if (adev->gfx.ras_funcs &&
> > -   adev->gfx.ras_funcs->query_ras_error_count)
> > -   adev->gfx.ras_funcs->query_ras_error_count(adev,
> > err_data);
> > +   if (adev->gfx.ras && adev->gfx.ras->ras_block.ops &&
> > +   adev->gfx.ras->ras_block.ops->query_ras_error_count)
> > +   adev->gfx.ras->ras_block.ops-
> > >query_ras_error_count(adev, err_data);
> > amdgpu_ras_reset_gpu(adev);
> > }
> > return AMDGPU_RAS_SUCCESS;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > index 6b78b4a0e182..ff4a8428a84b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > @@ -31,6 +31,7 @@
> >  #include "amdgpu_ring.h"
> >  #include "amdgpu_rlc.h"
> >  #include "soc15.h"
> > +#include "amdgpu_ras.h"
> >
> >  /* GFX current status */
> >  #define AMDGPU_GFX_NORMAL_MODE 0xL
> > @@ -213,16 +214,8 @@ struct amdgpu_cu_info {
> > uint32_t bitmap[4][4];
> >  };
> >
> > -struct amdgpu_gfx_ras_funcs {
> > -   int (*ras_late_init)(struct amdgpu_device *adev);
> > -   void (*ras_fini)(struct amdgpu_device *adev);
> > -   int (*ras_error_inject)(struct amdgpu_device *adev,
> > -   void *inject_if);
> > -   int (*query_ras_error_count)(struct amdgpu_device *adev,
> > -void *ras_error_status);
> > -   void (*reset_ras_error_count)(str

RE: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the unified ras block data and ops

2021-12-06 Thread Chai, Thomas
Hi tao:
 I add my comments behind your comments. Please review.

-Original Message-
From: Zhou1, Tao  
Sent: Monday, December 6, 2021 2:58 PM
To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking 
Subject: RE: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the 
unified ras block data and ops

[AMD Official Use Only]

Please see my comments inline.

> -Original Message-
> From: Chai, Thomas 
> Sent: Wednesday, December 1, 2021 6:53 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Chai, Thomas ; Zhang, Hawking 
> ; Zhou1, Tao ; Chai, Thomas 
> 
> Subject: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the 
> unified ras block data and ops
> 
> 1.Modify gfx block to fit for the unified ras block data and ops 
> 2.Implement .ras_block_match function pointer for gfx block to identify 
> itself.
> 3.Change amdgpu_gfx_ras_funcs to amdgpu_gfx_ras, and the corresponding 
> variable name remove _funcs suffix.
> 4.Remove the const flag of gfx ras variable so that gfx ras block can 
> be able to be insertted into amdgpu device ras block link list.
> 5.Invoke amdgpu_ras_register_ras_block function to register gfx ras 
> block into amdgpu device ras block link list.
> 6.Remove the redundant code about gfx in amdgpu_ras.c after using the 
> unified ras block.
> 
> Signed-off-by: yipechai 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  6 +- 
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 15 ++--- 
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 80 ++---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 73 +++---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c   | 39 
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 42 + 
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h |  2 +-
>  8 files changed, 178 insertions(+), 81 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 1795d448c700..da8691259ac1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -696,9 +696,9 @@ int amdgpu_gfx_process_ras_data_cb(struct
> amdgpu_device *adev,
>*/
>   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
>   kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->query_ras_error_count)
> - adev->gfx.ras_funcs->query_ras_error_count(adev,
> err_data);
> + if (adev->gfx.ras && adev->gfx.ras->ras_block.ops &&
> + adev->gfx.ras->ras_block.ops->query_ras_error_count)
> + adev->gfx.ras->ras_block.ops-
> >query_ras_error_count(adev, err_data);
>   amdgpu_ras_reset_gpu(adev);
>   }
>   return AMDGPU_RAS_SUCCESS;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 6b78b4a0e182..ff4a8428a84b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -31,6 +31,7 @@
>  #include "amdgpu_ring.h"
>  #include "amdgpu_rlc.h"
>  #include "soc15.h"
> +#include "amdgpu_ras.h"
> 
>  /* GFX current status */
>  #define AMDGPU_GFX_NORMAL_MODE   0xL
> @@ -213,16 +214,8 @@ struct amdgpu_cu_info {
>   uint32_t bitmap[4][4];
>  };
> 
> -struct amdgpu_gfx_ras_funcs {
> - int (*ras_late_init)(struct amdgpu_device *adev);
> - void (*ras_fini)(struct amdgpu_device *adev);
> - int (*ras_error_inject)(struct amdgpu_device *adev,
> - void *inject_if);
> - int (*query_ras_error_count)(struct amdgpu_device *adev,
> -  void *ras_error_status);
> - void (*reset_ras_error_count)(struct amdgpu_device *adev);
> - void (*query_ras_error_status)(struct amdgpu_device *adev);
> - void (*reset_ras_error_status)(struct amdgpu_device *adev);
> +struct amdgpu_gfx_ras {
> + struct amdgpu_ras_block_object  ras_block;
>   void (*enable_watchdog_timer)(struct amdgpu_device *adev);  };

>[Tao] Can we add " enable_watchdog_timer" function into amdgpu_ras_block_ops 
>structure?
>And I think using ras_block directly is more simple than amdgpu_gfx_ras 
>gfx_v9_0_ras structure.

[Thomas] The ' enable_watchdog_timer ' function is not a common function. It is 
only defined by gfx_v9_4_2.c and called in gfx_v9_0.c. 
   I think the function pointers in the amdgpu_ras_block_ops structure 
should be the functions used by mo

RE: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the unified ras block data and ops

2021-12-06 Thread Chai, Thomas
Hi tao:
 Thanks for your review. I  add another two comments behind your comments, 
please review again.

-Original Message-
From: Zhou1, Tao  
Sent: Tuesday, December 7, 2021 12:07 PM
To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking 
Subject: RE: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the 
unified ras block data and ops

[AMD Official Use Only]

Hi Thomas,

Please see my two comments.

Regards,
Tao

> -Original Message-
> From: Chai, Thomas 
> Sent: Tuesday, December 7, 2021 11:37 AM
> To: Zhou1, Tao ; amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking 
> Subject: RE: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for 
> the unified ras block data and ops
> 
> Hi tao:
>  I add my comments behind your comments. Please review.
> 
> -Original Message-
> From: Zhou1, Tao 
> Sent: Monday, December 6, 2021 2:58 PM
> To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking 
> Subject: RE: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for 
> the unified ras block data and ops
> 
> [AMD Official Use Only]
> 
> Please see my comments inline.
> 
> > -Original Message-
> > From: Chai, Thomas 
> > Sent: Wednesday, December 1, 2021 6:53 PM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Chai, Thomas ; Zhang, Hawking 
> > ; Zhou1, Tao ; Chai,
> Thomas
> > 
> > Subject: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for 
> > the unified ras block data and ops
> >
> > 1.Modify gfx block to fit for the unified ras block data and ops 
> > 2.Implement .ras_block_match function pointer for gfx block to identify 
> > itself.
> > 3.Change amdgpu_gfx_ras_funcs to amdgpu_gfx_ras, and the 
> > corresponding variable name remove _funcs suffix.
> > 4.Remove the const flag of gfx ras variable so that gfx ras block 
> > can be able to be insertted into amdgpu device ras block link list.
> > 5.Invoke amdgpu_ras_register_ras_block function to register gfx ras 
> > block into amdgpu device ras block link list.
> > 6.Remove the redundant code about gfx in amdgpu_ras.c after using 
> > the unified ras block.
> >
> > Signed-off-by: yipechai 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  6 +- 
> > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 15 ++--- 
> > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 80 
> > ++--
> -
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 73 +++---
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c   | 39 
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h   |  2 +-
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 42 + 
> > drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h |  2 +-
> >  8 files changed, 178 insertions(+), 81 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > index 1795d448c700..da8691259ac1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > @@ -696,9 +696,9 @@ int amdgpu_gfx_process_ras_data_cb(struct
> > amdgpu_device *adev,
> >  */
> > if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
> > kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
> > -   if (adev->gfx.ras_funcs &&
> > -   adev->gfx.ras_funcs->query_ras_error_count)
> > -   adev->gfx.ras_funcs->query_ras_error_count(adev,
> > err_data);
> > +   if (adev->gfx.ras && adev->gfx.ras->ras_block.ops &&
> > +   adev->gfx.ras->ras_block.ops->query_ras_error_count)
> > +   adev->gfx.ras->ras_block.ops-
> > >query_ras_error_count(adev, err_data);
> > amdgpu_ras_reset_gpu(adev);
> > }
> > return AMDGPU_RAS_SUCCESS;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > index 6b78b4a0e182..ff4a8428a84b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > @@ -31,6 +31,7 @@
> >  #include "amdgpu_ring.h"
> >  #include "amdgpu_rlc.h"
> >  #include "soc15.h"
> > +#include "amdgpu_ras.h"
> >
> >  /* GFX current status */
> >  #define AMDGPU_GFX_NORMAL_MODE 0xL
> > @@ -213,16 +214,8 @@ struct amdgpu_cu_info {
> > uint32_t bitmap[4][4];
> >  };
> >
> > -struct amdgpu_gfx_ras_funcs {
> > -   int (*ras_late_init)(struc