RE: [PATCH] drm/amdgpu: refine gprs init shaders to check coverage

2021-04-20 Thread Zhang, Hawking
[AMD Public Use]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Dennis Li  
Sent: Wednesday, April 21, 2021 13:51
To: amd-gfx@lists.freedesktop.org; Deucher, Alexander 
; Kuehling, Felix ; Zhang, 
Hawking ; Koenig, Christian 
Cc: Li, Dennis 
Subject: [PATCH] drm/amdgpu: refine gprs init shaders to check coverage

Add codes to check whether all SIMDs are covered, make sure that all GPRs are 
initialized.

Signed-off-by: Dennis Li 

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9889bd495ba5..9e629f239288 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4656,8 +4656,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct 
amdgpu_device *adev)
if (!ring->sched.ready)
return 0;
 
-   if (adev->asic_type == CHIP_ARCTURUS ||
-   adev->asic_type == CHIP_ALDEBARAN) {
+   if (adev->asic_type == CHIP_ARCTURUS) {
vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
vgpr_init_shader_size = 
sizeof(vgpr_init_compute_shader_arcturus);
vgpr_init_regs_ptr = vgpr_init_regs_arcturus; @@ -4924,7 
+4923,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
}
 
/* requires IBs so do in late init after IB pool is initialized */
-   r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+   if (adev->asic_type == CHIP_ALDEBARAN)
+   r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
+   else
+   r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+
if (r)
return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 9ca76a3ac38c..798c0e178201 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -22,6 +22,7 @@
  */
 #include "amdgpu.h"
 #include "soc15.h"
+#include "soc15d.h"
 
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
@@ -79,6 +80,377 @@ static const struct soc15_reg_golden 
golden_settings_gc_9_4_2_alde[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),  };
 
+static const u32 vgpr_init_compute_shader_aldebaran[] = {
+   0xb8840904, 0xb8851a04, 0xb8861344, 0x9207c006, 0x92088405, 0x81070807,
+   0x81070407, 0x8e078207, 0xbe88008f, 0xc0410200, 0x0007, 0xd3d94000,
+   0x1880, 0xd3d94001, 0x1880, 0xd3d94002, 0x1880, 0xd3d94003,
+   0x1880, 0xd3d94004, 0x1880, 0xd3d94005, 0x1880, 0xd3d94006,
+   0x1880, 0xd3d94007, 0x1880, 0xd3d94008, 0x1880, 0xd3d94009,
+   0x1880, 0xd3d9400a, 0x1880, 0xd3d9400b, 0x1880, 0xd3d9400c,
+   0x1880, 0xd3d9400d, 0x1880, 0xd3d9400e, 0x1880, 0xd3d9400f,
+   0x1880, 0xd3d94010, 0x1880, 0xd3d94011, 0x1880, 0xd3d94012,
+   0x1880, 0xd3d94013, 0x1880, 0xd3d94014, 0x1880, 0xd3d94015,
+   0x1880, 0xd3d94016, 0x1880, 0xd3d94017, 0x1880, 0xd3d94018,
+   0x1880, 0xd3d94019, 0x1880, 0xd3d9401a, 0x1880, 0xd3d9401b,
+   0x1880, 0xd3d9401c, 0x1880, 0xd3d9401d, 0x1880, 0xd3d9401e,
+   0x1880, 0xd3d9401f, 0x1880, 0xd3d94020, 0x1880, 0xd3d94021,
+   0x1880, 0xd3d94022, 0x1880, 0xd3d94023, 0x1880, 0xd3d94024,
+   0x1880, 0xd3d94025, 0x1880, 0xd3d94026, 0x1880, 0xd3d94027,
+   0x1880, 0xd3d94028, 0x1880, 0xd3d94029, 0x1880, 0xd3d9402a,
+   0x1880, 0xd3d9402b, 0x1880, 0xd3d9402c, 0x1880, 0xd3d9402d,
+   0x1880, 0xd3d9402e, 0x1880, 0xd3d9402f, 0x1880, 0xd3d94030,
+   0x1880, 0xd3d94031, 0x1880, 0xd3d94032, 0x1880, 0xd3d94033,
+   0x1880, 0xd3d94034, 0x1880, 0xd3d94035, 0x1880, 0xd3d94036,
+   0x1880, 0xd3d94037, 0x1880, 0xd3d94038, 0x1880, 0xd3d94039,
+   0x1880, 0xd3d9403a, 0x1880, 0xd3d9403b, 0x1880, 0xd3d9403c,
+   0x1880, 0xd3d9403d, 0x1880, 0xd3d9403e, 0x1880, 0xd3d9403f,
+   0x1880, 0xd3d94040, 0x1880, 0xd3d94041, 0x1880, 0xd3d94042,
+   0x1880, 0xd3d94043, 0x1880, 0xd3d94044, 0x1880, 0xd3d94045,
+   0x1880, 0xd3d94046, 0x1880, 0xd3d94047, 0x1880, 0xd3d94048,
+   0x1880, 0xd3d94049, 0x1880, 0xd3d9404a, 0x1880, 0xd3d9404b,
+   0x1880, 0xd3d9404c, 0x1880, 0xd3d9404d, 0x1880, 0xd3d9404e,
+   0x1880, 0xd3d9404f, 0x1880, 0xd3d94050, 0x1880, 0xd3d94051,
+   0x1880, 0xd3d94052, 0x1880, 0xd3d94053, 0x1880, 0xd3d94054,
+   0x1880, 0xd3d94055, 0x1880, 0xd3d94056, 0x1880, 0xd3d94057,
+   0x1880, 0xd3d94058, 0x1880, 0xd3d94059, 0x1880, 0xd3d9405a,
+   0x1880, 0xd3d9405b, 0x1880, 0xd3d9405c, 0x1880, 0xd3d9405d,
+   0x1880, 0xd3d9405e, 0

[PATCH] drm/amdgpu: refine gprs init shaders to check coverage

2021-04-20 Thread Dennis Li
Add codes to check whether all SIMDs are covered, make sure that all
GPRs are initialized.

Signed-off-by: Dennis Li 

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9889bd495ba5..9e629f239288 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4656,8 +4656,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct 
amdgpu_device *adev)
if (!ring->sched.ready)
return 0;
 
-   if (adev->asic_type == CHIP_ARCTURUS ||
-   adev->asic_type == CHIP_ALDEBARAN) {
+   if (adev->asic_type == CHIP_ARCTURUS) {
vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
vgpr_init_shader_size = 
sizeof(vgpr_init_compute_shader_arcturus);
vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
@@ -4924,7 +4923,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
}
 
/* requires IBs so do in late init after IB pool is initialized */
-   r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+   if (adev->asic_type == CHIP_ALDEBARAN)
+   r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
+   else
+   r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+
if (r)
return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 9ca76a3ac38c..798c0e178201 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -22,6 +22,7 @@
  */
 #include "amdgpu.h"
 #include "soc15.h"
+#include "soc15d.h"
 
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
@@ -79,6 +80,377 @@ static const struct soc15_reg_golden 
golden_settings_gc_9_4_2_alde[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),
 };
 
+static const u32 vgpr_init_compute_shader_aldebaran[] = {
+   0xb8840904, 0xb8851a04, 0xb8861344, 0x9207c006, 0x92088405, 0x81070807,
+   0x81070407, 0x8e078207, 0xbe88008f, 0xc0410200, 0x0007, 0xd3d94000,
+   0x1880, 0xd3d94001, 0x1880, 0xd3d94002, 0x1880, 0xd3d94003,
+   0x1880, 0xd3d94004, 0x1880, 0xd3d94005, 0x1880, 0xd3d94006,
+   0x1880, 0xd3d94007, 0x1880, 0xd3d94008, 0x1880, 0xd3d94009,
+   0x1880, 0xd3d9400a, 0x1880, 0xd3d9400b, 0x1880, 0xd3d9400c,
+   0x1880, 0xd3d9400d, 0x1880, 0xd3d9400e, 0x1880, 0xd3d9400f,
+   0x1880, 0xd3d94010, 0x1880, 0xd3d94011, 0x1880, 0xd3d94012,
+   0x1880, 0xd3d94013, 0x1880, 0xd3d94014, 0x1880, 0xd3d94015,
+   0x1880, 0xd3d94016, 0x1880, 0xd3d94017, 0x1880, 0xd3d94018,
+   0x1880, 0xd3d94019, 0x1880, 0xd3d9401a, 0x1880, 0xd3d9401b,
+   0x1880, 0xd3d9401c, 0x1880, 0xd3d9401d, 0x1880, 0xd3d9401e,
+   0x1880, 0xd3d9401f, 0x1880, 0xd3d94020, 0x1880, 0xd3d94021,
+   0x1880, 0xd3d94022, 0x1880, 0xd3d94023, 0x1880, 0xd3d94024,
+   0x1880, 0xd3d94025, 0x1880, 0xd3d94026, 0x1880, 0xd3d94027,
+   0x1880, 0xd3d94028, 0x1880, 0xd3d94029, 0x1880, 0xd3d9402a,
+   0x1880, 0xd3d9402b, 0x1880, 0xd3d9402c, 0x1880, 0xd3d9402d,
+   0x1880, 0xd3d9402e, 0x1880, 0xd3d9402f, 0x1880, 0xd3d94030,
+   0x1880, 0xd3d94031, 0x1880, 0xd3d94032, 0x1880, 0xd3d94033,
+   0x1880, 0xd3d94034, 0x1880, 0xd3d94035, 0x1880, 0xd3d94036,
+   0x1880, 0xd3d94037, 0x1880, 0xd3d94038, 0x1880, 0xd3d94039,
+   0x1880, 0xd3d9403a, 0x1880, 0xd3d9403b, 0x1880, 0xd3d9403c,
+   0x1880, 0xd3d9403d, 0x1880, 0xd3d9403e, 0x1880, 0xd3d9403f,
+   0x1880, 0xd3d94040, 0x1880, 0xd3d94041, 0x1880, 0xd3d94042,
+   0x1880, 0xd3d94043, 0x1880, 0xd3d94044, 0x1880, 0xd3d94045,
+   0x1880, 0xd3d94046, 0x1880, 0xd3d94047, 0x1880, 0xd3d94048,
+   0x1880, 0xd3d94049, 0x1880, 0xd3d9404a, 0x1880, 0xd3d9404b,
+   0x1880, 0xd3d9404c, 0x1880, 0xd3d9404d, 0x1880, 0xd3d9404e,
+   0x1880, 0xd3d9404f, 0x1880, 0xd3d94050, 0x1880, 0xd3d94051,
+   0x1880, 0xd3d94052, 0x1880, 0xd3d94053, 0x1880, 0xd3d94054,
+   0x1880, 0xd3d94055, 0x1880, 0xd3d94056, 0x1880, 0xd3d94057,
+   0x1880, 0xd3d94058, 0x1880, 0xd3d94059, 0x1880, 0xd3d9405a,
+   0x1880, 0xd3d9405b, 0x1880, 0xd3d9405c, 0x1880, 0xd3d9405d,
+   0x1880, 0xd3d9405e, 0x1880, 0xd3d9405f, 0x1880, 0xd3d94060,
+   0x1880, 0xd3d94061, 0x1880, 0xd3d94062, 0x1880, 0xd3d94063,
+   0x1880, 0xd3d94064, 0x1880, 0xd3d94065, 0x1880, 0xd3d94066,
+   0x1880, 0xd3d94067, 0x1880, 0xd3d94068, 0x1880, 0xd3d94069,
+   0x1880, 0xd3d9406a, 0x1880, 0xd3d9406b, 0x1880, 0xd3d9406c,
+   0x1880, 0xd3d9406d, 0x1880, 0xd3d9406e, 0x18