RE: [PATCH] drm/amdkfd: update GFX11 CWSR trap handler

2022-11-01 Thread Sider, Graham
[AMD Official Use Only - General]

> -Original Message-
> From: Sider, Graham 
> Sent: Wednesday, October 26, 2022 5:05 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Kasiviswanathan, Harish
> ; Cornwall, Jay
> ; Sider, Graham 
> Subject: [PATCH] drm/amdkfd: update GFX11 CWSR trap handler
> 
> From: Jay Cornwall 
> 
> With corresponding FW change fixes issue where triggering CWSR on a
> workgroup with waves in s_barrier wouldn't lead to a back-off and therefore
> cause a hang.
> 
> Signed-off-by: Jay Cornwall 
> Tested-by: Graham Sider 

Reviewed-by: Graham Sider 

> ---
>  .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 764 +-
>  .../amd/amdkfd/cwsr_trap_handler_gfx10.asm|   6 +
>  2 files changed, 389 insertions(+), 381 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> index c7118843db05..0c4c5499bb5c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> @@ -2495,442 +2495,444 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
>   0xbf9f, 0x,
>  };
>  static const uint32_t cwsr_trap_gfx11_hex[] = {
> - 0xbfa1, 0xbfa0021e,
> + 0xbfa1, 0xbfa00221,
>   0xb0804006, 0xb8f8f802,
>   0x9178ff78, 0x00020006,
> - 0xb8fbf803, 0xbf0d9f6d,
> - 0xbfa20006, 0x8b6eff78,
> - 0x2000, 0xbfa10009,
> - 0x8b6eff6d, 0x00ff,
> - 0xbfa2001e, 0x8b6eff7b,
> - 0x0400, 0xbfa20041,
> - 0xbf830010, 0xb8fbf803,
> - 0xbfa0fffa, 0x8b6eff7b,
> - 0x0900, 0xbfa20015,
> - 0x8b6eff7b, 0x71ff,
> - 0xbfa10008, 0x8b6fff7b,
> - 0x7080, 0xbfa10001,
> - 0xbeee1287, 0xb8eff801,
> - 0x846e8c6e, 0x8b6e6f6e,
> - 0xbfa2000a, 0x8b6eff6d,
> - 0x00ff, 0xbfa20007,
> - 0xb8eef801, 0x8b6eff6e,
> - 0x0800, 0xbfa20003,
> + 0xb8fbf803, 0xbf0d9e6d,
> + 0xbfa10001, 0xbfbd,
> + 0xbf0d9f6d, 0xbfa20006,
> + 0x8b6eff78, 0x2000,
> + 0xbfa10009, 0x8b6eff6d,
> + 0x00ff, 0xbfa2001e,
>   0x8b6eff7b, 0x0400,
> - 0xbfa20026, 0xbefa4d82,
> - 0xbf89fc07, 0x84fa887a,
> - 0xf4005bbd, 0xf810,
> - 0xbf89fc07, 0x846e976e,
> - 0x9177ff77, 0x0080,
> - 0x8c776e77, 0xf4045bbd,
> - 0xf800, 0xbf89fc07,
> - 0xf4045ebd, 0xf808,
> - 0xbf89fc07, 0x8bee6e6e,
> - 0xbfa10001, 0xbe80486e,
> - 0x8b6eff6d, 0x01ff,
> - 0xbfa20005, 0x8c78ff78,
> - 0x2000, 0x80ec886c,
> - 0x82ed806d, 0xbfa5,
> - 0x8b6eff6d, 0x0100,
> - 0xbfa20002, 0x806c846c,
> - 0x826d806d, 0x8b6dff6d,
> - 0x, 0x8bfe7e7e,
> - 0x8bea6a6a, 0xb978f802,
> - 0xbe804a6c, 0x8b6dff6d,
> - 0x, 0xbefa0080,
> - 0xb97a0283, 0xbeee007e,
> - 0xbeef007f, 0xbefe0180,
> - 0xbefe4d84, 0xbf89fc07,
> - 0x8b7aff7f, 0x0400,
> - 0x847a857a, 0x8c6d7a6d,
> - 0xbefa007e, 0x8b7bff7f,
> - 0x, 0xbefe00c1,
> - 0xbeff00c1, 0xdca6c000,
> - 0x007a, 0x7e000280,
> - 0xbefe007a, 0xbeff007b,
> - 0xb8fb02dc, 0x847b997b,
> - 0xb8fa3b05, 0x807a817a,
> - 0xbf0d997b, 0xbfa20002,
> - 0x847a897a, 0xbfa1,
> - 0x847a8a7a, 0xb8fb1e06,
> - 0x847b8a7b, 0x807a7b7a,
> + 0xbfa20041, 0xbf830010,
> + 0xb8fbf803, 0xbfa0fffa,
> + 0x8b6eff7b, 0x0900,
> + 0xbfa20015, 0x8b6eff7b,
> + 0x71ff, 0xbfa10008,
> + 0x8b6fff7b, 0x7080,
> + 0xbfa10001, 0xbeee1287,
> + 0xb8eff801, 0x846e8c6e,
> + 0x8b6e6f6e, 0xbfa2000a,
> + 0x8b6eff6d, 0x00ff,
> + 0xbfa20007, 0xb8eef801,
> + 0x8b6eff6e, 0x0800,
> + 0xbfa20003, 0x8b6eff7b,
> + 0x0400, 0xbfa20026,
> + 0xbefa4d82, 0xbf89fc07,
> + 0x84fa887a, 0xf4005bbd,
> + 0xf810, 0xbf89fc07,
> + 0x846e976e, 0x9177ff77,
> + 0x0080, 0x8c776e77,
> + 0xf4045bbd, 0xf800,
> + 0xbf89fc07, 0xf4045ebd,
> + 0xf808, 0xbf89fc07,
> + 0x8bee6e6e, 0xbfa10001,
> + 0xbe80486e, 0x8b6eff6d,
> + 0x01ff, 0xbfa20005,
> + 0x8c78ff78, 0x2000,
> + 0x80ec886c, 0x82ed806d,
> + 0xbfa5, 0x8b6eff6d,
> + 0x0100, 0xbfa20002,
> + 0x806c846c, 0x826d806d,
> + 0x8b6dff6d, 0x,
> + 0x8bfe7e7e, 0x8bea6a6a,
> + 0xb978f802, 0xbe804a6c,
> + 0x8b6dff6d, 0x,
> + 0xbefa0080, 0xb97a0283,
> + 0xbeee007e, 0xbeef007f,
> + 0xbefe0180, 0xbefe4d84,
> + 0xbf89fc07, 0x8b7aff7f,
> + 0x0400, 0x847a857a,
> + 0x8c6d7a6d, 0xbefa007e,
>   0x8b7bff7f, 0x,
> - 0x807aff7a, 0x0200,
> - 0x807a7e7a, 0x827b807b,
> - 0xd761, 0x00010870,
> - 0xd761, 0x00010a71,
> - 0xd761, 0x00010c72,
> - 0xd761, 0x00010e73,
> - 0xd761, 0x00011074,
> - 0xd761, 0x00011275,
> - 0xd761, 0x00011476,
> - 0xd761, 0x00011677,
> - 0xd761, 0x00011a79,
>

Re: [PATCH] drm/amdkfd: update GFX11 CWSR trap handler

2022-10-31 Thread Felix Kuehling

Am 2022-10-26 um 17:05 schrieb Graham Sider:

From: Jay Cornwall 

With corresponding FW change fixes issue where triggering CWSR on a
workgroup with waves in s_barrier wouldn't lead to a back-off and
therefore cause a hang.

Signed-off-by: Jay Cornwall 
Tested-by: Graham Sider 


Acked-by: Felix Kuehling 



---
  .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 764 +-
  .../amd/amdkfd/cwsr_trap_handler_gfx10.asm|   6 +
  2 files changed, 389 insertions(+), 381 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index c7118843db05..0c4c5499bb5c 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -2495,442 +2495,444 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf9f, 0x,
  };
  static const uint32_t cwsr_trap_gfx11_hex[] = {
-   0xbfa1, 0xbfa0021e,
+   0xbfa1, 0xbfa00221,
0xb0804006, 0xb8f8f802,
0x9178ff78, 0x00020006,
-   0xb8fbf803, 0xbf0d9f6d,
-   0xbfa20006, 0x8b6eff78,
-   0x2000, 0xbfa10009,
-   0x8b6eff6d, 0x00ff,
-   0xbfa2001e, 0x8b6eff7b,
-   0x0400, 0xbfa20041,
-   0xbf830010, 0xb8fbf803,
-   0xbfa0fffa, 0x8b6eff7b,
-   0x0900, 0xbfa20015,
-   0x8b6eff7b, 0x71ff,
-   0xbfa10008, 0x8b6fff7b,
-   0x7080, 0xbfa10001,
-   0xbeee1287, 0xb8eff801,
-   0x846e8c6e, 0x8b6e6f6e,
-   0xbfa2000a, 0x8b6eff6d,
-   0x00ff, 0xbfa20007,
-   0xb8eef801, 0x8b6eff6e,
-   0x0800, 0xbfa20003,
+   0xb8fbf803, 0xbf0d9e6d,
+   0xbfa10001, 0xbfbd,
+   0xbf0d9f6d, 0xbfa20006,
+   0x8b6eff78, 0x2000,
+   0xbfa10009, 0x8b6eff6d,
+   0x00ff, 0xbfa2001e,
0x8b6eff7b, 0x0400,
-   0xbfa20026, 0xbefa4d82,
-   0xbf89fc07, 0x84fa887a,
-   0xf4005bbd, 0xf810,
-   0xbf89fc07, 0x846e976e,
-   0x9177ff77, 0x0080,
-   0x8c776e77, 0xf4045bbd,
-   0xf800, 0xbf89fc07,
-   0xf4045ebd, 0xf808,
-   0xbf89fc07, 0x8bee6e6e,
-   0xbfa10001, 0xbe80486e,
-   0x8b6eff6d, 0x01ff,
-   0xbfa20005, 0x8c78ff78,
-   0x2000, 0x80ec886c,
-   0x82ed806d, 0xbfa5,
-   0x8b6eff6d, 0x0100,
-   0xbfa20002, 0x806c846c,
-   0x826d806d, 0x8b6dff6d,
-   0x, 0x8bfe7e7e,
-   0x8bea6a6a, 0xb978f802,
-   0xbe804a6c, 0x8b6dff6d,
-   0x, 0xbefa0080,
-   0xb97a0283, 0xbeee007e,
-   0xbeef007f, 0xbefe0180,
-   0xbefe4d84, 0xbf89fc07,
-   0x8b7aff7f, 0x0400,
-   0x847a857a, 0x8c6d7a6d,
-   0xbefa007e, 0x8b7bff7f,
-   0x, 0xbefe00c1,
-   0xbeff00c1, 0xdca6c000,
-   0x007a, 0x7e000280,
-   0xbefe007a, 0xbeff007b,
-   0xb8fb02dc, 0x847b997b,
-   0xb8fa3b05, 0x807a817a,
-   0xbf0d997b, 0xbfa20002,
-   0x847a897a, 0xbfa1,
-   0x847a8a7a, 0xb8fb1e06,
-   0x847b8a7b, 0x807a7b7a,
+   0xbfa20041, 0xbf830010,
+   0xb8fbf803, 0xbfa0fffa,
+   0x8b6eff7b, 0x0900,
+   0xbfa20015, 0x8b6eff7b,
+   0x71ff, 0xbfa10008,
+   0x8b6fff7b, 0x7080,
+   0xbfa10001, 0xbeee1287,
+   0xb8eff801, 0x846e8c6e,
+   0x8b6e6f6e, 0xbfa2000a,
+   0x8b6eff6d, 0x00ff,
+   0xbfa20007, 0xb8eef801,
+   0x8b6eff6e, 0x0800,
+   0xbfa20003, 0x8b6eff7b,
+   0x0400, 0xbfa20026,
+   0xbefa4d82, 0xbf89fc07,
+   0x84fa887a, 0xf4005bbd,
+   0xf810, 0xbf89fc07,
+   0x846e976e, 0x9177ff77,
+   0x0080, 0x8c776e77,
+   0xf4045bbd, 0xf800,
+   0xbf89fc07, 0xf4045ebd,
+   0xf808, 0xbf89fc07,
+   0x8bee6e6e, 0xbfa10001,
+   0xbe80486e, 0x8b6eff6d,
+   0x01ff, 0xbfa20005,
+   0x8c78ff78, 0x2000,
+   0x80ec886c, 0x82ed806d,
+   0xbfa5, 0x8b6eff6d,
+   0x0100, 0xbfa20002,
+   0x806c846c, 0x826d806d,
+   0x8b6dff6d, 0x,
+   0x8bfe7e7e, 0x8bea6a6a,
+   0xb978f802, 0xbe804a6c,
+   0x8b6dff6d, 0x,
+   0xbefa0080, 0xb97a0283,
+   0xbeee007e, 0xbeef007f,
+   0xbefe0180, 0xbefe4d84,
+   0xbf89fc07, 0x8b7aff7f,
+   0x0400, 0x847a857a,
+   0x8c6d7a6d, 0xbefa007e,
0x8b7bff7f, 0x,
-   0x807aff7a, 0x0200,
-   0x807a7e7a, 0x827b807b,
-   0xd761, 0x00010870,
-   0xd761, 0x00010a71,
-   0xd761, 0x00010c72,
-   0xd761, 0x00010e73,
-   0xd761, 0x00011074,
-   0xd761, 0x00011275,
-   0xd761, 0x00011476,
-   0xd761, 0x00011677,
-   0xd761, 0x00011a79,
-   0xd761, 0x00011c7e,
-   0xd761, 0x00011e7f,
-   0xbefe00ff, 0x3fff,
-   0xbeff0080, 0xdca6c040,
-   0x007a, 0xd760007a,
-   0x00011d00, 0xd760007b,
-   0x00011f00, 0xbefe007a,
-   0xbeff007b, 0xbef4007e,
-   0x8b75ff7f, 0x,
-   0x8c75ff7

RE: [PATCH] drm/amdkfd: update GFX11 CWSR trap handler

2022-10-27 Thread Kasiviswanathan, Harish
[AMD Official Use Only - General]

Acked-by: Harish Kasiviswanathan 

-Original Message-
From: Sider, Graham  
Sent: Wednesday, October 26, 2022 5:05 PM
To: amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix ; Kasiviswanathan, Harish 
; Cornwall, Jay ; Sider, 
Graham 
Subject: [PATCH] drm/amdkfd: update GFX11 CWSR trap handler

From: Jay Cornwall 

With corresponding FW change fixes issue where triggering CWSR on a workgroup 
with waves in s_barrier wouldn't lead to a back-off and therefore cause a hang.

Signed-off-by: Jay Cornwall 
Tested-by: Graham Sider 
---
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 764 +-
 .../amd/amdkfd/cwsr_trap_handler_gfx10.asm|   6 +
 2 files changed, 389 insertions(+), 381 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index c7118843db05..0c4c5499bb5c 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -2495,442 +2495,444 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf9f, 0x,
 };
 static const uint32_t cwsr_trap_gfx11_hex[] = {
-   0xbfa1, 0xbfa0021e,
+   0xbfa1, 0xbfa00221,
0xb0804006, 0xb8f8f802,
0x9178ff78, 0x00020006,
-   0xb8fbf803, 0xbf0d9f6d,
-   0xbfa20006, 0x8b6eff78,
-   0x2000, 0xbfa10009,
-   0x8b6eff6d, 0x00ff,
-   0xbfa2001e, 0x8b6eff7b,
-   0x0400, 0xbfa20041,
-   0xbf830010, 0xb8fbf803,
-   0xbfa0fffa, 0x8b6eff7b,
-   0x0900, 0xbfa20015,
-   0x8b6eff7b, 0x71ff,
-   0xbfa10008, 0x8b6fff7b,
-   0x7080, 0xbfa10001,
-   0xbeee1287, 0xb8eff801,
-   0x846e8c6e, 0x8b6e6f6e,
-   0xbfa2000a, 0x8b6eff6d,
-   0x00ff, 0xbfa20007,
-   0xb8eef801, 0x8b6eff6e,
-   0x0800, 0xbfa20003,
+   0xb8fbf803, 0xbf0d9e6d,
+   0xbfa10001, 0xbfbd,
+   0xbf0d9f6d, 0xbfa20006,
+   0x8b6eff78, 0x2000,
+   0xbfa10009, 0x8b6eff6d,
+   0x00ff, 0xbfa2001e,
0x8b6eff7b, 0x0400,
-   0xbfa20026, 0xbefa4d82,
-   0xbf89fc07, 0x84fa887a,
-   0xf4005bbd, 0xf810,
-   0xbf89fc07, 0x846e976e,
-   0x9177ff77, 0x0080,
-   0x8c776e77, 0xf4045bbd,
-   0xf800, 0xbf89fc07,
-   0xf4045ebd, 0xf808,
-   0xbf89fc07, 0x8bee6e6e,
-   0xbfa10001, 0xbe80486e,
-   0x8b6eff6d, 0x01ff,
-   0xbfa20005, 0x8c78ff78,
-   0x2000, 0x80ec886c,
-   0x82ed806d, 0xbfa5,
-   0x8b6eff6d, 0x0100,
-   0xbfa20002, 0x806c846c,
-   0x826d806d, 0x8b6dff6d,
-   0x, 0x8bfe7e7e,
-   0x8bea6a6a, 0xb978f802,
-   0xbe804a6c, 0x8b6dff6d,
-   0x, 0xbefa0080,
-   0xb97a0283, 0xbeee007e,
-   0xbeef007f, 0xbefe0180,
-   0xbefe4d84, 0xbf89fc07,
-   0x8b7aff7f, 0x0400,
-   0x847a857a, 0x8c6d7a6d,
-   0xbefa007e, 0x8b7bff7f,
-   0x, 0xbefe00c1,
-   0xbeff00c1, 0xdca6c000,
-   0x007a, 0x7e000280,
-   0xbefe007a, 0xbeff007b,
-   0xb8fb02dc, 0x847b997b,
-   0xb8fa3b05, 0x807a817a,
-   0xbf0d997b, 0xbfa20002,
-   0x847a897a, 0xbfa1,
-   0x847a8a7a, 0xb8fb1e06,
-   0x847b8a7b, 0x807a7b7a,
+   0xbfa20041, 0xbf830010,
+   0xb8fbf803, 0xbfa0fffa,
+   0x8b6eff7b, 0x0900,
+   0xbfa20015, 0x8b6eff7b,
+   0x71ff, 0xbfa10008,
+   0x8b6fff7b, 0x7080,
+   0xbfa10001, 0xbeee1287,
+   0xb8eff801, 0x846e8c6e,
+   0x8b6e6f6e, 0xbfa2000a,
+   0x8b6eff6d, 0x00ff,
+   0xbfa20007, 0xb8eef801,
+   0x8b6eff6e, 0x0800,
+   0xbfa20003, 0x8b6eff7b,
+   0x0400, 0xbfa20026,
+   0xbefa4d82, 0xbf89fc07,
+   0x84fa887a, 0xf4005bbd,
+   0xf810, 0xbf89fc07,
+   0x846e976e, 0x9177ff77,
+   0x0080, 0x8c776e77,
+   0xf4045bbd, 0xf800,
+   0xbf89fc07, 0xf4045ebd,
+   0xf808, 0xbf89fc07,
+   0x8bee6e6e, 0xbfa10001,
+   0xbe80486e, 0x8b6eff6d,
+   0x01ff, 0xbfa20005,
+   0x8c78ff78, 0x2000,
+   0x80ec886c, 0x82ed806d,
+   0xbfa5, 0x8b6eff6d,
+   0x0100, 0xbfa20002,
+   0x806c846c, 0x826d806d,
+   0x8b6dff6d, 0x,
+   0x8bfe7e7e, 0x8bea6a6a,
+   0xb978f802, 0xbe804a6c,
+   0x8b6dff6d, 0x,
+   0xbefa0080, 0xb97a0283,
+   0xbeee007e, 0xbeef007f,
+   0xbefe0180, 0xbefe4d84,
+   0xbf89fc07, 0x8b7aff7f,
+   0x0400, 0x847a857a,
+   0x8c6d7a6d, 0xbefa007e,
0x8b7bff7f, 0x,
-   0x807aff7a, 0x0200,
-   0x807a7e7a, 0x827b807b,
-   0xd761, 0x00010870,
-   0xd761, 0x00010a71,
-   0xd761, 0x00010c72,
-   0xd761, 0x00010e73,
-   0xd761, 0x00011074,
-   0xd761, 0x00011275,
-   0xd761, 0x00011476,
-   0xd761, 0x00011677,
-   0xd761, 0x00011a79,
-   0xd761, 0x00011c7e,
-   0xd761