- Leave DEP_MODE unchanged as it is ignored in the trap handler
- Save/restore SCHED_MODE (gfx12.0 saves in ttmp11)

Signed-off-by: Jay Cornwall <[email protected]>
Cc: Lancelot Six <[email protected]>
Cc: Vladimir Indic <[email protected]>
---
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 372 +++++++++---------
 .../amd/amdkfd/cwsr_trap_handler_gfx12.asm    |  32 +-
 2 files changed, 214 insertions(+), 190 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index d86bccc49e3f..9bb7fb6a83ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -4587,18 +4587,14 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
 };
 
 static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
-       0xbfa00001, 0xbfa003b4,
-       0xb0804009, 0xb8eef81a,
-       0xbf880000, 0xb980081a,
-       0x00000000, 0xb8f8f804,
-       0x9177ff77, 0x0c000000,
-       0x846e9a6e, 0x8c776e77,
+       0xbfa00001, 0xbfa003ac,
+       0xb0804009, 0xb8f8f804,
        0x9178ff78, 0x00008c00,
        0xb8fbf811, 0x8b6eff78,
        0x00004000, 0xbfa10008,
        0x8b6eff7b, 0x00000080,
        0xbfa20018, 0x8b6ea07b,
-       0xbfa200d4, 0xbf830010,
+       0xbfa200d1, 0xbf830010,
        0xb8fbf811, 0xbfa0fffb,
        0x8b6eff7b, 0x00000bd0,
        0xbfa20010, 0xb8eef812,
@@ -4609,7 +4605,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0xf0000000, 0xbfa20005,
        0x8b6fff6f, 0x00000200,
        0xbfa20002, 0x8b6ea07b,
-       0xbfa200be, 0x9177ff77,
+       0xbfa200bb, 0x9177ff77,
        0x007fc000, 0xb8fa04a1,
        0x847a967a, 0x8c777a77,
        0xb8fa0421, 0x847a957a,
@@ -4702,189 +4698,189 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0xb97a0421, 0x857a8e77,
        0xb97a3021, 0x8bfe7e7e,
        0x8bea6a6a, 0x85788978,
-       0x936eff77, 0x0002001a,
-       0xb96ef81a, 0xb9783244,
-       0xbe804a6c, 0xb8faf802,
-       0xbf0d987a, 0xbfa10001,
-       0xbfb00000, 0x8b6dff6d,
-       0x01ffffff, 0xbefa0080,
-       0xb97a0151, 0x9177ff77,
-       0x007fc000, 0xb8fa04a1,
-       0x847a967a, 0x8c777a77,
-       0xb8fa0421, 0x847a957a,
-       0x8c777a77, 0xb8fa3021,
-       0x847a8e7a, 0x8c777a77,
-       0xb980f821, 0x00000000,
-       0xbf0d847b, 0xbfa20078,
-       0xf4003eb6, 0xf8000000,
-       0xbfc70000, 0xf4003bb6,
-       0xf8000008, 0x8b76ff7a,
-       0x80000000, 0xbfa20027,
-       0x9376ff7a, 0x00060019,
-       0x81f9a376, 0xbf0b8179,
-       0xbfa20068, 0x81f9ac76,
-       0xbf0b8179, 0xbfa20062,
-       0x81f9b776, 0xbf0b8179,
-       0xbfa2005f, 0x8b76ff7a,
-       0x000001ff, 0xbf06ff76,
-       0x000000fe, 0xbfa2005d,
-       0xbf06ff76, 0x000000ff,
-       0xbfa20057, 0xbf06ff76,
-       0x000000fa, 0xbfa20054,
-       0x81f9ff76, 0x000000e9,
-       0xbf0b8179, 0xbfa20050,
-       0x8b76ff7b, 0xffff0000,
-       0xbf06ff76, 0xbf860000,
-       0xbfa10051, 0x9376ff7b,
-       0x0002000e, 0x8b79ff7b,
-       0x00003f00, 0x85798679,
-       0x8c767976, 0xb9763b01,
-       0xbfa00049, 0x8b76ff7a,
-       0xfc000000, 0xbf06ff76,
-       0xd4000000, 0xbfa20013,
-       0xbf06ff76, 0xc8000000,
-       0xbfa20027, 0x8b76ff7a,
-       0xff000000, 0xbf06ff76,
-       0xcf000000, 0xbfa20039,
-       0x8b79ff7a, 0xffff0000,
-       0xbf06ff79, 0xcc350000,
-       0xbfa20037, 0xbf06ff79,
-       0xcc3a0000, 0xbfa20034,
-       0xbf06ff76, 0xcc000000,
-       0xbfa10031, 0x8b76ff7b,
-       0x000001ff, 0xbf06ff76,
-       0x000000ff, 0xbfa20029,
-       0xbf06ff76, 0x000000fa,
-       0xbfa20026, 0x81f6ff76,
-       0x000000e9, 0xbf0b8176,
-       0xbfa20022, 0x8b76ff7b,
-       0x0003fe00, 0xbf06ff76,
-       0x0001fe00, 0xbfa2001d,
-       0x8b76ff7b, 0x07fc0000,
-       0xbf06ff76, 0x03fc0000,
-       0xbfa20018, 0xbfa00014,
-       0x9376ff7a, 0x00040016,
-       0x81f68176, 0xbf0b8176,
-       0xbfa20012, 0x9376ff7a,
-       0x00050011, 0x81f68176,
-       0xbf0b8176, 0xbfa2000d,
+       0xb9783244, 0xbe804a6c,
+       0xb8faf802, 0xbf0d987a,
+       0xbfa10001, 0xbfb00000,
+       0x8b6dff6d, 0x01ffffff,
+       0xbefa0080, 0xb97a0151,
+       0x9177ff77, 0x007fc000,
+       0xb8fa04a1, 0x847a967a,
+       0x8c777a77, 0xb8fa0421,
+       0x847a957a, 0x8c777a77,
+       0xb8fa3021, 0x847a8e7a,
+       0x8c777a77, 0xb980f821,
+       0x00000000, 0xbf0d847b,
+       0xbfa20078, 0xf4003eb6,
+       0xf8000000, 0xbfc70000,
+       0xf4003bb6, 0xf8000008,
+       0x8b76ff7a, 0x80000000,
+       0xbfa20027, 0x9376ff7a,
+       0x00060019, 0x81f9a376,
+       0xbf0b8179, 0xbfa20068,
+       0x81f9ac76, 0xbf0b8179,
+       0xbfa20062, 0x81f9b776,
+       0xbf0b8179, 0xbfa2005f,
        0x8b76ff7a, 0x000001ff,
+       0xbf06ff76, 0x000000fe,
+       0xbfa2005d, 0xbf06ff76,
+       0x000000ff, 0xbfa20057,
+       0xbf06ff76, 0x000000fa,
+       0xbfa20054, 0x81f9ff76,
+       0x000000e9, 0xbf0b8179,
+       0xbfa20050, 0x8b76ff7b,
+       0xffff0000, 0xbf06ff76,
+       0xbf860000, 0xbfa10051,
+       0x9376ff7b, 0x0002000e,
+       0x8b79ff7b, 0x00003f00,
+       0x85798679, 0x8c767976,
+       0xb9763b01, 0xbfa00049,
+       0x8b76ff7a, 0xfc000000,
+       0xbf06ff76, 0xd4000000,
+       0xbfa20013, 0xbf06ff76,
+       0xc8000000, 0xbfa20027,
+       0x8b76ff7a, 0xff000000,
+       0xbf06ff76, 0xcf000000,
+       0xbfa20039, 0x8b79ff7a,
+       0xffff0000, 0xbf06ff79,
+       0xcc350000, 0xbfa20037,
+       0xbf06ff79, 0xcc3a0000,
+       0xbfa20034, 0xbf06ff76,
+       0xcc000000, 0xbfa10031,
+       0x8b76ff7b, 0x000001ff,
        0xbf06ff76, 0x000000ff,
-       0xbfa20008, 0x8b76ff7b,
+       0xbfa20029, 0xbf06ff76,
+       0x000000fa, 0xbfa20026,
+       0x81f6ff76, 0x000000e9,
+       0xbf0b8176, 0xbfa20022,
+       0x8b76ff7b, 0x0003fe00,
+       0xbf06ff76, 0x0001fe00,
+       0xbfa2001d, 0x8b76ff7b,
+       0x07fc0000, 0xbf06ff76,
+       0x03fc0000, 0xbfa20018,
+       0xbfa00014, 0x9376ff7a,
+       0x00040016, 0x81f68176,
+       0xbf0b8176, 0xbfa20012,
+       0x9376ff7a, 0x00050011,
+       0x81f68176, 0xbf0b8176,
+       0xbfa2000d, 0x8b76ff7a,
        0x000001ff, 0xbf06ff76,
-       0x000000ff, 0xbfa20003,
-       0xbfc70000, 0xbefb006e,
-       0xbfa0ffad, 0xbfc70000,
-       0xbefb006f, 0xbfa0ffaa,
-       0xbfc70000, 0xbeee007e,
-       0xbeef007f, 0xbefe0180,
-       0xbefe4d84, 0xbf8a0000,
-       0x8b7aff7f, 0x04000000,
-       0x847a857a, 0x8c6d7a6d,
-       0xb8eff822, 0xb980f822,
-       0x00000000, 0xb8fa2b01,
-       0x847a997a, 0x8c6d7a6d,
-       0xbefa0080, 0xb97a2b01,
-       0xbefa007e, 0x8b7bff7f,
-       0x01ffffff, 0xbefe00c1,
-       0xbeff00c1, 0xee0a407a,
-       0x000c0000, 0x00000000,
-       0x7e000280, 0xbefe007a,
-       0xbeff007b, 0xb8fb0742,
-       0x847b997b, 0xb8fa3b05,
-       0x807a817a, 0xbf0d997b,
-       0xbfa20002, 0x847a897a,
-       0xbfa00001, 0x847a8a7a,
+       0x000000ff, 0xbfa20008,
+       0x8b76ff7b, 0x000001ff,
+       0xbf06ff76, 0x000000ff,
+       0xbfa20003, 0xbfc70000,
+       0xbefb006e, 0xbfa0ffad,
+       0xbfc70000, 0xbefb006f,
+       0xbfa0ffaa, 0xbfc70000,
+       0xbeee007e, 0xbeef007f,
+       0xbefe0180, 0xbefe4d84,
+       0xbf8a0000, 0x8b7aff7f,
+       0x04000000, 0x847a857a,
+       0x8c6d7a6d, 0xb8eff822,
+       0xb980f822, 0x00000000,
+       0xb8fa2b01, 0x847a997a,
+       0x8c6d7a6d, 0xbefa0080,
+       0xb97a2b01, 0xbefa007e,
        0x8b7bff7f, 0x01ffffff,
-       0x807aff7a, 0x000001c0,
-       0x807a7e7a, 0x827b807b,
-       0xd7610000, 0x00010870,
-       0xd7610000, 0x00010a71,
-       0xd7610000, 0x00010c72,
-       0xd7610000, 0x00010e73,
-       0xd7610000, 0x00011074,
-       0xd7610000, 0x00011275,
-       0xd7610000, 0x00011476,
-       0xd7610000, 0x00011677,
-       0xd7610000, 0x00011a79,
-       0xd7610000, 0x00011c7e,
-       0xd7610000, 0x00011e7f,
-       0xbefe00ff, 0x00003fff,
-       0xbeff0080, 0xee0a407a,
-       0x000c0000, 0x00000000,
-       0xd760007a, 0x00011d00,
-       0xd760007b, 0x00011f00,
+       0xbefe00c1, 0xbeff00c1,
+       0xee0a407a, 0x000c0000,
+       0x00000000, 0x7e000280,
        0xbefe007a, 0xbeff007b,
-       0xbef4007e, 0x8b75ff7f,
-       0x01ffffff, 0xbef1007d,
-       0xb8f30742, 0x84739973,
-       0xbefe00c1, 0x857d9973,
-       0x8b7d817d, 0xbf06817d,
-       0xbfa20002, 0xbeff0080,
-       0xbfa00002, 0xbeff00c1,
-       0xbfa0000a, 0xee0a4074,
-       0x008c0000, 0x00008000,
-       0xee0a4074, 0x010c0000,
+       0xb8fb0742, 0x847b997b,
+       0xb8fa3b05, 0x807a817a,
+       0xbf0d997b, 0xbfa20002,
+       0x847a897a, 0xbfa00001,
+       0x847a8a7a, 0x8b7bff7f,
+       0x01ffffff, 0x807aff7a,
+       0x000001c0, 0x807a7e7a,
+       0x827b807b, 0xd7610000,
+       0x00010870, 0xd7610000,
+       0x00010a71, 0xd7610000,
+       0x00010c72, 0xd7610000,
+       0x00010e73, 0xd7610000,
+       0x00011074, 0xd7610000,
+       0x00011275, 0xd7610000,
+       0x00011476, 0xd7610000,
+       0x00011677, 0xd7610000,
+       0x00011a79, 0xd7610000,
+       0x00011c7e, 0xd7610000,
+       0x00011e7f, 0xbefe00ff,
+       0x00003fff, 0xbeff0080,
+       0xee0a407a, 0x000c0000,
+       0x00000000, 0xd760007a,
+       0x00011d00, 0xd760007b,
+       0x00011f00, 0xbefe007a,
+       0xbeff007b, 0xbef4007e,
+       0x8b75ff7f, 0x01ffffff,
+       0xbef1007d, 0xb8f30742,
+       0x84739973, 0xbefe00c1,
+       0x857d9973, 0x8b7d817d,
+       0xbf06817d, 0xbfa20002,
+       0xbeff0080, 0xbfa00002,
+       0xbeff00c1, 0xbfa0000a,
+       0xee0a4074, 0x008c0000,
+       0x00008000, 0xee0a4074,
+       0x010c0000, 0x00010000,
+       0xee0a4074, 0x018c0000,
+       0x00018000, 0xbfa00009,
+       0xee0a4074, 0x008c0000,
        0x00010000, 0xee0a4074,
-       0x018c0000, 0x00018000,
-       0xbfa00009, 0xee0a4074,
-       0x008c0000, 0x00010000,
-       0xee0a4074, 0x010c0000,
-       0x00020000, 0xee0a4074,
-       0x018c0000, 0x00030000,
-       0xb8f03b05, 0x80708170,
-       0xbf0d9973, 0xbfa20002,
-       0x84708970, 0xbfa00001,
-       0x84708a70, 0x8070ff70,
-       0x00000200, 0x7e000280,
-       0x7e020280, 0x7e040280,
-       0xbefd0080, 0xd7610002,
-       0x0000fa71, 0x807d817d,
-       0xb8faf802, 0xbf0c8b7a,
-       0xbfa20003, 0xbe804fc2,
-       0xbf94fffe, 0xbfa10001,
-       0xbe804ec4, 0xbf94fffc,
-       0xbefa4c88, 0xbfc70000,
-       0xbf0c807a, 0xbfa20006,
-       0x9371ff7a, 0x00070004,
-       0x937aff7a, 0x00070010,
-       0xbf06717a, 0xbfa2fff6,
-       0xb8faf804, 0x8b7aff7a,
-       0x0001000c, 0x9178ff78,
-       0x0001000c, 0x8c787a78,
-       0xd7610002, 0x0000fa6c,
-       0x807d817d, 0x917aff6d,
-       0x80000000, 0xd7610002,
+       0x010c0000, 0x00020000,
+       0xee0a4074, 0x018c0000,
+       0x00030000, 0xb8f03b05,
+       0x80708170, 0xbf0d9973,
+       0xbfa20002, 0x84708970,
+       0xbfa00001, 0x84708a70,
+       0x8070ff70, 0x00000200,
+       0x7e000280, 0x7e020280,
+       0x7e040280, 0xbefd0080,
+       0xd7610002, 0x0000fa71,
+       0x807d817d, 0xb8faf802,
+       0xbf0c8b7a, 0xbfa20003,
+       0xbe804fc2, 0xbf94fffe,
+       0xbfa10001, 0xbe804ec4,
+       0xbf94fffc, 0xbefa4c88,
+       0xbfc70000, 0xbf0c807a,
+       0xbfa20006, 0x9371ff7a,
+       0x00070004, 0x937aff7a,
+       0x00070010, 0xbf06717a,
+       0xbfa2fff6, 0xb8faf804,
+       0x8b7aff7a, 0x0001000c,
+       0x9178ff78, 0x0001000c,
+       0x8c787a78, 0xd7610002,
+       0x0000fa6c, 0x807d817d,
+       0x917aff6d, 0x80000000,
+       0xd7610002, 0x0000fa7a,
+       0x807d817d, 0xd7610002,
+       0x0000fa6e, 0x807d817d,
+       0xbefa0080, 0xd7610002,
        0x0000fa7a, 0x807d817d,
-       0xd7610002, 0x0000fa6e,
-       0x807d817d, 0xbefa0080,
+       0xd7610002, 0x0000fa78,
+       0x807d817d, 0xb8faf811,
        0xd7610002, 0x0000fa7a,
        0x807d817d, 0xd7610002,
-       0x0000fa78, 0x807d817d,
-       0xb8faf811, 0xd7610002,
+       0x0000fa6f, 0x807d817d,
+       0xb8f1f801, 0x937aff6d,
+       0x00060019, 0x847a8c7a,
+       0x8c717a71, 0xd7610002,
+       0x0000fa71, 0x807d817d,
+       0xb8f1f814, 0xd7610002,
+       0x0000fa71, 0x807d817d,
+       0xb8f1f815, 0xd7610002,
+       0x0000fa71, 0x807d817d,
+       0xb8f1f812, 0xd7610002,
+       0x0000fa71, 0x807d817d,
+       0xb8f1f813, 0xd7610002,
+       0x0000fa71, 0x807d817d,
+       0xb8faf802, 0xd7610002,
        0x0000fa7a, 0x807d817d,
-       0xd7610002, 0x0000fa6f,
-       0x807d817d, 0xb8f1f801,
-       0x937aff6d, 0x00060019,
-       0x847a8c7a, 0x8c717a71,
-       0xd7610002, 0x0000fa71,
-       0x807d817d, 0xb8f1f814,
-       0xd7610002, 0x0000fa71,
-       0x807d817d, 0xb8f1f815,
-       0xd7610002, 0x0000fa71,
-       0x807d817d, 0xb8f1f812,
-       0xd7610002, 0x0000fa71,
-       0x807d817d, 0xb8f1f813,
-       0xd7610002, 0x0000fa71,
-       0x807d817d, 0xb8faf802,
+       0xbefa50c1, 0xbfc70000,
        0xd7610002, 0x0000fa7a,
-       0x807d817d, 0xbefa50c1,
+       0x807d817d, 0xbefa4c88,
        0xbfc70000, 0xd7610002,
        0x0000fa7a, 0x807d817d,
-       0xbefa4c88, 0xbfc70000,
-       0xd7610002, 0x0000fa7a,
-       0x807d817d, 0xbefe00ff,
-       0x0000ffff, 0xbeff0080,
+       0xb8faf81a, 0xd7610002,
+       0x0000fa7a, 0x807d817d,
+       0xbefe00c1, 0xbeff0080,
        0x80767074, 0x82778075,
        0xee0a4076, 0x010c0000,
        0x00000000, 0xbefe00c1,
@@ -5061,7 +5057,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0x018c0000, 0x00030000,
        0x807d847d, 0x8070ff70,
        0x00000400, 0xbf0a7b7d,
-       0xbfa2ffe9, 0xbfa00183,
+       0xbfa2ffe9, 0xbfa00184,
        0xbef4007e, 0x8b75ff7f,
        0x01ffffff, 0xbef1007f,
        0xb8f20742, 0x84729972,
@@ -5229,6 +5225,8 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0x856e906e, 0x8b6e6e6e,
        0xbfa10003, 0xbe804ec3,
        0x816ec16e, 0xbfa0fffb,
+       0xf4601bbb, 0xf8000040,
+       0xbfc70000, 0xb96ef81a,
        0xbefd006f, 0xbefe0070,
        0xbeff0071, 0xb979f822,
        0xb97b2011, 0x857b867b,
@@ -5248,19 +5246,17 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0x856e8e77, 0xb96e3021,
        0x8b6dff6d, 0x01ffffff,
        0x8bfe7e7e, 0x8bea6a6a,
-       0x936eff77, 0x0002001a,
-       0xb96ef81a, 0xb97af804,
+       0xb97af804, 0xb8eef802,
+       0xbf0c8b6e, 0xbfa20003,
+       0xbe804fc2, 0xbf94fffe,
+       0xbfa10001, 0xbe804ec4,
+       0xbf94fffc, 0x857a897a,
+       0xb97a0244, 0xbe804a6c,
        0xb8eef802, 0xbf0c8b6e,
        0xbfa20003, 0xbe804fc2,
        0xbf94fffe, 0xbfa10001,
        0xbe804ec4, 0xbf94fffc,
-       0x857a897a, 0xb97a0244,
-       0xbe804a6c, 0xb8eef802,
-       0xbf0c8b6e, 0xbfa20003,
-       0xbe804fc2, 0xbf94fffe,
-       0xbfa10001, 0xbe804ec4,
-       0xbf94fffc, 0xbfb10000,
+       0xbfb10000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
-       0xbf9f0000, 0x00000000,
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
index ace2a9f2ac73..ccc61f60ceb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -36,6 +36,7 @@
 #define NUM_NAMED_BARRIERS (ASIC_FAMILY == CHIP_GC_12_0_3 ? 0x10 : 0)
 #define HAVE_CLUSTER_BARRIER (ASIC_FAMILY == CHIP_GC_12_0_3)
 #define CLUSTER_BARRIER_SERIALIZE_WORKAROUND (ASIC_FAMILY == CHIP_GC_12_0_3)
+#define RELAXED_SCHEDULING_IN_TRAP (ASIC_FAMILY == CHIP_GFX12)
 
 #define SINGLE_STEP_MISSED_WORKAROUND 1        //workaround for lost 
TRAP_AFTER_INST exception when SAVECTX raised
 #define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
@@ -110,9 +111,11 @@ var BARRIER_STATE_MEMBER_OFFSET                    = 4
 var BARRIER_STATE_MEMBER_SIZE                  = 7
 var BARRIER_STATE_VALID_OFFSET                 = 0
 
+#if RELAXED_SCHEDULING_IN_TRAP
 var TTMP11_SCHED_MODE_SHIFT                    = 26
 var TTMP11_SCHED_MODE_SIZE                     = 2
 var TTMP11_SCHED_MODE_MASK                     = 0xC000000
+#endif
 
 var NAMED_BARRIERS_SR_OFFSET_FROM_HWREG                = 0x80
 var S_BARRIER_INIT_MEMBERCNT_MASK              = 0x7F0000
@@ -223,18 +226,22 @@ L_JUMP_TO_RESTORE:
        s_branch        L_RESTORE
 
 L_SKIP_RESTORE:
+#if RELAXED_SCHEDULING_IN_TRAP
        // Assume most relaxed scheduling mode is set. Save and revert to 
normal mode.
        s_getreg_b32    ttmp2, hwreg(HW_REG_WAVE_SCHED_MODE)
        s_wait_alu      0
        s_setreg_imm32_b32      hwreg(HW_REG_WAVE_SCHED_MODE, \
                SQ_WAVE_SCHED_MODE_DEP_MODE_SHIFT, 
SQ_WAVE_SCHED_MODE_DEP_MODE_SIZE), 0
+#endif
 
        s_getreg_b32    s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV)        
//save STATUS since we will change SCC
 
+#if RELAXED_SCHEDULING_IN_TRAP
        // Save SCHED_MODE[1:0] into ttmp11[27:26].
        s_andn2_b32     ttmp11, ttmp11, TTMP11_SCHED_MODE_MASK
        s_lshl_b32      ttmp2, ttmp2, TTMP11_SCHED_MODE_SHIFT
        s_or_b32        ttmp11, ttmp11, ttmp2
+#endif
 
        // Clear SPI_PRIO: do not save with elevated priority.
        // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if 
setreg'd.
@@ -316,7 +323,7 @@ L_FETCH_2ND_TRAP:
        s_cbranch_scc0  L_NO_SIGN_EXTEND_TMA
        s_or_b32        ttmp15, ttmp15, ~ADDRESS_HI32_MASK
 L_NO_SIGN_EXTEND_TMA:
-#if ASIC_FAMILY == CHIP_GFX12
+#if RELAXED_SCHEDULING_IN_TRAP
        // Move SCHED_MODE[1:0] from ttmp11 to unused bits in ttmp1[27:26] 
(return PC_HI).
        // The second-level trap will restore from ttmp1 for backwards 
compatibility.
        s_and_b32       ttmp2, ttmp11, TTMP11_SCHED_MODE_MASK
@@ -382,8 +389,10 @@ L_EXIT_TRAP:
        // Only restore fields which the trap handler changes.
        s_lshr_b32      s_save_state_priv, s_save_state_priv, 
SQ_WAVE_STATE_PRIV_SCC_SHIFT
 
+#if RELAXED_SCHEDULING_IN_TRAP
        // Assume relaxed scheduling mode after this point.
        restore_sched_mode(ttmp2)
+#endif
 
        s_setreg_b32    hwreg(HW_REG_WAVE_STATE_PRIV, 
SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
                SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - 
SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv
@@ -591,8 +600,18 @@ L_SAVE_HWREG:
        write_hwreg_to_v2(s_save_tmp)
 #endif
 
+#if ASIC_FAMILY >= CHIP_GC_12_0_3
+       s_getreg_b32    s_save_tmp, hwreg(HW_REG_WAVE_SCHED_MODE)
+       write_hwreg_to_v2(s_save_tmp)
+#endif
+
+#if ! SAVE_TTMPS_IN_SGPR_BLOCK
        // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
        s_mov_b32       exec_lo, 0xFFFF
+#else
+       // All 128 bytes are available for HWREGs.
+       s_mov_b32       exec_lo, 0xFFFFFFFF
+#endif
        s_mov_b32       exec_hi, 0x0
        s_add_u32       s_save_addr_lo, s_save_base_addr_lo, s_save_mem_offset
        s_addc_u32      s_save_addr_hi, s_save_base_addr_hi, 0x0
@@ -1155,6 +1174,12 @@ L_SKIP_TRAP_CLUSTER_BARRIER_SIGNAL:
 L_SKIP_CLUSTER_BARRIER_RESTORE:
 #endif
 
+#if ASIC_FAMILY >= CHIP_GC_12_0_3
+       s_load_b32      s_restore_tmp, [s_restore_addr_lo, s_restore_addr_hi], 
null scope:SCOPE_SYS offset:0x40
+       s_wait_kmcnt    0
+       s_setreg_b32    hwreg(HW_REG_WAVE_SCHED_MODE), s_restore_tmp
+#endif
+
        s_mov_b32       m0, s_restore_m0
        s_mov_b32       exec_lo, s_restore_exec_lo
        s_mov_b32       exec_hi, s_restore_exec_hi
@@ -1194,8 +1219,10 @@ L_SKIP_CLUSTER_BARRIER_RESTORE:
        s_and_b64       exec, exec, exec                                        
// Restore STATUS.EXECZ, not writable by s_setreg_b32
        s_and_b64       vcc, vcc, vcc                                           
// Restore STATUS.VCCZ, not writable by s_setreg_b32
 
+#if RELAXED_SCHEDULING_IN_TRAP
        // Assume relaxed scheduling mode after this point.
        restore_sched_mode(s_restore_tmp)
+#endif
 
        s_setreg_b32    hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv     
// SCC is included, which is changed by previous salu
 
@@ -1347,11 +1374,12 @@ L_NOT_IN_CLUSTER:
 #endif
 end
 
-
+#if RELAXED_SCHEDULING_IN_TRAP
 function restore_sched_mode(s_tmp)
        s_bfe_u32       s_tmp, ttmp11, (TTMP11_SCHED_MODE_SHIFT | 
(TTMP11_SCHED_MODE_SIZE << 0x10))
        s_setreg_b32    hwreg(HW_REG_WAVE_SCHED_MODE), s_tmp
 end
+#endif
 
 function restore_barrier_signal_count(barrier_id)
        // extract the saved signal count from s_restore_tmp
-- 
2.34.1

Reply via email to