- Leave DEP_MODE unchanged as it is ignored in the trap handler - Save/restore SCHED_MODE (gfx12.0 saves in ttmp11)
Signed-off-by: Jay Cornwall <[email protected]> Cc: Lancelot Six <[email protected]> Cc: Vladimir Indic <[email protected]> --- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 372 +++++++++--------- .../amd/amdkfd/cwsr_trap_handler_gfx12.asm | 32 +- 2 files changed, 214 insertions(+), 190 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index d86bccc49e3f..9bb7fb6a83ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -4587,18 +4587,14 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { }; static const uint32_t cwsr_trap_gfx12_1_0_hex[] = { - 0xbfa00001, 0xbfa003b4, - 0xb0804009, 0xb8eef81a, - 0xbf880000, 0xb980081a, - 0x00000000, 0xb8f8f804, - 0x9177ff77, 0x0c000000, - 0x846e9a6e, 0x8c776e77, + 0xbfa00001, 0xbfa003ac, + 0xb0804009, 0xb8f8f804, 0x9178ff78, 0x00008c00, 0xb8fbf811, 0x8b6eff78, 0x00004000, 0xbfa10008, 0x8b6eff7b, 0x00000080, 0xbfa20018, 0x8b6ea07b, - 0xbfa200d4, 0xbf830010, + 0xbfa200d1, 0xbf830010, 0xb8fbf811, 0xbfa0fffb, 0x8b6eff7b, 0x00000bd0, 0xbfa20010, 0xb8eef812, @@ -4609,7 +4605,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = { 0xf0000000, 0xbfa20005, 0x8b6fff6f, 0x00000200, 0xbfa20002, 0x8b6ea07b, - 0xbfa200be, 0x9177ff77, + 0xbfa200bb, 0x9177ff77, 0x007fc000, 0xb8fa04a1, 0x847a967a, 0x8c777a77, 0xb8fa0421, 0x847a957a, @@ -4702,189 +4698,189 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = { 0xb97a0421, 0x857a8e77, 0xb97a3021, 0x8bfe7e7e, 0x8bea6a6a, 0x85788978, - 0x936eff77, 0x0002001a, - 0xb96ef81a, 0xb9783244, - 0xbe804a6c, 0xb8faf802, - 0xbf0d987a, 0xbfa10001, - 0xbfb00000, 0x8b6dff6d, - 0x01ffffff, 0xbefa0080, - 0xb97a0151, 0x9177ff77, - 0x007fc000, 0xb8fa04a1, - 0x847a967a, 0x8c777a77, - 0xb8fa0421, 0x847a957a, - 0x8c777a77, 0xb8fa3021, - 0x847a8e7a, 0x8c777a77, - 0xb980f821, 0x00000000, - 0xbf0d847b, 0xbfa20078, - 0xf4003eb6, 0xf8000000, - 0xbfc70000, 0xf4003bb6, - 0xf8000008, 0x8b76ff7a, - 0x80000000, 0xbfa20027, - 0x9376ff7a, 0x00060019, - 0x81f9a376, 0xbf0b8179, - 0xbfa20068, 0x81f9ac76, - 0xbf0b8179, 0xbfa20062, - 0x81f9b776, 0xbf0b8179, - 0xbfa2005f, 0x8b76ff7a, - 0x000001ff, 0xbf06ff76, - 0x000000fe, 0xbfa2005d, - 0xbf06ff76, 0x000000ff, - 0xbfa20057, 0xbf06ff76, - 0x000000fa, 0xbfa20054, - 0x81f9ff76, 0x000000e9, - 0xbf0b8179, 0xbfa20050, - 0x8b76ff7b, 0xffff0000, - 0xbf06ff76, 0xbf860000, - 0xbfa10051, 0x9376ff7b, - 0x0002000e, 0x8b79ff7b, - 0x00003f00, 0x85798679, - 0x8c767976, 0xb9763b01, - 0xbfa00049, 0x8b76ff7a, - 0xfc000000, 0xbf06ff76, - 0xd4000000, 0xbfa20013, - 0xbf06ff76, 0xc8000000, - 0xbfa20027, 0x8b76ff7a, - 0xff000000, 0xbf06ff76, - 0xcf000000, 0xbfa20039, - 0x8b79ff7a, 0xffff0000, - 0xbf06ff79, 0xcc350000, - 0xbfa20037, 0xbf06ff79, - 0xcc3a0000, 0xbfa20034, - 0xbf06ff76, 0xcc000000, - 0xbfa10031, 0x8b76ff7b, - 0x000001ff, 0xbf06ff76, - 0x000000ff, 0xbfa20029, - 0xbf06ff76, 0x000000fa, - 0xbfa20026, 0x81f6ff76, - 0x000000e9, 0xbf0b8176, - 0xbfa20022, 0x8b76ff7b, - 0x0003fe00, 0xbf06ff76, - 0x0001fe00, 0xbfa2001d, - 0x8b76ff7b, 0x07fc0000, - 0xbf06ff76, 0x03fc0000, - 0xbfa20018, 0xbfa00014, - 0x9376ff7a, 0x00040016, - 0x81f68176, 0xbf0b8176, - 0xbfa20012, 0x9376ff7a, - 0x00050011, 0x81f68176, - 0xbf0b8176, 0xbfa2000d, + 0xb9783244, 0xbe804a6c, + 0xb8faf802, 0xbf0d987a, + 0xbfa10001, 0xbfb00000, + 0x8b6dff6d, 0x01ffffff, + 0xbefa0080, 0xb97a0151, + 0x9177ff77, 0x007fc000, + 0xb8fa04a1, 0x847a967a, + 0x8c777a77, 0xb8fa0421, + 0x847a957a, 0x8c777a77, + 0xb8fa3021, 0x847a8e7a, + 0x8c777a77, 0xb980f821, + 0x00000000, 0xbf0d847b, + 0xbfa20078, 0xf4003eb6, + 0xf8000000, 0xbfc70000, + 0xf4003bb6, 0xf8000008, + 0x8b76ff7a, 0x80000000, + 0xbfa20027, 0x9376ff7a, + 0x00060019, 0x81f9a376, + 0xbf0b8179, 0xbfa20068, + 0x81f9ac76, 0xbf0b8179, + 0xbfa20062, 0x81f9b776, + 0xbf0b8179, 0xbfa2005f, 0x8b76ff7a, 0x000001ff, + 0xbf06ff76, 0x000000fe, + 0xbfa2005d, 0xbf06ff76, + 0x000000ff, 0xbfa20057, + 0xbf06ff76, 0x000000fa, + 0xbfa20054, 0x81f9ff76, + 0x000000e9, 0xbf0b8179, + 0xbfa20050, 0x8b76ff7b, + 0xffff0000, 0xbf06ff76, + 0xbf860000, 0xbfa10051, + 0x9376ff7b, 0x0002000e, + 0x8b79ff7b, 0x00003f00, + 0x85798679, 0x8c767976, + 0xb9763b01, 0xbfa00049, + 0x8b76ff7a, 0xfc000000, + 0xbf06ff76, 0xd4000000, + 0xbfa20013, 0xbf06ff76, + 0xc8000000, 0xbfa20027, + 0x8b76ff7a, 0xff000000, + 0xbf06ff76, 0xcf000000, + 0xbfa20039, 0x8b79ff7a, + 0xffff0000, 0xbf06ff79, + 0xcc350000, 0xbfa20037, + 0xbf06ff79, 0xcc3a0000, + 0xbfa20034, 0xbf06ff76, + 0xcc000000, 0xbfa10031, + 0x8b76ff7b, 0x000001ff, 0xbf06ff76, 0x000000ff, - 0xbfa20008, 0x8b76ff7b, + 0xbfa20029, 0xbf06ff76, + 0x000000fa, 0xbfa20026, + 0x81f6ff76, 0x000000e9, + 0xbf0b8176, 0xbfa20022, + 0x8b76ff7b, 0x0003fe00, + 0xbf06ff76, 0x0001fe00, + 0xbfa2001d, 0x8b76ff7b, + 0x07fc0000, 0xbf06ff76, + 0x03fc0000, 0xbfa20018, + 0xbfa00014, 0x9376ff7a, + 0x00040016, 0x81f68176, + 0xbf0b8176, 0xbfa20012, + 0x9376ff7a, 0x00050011, + 0x81f68176, 0xbf0b8176, + 0xbfa2000d, 0x8b76ff7a, 0x000001ff, 0xbf06ff76, - 0x000000ff, 0xbfa20003, - 0xbfc70000, 0xbefb006e, - 0xbfa0ffad, 0xbfc70000, - 0xbefb006f, 0xbfa0ffaa, - 0xbfc70000, 0xbeee007e, - 0xbeef007f, 0xbefe0180, - 0xbefe4d84, 0xbf8a0000, - 0x8b7aff7f, 0x04000000, - 0x847a857a, 0x8c6d7a6d, - 0xb8eff822, 0xb980f822, - 0x00000000, 0xb8fa2b01, - 0x847a997a, 0x8c6d7a6d, - 0xbefa0080, 0xb97a2b01, - 0xbefa007e, 0x8b7bff7f, - 0x01ffffff, 0xbefe00c1, - 0xbeff00c1, 0xee0a407a, - 0x000c0000, 0x00000000, - 0x7e000280, 0xbefe007a, - 0xbeff007b, 0xb8fb0742, - 0x847b997b, 0xb8fa3b05, - 0x807a817a, 0xbf0d997b, - 0xbfa20002, 0x847a897a, - 0xbfa00001, 0x847a8a7a, + 0x000000ff, 0xbfa20008, + 0x8b76ff7b, 0x000001ff, + 0xbf06ff76, 0x000000ff, + 0xbfa20003, 0xbfc70000, + 0xbefb006e, 0xbfa0ffad, + 0xbfc70000, 0xbefb006f, + 0xbfa0ffaa, 0xbfc70000, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbefe4d84, + 0xbf8a0000, 0x8b7aff7f, + 0x04000000, 0x847a857a, + 0x8c6d7a6d, 0xb8eff822, + 0xb980f822, 0x00000000, + 0xb8fa2b01, 0x847a997a, + 0x8c6d7a6d, 0xbefa0080, + 0xb97a2b01, 0xbefa007e, 0x8b7bff7f, 0x01ffffff, - 0x807aff7a, 0x000001c0, - 0x807a7e7a, 0x827b807b, - 0xd7610000, 0x00010870, - 0xd7610000, 0x00010a71, - 0xd7610000, 0x00010c72, - 0xd7610000, 0x00010e73, - 0xd7610000, 0x00011074, - 0xd7610000, 0x00011275, - 0xd7610000, 0x00011476, - 0xd7610000, 0x00011677, - 0xd7610000, 0x00011a79, - 0xd7610000, 0x00011c7e, - 0xd7610000, 0x00011e7f, - 0xbefe00ff, 0x00003fff, - 0xbeff0080, 0xee0a407a, - 0x000c0000, 0x00000000, - 0xd760007a, 0x00011d00, - 0xd760007b, 0x00011f00, + 0xbefe00c1, 0xbeff00c1, + 0xee0a407a, 0x000c0000, + 0x00000000, 0x7e000280, 0xbefe007a, 0xbeff007b, - 0xbef4007e, 0x8b75ff7f, - 0x01ffffff, 0xbef1007d, - 0xb8f30742, 0x84739973, - 0xbefe00c1, 0x857d9973, - 0x8b7d817d, 0xbf06817d, - 0xbfa20002, 0xbeff0080, - 0xbfa00002, 0xbeff00c1, - 0xbfa0000a, 0xee0a4074, - 0x008c0000, 0x00008000, - 0xee0a4074, 0x010c0000, + 0xb8fb0742, 0x847b997b, + 0xb8fa3b05, 0x807a817a, + 0xbf0d997b, 0xbfa20002, + 0x847a897a, 0xbfa00001, + 0x847a8a7a, 0x8b7bff7f, + 0x01ffffff, 0x807aff7a, + 0x000001c0, 0x807a7e7a, + 0x827b807b, 0xd7610000, + 0x00010870, 0xd7610000, + 0x00010a71, 0xd7610000, + 0x00010c72, 0xd7610000, + 0x00010e73, 0xd7610000, + 0x00011074, 0xd7610000, + 0x00011275, 0xd7610000, + 0x00011476, 0xd7610000, + 0x00011677, 0xd7610000, + 0x00011a79, 0xd7610000, + 0x00011c7e, 0xd7610000, + 0x00011e7f, 0xbefe00ff, + 0x00003fff, 0xbeff0080, + 0xee0a407a, 0x000c0000, + 0x00000000, 0xd760007a, + 0x00011d00, 0xd760007b, + 0x00011f00, 0xbefe007a, + 0xbeff007b, 0xbef4007e, + 0x8b75ff7f, 0x01ffffff, + 0xbef1007d, 0xb8f30742, + 0x84739973, 0xbefe00c1, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20002, + 0xbeff0080, 0xbfa00002, + 0xbeff00c1, 0xbfa0000a, + 0xee0a4074, 0x008c0000, + 0x00008000, 0xee0a4074, + 0x010c0000, 0x00010000, + 0xee0a4074, 0x018c0000, + 0x00018000, 0xbfa00009, + 0xee0a4074, 0x008c0000, 0x00010000, 0xee0a4074, - 0x018c0000, 0x00018000, - 0xbfa00009, 0xee0a4074, - 0x008c0000, 0x00010000, - 0xee0a4074, 0x010c0000, - 0x00020000, 0xee0a4074, - 0x018c0000, 0x00030000, - 0xb8f03b05, 0x80708170, - 0xbf0d9973, 0xbfa20002, - 0x84708970, 0xbfa00001, - 0x84708a70, 0x8070ff70, - 0x00000200, 0x7e000280, - 0x7e020280, 0x7e040280, - 0xbefd0080, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8faf802, 0xbf0c8b7a, - 0xbfa20003, 0xbe804fc2, - 0xbf94fffe, 0xbfa10001, - 0xbe804ec4, 0xbf94fffc, - 0xbefa4c88, 0xbfc70000, - 0xbf0c807a, 0xbfa20006, - 0x9371ff7a, 0x00070004, - 0x937aff7a, 0x00070010, - 0xbf06717a, 0xbfa2fff6, - 0xb8faf804, 0x8b7aff7a, - 0x0001000c, 0x9178ff78, - 0x0001000c, 0x8c787a78, - 0xd7610002, 0x0000fa6c, - 0x807d817d, 0x917aff6d, - 0x80000000, 0xd7610002, + 0x010c0000, 0x00020000, + 0xee0a4074, 0x018c0000, + 0x00030000, 0xb8f03b05, + 0x80708170, 0xbf0d9973, + 0xbfa20002, 0x84708970, + 0xbfa00001, 0x84708a70, + 0x8070ff70, 0x00000200, + 0x7e000280, 0x7e020280, + 0x7e040280, 0xbefd0080, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8faf802, + 0xbf0c8b7a, 0xbfa20003, + 0xbe804fc2, 0xbf94fffe, + 0xbfa10001, 0xbe804ec4, + 0xbf94fffc, 0xbefa4c88, + 0xbfc70000, 0xbf0c807a, + 0xbfa20006, 0x9371ff7a, + 0x00070004, 0x937aff7a, + 0x00070010, 0xbf06717a, + 0xbfa2fff6, 0xb8faf804, + 0x8b7aff7a, 0x0001000c, + 0x9178ff78, 0x0001000c, + 0x8c787a78, 0xd7610002, + 0x0000fa6c, 0x807d817d, + 0x917aff6d, 0x80000000, + 0xd7610002, 0x0000fa7a, + 0x807d817d, 0xd7610002, + 0x0000fa6e, 0x807d817d, + 0xbefa0080, 0xd7610002, 0x0000fa7a, 0x807d817d, - 0xd7610002, 0x0000fa6e, - 0x807d817d, 0xbefa0080, + 0xd7610002, 0x0000fa78, + 0x807d817d, 0xb8faf811, 0xd7610002, 0x0000fa7a, 0x807d817d, 0xd7610002, - 0x0000fa78, 0x807d817d, - 0xb8faf811, 0xd7610002, + 0x0000fa6f, 0x807d817d, + 0xb8f1f801, 0x937aff6d, + 0x00060019, 0x847a8c7a, + 0x8c717a71, 0xd7610002, + 0x0000fa71, 0x807d817d, + 0xb8f1f814, 0xd7610002, + 0x0000fa71, 0x807d817d, + 0xb8f1f815, 0xd7610002, + 0x0000fa71, 0x807d817d, + 0xb8f1f812, 0xd7610002, + 0x0000fa71, 0x807d817d, + 0xb8f1f813, 0xd7610002, + 0x0000fa71, 0x807d817d, + 0xb8faf802, 0xd7610002, 0x0000fa7a, 0x807d817d, - 0xd7610002, 0x0000fa6f, - 0x807d817d, 0xb8f1f801, - 0x937aff6d, 0x00060019, - 0x847a8c7a, 0x8c717a71, - 0xd7610002, 0x0000fa71, - 0x807d817d, 0xb8f1f814, - 0xd7610002, 0x0000fa71, - 0x807d817d, 0xb8f1f815, - 0xd7610002, 0x0000fa71, - 0x807d817d, 0xb8f1f812, - 0xd7610002, 0x0000fa71, - 0x807d817d, 0xb8f1f813, - 0xd7610002, 0x0000fa71, - 0x807d817d, 0xb8faf802, + 0xbefa50c1, 0xbfc70000, 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xbefa50c1, + 0x807d817d, 0xbefa4c88, 0xbfc70000, 0xd7610002, 0x0000fa7a, 0x807d817d, - 0xbefa4c88, 0xbfc70000, - 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xbefe00ff, - 0x0000ffff, 0xbeff0080, + 0xb8faf81a, 0xd7610002, + 0x0000fa7a, 0x807d817d, + 0xbefe00c1, 0xbeff0080, 0x80767074, 0x82778075, 0xee0a4076, 0x010c0000, 0x00000000, 0xbefe00c1, @@ -5061,7 +5057,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = { 0x018c0000, 0x00030000, 0x807d847d, 0x8070ff70, 0x00000400, 0xbf0a7b7d, - 0xbfa2ffe9, 0xbfa00183, + 0xbfa2ffe9, 0xbfa00184, 0xbef4007e, 0x8b75ff7f, 0x01ffffff, 0xbef1007f, 0xb8f20742, 0x84729972, @@ -5229,6 +5225,8 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = { 0x856e906e, 0x8b6e6e6e, 0xbfa10003, 0xbe804ec3, 0x816ec16e, 0xbfa0fffb, + 0xf4601bbb, 0xf8000040, + 0xbfc70000, 0xb96ef81a, 0xbefd006f, 0xbefe0070, 0xbeff0071, 0xb979f822, 0xb97b2011, 0x857b867b, @@ -5248,19 +5246,17 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = { 0x856e8e77, 0xb96e3021, 0x8b6dff6d, 0x01ffffff, 0x8bfe7e7e, 0x8bea6a6a, - 0x936eff77, 0x0002001a, - 0xb96ef81a, 0xb97af804, + 0xb97af804, 0xb8eef802, + 0xbf0c8b6e, 0xbfa20003, + 0xbe804fc2, 0xbf94fffe, + 0xbfa10001, 0xbe804ec4, + 0xbf94fffc, 0x857a897a, + 0xb97a0244, 0xbe804a6c, 0xb8eef802, 0xbf0c8b6e, 0xbfa20003, 0xbe804fc2, 0xbf94fffe, 0xbfa10001, 0xbe804ec4, 0xbf94fffc, - 0x857a897a, 0xb97a0244, - 0xbe804a6c, 0xb8eef802, - 0xbf0c8b6e, 0xbfa20003, - 0xbe804fc2, 0xbf94fffe, - 0xbfa10001, 0xbe804ec4, - 0xbf94fffc, 0xbfb10000, + 0xbfb10000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, - 0xbf9f0000, 0x00000000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm index ace2a9f2ac73..ccc61f60ceb3 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm @@ -36,6 +36,7 @@ #define NUM_NAMED_BARRIERS (ASIC_FAMILY == CHIP_GC_12_0_3 ? 0x10 : 0) #define HAVE_CLUSTER_BARRIER (ASIC_FAMILY == CHIP_GC_12_0_3) #define CLUSTER_BARRIER_SERIALIZE_WORKAROUND (ASIC_FAMILY == CHIP_GC_12_0_3) +#define RELAXED_SCHEDULING_IN_TRAP (ASIC_FAMILY == CHIP_GFX12) #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised #define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12) @@ -110,9 +111,11 @@ var BARRIER_STATE_MEMBER_OFFSET = 4 var BARRIER_STATE_MEMBER_SIZE = 7 var BARRIER_STATE_VALID_OFFSET = 0 +#if RELAXED_SCHEDULING_IN_TRAP var TTMP11_SCHED_MODE_SHIFT = 26 var TTMP11_SCHED_MODE_SIZE = 2 var TTMP11_SCHED_MODE_MASK = 0xC000000 +#endif var NAMED_BARRIERS_SR_OFFSET_FROM_HWREG = 0x80 var S_BARRIER_INIT_MEMBERCNT_MASK = 0x7F0000 @@ -223,18 +226,22 @@ L_JUMP_TO_RESTORE: s_branch L_RESTORE L_SKIP_RESTORE: +#if RELAXED_SCHEDULING_IN_TRAP // Assume most relaxed scheduling mode is set. Save and revert to normal mode. s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_SCHED_MODE) s_wait_alu 0 s_setreg_imm32_b32 hwreg(HW_REG_WAVE_SCHED_MODE, \ SQ_WAVE_SCHED_MODE_DEP_MODE_SHIFT, SQ_WAVE_SCHED_MODE_DEP_MODE_SIZE), 0 +#endif s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC +#if RELAXED_SCHEDULING_IN_TRAP // Save SCHED_MODE[1:0] into ttmp11[27:26]. s_andn2_b32 ttmp11, ttmp11, TTMP11_SCHED_MODE_MASK s_lshl_b32 ttmp2, ttmp2, TTMP11_SCHED_MODE_SHIFT s_or_b32 ttmp11, ttmp11, ttmp2 +#endif // Clear SPI_PRIO: do not save with elevated priority. // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. @@ -316,7 +323,7 @@ L_FETCH_2ND_TRAP: s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA s_or_b32 ttmp15, ttmp15, ~ADDRESS_HI32_MASK L_NO_SIGN_EXTEND_TMA: -#if ASIC_FAMILY == CHIP_GFX12 +#if RELAXED_SCHEDULING_IN_TRAP // Move SCHED_MODE[1:0] from ttmp11 to unused bits in ttmp1[27:26] (return PC_HI). // The second-level trap will restore from ttmp1 for backwards compatibility. s_and_b32 ttmp2, ttmp11, TTMP11_SCHED_MODE_MASK @@ -382,8 +389,10 @@ L_EXIT_TRAP: // Only restore fields which the trap handler changes. s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT +#if RELAXED_SCHEDULING_IN_TRAP // Assume relaxed scheduling mode after this point. restore_sched_mode(ttmp2) +#endif s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv @@ -591,8 +600,18 @@ L_SAVE_HWREG: write_hwreg_to_v2(s_save_tmp) #endif +#if ASIC_FAMILY >= CHIP_GC_12_0_3 + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCHED_MODE) + write_hwreg_to_v2(s_save_tmp) +#endif + +#if ! SAVE_TTMPS_IN_SGPR_BLOCK // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. s_mov_b32 exec_lo, 0xFFFF +#else + // All 128 bytes are available for HWREGs. + s_mov_b32 exec_lo, 0xFFFFFFFF +#endif s_mov_b32 exec_hi, 0x0 s_add_u32 s_save_addr_lo, s_save_base_addr_lo, s_save_mem_offset s_addc_u32 s_save_addr_hi, s_save_base_addr_hi, 0x0 @@ -1155,6 +1174,12 @@ L_SKIP_TRAP_CLUSTER_BARRIER_SIGNAL: L_SKIP_CLUSTER_BARRIER_RESTORE: #endif +#if ASIC_FAMILY >= CHIP_GC_12_0_3 + s_load_b32 s_restore_tmp, [s_restore_addr_lo, s_restore_addr_hi], null scope:SCOPE_SYS offset:0x40 + s_wait_kmcnt 0 + s_setreg_b32 hwreg(HW_REG_WAVE_SCHED_MODE), s_restore_tmp +#endif + s_mov_b32 m0, s_restore_m0 s_mov_b32 exec_lo, s_restore_exec_lo s_mov_b32 exec_hi, s_restore_exec_hi @@ -1194,8 +1219,10 @@ L_SKIP_CLUSTER_BARRIER_RESTORE: s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 +#if RELAXED_SCHEDULING_IN_TRAP // Assume relaxed scheduling mode after this point. restore_sched_mode(s_restore_tmp) +#endif s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu @@ -1347,11 +1374,12 @@ L_NOT_IN_CLUSTER: #endif end - +#if RELAXED_SCHEDULING_IN_TRAP function restore_sched_mode(s_tmp) s_bfe_u32 s_tmp, ttmp11, (TTMP11_SCHED_MODE_SHIFT | (TTMP11_SCHED_MODE_SIZE << 0x10)) s_setreg_b32 hwreg(HW_REG_WAVE_SCHED_MODE), s_tmp end +#endif function restore_barrier_signal_count(barrier_id) // extract the saved signal count from s_restore_tmp -- 2.34.1
