summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorJay Cornwall <jay.cornwall@amd.com>2026-01-16 12:48:45 -0600
committerAlex Deucher <alexander.deucher@amd.com>2026-01-21 14:21:51 -0500
commitbbcad5a8896b2d3da75d2f0dd9c85f37757d8c37 (patch)
tree0ecb2262a3efe5cb51aff322512da269a8991510 /drivers/gpu
parent29b703d7addf767b99ba4af98ecddd5c1c91ef38 (diff)
drm/amdkfd: gfx12.1 trap handler support for expert scheduling mode
- Leave DEP_MODE unchanged as it is ignored in the trap handler - Save/restore SCHED_MODE (gfx12.0 saves in ttmp11) Signed-off-by: Jay Cornwall <jay.cornwall@amd.com> Reviewed-by: Lancelot Six <lancelot.six@amd.com> Cc: Vladimir Indic <vladimir.indic@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h372
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm35
2 files changed, 218 insertions, 189 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index d86bccc49e3f..9bb7fb6a83ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -4587,18 +4587,14 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
};
static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
- 0xbfa00001, 0xbfa003b4,
- 0xb0804009, 0xb8eef81a,
- 0xbf880000, 0xb980081a,
- 0x00000000, 0xb8f8f804,
- 0x9177ff77, 0x0c000000,
- 0x846e9a6e, 0x8c776e77,
+ 0xbfa00001, 0xbfa003ac,
+ 0xb0804009, 0xb8f8f804,
0x9178ff78, 0x00008c00,
0xb8fbf811, 0x8b6eff78,
0x00004000, 0xbfa10008,
0x8b6eff7b, 0x00000080,
0xbfa20018, 0x8b6ea07b,
- 0xbfa200d4, 0xbf830010,
+ 0xbfa200d1, 0xbf830010,
0xb8fbf811, 0xbfa0fffb,
0x8b6eff7b, 0x00000bd0,
0xbfa20010, 0xb8eef812,
@@ -4609,7 +4605,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0xf0000000, 0xbfa20005,
0x8b6fff6f, 0x00000200,
0xbfa20002, 0x8b6ea07b,
- 0xbfa200be, 0x9177ff77,
+ 0xbfa200bb, 0x9177ff77,
0x007fc000, 0xb8fa04a1,
0x847a967a, 0x8c777a77,
0xb8fa0421, 0x847a957a,
@@ -4702,189 +4698,189 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0xb97a0421, 0x857a8e77,
0xb97a3021, 0x8bfe7e7e,
0x8bea6a6a, 0x85788978,
- 0x936eff77, 0x0002001a,
- 0xb96ef81a, 0xb9783244,
- 0xbe804a6c, 0xb8faf802,
- 0xbf0d987a, 0xbfa10001,
- 0xbfb00000, 0x8b6dff6d,
- 0x01ffffff, 0xbefa0080,
- 0xb97a0151, 0x9177ff77,
- 0x007fc000, 0xb8fa04a1,
- 0x847a967a, 0x8c777a77,
- 0xb8fa0421, 0x847a957a,
- 0x8c777a77, 0xb8fa3021,
- 0x847a8e7a, 0x8c777a77,
- 0xb980f821, 0x00000000,
- 0xbf0d847b, 0xbfa20078,
- 0xf4003eb6, 0xf8000000,
- 0xbfc70000, 0xf4003bb6,
- 0xf8000008, 0x8b76ff7a,
- 0x80000000, 0xbfa20027,
- 0x9376ff7a, 0x00060019,
- 0x81f9a376, 0xbf0b8179,
- 0xbfa20068, 0x81f9ac76,
- 0xbf0b8179, 0xbfa20062,
- 0x81f9b776, 0xbf0b8179,
- 0xbfa2005f, 0x8b76ff7a,
- 0x000001ff, 0xbf06ff76,
- 0x000000fe, 0xbfa2005d,
- 0xbf06ff76, 0x000000ff,
- 0xbfa20057, 0xbf06ff76,
- 0x000000fa, 0xbfa20054,
- 0x81f9ff76, 0x000000e9,
- 0xbf0b8179, 0xbfa20050,
- 0x8b76ff7b, 0xffff0000,
- 0xbf06ff76, 0xbf860000,
- 0xbfa10051, 0x9376ff7b,
- 0x0002000e, 0x8b79ff7b,
- 0x00003f00, 0x85798679,
- 0x8c767976, 0xb9763b01,
- 0xbfa00049, 0x8b76ff7a,
- 0xfc000000, 0xbf06ff76,
- 0xd4000000, 0xbfa20013,
- 0xbf06ff76, 0xc8000000,
- 0xbfa20027, 0x8b76ff7a,
- 0xff000000, 0xbf06ff76,
- 0xcf000000, 0xbfa20039,
- 0x8b79ff7a, 0xffff0000,
- 0xbf06ff79, 0xcc350000,
- 0xbfa20037, 0xbf06ff79,
- 0xcc3a0000, 0xbfa20034,
- 0xbf06ff76, 0xcc000000,
- 0xbfa10031, 0x8b76ff7b,
- 0x000001ff, 0xbf06ff76,
- 0x000000ff, 0xbfa20029,
- 0xbf06ff76, 0x000000fa,
- 0xbfa20026, 0x81f6ff76,
- 0x000000e9, 0xbf0b8176,
- 0xbfa20022, 0x8b76ff7b,
- 0x0003fe00, 0xbf06ff76,
- 0x0001fe00, 0xbfa2001d,
- 0x8b76ff7b, 0x07fc0000,
- 0xbf06ff76, 0x03fc0000,
- 0xbfa20018, 0xbfa00014,
- 0x9376ff7a, 0x00040016,
- 0x81f68176, 0xbf0b8176,
- 0xbfa20012, 0x9376ff7a,
- 0x00050011, 0x81f68176,
- 0xbf0b8176, 0xbfa2000d,
+ 0xb9783244, 0xbe804a6c,
+ 0xb8faf802, 0xbf0d987a,
+ 0xbfa10001, 0xbfb00000,
+ 0x8b6dff6d, 0x01ffffff,
+ 0xbefa0080, 0xb97a0151,
+ 0x9177ff77, 0x007fc000,
+ 0xb8fa04a1, 0x847a967a,
+ 0x8c777a77, 0xb8fa0421,
+ 0x847a957a, 0x8c777a77,
+ 0xb8fa3021, 0x847a8e7a,
+ 0x8c777a77, 0xb980f821,
+ 0x00000000, 0xbf0d847b,
+ 0xbfa20078, 0xf4003eb6,
+ 0xf8000000, 0xbfc70000,
+ 0xf4003bb6, 0xf8000008,
+ 0x8b76ff7a, 0x80000000,
+ 0xbfa20027, 0x9376ff7a,
+ 0x00060019, 0x81f9a376,
+ 0xbf0b8179, 0xbfa20068,
+ 0x81f9ac76, 0xbf0b8179,
+ 0xbfa20062, 0x81f9b776,
+ 0xbf0b8179, 0xbfa2005f,
0x8b76ff7a, 0x000001ff,
+ 0xbf06ff76, 0x000000fe,
+ 0xbfa2005d, 0xbf06ff76,
+ 0x000000ff, 0xbfa20057,
+ 0xbf06ff76, 0x000000fa,
+ 0xbfa20054, 0x81f9ff76,
+ 0x000000e9, 0xbf0b8179,
+ 0xbfa20050, 0x8b76ff7b,
+ 0xffff0000, 0xbf06ff76,
+ 0xbf860000, 0xbfa10051,
+ 0x9376ff7b, 0x0002000e,
+ 0x8b79ff7b, 0x00003f00,
+ 0x85798679, 0x8c767976,
+ 0xb9763b01, 0xbfa00049,
+ 0x8b76ff7a, 0xfc000000,
+ 0xbf06ff76, 0xd4000000,
+ 0xbfa20013, 0xbf06ff76,
+ 0xc8000000, 0xbfa20027,
+ 0x8b76ff7a, 0xff000000,
+ 0xbf06ff76, 0xcf000000,
+ 0xbfa20039, 0x8b79ff7a,
+ 0xffff0000, 0xbf06ff79,
+ 0xcc350000, 0xbfa20037,
+ 0xbf06ff79, 0xcc3a0000,
+ 0xbfa20034, 0xbf06ff76,
+ 0xcc000000, 0xbfa10031,
+ 0x8b76ff7b, 0x000001ff,
0xbf06ff76, 0x000000ff,
- 0xbfa20008, 0x8b76ff7b,
+ 0xbfa20029, 0xbf06ff76,
+ 0x000000fa, 0xbfa20026,
+ 0x81f6ff76, 0x000000e9,
+ 0xbf0b8176, 0xbfa20022,
+ 0x8b76ff7b, 0x0003fe00,
+ 0xbf06ff76, 0x0001fe00,
+ 0xbfa2001d, 0x8b76ff7b,
+ 0x07fc0000, 0xbf06ff76,
+ 0x03fc0000, 0xbfa20018,
+ 0xbfa00014, 0x9376ff7a,
+ 0x00040016, 0x81f68176,
+ 0xbf0b8176, 0xbfa20012,
+ 0x9376ff7a, 0x00050011,
+ 0x81f68176, 0xbf0b8176,
+ 0xbfa2000d, 0x8b76ff7a,
0x000001ff, 0xbf06ff76,
- 0x000000ff, 0xbfa20003,
- 0xbfc70000, 0xbefb006e,
- 0xbfa0ffad, 0xbfc70000,
- 0xbefb006f, 0xbfa0ffaa,
- 0xbfc70000, 0xbeee007e,
- 0xbeef007f, 0xbefe0180,
- 0xbefe4d84, 0xbf8a0000,
- 0x8b7aff7f, 0x04000000,
- 0x847a857a, 0x8c6d7a6d,
- 0xb8eff822, 0xb980f822,
- 0x00000000, 0xb8fa2b01,
- 0x847a997a, 0x8c6d7a6d,
- 0xbefa0080, 0xb97a2b01,
- 0xbefa007e, 0x8b7bff7f,
- 0x01ffffff, 0xbefe00c1,
- 0xbeff00c1, 0xee0a407a,
- 0x000c0000, 0x00000000,
- 0x7e000280, 0xbefe007a,
- 0xbeff007b, 0xb8fb0742,
- 0x847b997b, 0xb8fa3b05,
- 0x807a817a, 0xbf0d997b,
- 0xbfa20002, 0x847a897a,
- 0xbfa00001, 0x847a8a7a,
+ 0x000000ff, 0xbfa20008,
+ 0x8b76ff7b, 0x000001ff,
+ 0xbf06ff76, 0x000000ff,
+ 0xbfa20003, 0xbfc70000,
+ 0xbefb006e, 0xbfa0ffad,
+ 0xbfc70000, 0xbefb006f,
+ 0xbfa0ffaa, 0xbfc70000,
+ 0xbeee007e, 0xbeef007f,
+ 0xbefe0180, 0xbefe4d84,
+ 0xbf8a0000, 0x8b7aff7f,
+ 0x04000000, 0x847a857a,
+ 0x8c6d7a6d, 0xb8eff822,
+ 0xb980f822, 0x00000000,
+ 0xb8fa2b01, 0x847a997a,
+ 0x8c6d7a6d, 0xbefa0080,
+ 0xb97a2b01, 0xbefa007e,
0x8b7bff7f, 0x01ffffff,
- 0x807aff7a, 0x000001c0,
- 0x807a7e7a, 0x827b807b,
- 0xd7610000, 0x00010870,
- 0xd7610000, 0x00010a71,
- 0xd7610000, 0x00010c72,
- 0xd7610000, 0x00010e73,
- 0xd7610000, 0x00011074,
- 0xd7610000, 0x00011275,
- 0xd7610000, 0x00011476,
- 0xd7610000, 0x00011677,
- 0xd7610000, 0x00011a79,
- 0xd7610000, 0x00011c7e,
- 0xd7610000, 0x00011e7f,
- 0xbefe00ff, 0x00003fff,
- 0xbeff0080, 0xee0a407a,
- 0x000c0000, 0x00000000,
- 0xd760007a, 0x00011d00,
- 0xd760007b, 0x00011f00,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xee0a407a, 0x000c0000,
+ 0x00000000, 0x7e000280,
0xbefe007a, 0xbeff007b,
- 0xbef4007e, 0x8b75ff7f,
- 0x01ffffff, 0xbef1007d,
- 0xb8f30742, 0x84739973,
- 0xbefe00c1, 0x857d9973,
- 0x8b7d817d, 0xbf06817d,
- 0xbfa20002, 0xbeff0080,
- 0xbfa00002, 0xbeff00c1,
- 0xbfa0000a, 0xee0a4074,
- 0x008c0000, 0x00008000,
- 0xee0a4074, 0x010c0000,
+ 0xb8fb0742, 0x847b997b,
+ 0xb8fa3b05, 0x807a817a,
+ 0xbf0d997b, 0xbfa20002,
+ 0x847a897a, 0xbfa00001,
+ 0x847a8a7a, 0x8b7bff7f,
+ 0x01ffffff, 0x807aff7a,
+ 0x000001c0, 0x807a7e7a,
+ 0x827b807b, 0xd7610000,
+ 0x00010870, 0xd7610000,
+ 0x00010a71, 0xd7610000,
+ 0x00010c72, 0xd7610000,
+ 0x00010e73, 0xd7610000,
+ 0x00011074, 0xd7610000,
+ 0x00011275, 0xd7610000,
+ 0x00011476, 0xd7610000,
+ 0x00011677, 0xd7610000,
+ 0x00011a79, 0xd7610000,
+ 0x00011c7e, 0xd7610000,
+ 0x00011e7f, 0xbefe00ff,
+ 0x00003fff, 0xbeff0080,
+ 0xee0a407a, 0x000c0000,
+ 0x00000000, 0xd760007a,
+ 0x00011d00, 0xd760007b,
+ 0x00011f00, 0xbefe007a,
+ 0xbeff007b, 0xbef4007e,
+ 0x8b75ff7f, 0x01ffffff,
+ 0xbef1007d, 0xb8f30742,
+ 0x84739973, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00002,
+ 0xbeff00c1, 0xbfa0000a,
+ 0xee0a4074, 0x008c0000,
+ 0x00008000, 0xee0a4074,
+ 0x010c0000, 0x00010000,
+ 0xee0a4074, 0x018c0000,
+ 0x00018000, 0xbfa00009,
+ 0xee0a4074, 0x008c0000,
0x00010000, 0xee0a4074,
- 0x018c0000, 0x00018000,
- 0xbfa00009, 0xee0a4074,
- 0x008c0000, 0x00010000,
- 0xee0a4074, 0x010c0000,
- 0x00020000, 0xee0a4074,
- 0x018c0000, 0x00030000,
- 0xb8f03b05, 0x80708170,
- 0xbf0d9973, 0xbfa20002,
- 0x84708970, 0xbfa00001,
- 0x84708a70, 0x8070ff70,
- 0x00000200, 0x7e000280,
- 0x7e020280, 0x7e040280,
- 0xbefd0080, 0xd7610002,
- 0x0000fa71, 0x807d817d,
- 0xb8faf802, 0xbf0c8b7a,
- 0xbfa20003, 0xbe804fc2,
- 0xbf94fffe, 0xbfa10001,
- 0xbe804ec4, 0xbf94fffc,
- 0xbefa4c88, 0xbfc70000,
- 0xbf0c807a, 0xbfa20006,
- 0x9371ff7a, 0x00070004,
- 0x937aff7a, 0x00070010,
- 0xbf06717a, 0xbfa2fff6,
- 0xb8faf804, 0x8b7aff7a,
- 0x0001000c, 0x9178ff78,
- 0x0001000c, 0x8c787a78,
- 0xd7610002, 0x0000fa6c,
- 0x807d817d, 0x917aff6d,
- 0x80000000, 0xd7610002,
+ 0x010c0000, 0x00020000,
+ 0xee0a4074, 0x018c0000,
+ 0x00030000, 0xb8f03b05,
+ 0x80708170, 0xbf0d9973,
+ 0xbfa20002, 0x84708970,
+ 0xbfa00001, 0x84708a70,
+ 0x8070ff70, 0x00000200,
+ 0x7e000280, 0x7e020280,
+ 0x7e040280, 0xbefd0080,
+ 0xd7610002, 0x0000fa71,
+ 0x807d817d, 0xb8faf802,
+ 0xbf0c8b7a, 0xbfa20003,
+ 0xbe804fc2, 0xbf94fffe,
+ 0xbfa10001, 0xbe804ec4,
+ 0xbf94fffc, 0xbefa4c88,
+ 0xbfc70000, 0xbf0c807a,
+ 0xbfa20006, 0x9371ff7a,
+ 0x00070004, 0x937aff7a,
+ 0x00070010, 0xbf06717a,
+ 0xbfa2fff6, 0xb8faf804,
+ 0x8b7aff7a, 0x0001000c,
+ 0x9178ff78, 0x0001000c,
+ 0x8c787a78, 0xd7610002,
+ 0x0000fa6c, 0x807d817d,
+ 0x917aff6d, 0x80000000,
+ 0xd7610002, 0x0000fa7a,
+ 0x807d817d, 0xd7610002,
+ 0x0000fa6e, 0x807d817d,
+ 0xbefa0080, 0xd7610002,
0x0000fa7a, 0x807d817d,
- 0xd7610002, 0x0000fa6e,
- 0x807d817d, 0xbefa0080,
+ 0xd7610002, 0x0000fa78,
+ 0x807d817d, 0xb8faf811,
0xd7610002, 0x0000fa7a,
0x807d817d, 0xd7610002,
- 0x0000fa78, 0x807d817d,
- 0xb8faf811, 0xd7610002,
+ 0x0000fa6f, 0x807d817d,
+ 0xb8f1f801, 0x937aff6d,
+ 0x00060019, 0x847a8c7a,
+ 0x8c717a71, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8f1f814, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8f1f815, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8f1f812, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8f1f813, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8faf802, 0xd7610002,
0x0000fa7a, 0x807d817d,
- 0xd7610002, 0x0000fa6f,
- 0x807d817d, 0xb8f1f801,
- 0x937aff6d, 0x00060019,
- 0x847a8c7a, 0x8c717a71,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xb8f1f814,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xb8f1f815,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xb8f1f812,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xb8f1f813,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xb8faf802,
+ 0xbefa50c1, 0xbfc70000,
0xd7610002, 0x0000fa7a,
- 0x807d817d, 0xbefa50c1,
+ 0x807d817d, 0xbefa4c88,
0xbfc70000, 0xd7610002,
0x0000fa7a, 0x807d817d,
- 0xbefa4c88, 0xbfc70000,
- 0xd7610002, 0x0000fa7a,
- 0x807d817d, 0xbefe00ff,
- 0x0000ffff, 0xbeff0080,
+ 0xb8faf81a, 0xd7610002,
+ 0x0000fa7a, 0x807d817d,
+ 0xbefe00c1, 0xbeff0080,
0x80767074, 0x82778075,
0xee0a4076, 0x010c0000,
0x00000000, 0xbefe00c1,
@@ -5061,7 +5057,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0x018c0000, 0x00030000,
0x807d847d, 0x8070ff70,
0x00000400, 0xbf0a7b7d,
- 0xbfa2ffe9, 0xbfa00183,
+ 0xbfa2ffe9, 0xbfa00184,
0xbef4007e, 0x8b75ff7f,
0x01ffffff, 0xbef1007f,
0xb8f20742, 0x84729972,
@@ -5229,6 +5225,8 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0x856e906e, 0x8b6e6e6e,
0xbfa10003, 0xbe804ec3,
0x816ec16e, 0xbfa0fffb,
+ 0xf4601bbb, 0xf8000040,
+ 0xbfc70000, 0xb96ef81a,
0xbefd006f, 0xbefe0070,
0xbeff0071, 0xb979f822,
0xb97b2011, 0x857b867b,
@@ -5248,19 +5246,17 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0x856e8e77, 0xb96e3021,
0x8b6dff6d, 0x01ffffff,
0x8bfe7e7e, 0x8bea6a6a,
- 0x936eff77, 0x0002001a,
- 0xb96ef81a, 0xb97af804,
+ 0xb97af804, 0xb8eef802,
+ 0xbf0c8b6e, 0xbfa20003,
+ 0xbe804fc2, 0xbf94fffe,
+ 0xbfa10001, 0xbe804ec4,
+ 0xbf94fffc, 0x857a897a,
+ 0xb97a0244, 0xbe804a6c,
0xb8eef802, 0xbf0c8b6e,
0xbfa20003, 0xbe804fc2,
0xbf94fffe, 0xbfa10001,
0xbe804ec4, 0xbf94fffc,
- 0x857a897a, 0xb97a0244,
- 0xbe804a6c, 0xb8eef802,
- 0xbf0c8b6e, 0xbfa20003,
- 0xbe804fc2, 0xbf94fffe,
- 0xbfa10001, 0xbe804ec4,
- 0xbf94fffc, 0xbfb10000,
+ 0xbfb10000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
- 0xbf9f0000, 0x00000000,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
index 369b36c93e5f..c25f16e66a27 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -36,6 +36,7 @@
#define NUM_NAMED_BARRIERS (ASIC_FAMILY == CHIP_GC_12_0_3 ? 0x10 : 0)
#define HAVE_CLUSTER_BARRIER (ASIC_FAMILY == CHIP_GC_12_0_3)
#define CLUSTER_BARRIER_SERIALIZE_WORKAROUND (ASIC_FAMILY == CHIP_GC_12_0_3)
+#define RELAXED_SCHEDULING_IN_TRAP (ASIC_FAMILY == CHIP_GFX12)
#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
@@ -110,6 +111,12 @@ var BARRIER_STATE_MEMBER_OFFSET = 4
var BARRIER_STATE_MEMBER_SIZE = 7
var BARRIER_STATE_VALID_OFFSET = 0
+#if RELAXED_SCHEDULING_IN_TRAP
+var TTMP11_SCHED_MODE_SHIFT = 26
+var TTMP11_SCHED_MODE_SIZE = 2
+var TTMP11_SCHED_MODE_MASK = 0xC000000
+#endif
+
var NAMED_BARRIERS_SR_OFFSET_FROM_HWREG = 0x80
var S_BARRIER_INIT_MEMBERCNT_MASK = 0x7F0000
var S_BARRIER_INIT_MEMBERCNT_SHIFT = 0x10
@@ -222,18 +229,22 @@ L_JUMP_TO_RESTORE:
s_branch L_RESTORE
L_SKIP_RESTORE:
+#if RELAXED_SCHEDULING_IN_TRAP
// Assume most relaxed scheduling mode is set. Save and revert to normal mode.
s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_SCHED_MODE)
s_wait_alu 0
s_setreg_imm32_b32 hwreg(HW_REG_WAVE_SCHED_MODE, \
SQ_WAVE_SCHED_MODE_DEP_MODE_SHIFT, SQ_WAVE_SCHED_MODE_DEP_MODE_SIZE), 0
+#endif
s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC
+#if RELAXED_SCHEDULING_IN_TRAP
// Save SCHED_MODE[1:0] into ttmp11[27:26].
s_andn2_b32 ttmp11, ttmp11, TTMP11_SCHED_MODE_MASK
s_lshl_b32 ttmp2, ttmp2, TTMP11_SCHED_MODE_SHIFT
s_or_b32 ttmp11, ttmp11, ttmp2
+#endif
// Clear SPI_PRIO: do not save with elevated priority.
// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
@@ -315,7 +326,7 @@ L_FETCH_2ND_TRAP:
s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
s_or_b32 ttmp15, ttmp15, ~ADDRESS_HI32_MASK
L_NO_SIGN_EXTEND_TMA:
-#if ASIC_FAMILY == CHIP_GFX12
+#if RELAXED_SCHEDULING_IN_TRAP
// Move SCHED_MODE[1:0] from ttmp11 to unused bits in ttmp1[27:26] (return PC_HI).
// The second-level trap will restore from ttmp1 for backwards compatibility.
s_and_b32 ttmp2, ttmp11, TTMP11_SCHED_MODE_MASK
@@ -381,8 +392,10 @@ L_EXIT_TRAP:
// Only restore fields which the trap handler changes.
s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT
+#if RELAXED_SCHEDULING_IN_TRAP
// Assume relaxed scheduling mode after this point.
restore_sched_mode(ttmp2)
+#endif
s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv
@@ -590,8 +603,18 @@ L_SAVE_HWREG:
write_hwreg_to_v2(s_save_tmp)
#endif
+#if ASIC_FAMILY >= CHIP_GC_12_0_3
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCHED_MODE)
+ write_hwreg_to_v2(s_save_tmp)
+#endif
+
+#if ! SAVE_TTMPS_IN_SGPR_BLOCK
// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
s_mov_b32 exec_lo, 0xFFFF
+#else
+ // All 128 bytes are available for HWREGs.
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+#endif
s_mov_b32 exec_hi, 0x0
s_add_u32 s_save_addr_lo, s_save_base_addr_lo, s_save_mem_offset
s_addc_u32 s_save_addr_hi, s_save_base_addr_hi, 0x0
@@ -1154,6 +1177,12 @@ L_SKIP_TRAP_CLUSTER_BARRIER_SIGNAL:
L_SKIP_CLUSTER_BARRIER_RESTORE:
#endif
+#if ASIC_FAMILY >= CHIP_GC_12_0_3
+ s_load_b32 s_restore_tmp, [s_restore_addr_lo, s_restore_addr_hi], null scope:SCOPE_SYS offset:0x40
+ s_wait_kmcnt 0
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCHED_MODE), s_restore_tmp
+#endif
+
s_mov_b32 m0, s_restore_m0
s_mov_b32 exec_lo, s_restore_exec_lo
s_mov_b32 exec_hi, s_restore_exec_hi
@@ -1193,8 +1222,10 @@ L_SKIP_CLUSTER_BARRIER_RESTORE:
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+#if RELAXED_SCHEDULING_IN_TRAP
// Assume relaxed scheduling mode after this point.
restore_sched_mode(s_restore_tmp)
+#endif
s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu
@@ -1346,10 +1377,12 @@ L_NOT_IN_CLUSTER:
#endif
end
+#if RELAXED_SCHEDULING_IN_TRAP
function restore_sched_mode(s_tmp)
s_bfe_u32 s_tmp, ttmp11, (TTMP11_SCHED_MODE_SHIFT | (TTMP11_SCHED_MODE_SIZE << 0x10))
s_setreg_b32 hwreg(HW_REG_WAVE_SCHED_MODE), s_tmp
end
+#endif
function restore_barrier_signal_count(barrier_id)
// extract the saved signal count from s_restore_tmp