Module: Mesa Branch: main Commit: 620baf9c42ec04ee74cfe11054d12939eaa11539 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=620baf9c42ec04ee74cfe11054d12939eaa11539
Author: Chia-I Wu <[email protected]> Date: Tue Feb 7 20:43:48 2023 -0800 freedreno/registers: document more bits of CP_REG_TEST On gen3+, there are 32 predicate bits instead of 1. I set out to see why CP_REG_TEST (and others commands that read registers) is slower on gen1 but could not find anything. Since the blob seems to use multiple predicate bits, let's keep them documented. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21206> --- src/freedreno/.gitlab-ci/reference/crash.log | 2 +- ...w.indexed.indirect_draw_count.triangle_list.log | 6 +-- src/freedreno/.gitlab-ci/reference/fd-clouds.log | 44 +++++++++++----------- .../registers/adreno/adreno_control_regs.xml | 3 ++ src/freedreno/registers/adreno/adreno_pm4.xml | 31 ++++++++------- 5 files changed, 44 insertions(+), 42 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log index 1d3598445c7..fee7f145f1d 100644 --- a/src/freedreno/.gitlab-ci/reference/crash.log +++ b/src/freedreno/.gitlab-ci/reference/crash.log @@ -3048,7 +3048,7 @@ indexed-registers: 00000000 0x17c: 00000000 00000000 0x17d: 00000000 00000000 0x17e: 00000000 - 00000000 0x17f: 00000000 + 00000000 PRED_REG: 0 - regs-name: CP_ROQ dwords: 1024 ----------------------------------------------- diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log index d4f1a3f0f98..f0b353ae873 100644 --- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log +++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log @@ -435,7 +435,7 @@ cmdstream[0]: 265 dwords ibaddr:000000000115e000 ibsize:000000f1 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | GMEM | MODE = RENDER_MODE } + { REG0 = 0 | PRED_BIT = 0 | GMEM | MODE = RENDER_MODE } { DWORDS = 23 } 000000000115e000: 0000: 70c70002 34000000 00000017 write RB_BLIT_SCISSOR_TL (88d1) @@ -495,7 +495,7 @@ cmdstream[0]: 265 dwords RB_BLIT_SCISSOR_BR: { X = 255 | Y = 255 } 000000000115e05c: 0000: 4888d102 00000000 00ff00ff opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | SYSMEM | MODE = RENDER_MODE } + { REG0 = 0 | PRED_BIT = 0 | SYSMEM | MODE = RENDER_MODE } { DWORDS = 0 } 000000000115e068: 0000: 70c70002 38000000 00000000 write RB_DEPTH_BUFFER_INFO (8872) @@ -555,7 +555,7 @@ cmdstream[0]: 265 dwords RB_BLIT_GMEM_MSAA_CNTL: { SAMPLES = MSAA_ONE } 000000000115e128: 0000: 4088d501 00000000 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | GMEM | SYSMEM | MODE = RENDER_MODE } + { REG0 = 0 | PRED_BIT = 0 | GMEM | SYSMEM | MODE = RENDER_MODE } { DWORDS = 4 } 000000000115e130: 0000: 70c70002 3c000000 00000004 opcode: CP_REG_WRITE (6d) (4 dwords) diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log index a8b639a0fae..20e572aad2d 100644 --- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log +++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log @@ -1471,10 +1471,10 @@ cmdstream[0]: 1023 dwords gpuaddr:0000000001d90010 0000000001d918e0: 0000: 70c28003 00000883 01d90010 00000000 opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d918f0: 0000: 70b90001 02000883 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 7 } 0000000001d918f8: 0000: 70c70002 10000000 00000007 opcode: CP_REG_TO_MEM (3e) (4 dwords) @@ -1553,10 +1553,10 @@ cmdstream[0]: 1023 dwords opcode: CP_SET_MODE (63) (2 dwords) 0000000001d919d0: 0000: 70e30001 00000000 opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d919d8: 0000: 70b90001 02000883 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 11 } 0000000001d919e0: 0000: 70c70002 10000000 0000000b opcode: CP_SET_BIN_DATA5 (2f) (8 dwords) @@ -1703,10 +1703,10 @@ cmdstream[0]: 1023 dwords :0,1,17,6 0000000001d91aa4: 0000: 48088901 00000011 opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0xc38 | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0xc38 | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d91aac: 0000: 70b90001 02000c38 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 4 } 0000000001d91ab4: 0000: 70c70002 10000000 00000004 opcode: CP_INDIRECT_BUFFER (3f) (4 dwords) @@ -6745,10 +6745,10 @@ cmdstream[0]: 1023 dwords :0,1,18,3 0000000001d91ad4: 0000: 48088901 00000012 opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d91adc: 0000: 70b90001 02000883 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 2 } 0000000001d91ae4: 0000: 70c70002 10000000 00000002 opcode: CP_SET_MARKER (65) (2 dwords) @@ -6870,10 +6870,10 @@ cmdstream[0]: 1023 dwords opcode: CP_SET_MODE (63) (2 dwords) 0000000001d91b9c: 0000: 70e30001 00000000 opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d91ba4: 0000: 70b90001 02000883 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 11 } 0000000001d91bac: 0000: 70c70002 10000000 0000000b opcode: CP_SET_BIN_DATA5 (2f) (8 dwords) @@ -6944,10 +6944,10 @@ cmdstream[0]: 1023 dwords :0,1,27,24 0000000001d91c70: 0000: 48088901 0000001b opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0xc39 | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0xc39 | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d91c78: 0000: 70b90001 02000c39 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 4 } 0000000001d91c80: 0000: 70c70002 10000000 00000004 opcode: CP_INDIRECT_BUFFER (3f) (4 dwords) @@ -6961,10 +6961,10 @@ cmdstream[0]: 1023 dwords :0,1,28,24 0000000001d91ca0: 0000: 48088901 0000001c opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d91ca8: 0000: 70b90001 02000883 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 2 } 0000000001d91cb0: 0000: 70c70002 10000000 00000002 opcode: CP_SET_MARKER (65) (2 dwords) @@ -7039,10 +7039,10 @@ cmdstream[0]: 1023 dwords opcode: CP_SET_MODE (63) (2 dwords) 0000000001d91d68: 0000: 70e30001 00000000 opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d91d70: 0000: 70b90001 02000883 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 11 } 0000000001d91d78: 0000: 70c70002 10000000 0000000b opcode: CP_SET_BIN_DATA5 (2f) (8 dwords) @@ -7113,10 +7113,10 @@ cmdstream[0]: 1023 dwords :0,1,37,34 0000000001d91e3c: 0000: 48088901 00000025 opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0xc3a | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0xc3a | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d91e44: 0000: 70b90001 02000c3a opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 4 } 0000000001d91e4c: 0000: 70c70002 10000000 00000004 opcode: CP_INDIRECT_BUFFER (3f) (4 dwords) @@ -7130,10 +7130,10 @@ cmdstream[0]: 1023 dwords :0,1,38,34 0000000001d91e6c: 0000: 48088901 00000026 opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d91e74: 0000: 70b90001 02000883 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 2 } 0000000001d91e7c: 0000: 70c70002 10000000 00000002 opcode: CP_SET_MARKER (65) (2 dwords) @@ -7208,10 +7208,10 @@ cmdstream[0]: 1023 dwords opcode: CP_SET_MODE (63) (2 dwords) 0000000001d91f34: 0000: 70e30001 00000000 opcode: CP_REG_TEST (39) (2 dwords) - { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME } + { REG = 0x883 | BIT = 0 | SKIP_WAIT_FOR_ME | PRED_BIT = 0 } 0000000001d91f3c: 0000: 70b90001 02000883 opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST } + { REG0 = 0 | PRED_BIT = 0 | MODE = PRED_TEST } { DWORDS = 11 } 0000000001d91f44: 0000: 70c70002 10000000 0000000b opcode: CP_SET_BIN_DATA5 (2f) (8 dwords) diff --git a/src/freedreno/registers/adreno/adreno_control_regs.xml b/src/freedreno/registers/adreno/adreno_control_regs.xml index 3fdac2192f4..9ed0469024b 100644 --- a/src/freedreno/registers/adreno/adreno_control_regs.xml +++ b/src/freedreno/registers/adreno/adreno_control_regs.xml @@ -223,6 +223,9 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> <reg32 name="SCRATCH_REG5" offset="0x175"/> <reg32 name="SCRATCH_REG6" offset="0x176"/> <reg32 name="SCRATCH_REG7" offset="0x177"/> + + <!-- new in gen3+ --> + <reg32 name="PRED_REG" offset="0x17f"/> </domain> </database> diff --git a/src/freedreno/registers/adreno/adreno_pm4.xml b/src/freedreno/registers/adreno/adreno_pm4.xml index bcfe2cb1ae1..c19b67160e7 100644 --- a/src/freedreno/registers/adreno/adreno_pm4.xml +++ b/src/freedreno/registers/adreno/adreno_pm4.xml @@ -1723,14 +1723,20 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="BIT" low="20" high="24" type="uint"/> <!-- skip implied CP_WAIT_FOR_ME --> <bitfield name="SKIP_WAIT_FOR_ME" pos="25" type="boolean"/> - <!-- - Appears only in: - opcode: CP_REG_TEST (39) (4 dwords) - { REG = 0 | BIT = 0 | WAIT_FOR_ME | UNK31 } - Seem to force CP_REG_TEST to write false - --> - <bitfield name="UNK31" pos="31" type="boolean"/> + <!-- the predicate bit to set (new in gen3+) --> + <bitfield name="PRED_BIT" low="26" high="30" type="uint"/> + <!-- update the predicate reg directly (new in gen3+) --> + <bitfield name="PRED_UPDATE" pos="31" type="boolean"/> </reg32> + + <!-- + In PRED_UPDATE mode, the predicate reg is updated directly using two + more dwords, ignoring other bits: + + PRED_REG = (PRED_REG & ~PRED_MASK) | (PRED_VAL & PRED_MASK); + --> + <reg32 offset="1" name="PRED_MASK" type="hex"/> + <reg32 offset="2" name="PRED_VAL" type="hex"/> </domain> <!-- I *think* this existed at least as far back as a4xx --> @@ -1746,15 +1752,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <reg32 offset="0" name="0"> <bitfield name="REG0" low="0" high="17" type="hex"/> - <!-- - Blob uses them for vkCmdClearAttachments in gmem mode. Examples: - opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST | 0x140000 } - opcode: CP_COND_REG_EXEC (47) (3 dwords) - { REG0 = 0 | MODE = PRED_TEST | 0x100000 } - --> - <bitfield name="UNK18" pos="18" varset="chip" variants="A6XX-" type="boolean"/> - <bitfield name="UNK20" pos="20" varset="chip" variants="A6XX-" type="boolean"/> + <!-- the predicate bit to test (new in gen3+) --> + <bitfield name="PRED_BIT" low="18" high="22" varset="chip" variants="A6XX-" type="uint"/> <!-- Note: these bits have the same meaning, and use the same
