Module: Mesa
Branch: main
Commit: 1b988af0adbfcf84969c64d39a854be917f82c8d
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1b988af0adbfcf84969c64d39a854be917f82c8d

Author: Timur Kristóf <[email protected]>
Date:   Mon Oct 16 10:25:06 2023 +0200

radv: Move radv_cp_wait_mem to radv_cs.h and add queue family argument.

Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25770>

---

 src/amd/vulkan/radv_cmd_buffer.c  |  6 +++---
 src/amd/vulkan/radv_cs.h          | 16 ++++++++++++++++
 src/amd/vulkan/radv_perfcounter.c |  2 +-
 src/amd/vulkan/radv_private.h     |  1 -
 src/amd/vulkan/radv_query.c       | 11 ++++++-----
 src/amd/vulkan/radv_queue.c       |  4 ++--
 src/amd/vulkan/si_cmd_buffer.c    | 20 ++++----------------
 7 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b382d8f8566..2632f54391e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -656,8 +656,8 @@ radv_wait_gang_leader(struct radv_cmd_buffer *cmd_buffer)
    radeon_check_space(cmd_buffer->device->ws, ace_cs, 7);
 
    /* ACE waits for the semaphore which GFX wrote. */
-   radv_cp_wait_mem(ace_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, 
cmd_buffer->gang.sem.va, cmd_buffer->gang.sem.leader_value,
-                    0xffffffff);
+   radv_cp_wait_mem(ace_cs, RADV_QUEUE_COMPUTE, WAIT_REG_MEM_GREATER_OR_EQUAL, 
cmd_buffer->gang.sem.va,
+                    cmd_buffer->gang.sem.leader_value, 0xffffffff);
 }
 
 static struct radeon_cmdbuf *
@@ -10688,7 +10688,7 @@ radv_CmdWaitEvents2(VkCommandBuffer commandBuffer, 
uint32_t eventCount, const Vk
 
       ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, 
cs, 7);
 
-      radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff);
+      radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, va, 1, 
0xffffffff);
       assert(cmd_buffer->cs->cdw <= cdw_max);
    }
 
diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h
index 0a4eff87694..e8ca11b6194 100644
--- a/src/amd/vulkan/radv_cs.h
+++ b/src/amd/vulkan/radv_cs.h
@@ -208,4 +208,20 @@ radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, 
unsigned reg, unsigne
    radeon_emit(cs, 0); /* unused */
 }
 
+ALWAYS_INLINE static void
+radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, 
const uint32_t op, const uint64_t va,
+                 const uint32_t ref, const uint32_t mask)
+{
+   assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == 
WAIT_REG_MEM_GREATER_OR_EQUAL);
+   assert(qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE);
+
+   radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
+   radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+   radeon_emit(cs, ref);  /* reference value */
+   radeon_emit(cs, mask); /* mask */
+   radeon_emit(cs, 4);    /* poll interval */
+}
+
 #endif /* RADV_CS_H */
diff --git a/src/amd/vulkan/radv_perfcounter.c 
b/src/amd/vulkan/radv_perfcounter.c
index 884fba6dc25..5737c480432 100644
--- a/src/amd/vulkan/radv_perfcounter.c
+++ b/src/amd/vulkan/radv_perfcounter.c
@@ -713,7 +713,7 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, 
struct radv_pc_query_pool
    si_cs_emit_write_event_eop(cs, 
cmd_buffer->device->physical_device->rad_info.gfx_level,
                               radv_cmd_buffer_uses_mec(cmd_buffer), 
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
                               EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, 
cmd_buffer->gfx9_fence_va);
-   radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff);
+   radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 
0xffffffff);
 
    radv_pc_wait_idle(cmd_buffer);
    radv_pc_stop_and_sample(cmd_buffer, pool, va, true);
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 45ab956cdd7..a02556a1d28 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1996,7 +1996,6 @@ struct radv_vgt_shader_key {
    uint8_t vs_wave32 : 1;
 };
 
-void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, 
uint32_t ref, uint32_t mask);
 void si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf 
*cs, enum amd_gfx_level gfx_level,
                             uint32_t *flush_cnt, uint64_t flush_va, bool 
is_mec, enum radv_cmd_flush_bits flush_bits,
                             enum rgp_flush_bits *sqtt_flush_bits, uint64_t 
gfx9_eop_bug_va);
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 0e78b5a030e..c6e371beb1f 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1486,7 +1486,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer 
commandBuffer, VkQueryPool queryPoo
             uint64_t avail_va = va + pool->availability_offset + 4 * query;
 
             /* This waits on the ME. All copies below are done on the ME */
-            radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
+            radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, avail_va, 
1, 0xffffffff);
          }
       }
       radv_query_shader(cmd_buffer, 
&cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, 
pool->bo,
@@ -1509,7 +1509,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer 
commandBuffer, VkQueryPool queryPoo
             /* Wait on the high 32 bits of the timestamp in
              * case the low part is 0xffffffff.
              */
-            radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL, local_src_va + 4, 
TIMESTAMP_NOT_READY >> 32, 0xffffffff);
+            radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_NOT_EQUAL, 
local_src_va + 4, TIMESTAMP_NOT_READY >> 32,
+                             0xffffffff);
          }
       }
 
@@ -1527,7 +1528,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer 
commandBuffer, VkQueryPool queryPoo
 
             /* Wait on the upper word of all results. */
             for (unsigned j = 0; j < 4; j++, src_va += 8) {
-               radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 
0x80000000, 0xffffffff);
+               radv_cp_wait_mem(cs, cmd_buffer->qf, 
WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
             }
          }
       }
@@ -1545,8 +1546,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer 
commandBuffer, VkQueryPool queryPoo
             radeon_check_space(cmd_buffer->device->ws, cs, 7 * 2);
 
             /* Wait on the upper word of the PrimitiveStorageNeeded result. */
-            radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 
0x80000000, 0xffffffff);
-            radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 20, 
0x80000000, 0xffffffff);
+            radv_cp_wait_mem(cs, cmd_buffer->qf, 
WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
+            radv_cp_wait_mem(cs, cmd_buffer->qf, 
WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 20, 0x80000000, 0xffffffff);
          }
       }
 
diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c
index 816a96cc30f..023b9f3ce76 100644
--- a/src/amd/vulkan/radv_queue.c
+++ b/src/amd/vulkan/radv_queue.c
@@ -1285,7 +1285,7 @@ radv_create_gang_wait_preambles_postambles(struct 
radv_queue *queue)
     * in a multi-process environment, because task shader dispatches are not
     * meant to be executed on multiple compute engines at the same time.
     */
-   radv_cp_wait_mem(ace_pre_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 
0xffffffff);
+   radv_cp_wait_mem(ace_pre_cs, RADV_QUEUE_COMPUTE, 
WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 0xffffffff);
    radeon_emit(ace_pre_cs, PKT3(PKT3_WRITE_DATA, 3, 0));
    radeon_emit(ace_pre_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | 
S_370_ENGINE_SEL(V_370_ME));
    radeon_emit(ace_pre_cs, ace_wait_va);
@@ -1303,7 +1303,7 @@ radv_create_gang_wait_preambles_postambles(struct 
radv_queue *queue)
     * as soon as the gang leader is done, which may lead to bugs because the
     * same command buffers could be submitted again while still being executed.
     */
-   radv_cp_wait_mem(leader_post_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, 
leader_wait_va, 1, 0xffffffff);
+   radv_cp_wait_mem(leader_post_cs, queue->state.qf, 
WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff);
    radeon_emit(leader_post_cs, PKT3(PKT3_WRITE_DATA, 3, 0));
    radeon_emit(leader_post_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) 
| S_370_ENGINE_SEL(V_370_ME));
    radeon_emit(leader_post_cs, leader_wait_va);
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index ea35f10d851..bcad1705491 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -1029,20 +1029,6 @@ si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, 
enum amd_gfx_level gfx_leve
    }
 }
 
-void
-radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t 
ref, uint32_t mask)
-{
-   assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == 
WAIT_REG_MEM_GREATER_OR_EQUAL);
-
-   radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
-   radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
-   radeon_emit(cs, va);
-   radeon_emit(cs, va >> 32);
-   radeon_emit(cs, ref);  /* reference value */
-   radeon_emit(cs, mask); /* mask */
-   radeon_emit(cs, 4);    /* poll interval */
-}
-
 static void
 si_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, 
unsigned cp_coher_cntl)
 {
@@ -1245,7 +1231,8 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum 
amd_gfx_level gfx_level
                                        S_490_SEQ(gcr_seq),
                                     EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, 
flush_va, *flush_cnt, gfx9_eop_bug_va);
 
-         radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 
0xffffffff);
+         const enum radv_queue_family qf = is_mec ? RADV_QUEUE_COMPUTE : 
RADV_QUEUE_GENERAL;
+         radv_cp_wait_mem(cs, qf, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 
0xffffffff);
       }
    }
 
@@ -1407,7 +1394,8 @@ si_cs_emit_cache_flush(struct radeon_winsys *ws, struct 
radeon_cmdbuf *cs, enum
 
       si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, tc_flags, 
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
                                  flush_va, *flush_cnt, gfx9_eop_bug_va);
-      radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 
0xffffffff);
+      const enum radv_queue_family qf = is_mec ? RADV_QUEUE_COMPUTE : 
RADV_QUEUE_GENERAL;
+      radv_cp_wait_mem(cs, qf, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 
0xffffffff);
    }
 
    /* VGT state sync */

Reply via email to