Module: Mesa
Branch: main
Commit: 107473162e328aae754cd718340933693c839094
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=107473162e328aae754cd718340933693c839094

Author: Timur Kristóf <[email protected]>
Date:   Wed Oct 18 14:29:55 2023 +0200

radv: Refactor WRITE_DATA helper function.

Create a version of this function that takes a CS and queue family.
move it to radv_cs.h so it can be called from multiple other files.

Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25770>

---

 src/amd/vulkan/radv_cmd_buffer.c | 26 +++++++++-----------------
 src/amd/vulkan/radv_cs.h         | 27 +++++++++++++++++++++++++++
 src/amd/vulkan/radv_private.h    |  2 +-
 src/amd/vulkan/radv_query.c      | 12 ++++++------
 src/amd/vulkan/si_cmd_buffer.c   |  2 +-
 5 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 2632f54391e..8e719f6d92d 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -277,18 +277,10 @@ radv_queue_family_to_ring(const struct 
radv_physical_device *physical_device, en
 }
 
 static void
-radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned 
engine_sel, uint64_t va, unsigned count,
-                            const uint32_t *data)
+radv_write_data(struct radv_cmd_buffer *cmd_buffer, const unsigned engine_sel, 
const uint64_t va, const unsigned count,
+                const uint32_t *data, const bool predicating)
 {
-   struct radeon_cmdbuf *cs = cmd_buffer->cs;
-
-   radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
-
-   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
-   radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | 
S_370_ENGINE_SEL(engine_sel));
-   radeon_emit(cs, va);
-   radeon_emit(cs, va >> 32);
-   radeon_emit_array(cs, data, count);
+   radv_cs_write_data(cmd_buffer->device, cmd_buffer->cs, cmd_buffer->qf, 
engine_sel, va, count, data, predicating);
 }
 
 static void
@@ -296,7 +288,7 @@ radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, 
unsigned engine_sel, ui
 {
    uint32_t *zeroes = alloca(size);
    memset(zeroes, 0, size);
-   radv_emit_write_data_packet(cmd_buffer, engine_sel, va, size / 4, zeroes);
+   radv_write_data(cmd_buffer, engine_sel, va, size / 4, zeroes, false);
 }
 
 static void
@@ -554,7 +546,7 @@ radv_cmd_buffer_trace_emit(struct radv_cmd_buffer 
*cmd_buffer)
       va += 4;
 
    ++cmd_buffer->state.trace_id;
-   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 1, 
&cmd_buffer->state.trace_id);
+   radv_write_data(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id, 
false);
 
    radeon_check_space(cmd_buffer->device->ws, cs, 2);
 
@@ -769,7 +761,7 @@ radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, 
struct radv_pipeline *pip
    data[0] = pipeline_address;
    data[1] = pipeline_address >> 32;
 
-   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
+   radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false);
 }
 
 static void
@@ -785,7 +777,7 @@ radv_save_vertex_descriptors(struct radv_cmd_buffer 
*cmd_buffer, uint64_t vb_ptr
    data[0] = vb_ptr;
    data[1] = vb_ptr >> 32;
 
-   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
+   radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false);
 }
 
 static void
@@ -802,7 +794,7 @@ radv_save_vs_prolog(struct radv_cmd_buffer *cmd_buffer, 
const struct radv_shader
    data[0] = prolog_address;
    data[1] = prolog_address >> 32;
 
-   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
+   radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false);
 }
 
 void
@@ -832,7 +824,7 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, 
VkPipelineBindPoint bi
       data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
    }
 
-   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data);
+   radv_write_data(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data, false);
 }
 
 const struct radv_userdata_info *
diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h
index e8ca11b6194..25a25ec9e24 100644
--- a/src/amd/vulkan/radv_cs.h
+++ b/src/amd/vulkan/radv_cs.h
@@ -224,4 +224,31 @@ radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum 
radv_queue_family qf, cons
    radeon_emit(cs, 4);    /* poll interval */
 }
 
+ALWAYS_INLINE static unsigned
+radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf 
*cs, const enum radv_queue_family qf,
+                        const unsigned engine_sel, const uint64_t va, const 
unsigned count, const bool predicating)
+{
+   assert(qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE);
+
+   /* Return the correct cdw at the end of the packet so the caller can assert 
it. */
+   const unsigned cdw_end = radeon_check_space(device->ws, cs, 4 + count);
+
+   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, false));
+   radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | 
S_370_ENGINE_SEL(engine_sel));
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+
+   return cdw_end;
+}
+
+ALWAYS_INLINE static void
+radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, 
const enum radv_queue_family qf,
+                   const unsigned engine_sel, const uint64_t va, const 
unsigned count, const uint32_t *dwords,
+                   const bool predicating)
+{
+   ASSERTED const unsigned cdw_end = radv_cs_write_data_head(device, cs, qf, 
engine_sel, va, count, predicating);
+   radeon_emit_array(cs, dwords, count);
+   assert(cs->cdw == cdw_end);
+}
+
 #endif /* RADV_CS_H */
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index a02556a1d28..46f19263a52 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -2068,7 +2068,7 @@ unsigned radv_get_default_max_sample_dist(int 
log_samples);
 void radv_device_init_msaa(struct radv_device *device);
 VkResult radv_device_init_vrs_state(struct radv_device *device);
 
-void radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, 
uint64_t va, uint32_t imm);
+void radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, 
uint64_t va, uint32_t imm);
 
 void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const 
struct radv_image_view *iview,
                                    VkClearDepthStencilValue ds_clear_value, 
VkImageAspectFlags aspects);
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index c6e371beb1f..e1d82061034 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1777,11 +1777,11 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, 
struct radv_query_pool *poo
       if (cmd_buffer->device->physical_device->use_ngg_streamout) {
          /* generated prim counter */
          gfx10_copy_gds_query(cmd_buffer, 
RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
-         radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
+         radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
 
          /* written prim counter */
          gfx10_copy_gds_query(cmd_buffer, 
RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 8);
-         radv_emit_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
+         radv_cs_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
 
          /* Record that the command buffer needs GDS. */
          cmd_buffer->gds_needed = true;
@@ -1802,7 +1802,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, 
struct radv_query_pool *poo
       if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
          /* On GFX11+, primitives generated query always use GDS. */
          gfx10_copy_gds_query(cmd_buffer, 
RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
-         radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
+         radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
 
          /* Record that the command buffer needs GDS. */
          cmd_buffer->gds_needed = true;
@@ -1938,11 +1938,11 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, 
struct radv_query_pool *pool,
       if (cmd_buffer->device->physical_device->use_ngg_streamout) {
          /* generated prim counter */
          gfx10_copy_gds_query(cmd_buffer, 
RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
-         radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
+         radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
 
          /* written prim counter */
          gfx10_copy_gds_query(cmd_buffer, 
RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 24);
-         radv_emit_write_data_imm(cs, V_370_ME, va + 28, 0x80000000);
+         radv_cs_write_data_imm(cs, V_370_ME, va + 28, 0x80000000);
 
          cmd_buffer->state.active_prims_xfb_gds_queries--;
 
@@ -1960,7 +1960,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct 
radv_query_pool *pool,
       if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
          /* On GFX11+, primitives generated query always use GDS. */
          gfx10_copy_gds_query(cmd_buffer, 
RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
-         radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
+         radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
 
          cmd_buffer->state.active_prims_gen_gds_queries--;
 
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index bcad1705491..e6336967b7e 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -2010,7 +2010,7 @@ radv_device_init_msaa(struct radv_device *device)
 }
 
 void
-radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, 
uint64_t va, uint32_t imm)
+radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t 
va, uint32_t imm)
 {
    radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
    radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | 
S_370_ENGINE_SEL(engine_sel));

Reply via email to