Module: Mesa
Branch: main
Commit: 32dd77ea2d6bb434d902ba14cb2919161de7f223
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=32dd77ea2d6bb434d902ba14cb2919161de7f223

Author: Timur Kristóf <[email protected]>
Date:   Thu Oct  5 13:38:24 2023 +0200

radv: Implement vkCmdFillBuffer on transfer queues.

Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26580>

---

 src/amd/vulkan/meta/radv_meta_buffer.c |  4 +++-
 src/amd/vulkan/radv_private.h          |  2 ++
 src/amd/vulkan/radv_sdma.c             | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/meta/radv_meta_buffer.c 
b/src/amd/vulkan/meta/radv_meta_buffer.c
index 76f7d4e5111..20e380fbd71 100644
--- a/src/amd/vulkan/meta/radv_meta_buffer.c
+++ b/src/amd/vulkan/meta/radv_meta_buffer.c
@@ -236,7 +236,9 @@ radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const 
struct radv_image *im
    if (bo)
       radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
 
-   if (use_compute) {
+   if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
+      radv_sdma_fill_buffer(cmd_buffer->device, cmd_buffer->cs, va, size, 
value);
+   } else if (use_compute) {
       cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, 
VK_ACCESS_2_SHADER_WRITE_BIT, image);
 
       fill_buffer_shader(cmd_buffer, va, size, value);
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 36959847e84..1818b700a46 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -3143,6 +3143,8 @@ void radv_sdma_copy_buffer_image_unaligned(const struct 
radv_device *device, str
                                            bool to_image);
 void radv_sdma_copy_buffer(const struct radv_device *device, struct 
radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
                            uint64_t size);
+void radv_sdma_fill_buffer(const struct radv_device *device, struct 
radeon_cmdbuf *cs, const uint64_t va,
+                           const uint64_t size, const uint32_t value);
 
 void radv_memory_trace_init(struct radv_device *device);
 void radv_rmv_log_bo_allocate(struct radv_device *device, struct 
radeon_winsys_bo *bo, uint32_t size, bool is_internal);
diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c
index 133e33f62b5..2bc56d0e0f8 100644
--- a/src/amd/vulkan/radv_sdma.c
+++ b/src/amd/vulkan/radv_sdma.c
@@ -393,6 +393,39 @@ radv_sdma_copy_buffer(const struct radv_device *device, 
struct radeon_cmdbuf *cs
    }
 }
 
+void
+radv_sdma_fill_buffer(const struct radv_device *device, struct radeon_cmdbuf 
*cs, const uint64_t va,
+                      const uint64_t size, const uint32_t value)
+{
+   const uint32_t fill_size = 2; /* This means that the count is in dwords. */
+   const uint32_t constant_fill_header = 
SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0) | (fill_size & 0x3) << 30;
+
+   /* This packet is the same since SDMA v2.4, haven't bothered to check older 
versions. */
+   const enum sdma_version ver = 
device->physical_device->rad_info.sdma_ip_version;
+   assert(ver >= SDMA_2_4);
+
+   /* Maximum allowed fill size depends on the GPU.
+    * Emit as many packets as necessary to fill all the bytes we need.
+    */
+   const uint64_t max_fill_bytes = BITFIELD64_MASK(ver >= SDMA_6_0 ? 30 : 22) 
& ~0x3;
+   const unsigned num_packets = DIV_ROUND_UP(size, max_fill_bytes);
+   ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, num_packets 
* 5);
+
+   for (unsigned i = 0; i < num_packets; ++i) {
+      const uint64_t offset = i * max_fill_bytes;
+      const uint64_t fill_bytes = MIN2(size - offset, max_fill_bytes);
+      const uint64_t fill_va = va + offset;
+
+      radeon_emit(cs, constant_fill_header);
+      radeon_emit(cs, fill_va);
+      radeon_emit(cs, fill_va >> 32);
+      radeon_emit(cs, value);
+      radeon_emit(cs, fill_bytes - 1); /* Must be programmed in bytes, even if 
the fill is done in dwords. */
+   }
+
+   assert(cs->cdw <= cdw_max);
+}
+
 static void
 radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct 
radeon_cmdbuf *cs,
                                       const struct radv_sdma_linear_info 
*const src,

Reply via email to