Module: Mesa
Branch: main
Commit: 340218c51e65171020a8f9a818061a814aa70475
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=340218c51e65171020a8f9a818061a814aa70475

Author: Marek Olšák <[email protected]>
Date:   Fri Jan 20 21:54:57 2023 -0500

radeonsi: upload shaders via a staging buffer so as not to map VRAM directly

Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25494>

---

 src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
 src/gallium/drivers/radeonsi/si_shader.c | 65 ++++++++++++++++++++++++++++----
 3 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index cb7d2934be1..f5c820f4ad6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -78,6 +78,7 @@ static const struct debug_named_value 
radeonsi_debug_options[] = {
    {"reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per context."},
    {"shadowregs", DBG(SHADOW_REGS), "Enable CP register shadowing."},
    {"nofastdlist", DBG(NO_FAST_DISPLAY_LIST), "Disable fast display lists"},
+   {"nodmashaders", DBG(NO_DMA_SHADERS), "Disable uploading shaders via CP DMA 
and map them directly."},
 
    /* Multimedia options: */
    { "noefc", DBG(NO_EFC), "Disable hardware based encoder colour format 
conversion."},
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 43438591299..e5b90caa3ec 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -221,6 +221,7 @@ enum
    DBG_RESERVE_VMID,
    DBG_SHADOW_REGS,
    DBG_NO_FAST_DISPLAY_LIST,
+   DBG_NO_DMA_SHADERS,
 
    /* Multimedia options: */
    DBG_NO_EFC,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index dabf8d94ba4..8cc852c3f38 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -18,6 +18,7 @@
 #include "util/u_memory.h"
 #include "util/mesa-sha1.h"
 #include "util/ralloc.h"
+#include "util/u_upload_mgr.h"
 
 static const char scratch_rsrc_dword0_symbol[] = "SCRATCH_RSRC_DWORD0";
 
@@ -908,12 +909,14 @@ static bool upload_binary_elf(struct si_screen *sscreen, 
struct si_shader *shade
       return false;
 
    unsigned rx_size = ac_align_shader_binary_for_prefetch(&sscreen->info, 
binary.rx_size);
+   bool dma_upload = !(sscreen->debug_flags & DBG(NO_DMA_SHADERS));
 
    si_resource_reference(&shader->bo, NULL);
    shader->bo = si_aligned_buffer_create(
       &sscreen->b,
-      (sscreen->info.cpdma_prefetch_writes_memory ? 0 : 
SI_RESOURCE_FLAG_READ_ONLY) |
-      SI_RESOURCE_FLAG_DRIVER_INTERNAL | SI_RESOURCE_FLAG_32BIT,
+      SI_RESOURCE_FLAG_DRIVER_INTERNAL | SI_RESOURCE_FLAG_32BIT |
+      (dma_upload || sscreen->info.cpdma_prefetch_writes_memory ? 0 : 
SI_RESOURCE_FLAG_READ_ONLY) |
+      (dma_upload ? PIPE_RESOURCE_FLAG_UNMAPPABLE : 0),
       PIPE_USAGE_IMMUTABLE, align(rx_size, SI_CPDMA_ALIGNMENT), 256);
    if (!shader->bo)
       return false;
@@ -924,11 +927,28 @@ static bool upload_binary_elf(struct si_screen *sscreen, 
struct si_shader *shade
    u.get_external_symbol = si_get_external_symbol;
    u.cb_data = &scratch_va;
    u.rx_va = shader->bo->gpu_address;
-   u.rx_ptr = sscreen->ws->buffer_map(sscreen->ws,
-      shader->bo->buf, NULL,
-      PIPE_MAP_READ_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY);
-   if (!u.rx_ptr)
-      return false;
+
+   struct si_context *upload_ctx = NULL;
+   struct pipe_resource *staging = NULL;
+   unsigned staging_offset = 0;
+
+   if (dma_upload) {
+      /* First upload into a staging buffer. */
+      upload_ctx = si_get_aux_context(&sscreen->aux_context.shader_upload);
+
+      u_upload_alloc(upload_ctx->b.stream_uploader, 0, binary.rx_size, 256,
+                     &staging_offset, &staging, (void**)&u.rx_ptr);
+      if (!u.rx_ptr) {
+         si_put_aux_context_flush(&sscreen->aux_context.shader_upload);
+         return false;
+      }
+   } else {
+      u.rx_ptr = sscreen->ws->buffer_map(sscreen->ws,
+         shader->bo->buf, NULL,
+         PIPE_MAP_READ_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY);
+      if (!u.rx_ptr)
+         return false;
+   }
 
    int size = ac_rtld_upload(&u);
 
@@ -939,7 +959,36 @@ static bool upload_binary_elf(struct si_screen *sscreen, 
struct si_shader *shade
       memcpy(shader->binary.uploaded_code, u.rx_ptr, size);
    }
 
-   sscreen->ws->buffer_unmap(sscreen->ws, shader->bo->buf);
+   if (dma_upload) {
+      /* Then copy from the staging buffer to VRAM.
+       *
+       * We can't use the upload copy in si_buffer_transfer_unmap because that 
might use
+       * a compute shader, and we can't use shaders in the code that is 
responsible for making
+       * them available.
+       */
+      si_cp_dma_copy_buffer(upload_ctx, &shader->bo->b.b, staging, 0, 
staging_offset,
+                            binary.rx_size, SI_OP_SYNC_AFTER, 
SI_COHERENCY_SHADER,
+                            sscreen->info.gfx_level >= GFX7 ? L2_LRU : 
L2_BYPASS);
+      upload_ctx->flags |= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_L2;
+
+#if 0 /* debug: validate whether the copy was successful */
+      uint32_t *dst_binary = malloc(binary.rx_size);
+      uint32_t *src_binary = (uint32_t*)u.rx_ptr;
+      pipe_buffer_read(&upload_ctx->b, &shader->bo->b.b, 0, binary.rx_size, 
dst_binary);
+      puts("dst_binary == src_binary:");
+      for (unsigned i = 0; i < binary.rx_size / 4; i++) {
+         printf("   %08x == %08x\n", dst_binary[i], src_binary[i]);
+      }
+      free(dst_binary);
+      exit(0);
+#endif
+
+      si_put_aux_context_flush(&sscreen->aux_context.shader_upload);
+      pipe_resource_reference(&staging, NULL);
+   } else {
+      sscreen->ws->buffer_unmap(sscreen->ws, shader->bo->buf);
+   }
+
    ac_rtld_close(&binary);
    shader->gpu_address = u.rx_va;
 

Reply via email to