From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_cp_dma.c |  2 +-
 src/gallium/drivers/radeonsi/si_dma.c    | 40 ++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index aed8bb8..f06b8dd 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -196,21 +196,21 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
                map += offset;
                for (uint64_t i = 0; i < size; i++) {
                        unsigned byte_within_dword = (offset + i) % 4;
                        *map++ = (value >> (byte_within_dword * 8)) & 0xff;
                }
                return;
        }
 
        /* dma_clear_buffer can use clear_buffer on failure. Make sure that
         * doesn't happen. We don't want an infinite recursion: */
-       if (sctx->b.chip_class >= CIK && sctx->b.dma.cs &&
+       if (sctx->b.dma.cs &&
            /* CP DMA is very slow. Always use SDMA for big clears. This
             * alone improves DeusEx:MD performance by 70%. */
            (size > 128 * 1024 ||
             /* Buffers not used by the GFX IB yet will be cleared by SDMA.
              * This happens to move most buffer clears to SDMA, including
              * DCC and CMASK clears, because pipe->clear clears them before
              * si_emit_framebuffer_state (in a draw call) adds them.
              * For example, DeusEx:MD has 21 buffer clears per frame and all
              * of them are moved to SDMA thanks to this. */
             !ws->cs_is_buffer_referenced(sctx->b.gfx.cs, rdst->buf,
diff --git a/src/gallium/drivers/radeonsi/si_dma.c 
b/src/gallium/drivers/radeonsi/si_dma.c
index b6aab00..9dbee3a 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -71,20 +71,59 @@ static void si_dma_copy_buffer(struct si_context *ctx,
                radeon_emit(cs, dst_offset);
                radeon_emit(cs, src_offset);
                radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
                radeon_emit(cs, (src_offset >> 32UL) & 0xff);
                dst_offset += count;
                src_offset += count;
                size -= count;
        }
 }
 
+static void si_dma_clear_buffer(struct pipe_context *ctx,
+                               struct pipe_resource *dst,
+                               uint64_t offset,
+                               uint64_t size,
+                               unsigned clear_value)
+{
+       struct si_context *sctx = (struct si_context *)ctx;
+       struct radeon_winsys_cs *cs = sctx->b.dma.cs;
+       unsigned i, ncopy, csize;
+       struct r600_resource *rdst = r600_resource(dst);
+
+       if (!cs || offset % 4 != 0 || size % 4 != 0) {
+               ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4);
+               return;
+       }
+
+       /* Mark the buffer range of destination as valid (initialized),
+        * so that transfer_map knows it should wait for the GPU when mapping
+        * that range. */
+       util_range_add(&rdst->valid_buffer_range, offset, offset + size);
+
+       offset += rdst->gpu_address;
+
+       /* the same maximum size as for copying */
+       ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
+       r600_need_dma_space(&sctx->b, ncopy * 4, rdst, NULL);
+
+       for (i = 0; i < ncopy; i++) {
+               csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
+               radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_CONSTANT_FILL, 0,
+                                             csize / 4));
+               radeon_emit(cs, offset);
+               radeon_emit(cs, clear_value);
+               radeon_emit(cs, (offset >> 32) << 16);
+               offset += csize;
+               size -= csize;
+       }
+}
+
 static void si_dma_copy_tile(struct si_context *ctx,
                             struct pipe_resource *dst,
                             unsigned dst_level,
                             unsigned dst_x,
                             unsigned dst_y,
                             unsigned dst_z,
                             struct pipe_resource *src,
                             unsigned src_level,
                             unsigned src_x,
                             unsigned src_y,
@@ -278,11 +317,12 @@ static void si_dma_copy(struct pipe_context *ctx,
        return;
 
 fallback:
        si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
                                src, src_level, src_box);
 }
 
 void si_init_dma_functions(struct si_context *sctx)
 {
        sctx->b.dma_copy = si_dma_copy;
+       sctx->b.dma_clear_buffer = si_dma_clear_buffer;
 }
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to