From: Marek Olšák <marek.ol...@amd.com>

r600_dma_emit_wait_idle is going away in its current form.
The only difference is that the moved code is executed before DMA calls
instead of after them.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 33 ++++++++++++---------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 1fe3af0..6b7bbaf 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -220,21 +220,22 @@ void r600_draw_rectangle(struct blitter_context *blitter,
 
        /* draw */
        util_draw_vertex_buffer(&rctx->b, NULL, buf, blitter->vb_slot, offset,
                                R600_PRIM_RECTANGLE_LIST, 3, 2);
        pipe_resource_reference(&buf, NULL);
 }
 
 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
                          struct r600_resource *dst, struct r600_resource *src)
 {
-       uint64_t vram = 0, gtt = 0;
+       uint64_t vram = ctx->dma.cs->used_vram;
+       uint64_t gtt = ctx->dma.cs->used_gart;
 
        if (dst) {
                vram += dst->vram_usage;
                gtt += dst->gart_usage;
        }
        if (src) {
                vram += src->vram_usage;
                gtt += src->gart_usage;
        }
 
@@ -243,64 +244,60 @@ void r600_need_dma_space(struct r600_common_context *ctx, 
unsigned num_dw,
            ((dst &&
              ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
                                               RADEON_USAGE_READWRITE)) ||
             (src &&
              ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
                                               RADEON_USAGE_WRITE))))
                ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 
        /* Flush if there's not enough space, or if the memory usage per IB
         * is too large.
+        *
+        * IBs using too little memory are limited by the IB submission 
overhead.
+        * IBs using too much memory are limited by the kernel/TTM overhead.
+        * Too long IBs create CPU-GPU pipeline bubbles and add latency.
+        *
+        * This heuristic makes sure that DMA requests are executed
+        * very soon after the call is made and lowers memory usage.
+        * It improves texture upload performance by keeping the DMA
+        * engine busy while uploads are being submitted.
         */
        if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
+           ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 
||
            !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) 
{
                ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
                assert((num_dw + ctx->dma.cs->current.cdw) <= 
ctx->dma.cs->current.max_dw);
        }
 
        /* If GPUVM is not supported, the CS checker needs 2 entries
         * in the buffer list per packet, which has to be done manually.
         */
        if (ctx->screen->info.has_virtual_memory) {
                if (dst)
                        radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
                                                  RADEON_USAGE_WRITE,
                                                  RADEON_PRIO_SDMA_BUFFER);
                if (src)
                        radeon_add_to_buffer_list(ctx, &ctx->dma, src,
                                                  RADEON_USAGE_READ,
                                                  RADEON_PRIO_SDMA_BUFFER);
        }
+
+       /* this function is called before all DMA calls, so increment this. */
+       ctx->num_dma_calls++;
 }
 
 /* This is required to prevent read-after-write hazards. */
 void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
 {
        struct radeon_winsys_cs *cs = rctx->dma.cs;
 
-       /* done at the end of DMA calls, so increment this. */
-       rctx->num_dma_calls++;
-
-       /* IBs using too little memory are limited by the IB submission 
overhead.
-        * IBs using too much memory are limited by the kernel/TTM overhead.
-        * Too long IBs create CPU-GPU pipeline bubbles and add latency.
-        *
-        * This heuristic makes sure that DMA requests are executed
-        * very soon after the call is made and lowers memory usage.
-        * It improves texture upload performance by keeping the DMA
-        * engine busy while uploads are being submitted.
-        */
-       if (cs->used_vram + cs->used_gart > 64 * 1024 * 1024) {
-               rctx->dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
-               return;
-       }
-
        r600_need_dma_space(rctx, 1, NULL, NULL);
 
        if (!radeon_emitted(cs, 0)) /* empty queue */
                return;
 
        /* NOP waits for idle on Evergreen and later. */
        if (rctx->chip_class >= CIK)
                radeon_emit(cs, 0x00000000); /* NOP */
        else if (rctx->chip_class >= EVERGREEN)
                radeon_emit(cs, 0xf0000000); /* NOP */
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to