From: Marek Olšák <marek.ol...@amd.com>

+23% Bioshock Infinite performance.

v2: - use the new fence_finish interface
    - allow deferred fences with multiple contexts
    - clear the ctx pointer after a deferred flush
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 44 ++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 119fdf5..1c56e6e 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -40,20 +40,26 @@
 #include <sys/utsname.h>
 
 #ifndef HAVE_LLVM
 #define HAVE_LLVM 0
 #endif
 
 struct r600_multi_fence {
        struct pipe_reference reference;
        struct pipe_fence_handle *gfx;
        struct pipe_fence_handle *sdma;
+
+       /* If the context wasn't flushed at fence creation, this is non-NULL. */
+       struct {
+               struct r600_common_context *ctx;
+               unsigned ib_index;
+       } gfx_unflushed;
 };
 
 /*
  * shader binary helpers.
  */
 void radeon_shader_binary_init(struct radeon_shader_binary *b)
 {
        memset(b, 0, sizeof(*b));
 }
 
@@ -255,42 +261,59 @@ void r600_postflush_resume_features(struct 
r600_common_context *ctx)
 
 static void r600_flush_from_st(struct pipe_context *ctx,
                               struct pipe_fence_handle **fence,
                               unsigned flags)
 {
        struct pipe_screen *screen = ctx->screen;
        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
        unsigned rflags = 0;
        struct pipe_fence_handle *gfx_fence = NULL;
        struct pipe_fence_handle *sdma_fence = NULL;
+       bool deferred_fence = false;
 
        if (flags & PIPE_FLUSH_END_OF_FRAME)
                rflags |= RADEON_FLUSH_END_OF_FRAME;
        if (flags & PIPE_FLUSH_DEFERRED)
                rflags |= RADEON_FLUSH_ASYNC;
 
        if (rctx->dma.cs) {
                rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
        }
-       rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+
+       /* Instead of flushing, create a deferred fence. Constraints:
+        * - The state tracker must allow a deferred flush.
+        * - The state tracker must request a fence.
+        * Thread safety in fence_finish must be ensured by the state tracker.
+        */
+       if (flags & PIPE_FLUSH_DEFERRED && fence) {
+               gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
+               deferred_fence = true;
+       } else {
+               rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+       }
 
        /* Both engines can signal out of order, so we need to keep both 
fences. */
        if (gfx_fence || sdma_fence) {
                struct r600_multi_fence *multi_fence =
                        CALLOC_STRUCT(r600_multi_fence);
                if (!multi_fence)
                        return;
 
                multi_fence->reference.count = 1;
                multi_fence->gfx = gfx_fence;
                multi_fence->sdma = sdma_fence;
 
+               if (deferred_fence) {
+                       multi_fence->gfx_unflushed.ctx = rctx;
+                       multi_fence->gfx_unflushed.ib_index = 
rctx->num_gfx_cs_flushes;
+               }
+
                screen->fence_reference(screen, fence, NULL);
                *fence = (struct pipe_fence_handle*)multi_fence;
        }
 }
 
 static void r600_flush_dma_ring(void *ctx, unsigned flags,
                                struct pipe_fence_handle **fence)
 {
        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
        struct radeon_winsys_cs *cs = rctx->dma.cs;
@@ -953,36 +976,55 @@ static void r600_fence_reference(struct pipe_screen 
*screen,
         *rdst = rsrc;
 }
 
 static boolean r600_fence_finish(struct pipe_screen *screen,
                                 struct pipe_context *ctx,
                                 struct pipe_fence_handle *fence,
                                 uint64_t timeout)
 {
        struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
        struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+       struct r600_common_context *rctx =
+               ctx ? (struct r600_common_context*)ctx : NULL;
        int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
 
        if (rfence->sdma) {
                if (!rws->fence_wait(rws, rfence->sdma, timeout))
                        return false;
 
                /* Recompute the timeout after waiting. */
                if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
                        int64_t time = os_time_get_nano();
                        timeout = abs_timeout > time ? abs_timeout - time : 0;
                }
        }
 
        if (!rfence->gfx)
                return true;
 
+       /* Flush the gfx IB if it hasn't been flushed yet. */
+       if (rctx &&
+           rfence->gfx_unflushed.ctx == rctx &&
+           rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) {
+               rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
+               rfence->gfx_unflushed.ctx = NULL;
+
+               if (!timeout)
+                       return false;
+
+               /* Recompute the timeout after all that. */
+               if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+                       int64_t time = os_time_get_nano();
+                       timeout = abs_timeout > time ? abs_timeout - time : 0;
+               }
+       }
+
        return rws->fence_wait(rws, rfence->gfx, timeout);
 }
 
 static void r600_query_memory_info(struct pipe_screen *screen,
                                   struct pipe_memory_info *info)
 {
        struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
        struct radeon_winsys *ws = rscreen->ws;
        unsigned vram_usage, gtt_usage;
 
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to