Re: [Mesa-dev] [PATCH 09/25] gallium/u_threaded: implement asynchronous flushes

2017-11-03 Thread Nicolai Hähnle

On 31.10.2017 03:15, Marek Olšák wrote:

On Sun, Oct 22, 2017 at 9:07 PM, Nicolai Hähnle  wrote:

@@ -107,20 +138,46 @@ static boolean si_fence_finish(struct pipe_screen *screen,
uint64_t timeout)
  {
 struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
 struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
 struct r600_common_context *rctx;
 int64_t abs_timeout = os_time_get_absolute_timeout(timeout);

 ctx = threaded_context_unwrap_sync(ctx);
 rctx = ctx ? (struct r600_common_context*)ctx : NULL;

+   if (!util_queue_fence_is_signalled(&rfence->ready)) {
+   if (!timeout)
+   return false;
+
+   if (rfence->tc_token) {
+   /* Ensure that si_flush_from_st will be called for
+* this fence, but only if we're in the API thread
+* where the context is current.
+*
+* Note that the batch containing the flush may already
+* be in flight in the driver thread, so the fence
+* may not be ready yet when this call returns.
+*/
+   threaded_context_flush(ctx, rfence->tc_token);
+   }
+
+   if (timeout == PIPE_TIMEOUT_INFINITE) {
+   util_queue_fence_wait(&rfence->ready);
+   } else {
+   if (!util_queue_fence_wait_timeout(&rfence->ready, 
abs_timeout))
+   return false;
+   }
+
+   assert(!rfence->tc_token);


tc_token might be non-NULL if this code is executed right after
si_flush_from_st signals the fence.


You're quite right, I'm removing this assertion, thanks!




Marek




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/25] gallium/u_threaded: implement asynchronous flushes

2017-10-30 Thread Marek Olšák
On Sun, Oct 22, 2017 at 9:07 PM, Nicolai Hähnle  wrote:
> @@ -107,20 +138,46 @@ static boolean si_fence_finish(struct pipe_screen 
> *screen,
>uint64_t timeout)
>  {
> struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
> struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
> struct r600_common_context *rctx;
> int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
>
> ctx = threaded_context_unwrap_sync(ctx);
> rctx = ctx ? (struct r600_common_context*)ctx : NULL;
>
> +   if (!util_queue_fence_is_signalled(&rfence->ready)) {
> +   if (!timeout)
> +   return false;
> +
> +   if (rfence->tc_token) {
> +   /* Ensure that si_flush_from_st will be called for
> +* this fence, but only if we're in the API thread
> +* where the context is current.
> +*
> +* Note that the batch containing the flush may 
> already
> +* be in flight in the driver thread, so the fence
> +* may not be ready yet when this call returns.
> +*/
> +   threaded_context_flush(ctx, rfence->tc_token);
> +   }
> +
> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
> +   util_queue_fence_wait(&rfence->ready);
> +   } else {
> +   if (!util_queue_fence_wait_timeout(&rfence->ready, 
> abs_timeout))
> +   return false;
> +   }
> +
> +   assert(!rfence->tc_token);

tc_token might be non-NULL if this code is executed right after
si_flush_from_st signals the fence.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/25] gallium/u_threaded: implement asynchronous flushes

2017-10-22 Thread Nicolai Hähnle
From: Nicolai Hähnle 

This requires out-of-band creation of fences, and will be signaled to
the pipe_context::flush implementation by a special TC_FLUSH_ASYNC flag.
---
 src/gallium/auxiliary/util/u_threaded_context.c| 96 +-
 src/gallium/auxiliary/util/u_threaded_context.h| 56 +
 .../auxiliary/util/u_threaded_context_calls.h  |  1 +
 src/gallium/drivers/radeonsi/si_fence.c| 90 +---
 src/gallium/drivers/radeonsi/si_pipe.c |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h |  2 +
 6 files changed, 233 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_threaded_context.c 
b/src/gallium/auxiliary/util/u_threaded_context.c
index 24fab7f5cb6..485d912ca28 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -81,40 +81,47 @@ tc_debug_check(struct threaded_context *tc)
 
 static void
 tc_batch_execute(void *job, int thread_index)
 {
struct tc_batch *batch = job;
struct pipe_context *pipe = batch->pipe;
struct tc_call *last = &batch->call[batch->num_total_call_slots];
 
tc_batch_check(batch);
 
+   assert(!batch->token);
+
for (struct tc_call *iter = batch->call; iter != last;
 iter += iter->num_call_slots) {
   tc_assert(iter->sentinel == TC_SENTINEL);
   execute_func[iter->call_id](pipe, &iter->payload);
}
 
tc_batch_check(batch);
batch->num_total_call_slots = 0;
 }
 
 static void
 tc_batch_flush(struct threaded_context *tc)
 {
struct tc_batch *next = &tc->batch_slots[tc->next];
 
tc_assert(next->num_total_call_slots != 0);
tc_batch_check(next);
tc_debug_check(tc);
p_atomic_add(&tc->num_offloaded_slots, next->num_total_call_slots);
 
+   if (next->token) {
+  next->token->tc = NULL;
+  tc_unflushed_batch_token_reference(&next->token, NULL);
+   }
+
util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
   NULL);
tc->last = tc->next;
tc->next = (tc->next + 1) % TC_MAX_BATCHES;
 }
 
 /* This is the function that adds variable-sized calls into the current
  * batch. It also flushes the batch if there is not enough space there.
  * All other higher-level "add" functions use it.
  */
@@ -172,40 +179,63 @@ _tc_sync(struct threaded_context *tc, const char *info, 
const char *func)
tc_debug_check(tc);
 
/* Only wait for queued calls... */
if (!util_queue_fence_is_signalled(&last->fence)) {
   util_queue_fence_wait(&last->fence);
   synced = true;
}
 
tc_debug_check(tc);
 
+   if (next->token) {
+  next->token->tc = NULL;
+  tc_unflushed_batch_token_reference(&next->token, NULL);
+   }
+
/* .. and execute unflushed calls directly. */
if (next->num_total_call_slots) {
   p_atomic_add(&tc->num_direct_slots, next->num_total_call_slots);
   tc_batch_execute(next, 0);
   synced = true;
}
 
if (synced) {
   p_atomic_inc(&tc->num_syncs);
 
   if (tc_strcmp(func, "tc_destroy") != 0)
  tc_printf("sync %s %s\n", func, info);
}
 
tc_debug_check(tc);
 }
 
 #define tc_sync(tc) _tc_sync(tc, "", __func__)
 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
 
+/**
+ * Call this from fence_finish for same-context fence waits of deferred fences
+ * that haven't been flushed yet.
+ *
+ * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
+ * i.e., the wrapped one.
+ */
+void
+threaded_context_flush(struct pipe_context *_pipe,
+   struct tc_unflushed_batch_token *token)
+{
+   struct threaded_context *tc = threaded_context(_pipe);
+
+   /* This is called from the state-tracker / application thread. */
+   if (token->tc && token->tc == tc)
+  tc_sync(token->tc);
+}
+
 static void
 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource 
*src)
 {
*dst = NULL;
pipe_resource_reference(dst, src);
 }
 
 void
 threaded_resource_init(struct pipe_resource *res)
 {
@@ -1775,36 +1805,94 @@ tc_create_video_buffer(struct pipe_context *_pipe,
 {
unreachable("Threaded context should not be enabled for video APIs");
return NULL;
 }
 
 
 /
  * draw, launch, clear, blit, copy, flush
  */
 
+struct tc_flush_payload {
+   struct pipe_fence_handle *fence;
+   unsigned flags;
+};
+
+static void
+tc_call_flush(struct pipe_context *pipe, union tc_payload *payload)
+{
+   struct tc_flush_payload *p = (struct tc_flush_payload *)payload;
+   struct pipe_screen *screen = pipe->screen;
+
+   pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
+   screen->fence_reference(screen, &p->fence, NULL);
+}
+
 static void
 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
  unsigned flags)
 {
struct threaded_context *tc = threaded_context(_pipe);
struct pipe_context *pipe = tc->pipe;
+   struct p