From: Nicolai Hähnle
This requires out-of-band creation of fences, and will be signaled to
the pipe_context::flush implementation by a special TC_FLUSH_ASYNC flag.
---
src/gallium/auxiliary/util/u_threaded_context.c| 96 +-
src/gallium/auxiliary/util/u_threaded_context.h| 56 +
.../auxiliary/util/u_threaded_context_calls.h | 1 +
src/gallium/drivers/radeonsi/si_fence.c| 90 +---
src/gallium/drivers/radeonsi/si_pipe.c | 1 +
src/gallium/drivers/radeonsi/si_pipe.h | 2 +
6 files changed, 233 insertions(+), 13 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c
b/src/gallium/auxiliary/util/u_threaded_context.c
index 24fab7f5cb6..485d912ca28 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -81,40 +81,47 @@ tc_debug_check(struct threaded_context *tc)
static void
tc_batch_execute(void *job, int thread_index)
{
struct tc_batch *batch = job;
struct pipe_context *pipe = batch->pipe;
struct tc_call *last = &batch->call[batch->num_total_call_slots];
tc_batch_check(batch);
+ assert(!batch->token);
+
for (struct tc_call *iter = batch->call; iter != last;
iter += iter->num_call_slots) {
tc_assert(iter->sentinel == TC_SENTINEL);
execute_func[iter->call_id](pipe, &iter->payload);
}
tc_batch_check(batch);
batch->num_total_call_slots = 0;
}
static void
tc_batch_flush(struct threaded_context *tc)
{
struct tc_batch *next = &tc->batch_slots[tc->next];
tc_assert(next->num_total_call_slots != 0);
tc_batch_check(next);
tc_debug_check(tc);
p_atomic_add(&tc->num_offloaded_slots, next->num_total_call_slots);
+ if (next->token) {
+ next->token->tc = NULL;
+ tc_unflushed_batch_token_reference(&next->token, NULL);
+ }
+
util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
NULL);
tc->last = tc->next;
tc->next = (tc->next + 1) % TC_MAX_BATCHES;
}
/* This is the function that adds variable-sized calls into the current
* batch. It also flushes the batch if there is not enough space there.
* All other higher-level "add" functions use it.
*/
@@ -172,40 +179,63 @@ _tc_sync(struct threaded_context *tc, const char *info,
const char *func)
tc_debug_check(tc);
/* Only wait for queued calls... */
if (!util_queue_fence_is_signalled(&last->fence)) {
util_queue_fence_wait(&last->fence);
synced = true;
}
tc_debug_check(tc);
+ if (next->token) {
+ next->token->tc = NULL;
+ tc_unflushed_batch_token_reference(&next->token, NULL);
+ }
+
/* .. and execute unflushed calls directly. */
if (next->num_total_call_slots) {
p_atomic_add(&tc->num_direct_slots, next->num_total_call_slots);
tc_batch_execute(next, 0);
synced = true;
}
if (synced) {
p_atomic_inc(&tc->num_syncs);
if (tc_strcmp(func, "tc_destroy") != 0)
tc_printf("sync %s %s\n", func, info);
}
tc_debug_check(tc);
}
#define tc_sync(tc) _tc_sync(tc, "", __func__)
#define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
+/**
+ * Call this from fence_finish for same-context fence waits of deferred fences
+ * that haven't been flushed yet.
+ *
+ * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
+ * i.e., the wrapped one.
+ */
+void
+threaded_context_flush(struct pipe_context *_pipe,
+ struct tc_unflushed_batch_token *token)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+
+ /* This is called from the state-tracker / application thread. */
+ if (token->tc && token->tc == tc)
+ tc_sync(token->tc);
+}
+
static void
tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource
*src)
{
*dst = NULL;
pipe_resource_reference(dst, src);
}
void
threaded_resource_init(struct pipe_resource *res)
{
@@ -1775,36 +1805,94 @@ tc_create_video_buffer(struct pipe_context *_pipe,
{
unreachable("Threaded context should not be enabled for video APIs");
return NULL;
}
/
* draw, launch, clear, blit, copy, flush
*/
+struct tc_flush_payload {
+ struct pipe_fence_handle *fence;
+ unsigned flags;
+};
+
+static void
+tc_call_flush(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_flush_payload *p = (struct tc_flush_payload *)payload;
+ struct pipe_screen *screen = pipe->screen;
+
+ pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
+ screen->fence_reference(screen, &p->fence, NULL);
+}
+
static void
tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
unsigned flags)
{
struct threaded_context *tc = threaded_context(_pipe);
struct pipe_context *pipe = tc->pipe;
+ struct p