Module: Mesa
Branch: main
Commit: dc1a18b0ed54e53b969c4143f3fb694463c956d8
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dc1a18b0ed54e53b969c4143f3fb694463c956d8

Author: Asahi Lina <[email protected]>
Date:   Wed May 10 15:57:55 2023 +0900

asahi: Lazily initialize batch state on first draw

We track buffers written by batches, but this gets messy when we end up
with an empty batch that is never submitted, since then it might have
taken over writer state from a prior already submitted batch (for its
framebuffers).

Instead of trying to track two tiers of resource writers, let's just
defer initializing batch state until we know we have a draw (or compute
launch, or clear). This means that if a batch is marked as a writer for
a buffer, we know it will not be an empty batch.

This should be a small performance win for empty batches (no need to
emit initial VDM state or run the writer code), but more impontantly it
eliminates the empty batch writer state revert corner case.

Signed-off-by: Asahi Lina <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22971>

---

 src/gallium/drivers/asahi/agx_batch.c | 53 +++++------------------------------
 src/gallium/drivers/asahi/agx_pipe.c  |  5 ++++
 src/gallium/drivers/asahi/agx_state.c | 33 ++++++++++++++++++++++
 src/gallium/drivers/asahi/agx_state.h |  1 +
 4 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/src/gallium/drivers/asahi/agx_batch.c 
b/src/gallium/drivers/asahi/agx_batch.c
index 2242f834827..e0eab52891f 100644
--- a/src/gallium/drivers/asahi/agx_batch.c
+++ b/src/gallium/drivers/asahi/agx_batch.c
@@ -117,26 +117,11 @@ agx_batch_init(struct agx_context *ctx,
    batch->clear_stencil = 0;
    batch->varyings = 0;
    batch->any_draws = false;
+   batch->initialized = false;
 
    /* We need to emit prim state at the start. Max collides with all. */
    batch->reduced_prim = PIPE_PRIM_MAX;
 
-   if (batch->key.zsbuf) {
-      struct agx_resource *rsrc = agx_resource(key->zsbuf->texture);
-      agx_batch_writes(batch, rsrc);
-
-      if (rsrc->separate_stencil)
-         agx_batch_writes(batch, rsrc->separate_stencil);
-   }
-
-   for (unsigned i = 0; i < key->nr_cbufs; ++i) {
-      if (key->cbufs[i])
-         agx_batch_writes(batch, agx_resource(key->cbufs[i]->texture));
-   }
-
-   if (key->width != AGX_COMPUTE_BATCH_WIDTH)
-      agx_batch_init_state(batch);
-
    if (!batch->syncobj) {
       int ret = drmSyncobjCreate(dev->fd, 0, &batch->syncobj);
       assert(!ret && batch->syncobj);
@@ -167,36 +152,8 @@ agx_batch_cleanup(struct agx_context *ctx, struct 
agx_batch *batch, bool reset)
    if (reset) {
       int handle;
       AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
-         struct agx_bo *bo = agx_lookup_bo(dev, handle);
-
-         /* Revert the writer to the last submitted batch that writes this BO,
-          * if any.
-          */
-         struct agx_batch *writer = agx_writer_get(ctx, handle);
-
-         if (writer == batch) {
-            int i;
-            assert(bo->writer_syncobj != batch->syncobj);
-            agx_writer_remove(ctx, handle);
-
-            if (bo->writer_syncobj) {
-               struct agx_batch *old_batch = NULL;
-               foreach_submitted(ctx, i) {
-                  old_batch = &ctx->batches.slots[i];
-                  if (old_batch->syncobj == bo->writer_syncobj)
-                     break;
-                  else
-                     old_batch = NULL;
-               }
-               assume(old_batch);
-               batch_debug(batch,
-                           "Resetting writer for BO @ 0x%" PRIx64
-                           " to batch %d (syncobj was %d)",
-                           bo->ptr.gpu, agx_batch_idx(old_batch),
-                           bo->writer_syncobj);
-               agx_writer_add(ctx, agx_batch_idx(old_batch), bo->handle);
-            }
-         }
+         /* We should write no buffers if this is an empty batch */
+         assert(agx_writer_get(ctx, handle) != batch);
 
          agx_bo_unreference(agx_lookup_bo(dev, handle));
       }
@@ -496,6 +453,8 @@ agx_batch_writes(struct agx_batch *batch, struct 
agx_resource *rsrc)
    struct agx_context *ctx = batch->ctx;
    struct agx_batch *writer = agx_writer_get(ctx, rsrc->bo->handle);
 
+   assert(batch->initialized);
+
    agx_flush_readers_except(ctx, rsrc, batch, "Write from other batch", false);
 
    /* Nothing to do if we're already writing */
@@ -780,6 +739,8 @@ agx_batch_reset(struct agx_context *ctx, struct agx_batch 
*batch)
 {
    batch_debug(batch, "RESET");
 
+   assert(!batch->initialized);
+
    /* Reset an empty batch. Like submit, but does nothing. */
    agx_batch_mark_submitted(batch);
 
diff --git a/src/gallium/drivers/asahi/agx_pipe.c 
b/src/gallium/drivers/asahi/agx_pipe.c
index eba84a477a2..f4755da7148 100644
--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@@ -1092,6 +1092,9 @@ agx_clear(struct pipe_context *pctx, unsigned buffers,
          util_framebuffer_get_num_samples(&ctx->framebuffer) > 1);
    }
 
+   if (fastclear)
+      agx_batch_init_state(batch);
+
    batch->clear |= fastclear;
    batch->resolve |= buffers;
    assert((batch->draw & slowclear) == slowclear);
@@ -1200,6 +1203,8 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch 
*batch)
       return;
    }
 
+   assert(batch->initialized);
+
    /* Finalize the encoder */
    uint8_t stop[5 + 64] = {0x00, 0x00, 0x00, 0xc0, 0x00};
    memcpy(batch->encoder_current, stop, sizeof(stop));
diff --git a/src/gallium/drivers/asahi/agx_state.c 
b/src/gallium/drivers/asahi/agx_state.c
index 505978fcd41..201a49dc030 100644
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -2065,6 +2065,14 @@ agx_build_meta(struct agx_batch *batch, bool store, bool 
partial_render)
 void
 agx_batch_init_state(struct agx_batch *batch)
 {
+   if (batch->initialized)
+      return;
+
+   if (batch->key.width == AGX_COMPUTE_BATCH_WIDTH) {
+      batch->initialized = true;
+      return;
+   }
+
    /* Emit state on the batch that we don't change and so don't dirty track */
    uint8_t *out = batch->encoder_current;
    struct agx_ppp_update ppp =
@@ -2089,6 +2097,9 @@ agx_batch_init_state(struct agx_batch *batch)
    agx_ppp_fini(&out, &ppp);
    batch->encoder_current = out;
 
+   /* Mark it as initialized now, since agx_batch_writes() will check this. */
+   batch->initialized = true;
+
    /* Choose a tilebuffer layout given the framebuffer key */
    enum pipe_format formats[PIPE_MAX_COLOR_BUFS] = {0};
    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
@@ -2100,6 +2111,19 @@ agx_batch_init_state(struct agx_batch *batch)
    batch->tilebuffer_layout = agx_build_tilebuffer_layout(
       formats, batch->key.nr_cbufs,
       util_framebuffer_get_num_samples(&batch->key));
+
+   if (batch->key.zsbuf) {
+      struct agx_resource *rsrc = agx_resource(batch->key.zsbuf->texture);
+      agx_batch_writes(batch, rsrc);
+
+      if (rsrc->separate_stencil)
+         agx_batch_writes(batch, rsrc->separate_stencil);
+   }
+
+   for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
+      if (batch->key.cbufs[i])
+         agx_batch_writes(batch, agx_resource(batch->key.cbufs[i]->texture));
+   }
 }
 
 static enum agx_object_type
@@ -2574,6 +2598,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info,
    if (ctx->rast->base.rasterizer_discard && !ctx->streamout.key.active)
       return;
 
+   agx_batch_init_state(batch);
+
    /* Dirty track the reduced prim: lines vs points vs triangles */
    enum pipe_prim_type reduced_prim = u_reduced_prim(info->mode);
    if (reduced_prim != batch->reduced_prim)
@@ -2765,6 +2791,13 @@ agx_launch_grid(struct pipe_context *pipe, const struct 
pipe_grid_info *info)
    struct agx_context *ctx = agx_context(pipe);
    struct agx_batch *batch = agx_get_compute_batch(ctx);
 
+   agx_batch_init_state(batch);
+
+   /* Consider compute launches as "draws" for the purposes of sanity
+    * checking batch state.
+    */
+   batch->any_draws = true;
+
    /* To implement load_num_workgroups, the number of workgroups needs to be
     * available in GPU memory. This is either the indirect buffer, or just a
     * buffer we upload ourselves if not indirect.
diff --git a/src/gallium/drivers/asahi/agx_state.h 
b/src/gallium/drivers/asahi/agx_state.h
index ab43824cd62..fc1fc049def 100644
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@@ -212,6 +212,7 @@ struct agx_batch {
    /* PIPE_CLEAR_* bitmask */
    uint32_t clear, draw, load, resolve;
    bool any_draws;
+   bool initialized;
 
    uint64_t uploaded_clear_color[PIPE_MAX_COLOR_BUFS];
    double clear_depth;

Reply via email to