Module: Mesa
Branch: master
Commit: a44868beda5569c674e1cc01f28e97f0d639df7a
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a44868beda5569c674e1cc01f28e97f0d639df7a

Author: Marek Olšák <[email protected]>
Date:   Fri Oct  9 18:58:14 2020 -0400

radeonsi: implement multi_draw for compute-based primitive culling

Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7056>

---

 .../drivers/radeonsi/si_compute_prim_discard.c     | 57 +++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_pipe.h             |  4 +-
 src/gallium/drivers/radeonsi/si_state_draw.c       | 15 +++---
 3 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c 
b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
index d58f7b0af62..d540f260c9b 100644
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@@ -956,7 +956,8 @@ static bool si_check_ring_space(struct si_context *sctx, 
unsigned out_indexbuf_s
 enum si_prim_discard_outcome
 si_prepare_prim_discard_or_split_draw(struct si_context *sctx, const struct 
pipe_draw_info *info,
                                       const struct pipe_draw_start_count 
*draws,
-                                      unsigned num_draws, bool 
primitive_restart)
+                                      unsigned num_draws, bool 
primitive_restart,
+                                      unsigned total_count)
 {
    /* If the compute shader compilation isn't finished, this returns false. */
    if (!si_shader_select_prim_discard_cs(sctx, info, primitive_restart))
@@ -967,7 +968,7 @@ si_prepare_prim_discard_or_split_draw(struct si_context 
*sctx, const struct pipe
 
    struct radeon_cmdbuf *gfx_cs = sctx->gfx_cs;
    unsigned prim = info->mode;
-   unsigned count = draws[0].count;
+   unsigned count = total_count;
    unsigned instance_count = info->instance_count;
    unsigned num_prims_per_instance = u_decomposed_prims_for_vertices(prim, 
count);
    unsigned num_prims = num_prims_per_instance * instance_count;
@@ -981,16 +982,54 @@ si_prepare_prim_discard_or_split_draw(struct si_context 
*sctx, const struct pipe
    if (ring_full && num_prims > split_prims_draw_level &&
        instance_count == 1 && /* TODO: support splitting instanced draws */
        (1 << prim) & ((1 << PIPE_PRIM_TRIANGLES) | (1 << 
PIPE_PRIM_TRIANGLE_STRIP))) {
-      /* Split draws. */
+      unsigned vert_count_per_subdraw = 0;
+
+      if (prim == PIPE_PRIM_TRIANGLES)
+         vert_count_per_subdraw = split_prims_draw_level * 3;
+      else if (prim == PIPE_PRIM_TRIANGLE_STRIP)
+         vert_count_per_subdraw = split_prims_draw_level;
+      else
+         unreachable("shouldn't get here");
+
+      /* Split multi draws first. */
+      if (num_draws > 1) {
+         unsigned count = 0;
+         unsigned first_draw = 0;
+         unsigned num_draws_split = 0;
+
+         for (unsigned i = 0; i < num_draws; i++) {
+            if (count && count + draws[i].count > vert_count_per_subdraw) {
+               /* Submit previous draws.  */
+               sctx->b.multi_draw(&sctx->b, info, draws + first_draw, 
num_draws_split);
+               count = 0;
+               first_draw = i;
+               num_draws_split = 0;
+            }
+
+            if (draws[i].count > vert_count_per_subdraw) {
+               /* Submit just 1 draw. It will be split. */
+               sctx->b.multi_draw(&sctx->b, info, draws + i, 1);
+               assert(count == 0);
+               assert(first_draw == i);
+               assert(num_draws_split == 0);
+               first_draw = i + 1;
+               continue;
+            }
+
+            count += draws[i].count;
+            num_draws_split++;
+         }
+         return SI_PRIM_DISCARD_MULTI_DRAW_SPLIT;
+      }
+
+      /* Split single draws if splitting multi draws isn't enough. */
       struct pipe_draw_info split_draw = *info;
       struct pipe_draw_start_count split_draw_range = draws[0];
+      unsigned base_start = split_draw_range.start;
 
       split_draw.primitive_restart = primitive_restart;
 
-      unsigned base_start = split_draw_range.start;
-
       if (prim == PIPE_PRIM_TRIANGLES) {
-         unsigned vert_count_per_subdraw = split_prims_draw_level * 3;
          assert(vert_count_per_subdraw < count);
 
          for (unsigned start = 0; start < count; start += 
vert_count_per_subdraw) {
@@ -1004,8 +1043,6 @@ si_prepare_prim_discard_or_split_draw(struct si_context 
*sctx, const struct pipe
           * for odd primitives. */
          STATIC_ASSERT(split_prims_draw_level % 2 == 0);
 
-         unsigned vert_count_per_subdraw = split_prims_draw_level;
-
          for (unsigned start = 0; start < count - 2; start += 
vert_count_per_subdraw) {
             split_draw_range.start = base_start + start;
             split_draw_range.count = MIN2(count - start, 
vert_count_per_subdraw + 2);
@@ -1017,8 +1054,6 @@ si_prepare_prim_discard_or_split_draw(struct si_context 
*sctx, const struct pipe
                sctx->preserve_prim_restart_gds_at_flush = true;
          }
          sctx->preserve_prim_restart_gds_at_flush = false;
-      } else {
-         assert(0);
       }
 
       return SI_PRIM_DISCARD_DRAW_SPLIT;
@@ -1031,7 +1066,7 @@ si_prepare_prim_discard_or_split_draw(struct si_context 
*sctx, const struct pipe
       return SI_PRIM_DISCARD_DISABLED;
    }
 
-   unsigned num_subdraws = DIV_ROUND_UP(num_prims, SPLIT_PRIMS_PACKET_LEVEL);
+   unsigned num_subdraws = DIV_ROUND_UP(num_prims, SPLIT_PRIMS_PACKET_LEVEL) * 
num_draws;
    unsigned need_compute_dw = 11 /* shader */ + 34 /* first draw */ +
                               24 * (num_subdraws - 1) + /* subdraws */
                               30;                       /* leave some space at 
the end */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index ce906f50e2c..a5c7daeacdb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1451,13 +1451,15 @@ enum si_prim_discard_outcome
    SI_PRIM_DISCARD_ENABLED,
    SI_PRIM_DISCARD_DISABLED,
    SI_PRIM_DISCARD_DRAW_SPLIT,
+   SI_PRIM_DISCARD_MULTI_DRAW_SPLIT,
 };
 
 void si_build_prim_discard_compute_shader(struct si_shader_context *ctx);
 enum si_prim_discard_outcome
 si_prepare_prim_discard_or_split_draw(struct si_context *sctx, const struct 
pipe_draw_info *info,
                                       const struct pipe_draw_start_count 
*draws,
-                                      unsigned num_draws, bool 
primitive_restart);
+                                      unsigned num_draws, bool 
primitive_restart,
+                                      unsigned total_count);
 void si_compute_signal_gfx(struct si_context *sctx);
 void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
                                           const struct pipe_draw_info *info,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 6ce07d7c409..18f5c2632d3 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -941,12 +941,13 @@ static void si_emit_draw_packets(struct si_context *sctx, 
const struct pipe_draw
 
       if (index_size) {
          if (dispatch_prim_discard_cs) {
-            index_va += draws[0].start * original_index_size;
-            index_max_size = MIN2(index_max_size, draws[0].count);
+            for (unsigned i = 0; i < num_draws; i++) {
+               uint64_t va = index_va + draws[0].start * original_index_size;
 
-            si_dispatch_prim_discard_cs_and_draw(sctx, info, draws[0].count,
-                                                 original_index_size, 
base_vertex,
-                                                 index_va, index_max_size);
+               si_dispatch_prim_discard_cs_and_draw(sctx, info, draws[i].count,
+                                                    original_index_size, 
base_vertex,
+                                                    va, MIN2(index_max_size, 
draws[i].count));
+            }
             return;
          }
 
@@ -1998,7 +1999,7 @@ static void si_multi_draw_vbo(struct pipe_context *ctx,
        (si_all_vs_resources_read_only(sctx, index_size ? indexbuf : NULL) ||
         pd_msg("write reference"))) {
       switch (si_prepare_prim_discard_or_split_draw(sctx, info, draws, 
num_draws,
-                                                    primitive_restart)) {
+                                                    primitive_restart, 
total_direct_count)) {
       case SI_PRIM_DISCARD_ENABLED:
          original_index_size = index_size;
          prim_discard_cs_instancing = instance_count > 1;
@@ -2017,6 +2018,8 @@ static void si_multi_draw_vbo(struct pipe_context *ctx,
       case SI_PRIM_DISCARD_DRAW_SPLIT:
          sctx->compute_num_verts_rejected -= total_direct_count;
          goto return_cleanup;
+      case SI_PRIM_DISCARD_MULTI_DRAW_SPLIT:
+         goto return_cleanup;
       }
    }
 

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to