From: Nicolai Hähnle <nicolai.haeh...@amd.com>

The data is read when the render_cond_atom is emitted, so we must
delay emitting the atom until after the flush.

Fixes: 0fe0320dc074 ("radeonsi: use optimal packet order when doing a pipeline 
sync")
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  3 ++-
 src/gallium/drivers/radeon/r600_query.c       |  9 ++++++---
 src/gallium/drivers/radeonsi/si_state_draw.c  | 15 ++++++++++-----
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index dca56734cd7..f78e38b65af 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -54,21 +54,22 @@ struct u_log_context;
 #define R600_RESOURCE_FLAG_TRANSFER            (PIPE_RESOURCE_FLAG_DRV_PRIV << 
0)
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH       (PIPE_RESOURCE_FLAG_DRV_PRIV << 
1)
 #define R600_RESOURCE_FLAG_FORCE_TILING                
(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 #define R600_RESOURCE_FLAG_DISABLE_DCC         (PIPE_RESOURCE_FLAG_DRV_PRIV << 
3)
 #define R600_RESOURCE_FLAG_UNMAPPABLE          (PIPE_RESOURCE_FLAG_DRV_PRIV << 
4)
 
 #define R600_CONTEXT_STREAMOUT_FLUSH           (1u << 0)
 /* Pipeline & streamout query controls. */
 #define R600_CONTEXT_START_PIPELINE_STATS      (1u << 1)
 #define R600_CONTEXT_STOP_PIPELINE_STATS       (1u << 2)
-#define R600_CONTEXT_PRIVATE_FLAG              (1u << 3)
+#define R600_CONTEXT_FLUSH_FOR_RENDER_COND     (1u << 3)
+#define R600_CONTEXT_PRIVATE_FLAG              (1u << 4)
 
 /* special primitive types */
 #define R600_PRIM_RECTANGLE_LIST       PIPE_PRIM_MAX
 
 #define R600_NOT_QUERY         0xffffffff
 
 /* Debug flags. */
 /* logging and features */
 #define DBG_TEX                        (1 << 0)
 #define DBG_NIR                        (1 << 1)
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index f937612bc1f..03ff1018a71 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1828,25 +1828,28 @@ static void r600_render_condition(struct pipe_context 
*ctx,
                         * from launching the compute grid.
                         */
                        rctx->render_cond = NULL;
 
                        ctx->get_query_result_resource(
                                ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
                                &rquery->workaround_buf->b.b, 
rquery->workaround_offset);
 
                        /* Settings this in the render cond atom is too late,
                         * so set it here. */
-                       rctx->flags |= rctx->screen->barrier_flags.L2_to_cp;
-
-                       atom->num_dw = 5;
+                       rctx->flags |= rctx->screen->barrier_flags.L2_to_cp |
+                                      R600_CONTEXT_FLUSH_FOR_RENDER_COND;
 
                        rctx->render_cond_force_off = old_force_off;
+               }
+
+               if (needs_workaround) {
+                       atom->num_dw = 5;
                } else {
                        for (qbuf = &rquery->buffer; qbuf; qbuf = 
qbuf->previous)
                                atom->num_dw += (qbuf->results_end / 
rquery->result_size) * 5;
 
                        if (rquery->b.type == 
PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
                                atom->num_dw *= R600_MAX_STREAMS;
                }
        }
 
        rctx->render_cond = query;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 1d8be49a480..81751d2186e 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1385,34 +1385,39 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
                                      SI_CONTEXT_PS_PARTIAL_FLUSH |
                                      SI_CONTEXT_CS_PARTIAL_FLUSH))) {
                /* If we have to wait for idle, set all states first, so that 
all
                 * SET packets are processed in parallel with previous draw 
calls.
                 * Then upload descriptors, set shader pointers, and draw, and
                 * prefetch at the end. This ensures that the time the CUs
                 * are idle is very short. (there are only SET_SH packets 
between
                 * the wait and the draw)
                 */
                struct r600_atom *shader_pointers = &sctx->shader_pointers.atom;
+               unsigned masked_atoms = 1u << shader_pointers->id;
 
-               /* Emit all states except shader pointers. */
-               si_emit_all_states(sctx, info, 1 << shader_pointers->id);
+               if (unlikely(sctx->b.flags & 
R600_CONTEXT_FLUSH_FOR_RENDER_COND))
+                       masked_atoms |= 1u << sctx->b.render_cond_atom.id;
+
+               /* Emit all states except shader pointers and render condition. 
*/
+               si_emit_all_states(sctx, info, masked_atoms);
                si_emit_cache_flush(sctx);
 
                /* <-- CUs are idle here. */
                if (!si_upload_graphics_shader_descriptors(sctx))
                        return;
 
                /* Set shader pointers after descriptors are uploaded. */
-               if (si_is_atom_dirty(sctx, shader_pointers)) {
+               if (si_is_atom_dirty(sctx, shader_pointers))
                        shader_pointers->emit(&sctx->b, NULL);
-                       sctx->dirty_atoms = 0;
-               }
+               if (si_is_atom_dirty(sctx, &sctx->b.render_cond_atom))
+                       sctx->b.render_cond_atom.emit(&sctx->b, NULL);
+               sctx->dirty_atoms = 0;
 
                si_emit_draw_packets(sctx, info, indexbuf, index_size, 
index_offset);
                /* <-- CUs are busy here. */
 
                /* Start prefetches after the draw has been started. Both will 
run
                 * in parallel, but starting the draw first is more important.
                 */
                if (sctx->b.chip_class >= CIK && sctx->prefetch_L2_mask)
                        cik_emit_prefetch_L2(sctx);
        } else {
-- 
2.11.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to