I don't like having to fush, so this introduces the other workaround.
Since my experience is that context register writes are pretty cheap,
this should not have too much overhead.

I haven't seen any significant perf changes in benchmarks or games
though.
---
 src/amd/vulkan/radv_cmd_buffer.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index c735d201802..0ca33cc67bc 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1102,10 +1102,6 @@ radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer)
 {
        uint32_t count = cmd_buffer->state.dynamic.scissor.count;
 
-       if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
-               cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
-               si_emit_cache_flush(cmd_buffer);
-       }
        si_write_scissors(cmd_buffer->cs, 0, count,
                          cmd_buffer->state.dynamic.scissor.scissors,
                          cmd_buffer->state.dynamic.viewport.viewports,
@@ -1866,7 +1862,8 @@ radv_upload_graphics_shader_descriptors(struct 
radv_cmd_buffer *cmd_buffer, bool
 static void
 radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, bool indexed_draw,
                         bool instanced_draw, bool indirect_draw,
-                        uint32_t draw_vertex_count)
+                        uint32_t draw_vertex_count,
+                        bool *gfx9_context_roll)
 {
        struct radeon_info *info = 
&cmd_buffer->device->physical_device->rad_info;
        struct radv_cmd_state *state = &cmd_buffer->state;
@@ -1921,6 +1918,7 @@ radv_emit_draw_registers(struct radv_cmd_buffer 
*cmd_buffer, bool indexed_draw,
                                               
R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
                                               primitive_reset_index);
                        state->last_primitive_reset_index = 
primitive_reset_index;
+                       *gfx9_context_roll = true;
                }
        }
 }
@@ -3279,6 +3277,9 @@ static void
 radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
                              const struct radv_draw_info *info)
 {
+       bool context_roll = cmd_buffer->state.dirty & 
~RADV_CMD_DIRTY_INDEX_BUFFER;
+       bool scissor_emitted = cmd_buffer->state.dirty & 
(RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT);
+
        if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
                radv_emit_graphics_pipeline(cmd_buffer);
 
@@ -3303,7 +3304,16 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer 
*cmd_buffer,
 
        radv_emit_draw_registers(cmd_buffer, info->indexed,
                                 info->instance_count > 1, info->indirect,
-                                info->indirect ? 0 : info->count);
+                                info->indirect ? 0 : info->count,
+                                &context_roll);
+
+       /* VEGA10 and RAVEN need a workaround for scissor registers. Either we 
need to
+        * do a PS_APRTIAL_FLUSH before writing them, or we need to always 
write it if
+        * a context roll happens. This does the lattter. */
+       if (context_roll && !scissor_emitted &&
+           (cmd_buffer->device->physical_device->rad_info.family == 
CHIP_VEGA10 ||
+            cmd_buffer->device->physical_device->rad_info.family == 
CHIP_RAVEN))
+               radv_emit_scissor(cmd_buffer);
 }
 
 static void
-- 
2.15.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to