From: Marek Olšák <marek.ol...@amd.com> They have a different frequency of updates and don't change when scissors change.
I think this even fixes something in si_update_vs_viewport_state. --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 1 + src/gallium/drivers/radeonsi/si_state.c | 9 +++-- src/gallium/drivers/radeonsi/si_state.h | 2 ++ src/gallium/drivers/radeonsi/si_state_draw.c | 6 ++-- .../drivers/radeonsi/si_state_viewport.c | 36 ++++++++++++------- 5 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 84536b7c6f6..c7ec83789b1 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -277,20 +277,21 @@ void si_begin_new_gfx_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref); si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map); si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable); si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond); si_all_descriptors_begin_new_cs(ctx); si_all_resident_buffers_begin_new_cs(ctx); ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; + si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband); si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors); si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports); si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state); if (ctx->scratch_buffer) { si_context_add_resource_size(ctx, &ctx->scratch_buffer->b.b); } if (ctx->streamout.suspended) { ctx->streamout.append_bitmask = ctx->streamout.enabled_mask; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 3a7e928df53..3d19af28507 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -995,27 +995,30 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); } sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR; sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color); si_pm4_bind_state(sctx, rasterizer, rs); si_update_poly_offset_state(sctx); if (!old_rs || - (old_rs->scissor_enable != rs->scissor_enable || - old_rs->line_width != rs->line_width || - old_rs->max_point_size != rs->max_point_size)) { + old_rs->scissor_enable != rs->scissor_enable) { sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors); } + if (!old_rs || + old_rs->line_width != rs->line_width || + old_rs->max_point_size != rs->max_point_size) + si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband); + if (!old_rs || old_rs->clip_halfz != rs->clip_halfz) { sctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports); } if (!old_rs || old_rs->clip_plane_enable != rs->clip_plane_enable || old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl) si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 9da58ac9710..3cfcf75a22f 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -200,20 +200,21 @@ union si_state_atoms { struct si_atom msaa_sample_locs; struct si_atom db_render_state; struct si_atom dpbb_state; struct si_atom msaa_config; struct si_atom sample_mask; struct si_atom cb_render_state; struct si_atom blend_color; struct si_atom clip_regs; struct si_atom clip_state; struct si_atom shader_pointers; + struct si_atom guardband; struct si_atom scissors; struct si_atom viewports; struct si_atom stencil_ref; struct si_atom spi_map; struct si_atom scratch_state; } s; struct si_atom array[0]; }; #define SI_ATOM_BIT(name) (1 << (offsetof(union si_state_atoms, s.name) / \ @@ -227,20 +228,21 @@ static inline unsigned si_atoms_that_roll_context(void) SI_ATOM_BIT(framebuffer) | SI_ATOM_BIT(msaa_sample_locs) | SI_ATOM_BIT(db_render_state) | SI_ATOM_BIT(dpbb_state) | SI_ATOM_BIT(msaa_config) | SI_ATOM_BIT(sample_mask) | SI_ATOM_BIT(cb_render_state) | SI_ATOM_BIT(blend_color) | SI_ATOM_BIT(clip_regs) | SI_ATOM_BIT(clip_state) | + SI_ATOM_BIT(guardband) | SI_ATOM_BIT(scissors) | SI_ATOM_BIT(viewports) | SI_ATOM_BIT(stencil_ref) | SI_ATOM_BIT(spi_map) | SI_ATOM_BIT(scratch_state)); } struct si_shader_data { uint32_t sh_base[SI_NUM_SHADERS]; }; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 5588c9a2c53..e33e235620a 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1270,24 +1270,22 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_POINT_MODE]) rast_prim = PIPE_PRIM_POINTS; else rast_prim = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; } else rast_prim = info->mode; if (rast_prim != sctx->current_rast_prim) { bool old_is_poly = sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES; bool new_is_poly = rast_prim >= PIPE_PRIM_TRIANGLES; - if (old_is_poly != new_is_poly) { - sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; - si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors); - } + if (old_is_poly != new_is_poly) + si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband); sctx->current_rast_prim = rast_prim; sctx->do_update_shaders = true; } if (sctx->tes_shader.cso && sctx->screen->has_ls_vgpr_init_bug) { /* Determine whether the LS VGPR fix should be applied. * * It is only required when num input CPs > num output CPs, diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index bffb1f91827..97b1b89b48b 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -129,28 +129,42 @@ static void si_emit_one_scissor(struct si_context *ctx, radeon_emit(cs, S_028250_TL_X(final.minx) | S_028250_TL_Y(final.miny) | S_028250_WINDOW_OFFSET_DISABLE(1)); radeon_emit(cs, S_028254_BR_X(final.maxx) | S_028254_BR_Y(final.maxy)); } /* the range is [-MAX, MAX] */ #define SI_MAX_VIEWPORT_RANGE 32768 -static void si_emit_guardband(struct si_context *ctx, - struct si_signed_scissor *vp_as_scissor) +static void si_emit_guardband(struct si_context *ctx) { + const struct si_signed_scissor *vp_as_scissor; + struct si_signed_scissor max_vp_scissor; struct radeon_winsys_cs *cs = ctx->gfx_cs; struct pipe_viewport_state vp; float left, top, right, bottom, max_range, guardband_x, guardband_y; float discard_x, discard_y; + if (ctx->vs_writes_viewport_index) { + /* Shaders can draw to any viewport. Make a union of all + * viewports. */ + max_vp_scissor = ctx->viewports.as_scissor[0]; + for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) { + si_scissor_make_union(&max_vp_scissor, + &ctx->viewports.as_scissor[i]); + } + vp_as_scissor = &max_vp_scissor; + } else { + vp_as_scissor = &ctx->viewports.as_scissor[0]; + } + /* Reconstruct the viewport transformation from the scissor. */ vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0; vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0; vp.scale[0] = vp_as_scissor->maxx - vp.translate[0]; vp.scale[1] = vp_as_scissor->maxy - vp.translate[1]; /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */ if (vp_as_scissor->minx == vp_as_scissor->maxx) vp.scale[0] = 0.5; if (vp_as_scissor->miny == vp_as_scissor->maxy) @@ -209,59 +223,49 @@ static void si_emit_guardband(struct si_context *ctx, radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */ radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */ } static void si_emit_scissors(struct si_context *ctx) { struct radeon_winsys_cs *cs = ctx->gfx_cs; struct pipe_scissor_state *states = ctx->scissors.states; unsigned mask = ctx->scissors.dirty_mask; bool scissor_enabled = false; - struct si_signed_scissor max_vp_scissor; - int i; if (ctx->queued.named.rasterizer) scissor_enabled = ctx->queued.named.rasterizer->scissor_enable; /* The simple case: Only 1 viewport is active. */ if (!ctx->vs_writes_viewport_index) { struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0]; if (!(mask & 1)) return; radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL); - si_emit_guardband(ctx, vp); ctx->scissors.dirty_mask &= ~1; /* clear one bit */ return; } - /* Shaders can draw to any viewport. Make a union of all viewports. */ - max_vp_scissor = ctx->viewports.as_scissor[0]; - for (i = 1; i < SI_MAX_VIEWPORTS; i++) - si_scissor_make_union(&max_vp_scissor, - &ctx->viewports.as_scissor[i]); - while (mask) { int start, count, i; u_bit_scan_consecutive_range(&mask, &start, &count); radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + start * 4 * 2, count * 2); for (i = start; i < start+count; i++) { si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i], scissor_enabled ? &states[i] : NULL); } } - si_emit_guardband(ctx, &max_vp_scissor); ctx->scissors.dirty_mask = 0; } static void si_set_viewport_states(struct pipe_context *pctx, unsigned start_slot, unsigned num_viewports, const struct pipe_viewport_state *state) { struct si_context *ctx = (struct si_context *)pctx; unsigned mask; @@ -273,20 +277,21 @@ static void si_set_viewport_states(struct pipe_context *pctx, ctx->viewports.states[index] = state[i]; si_get_scissor_from_viewport(ctx, &state[i], &ctx->viewports.as_scissor[index]); } mask = ((1 << num_viewports) - 1) << start_slot; ctx->viewports.dirty_mask |= mask; ctx->viewports.depth_range_dirty_mask |= mask; ctx->scissors.dirty_mask |= mask; si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports); + si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband); si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors); } static void si_emit_one_viewport(struct si_context *ctx, struct pipe_viewport_state *state) { struct radeon_winsys_cs *cs = ctx->gfx_cs; radeon_emit(cs, fui(state->scale[0])); radeon_emit(cs, fui(state->translate[0])); @@ -412,30 +417,37 @@ void si_update_vs_viewport_state(struct si_context *ctx) if (ctx->vs_disables_clipping_viewport != vs_window_space) { ctx->vs_disables_clipping_viewport = vs_window_space; ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors); si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports); } /* Viewport index handling. */ + if (ctx->vs_writes_viewport_index == info->writes_viewport_index) + return; + + /* This changes how the guardband is computed. */ ctx->vs_writes_viewport_index = info->writes_viewport_index; + si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband); + if (!ctx->vs_writes_viewport_index) return; if (ctx->scissors.dirty_mask) si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors); if (ctx->viewports.dirty_mask || ctx->viewports.depth_range_dirty_mask) si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports); } void si_init_viewport_functions(struct si_context *ctx) { + ctx->atoms.s.guardband.emit = si_emit_guardband; ctx->atoms.s.scissors.emit = si_emit_scissors; ctx->atoms.s.viewports.emit = si_emit_viewport_states; ctx->b.set_scissor_states = si_set_scissor_states; ctx->b.set_viewport_states = si_set_viewport_states; } -- 2.17.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev