On Mon, Jun 19, 2017 at 2:27 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > On 16.06.2017 14:58, Marek Olšák wrote: >> >> From: Marek Olšák <marek.ol...@amd.com> >> >> The main flush before texturing is done after the FMASK decompress pass. >> >> CB after MSAA rendering is not flushed in set_framebuffer_state and also >> not in memory_barrier if the current color buffer is MSAA. We fully rely >> on the FMASK decompress pass for the flushing. >> >> Some CB decompress and resolve passes need an explicit flush before and >> after. >> --- >> src/gallium/drivers/radeonsi/si_blit.c | 29 ++++++++++++++++++++++ >> src/gallium/drivers/radeonsi/si_state.c | 43 >> ++++++++++++++++++++++----------- >> 2 files changed, 58 insertions(+), 14 deletions(-) >> >> diff --git a/src/gallium/drivers/radeonsi/si_blit.c >> b/src/gallium/drivers/radeonsi/si_blit.c >> index 0993ebd..1159594 100644 >> --- a/src/gallium/drivers/radeonsi/si_blit.c >> +++ b/src/gallium/drivers/radeonsi/si_blit.c >> @@ -398,20 +398,28 @@ si_decompress_depth(struct si_context *sctx, >> * state becomes 0 for the whole mipmap tree and all >> planes. >> * (there is nothing else to flush) >> */ >> if (tex->tc_compatible_htile) { >> if (r600_can_sample_zs(tex, false)) >> tex->dirty_level_mask = 0; >> if (r600_can_sample_zs(tex, true)) >> tex->stencil_dirty_level_mask = 0; >> } >> } >> + /* set_framebuffer_state takes care of coherency for >> single-sample. >> + * The DB->CB copy uses CB for the final writes. >> + */ >> + if (copy_planes && tex->resource.b.b.nr_samples > 1) { >> + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | >> + SI_CONTEXT_INV_GLOBAL_L2 | >> + SI_CONTEXT_FLUSH_AND_INV_CB; >> + } >> } >> static void >> si_decompress_sampler_depth_textures(struct si_context *sctx, >> struct si_textures_info *textures) >> { >> unsigned i; >> unsigned mask = textures->needs_depth_decompress_mask; >> while (mask) { >> @@ -480,36 +488,49 @@ static void si_blit_decompress_color(struct >> pipe_context *ctx, >> for (layer = first_layer; layer <= checked_last_layer; >> layer++) { >> struct pipe_surface *cbsurf, surf_tmpl; >> surf_tmpl.format = rtex->resource.b.b.format; >> surf_tmpl.u.tex.level = level; >> surf_tmpl.u.tex.first_layer = layer; >> surf_tmpl.u.tex.last_layer = layer; >> cbsurf = ctx->create_surface(ctx, >> &rtex->resource.b.b, &surf_tmpl); >> + /* Required before and after FMASK and >> DCC_DECOMPRESS. */ >> + if (custom_blend == >> sctx->custom_blend_fmask_decompress || >> + custom_blend == >> sctx->custom_blend_dcc_decompress) >> + sctx->b.flags |= >> SI_CONTEXT_FLUSH_AND_INV_CB; >> + >> si_blitter_begin(ctx, SI_DECOMPRESS); >> util_blitter_custom_color(sctx->blitter, cbsurf, >> custom_blend); >> si_blitter_end(ctx); >> + if (custom_blend == >> sctx->custom_blend_fmask_decompress || >> + custom_blend == >> sctx->custom_blend_dcc_decompress) >> + sctx->b.flags |= >> SI_CONTEXT_FLUSH_AND_INV_CB; >> + >> pipe_surface_reference(&cbsurf, NULL); >> } >> /* The texture will always be dirty if some layers aren't >> flushed. >> * I don't think this case occurs often though. */ >> if (first_layer == 0 && last_layer >= max_layer) { >> rtex->dirty_level_mask &= ~(1 << level); >> } >> } >> sctx->decompression_enabled = false; >> sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness; >> + >> + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | >> + SI_CONTEXT_INV_GLOBAL_L2 | >> + SI_CONTEXT_INV_VMEM_L1; >> } >> static void >> si_decompress_color_texture(struct si_context *sctx, struct r600_texture >> *tex, >> unsigned first_level, unsigned last_level) >> { >> /* CMASK or DCC can be discarded and we can still end up here. */ >> if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset) >> return; >> @@ -1148,27 +1169,35 @@ void si_resource_copy_region(struct pipe_context >> *ctx, >> pipe_surface_reference(&dst_view, NULL); >> pipe_sampler_view_reference(&src_view, NULL); >> } >> static void si_do_CB_resolve(struct si_context *sctx, >> const struct pipe_blit_info *info, >> struct pipe_resource *dst, >> unsigned dst_level, unsigned dst_z, >> enum pipe_format format) >> { >> + /* Required before and after CB_RESOLVE. */ >> + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; >> + >> si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE | >> (info->render_condition_enable ? 0 : >> SI_DISABLE_RENDER_COND)); >> util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, >> dst_z, >> info->src.resource, >> info->src.box.z, >> ~0, sctx->custom_blend_resolve, >> format); >> si_blitter_end(&sctx->b.b); >> + >> + /* Flush caches for possible texturing. */ >> + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | >> + SI_CONTEXT_INV_GLOBAL_L2 | >> + SI_CONTEXT_INV_VMEM_L1; >> } >> static bool do_hardware_msaa_resolve(struct pipe_context *ctx, >> const struct pipe_blit_info *info) >> { >> struct si_context *sctx = (struct si_context*)ctx; >> struct r600_texture *src = (struct >> r600_texture*)info->src.resource; >> struct r600_texture *dst = (struct >> r600_texture*)info->dst.resource; >> MAYBE_UNUSED struct r600_texture *rtmp; >> unsigned dst_width = u_minify(info->dst.resource->width0, >> info->dst.level); >> diff --git a/src/gallium/drivers/radeonsi/si_state.c >> b/src/gallium/drivers/radeonsi/si_state.c >> index 44e5f1c..ab27af2 100644 >> --- a/src/gallium/drivers/radeonsi/si_state.c >> +++ b/src/gallium/drivers/radeonsi/si_state.c >> @@ -2523,34 +2523,42 @@ static void si_set_framebuffer_state(struct >> pipe_context *ctx, >> /* Only flush TC when changing the framebuffer state, because >> * the only client not using TC that can change textures is >> * the framebuffer. >> * >> * Wait for compute shaders because of possible transitions: >> * - FB write -> shader read >> * - shader write -> FB read >> * >> * DB caches are flushed on demand (using si_decompress_textures). >> + * >> + * When MSAA is enabled, CB and TC caches are flushed on demand >> + * (after FMASK decompression). >> */ >> - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | >> - SI_CONTEXT_INV_GLOBAL_L2 | >> - SI_CONTEXT_FLUSH_AND_INV_CB | >> - SI_CONTEXT_CS_PARTIAL_FLUSH; >> + if (sctx->framebuffer.nr_samples <= 1) { >> + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | >> + SI_CONTEXT_INV_GLOBAL_L2 | >> + SI_CONTEXT_FLUSH_AND_INV_CB; >> + } > > > This looks correct to me, but only because we happen not to support MSAA > shader images today. Luckily, people seem to agree that writing MSAA images > from shaders is not very useful, but if we ever do need to support those, > we'll have to support shader write -> FB access transitions. > > I suppose that's okay -- we can add a flag if an MSAA texture is bound as a > shader image and write access, and check that when binding the framebuffer. > But I think this should be mentioned in the comment: Shader write -> FB read > transitions cannot happen for MSAA textures, because MSAA shader images are > not supported.
Indeed. Added the comment. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev