On 16 April 2013 16:21, Eric Anholt <e...@anholt.net> wrote: > The upside is less CPU overhead in fiddling with GL error handling, the > ability to use the constant color write message in most cases, and no GLSL > clear shaders appearing in MESA_GLSL=dump output. The downside is more > batch flushing and a total recompute of GL state at the end of blorp. > However, if we're ever going to use the fast color clear feature of CMS > surfaces, we'll need this anyway since it requires very special state > setup. > > This increases the fail rate of some the GLES3conform ARB_sync tests, > because of the initial flush at the start of blorp. The tests already > intermittently failed (because it's just a bad testing procedure), and we > can return it to its previous fail rate by fixing the initial flush. > > Improves GLB2.7 performance 0.37% +/- 0.11% (n=71/70, outlier removed). > --- > src/mesa/drivers/dri/i965/Makefile.sources | 1 + > src/mesa/drivers/dri/i965/brw_blorp.cpp | 4 + > src/mesa/drivers/dri/i965/brw_blorp.h | 4 + > src/mesa/drivers/dri/i965/brw_blorp_clear.cpp | 303 > +++++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_clear.c | 11 + > src/mesa/drivers/dri/i965/brw_context.h | 1 + > src/mesa/drivers/dri/i965/gen6_blorp.cpp | 21 +- > src/mesa/drivers/dri/i965/gen7_blorp.cpp | 12 +- > 8 files changed, 343 insertions(+), 14 deletions(-) > create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_clear.cpp > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index be8d630..049ae91 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -31,6 +31,7 @@ i965_FILES = \ > intel_tex_validate.c \ > brw_blorp.cpp \ > brw_blorp_blit.cpp \ > + brw_blorp_clear.cpp \ > brw_cc.c \ > brw_cfg.cpp \ > brw_clear.c \ > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp > b/src/mesa/drivers/dri/i965/brw_blorp.cpp > index 8a044c1..a2d02bf 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp > +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp > @@ -148,6 +148,10 @@ brw_blorp_params::brw_blorp_params() > num_samples(0), > use_wm_prog(false) > { > + color_write_disable[0] = false; > + color_write_disable[1] = false; > + color_write_disable[2] = false; > + color_write_disable[3] = false; > } > > extern "C" { > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h > b/src/mesa/drivers/dri/i965/brw_blorp.h > index 1b95818..8915080 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp.h > +++ b/src/mesa/drivers/dri/i965/brw_blorp.h > @@ -45,6 +45,9 @@ brw_blorp_blit_miptrees(struct intel_context *intel, > int dst_x1, int dst_y1, > bool mirror_x, bool mirror_y); > > +bool > +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer > *fb); > + > #ifdef __cplusplus > } /* end extern "C" */ > > @@ -211,6 +214,7 @@ public: > unsigned num_samples; > bool use_wm_prog; > brw_blorp_wm_push_constants wm_push_consts; > + bool color_write_disable[4]; > }; > > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp > b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp > new file mode 100644 > index 0000000..000f1c6 > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp > @@ -0,0 +1,303 @@ > +/* > + * Copyright © 2013 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > "Software"), > + * to deal in the Software without restriction, including without > limitation > + * the rights to use, copy, modify, merge, publish, distribute, > sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the > next > + * paragraph) shall be included in all copies or substantial portions of > the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT > SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "main/teximage.h" > +#include "main/fbobject.h" > +#include "main/renderbuffer.h" > + > +#include "glsl/ralloc.h" > + > +#include "intel_fbo.h" > + > +#include "brw_blorp.h" > +#include "brw_context.h" > +#include "brw_eu.h" > +#include "brw_state.h" > + > +struct brw_blorp_clear_prog_key > +{ > + uint32_t const_clear; > +}; >
I found the name and type of "const_clear" really misleading (at first I assumed it was the value we are clearing the buffer to. Actually it's a boolean that determines whether we use the "replicate data" message). Can we change this to "bool use_replicate_data_msg" or something? > + > +class brw_blorp_clear_params : public brw_blorp_params > +{ > +public: > + brw_blorp_clear_params(struct brw_context *brw, > + struct gl_framebuffer *fb, > + struct gl_renderbuffer *rb, > + GLubyte *color_mask); > + > + virtual uint32_t get_wm_prog(struct brw_context *brw, > + brw_blorp_prog_data **prog_data) const; > + > +private: > + brw_blorp_clear_prog_key wm_prog_key; > +}; > + > +class brw_blorp_clear_program > +{ > +public: > + brw_blorp_clear_program(struct brw_context *brw, > + const brw_blorp_clear_prog_key *key); > + ~brw_blorp_clear_program(); > + > + const GLuint *compile(struct brw_context *brw, GLuint *program_size); > + > + brw_blorp_prog_data prog_data; > + > +private: > + void alloc_regs(); > + > + void *mem_ctx; > + struct brw_context *brw; > + const brw_blorp_clear_prog_key *key; > + struct brw_compile func; > + > + /* Thread dispatch header */ > + struct brw_reg R0; > + > + /* Pixel X/Y coordinates (always in R1). */ > + struct brw_reg R1; > + > + /* Register with push constants (a single vec4) */ > + struct brw_reg clear_rgba; > + > + /* MRF used for sampling and render target writes */ > + GLuint base_mrf; > Actually this mrf is just used for render target writes. > +}; > + > +brw_blorp_clear_program::brw_blorp_clear_program( > + struct brw_context *brw, > + const brw_blorp_clear_prog_key *key) > + : mem_ctx(ralloc_context(NULL)), > + brw(brw), > + key(key) > +{ > + brw_init_compile(brw, &func, mem_ctx); > +} > + > +brw_blorp_clear_program::~brw_blorp_clear_program() > +{ > + ralloc_free(mem_ctx); > +} > + > +brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw, > + struct gl_framebuffer *fb, > + struct gl_renderbuffer *rb, > + GLubyte *color_mask) > +{ > + struct intel_context *intel = &brw->intel; > + struct gl_context *ctx = &intel->ctx; > + struct intel_renderbuffer *irb = intel_renderbuffer(rb); > + > + dst.set(brw, irb->mt, irb->mt_level, irb->mt_layer); > + > + /* Override the surface format according to the context's sRGB rules. > */ > + gl_format format = irb->mt->format; > + if (!ctx->Color.sRGBEnabled) > + format = _mesa_get_srgb_format_linear(format); > + dst.brw_surfaceformat = brw->render_target_format[format]; > The code for doing this in gen7_update_renderbuffer_surface() looks very different: gl_format rb_format = intel_rb_format(irb); switch (rb_format) { case MESA_FORMAT_SARGB8: /* _NEW_BUFFERS * * Without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB surfaces to the * blend/update as sRGB. */ if (ctx->Color.sRGBEnabled) format = brw_format_for_mesa_format(rb_format); else format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; default: assert(brw_render_target_supported(intel, rb)); format = brw->render_target_format[rb_format]; if (unlikely(!brw->format_supported_as_render_target[rb_format])) { _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", __FUNCTION__, _mesa_get_format_name(rb_format)); } break; } (This logic is almost exactly duplicated in brw_update_renderbuffer_surface()). I'm having trouble reassuring myself that it's equivalent. Can we make a common function to do this job and share it between gen7_update_renderbuffer_surface(), brw_update_renderbuffer_surface(), and brw_blorp_clear_params()? > + > + x0 = fb->_Xmin; > + x1 = fb->_Xmax; > + if (rb->Name != 0) { > + y0 = fb->_Ymin; > + y1 = fb->_Ymax; > + } else { > + y0 = rb->Height - fb->_Ymax; > + y1 = rb->Height - fb->_Ymin; > + } > + > + float *push_consts = (float *)&wm_push_consts; > + > + push_consts[0] = ctx->Color.ClearColor.f[0]; > + push_consts[1] = ctx->Color.ClearColor.f[1]; > + push_consts[2] = ctx->Color.ClearColor.f[2]; > + push_consts[3] = ctx->Color.ClearColor.f[3]; > + > + use_wm_prog = true; > + > + memset(&wm_prog_key, 0, sizeof(wm_prog_key)); > + > + wm_prog_key.const_clear = true; > + > + /* From the SNB PRM (Vol4_Part1): > + * > + * "Replicated data (Message Type = 111) is only supported when > + * accessing tiled memory. Using this Message Type to access > linear > + * (untiled) memory is UNDEFINED." > + */ > + if (irb->mt->region->tiling == I915_TILING_NONE) > + wm_prog_key.const_clear = false; > + > + /* Constant color writes ignore everyting in blend and color calculator > + * state. This is not documented. > + */ > + for (int i = 0; i < 4; i++) { > + if (!color_mask[i]) { > + color_write_disable[i] = true; > + wm_prog_key.const_clear = false; > + } > + } > +} > + > +uint32_t > +brw_blorp_clear_params::get_wm_prog(struct brw_context *brw, > + brw_blorp_prog_data **prog_data) const > +{ > + uint32_t prog_offset; > + if (!brw_search_cache(&brw->cache, BRW_BLORP_BLIT_PROG, > s/BRW_BLORP_BLIT_PROG/BRW_BLORP_CLEAR_PROG/ here (and in the call to brw_upload_cache() below) > + &this->wm_prog_key, sizeof(this->wm_prog_key), > + &prog_offset, prog_data)) { > + brw_blorp_clear_program prog(brw, &this->wm_prog_key); > + GLuint program_size; > + const GLuint *program = prog.compile(brw, &program_size); > + brw_upload_cache(&brw->cache, BRW_BLORP_BLIT_PROG, > + &this->wm_prog_key, sizeof(this->wm_prog_key), > + program, program_size, > + &prog.prog_data, sizeof(prog.prog_data), > + &prog_offset, prog_data); > + } > + return prog_offset; > +} > + > +void > +brw_blorp_clear_program::alloc_regs() > +{ > + int reg = 0; > + this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW); > + this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW); > + > + prog_data.first_curbe_grf = reg; > + clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F); > + reg += BRW_BLORP_NUM_PUSH_CONST_REGS; > + > + /* Make sure we didn't run out of registers */ > + assert(reg <= GEN7_MRF_HACK_START); > + > + int mrf = 2; > + this->base_mrf = mrf; > +} > + > +const GLuint * > +brw_blorp_clear_program::compile(struct brw_context *brw, > + GLuint *program_size) > +{ > + /* Set up prog_data */ > + memset(&prog_data, 0, sizeof(prog_data)); > + prog_data.persample_msaa_dispatch = false; > + > + alloc_regs(); > + > + brw_set_compression_control(&func, BRW_COMPRESSION_NONE); > + > + struct brw_reg mrf_rt_write = > + retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F); > + > + uint32_t mlen, msg_type; > + if (key->const_clear) { > + /* The message payload is a single register with the low 4 > floats/ints > + * filled with the constant clear color. > + */ > + brw_set_mask_control(&func, true); > This would read easier as "brw_set_mask_control(&func, BRW_MASK_DISABLE);" > + brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba); > + brw_set_mask_control(&func, false); > And "brw_set_mask_control(&func, BRW_MASK_ENABLE);" here. > + > + msg_type = > BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; > + mlen = 1; > + } else { > + for (int i = 0; i < 4; i++) { > + /* The message payload is pairs of registers for 16 pixels each > of r, > + * g, b, and a. > + */ > + brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED); > + brw_MOV(&func, > + brw_message_reg(base_mrf + i * 2), > + brw_vec1_grf(clear_rgba.nr, i)); > + brw_set_compression_control(&func, BRW_COMPRESSION_NONE); > + } > + > + msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; > + mlen = 8; > + } > + > + /* Now write to the render target and terminate the thread */ > + brw_fb_WRITE(&func, > + 16 /* dispatch_width */, > + base_mrf /* msg_reg_nr */, > + mrf_rt_write /* src0 */, > + msg_type, > + BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX, > + mlen, > + 0 /* response_length */, > + true /* eot */, > + false /* header present */); > Unexpectedly indented. > + > + if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) { > + printf("Native code for BLORP clear:\n"); > + brw_dump_compile(&func, stdout, 0, func.next_insn_offset); > + printf("\n"); > + } > + return brw_get_program(&func, program_size); > +} > + > +extern "C" { > +bool > +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer > *fb) > +{ > + struct gl_context *ctx = &intel->ctx; > + struct brw_context *brw = brw_context(ctx); > + > + /* The constant color clear code doesn't work for multisampled > surfaces, so > + * we need to support falling back to other clear mechanisms. > + * Unfortunately, our clear code is based on a bitmask that doesn't > + * distinguish individual color attachments, so we walk the > attachments to > + * see if any require fallback, and fall back for all if any of them > need > + * to. > + */ > + for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; > buf++) { > + struct gl_renderbuffer *rb = > ctx->DrawBuffer->_ColorDrawBuffers[buf]; > + struct intel_renderbuffer *irb = intel_renderbuffer(rb); > + > + if (irb && irb->mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE) > + return false; > + } > + > + for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; > buf++) { > + struct gl_renderbuffer *rb = > ctx->DrawBuffer->_ColorDrawBuffers[buf]; > + > + /* If this is an ES2 context or GL_ARB_ES2_compatibility is > supported, > + * the framebuffer can be complete with some attachments missing. > In > + * this case the _ColorDrawBuffers pointer will be NULL. > + */ > + if (rb == NULL) > + continue; > + > + brw_blorp_clear_params params(brw, fb, rb, > ctx->Color.ColorMask[buf]); > + brw_blorp_exec(intel, ¶ms); > + } > + > + return true; > +} > + > +} /* extern "C" */ > diff --git a/src/mesa/drivers/dri/i965/brw_clear.c > b/src/mesa/drivers/dri/i965/brw_clear.c > index c0ac69d..ad6a554 100644 > --- a/src/mesa/drivers/dri/i965/brw_clear.c > +++ b/src/mesa/drivers/dri/i965/brw_clear.c > @@ -41,6 +41,7 @@ > #include "intel_regions.h" > > #include "brw_context.h" > +#include "brw_blorp.h" > > #define FILE_DEBUG_FLAG DEBUG_BLIT > > @@ -243,6 +244,16 @@ brw_clear(struct gl_context *ctx, GLbitfield mask) > } > } > > + /* We only have support for blorp as of gen6 so far. */ > + if (intel->gen >= 6) { > + if (mask & BUFFER_BITS_COLOR) { > + if (brw_blorp_clear_color(intel, fb)) { > + debug_mask("blorp color", mask & BUFFER_BITS_COLOR); > + mask &= ~BUFFER_BITS_COLOR; > + } > + } > + } > + > GLbitfield tri_mask = mask & (BUFFER_BITS_COLOR | > BUFFER_BIT_STENCIL | > BUFFER_BIT_DEPTH); > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 114c369..7ec448c 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -605,6 +605,7 @@ enum brw_cache_id { > BRW_CC_UNIT, > BRW_WM_PROG, > BRW_BLORP_BLIT_PROG, > + BRW_BLORP_CLEAR_PROG, > BRW_SAMPLER, > BRW_WM_UNIT, > BRW_SF_PROG, > diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp > b/src/mesa/drivers/dri/i965/gen6_blorp.cpp > index 872c408..3e7467e 100644 > --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp > +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp > @@ -278,17 +278,18 @@ gen6_blorp_emit_blend_state(struct brw_context *brw, > blend->blend1.post_blend_clamp_enable = 1; > blend->blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT; > > - blend->blend1.write_disable_r = false; > - blend->blend1.write_disable_g = false; > - blend->blend1.write_disable_b = false; > - blend->blend1.write_disable_a = false; > + blend->blend1.write_disable_r = params->color_write_disable[0]; > + blend->blend1.write_disable_g = params->color_write_disable[1]; > + blend->blend1.write_disable_b = params->color_write_disable[2]; > + blend->blend1.write_disable_a = params->color_write_disable[3]; > > /* When blitting from an XRGB source to a ARGB destination, we need to > * interpret the missing channel as 1.0. Blending can do that for us: > * we simply use the RGB values from the fragment shader ("source > RGB"), > * but smash the alpha channel to 1. > */ > - if (_mesa_get_format_bits(params->dst.mt->format, GL_ALPHA_BITS) > 0 && > + if (params->src.mt && > + _mesa_get_format_bits(params->dst.mt->format, GL_ALPHA_BITS) > 0 && > _mesa_get_format_bits(params->src.mt->format, GL_ALPHA_BITS) == 0) > { > blend->blend0.blend_enable = 1; > blend->blend0.ia_blend_enable = 1; > @@ -1058,16 +1059,18 @@ gen6_blorp_exec(struct intel_context *intel, > depthstencil_offset, > cc_state_offset); > if (params->use_wm_prog) { > uint32_t wm_surf_offset_renderbuffer; > - uint32_t wm_surf_offset_texture; > + uint32_t wm_surf_offset_texture = 0; > uint32_t sampler_offset; > wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params); > wm_surf_offset_renderbuffer = > gen6_blorp_emit_surface_state(brw, params, ¶ms->dst, > I915_GEM_DOMAIN_RENDER, > I915_GEM_DOMAIN_RENDER); > - wm_surf_offset_texture = > - gen6_blorp_emit_surface_state(brw, params, ¶ms->src, > - I915_GEM_DOMAIN_SAMPLER, 0); > + if (params->src.mt) { > + wm_surf_offset_texture = > + gen6_blorp_emit_surface_state(brw, params, ¶ms->src, > + I915_GEM_DOMAIN_SAMPLER, 0); > + } > wm_bind_bo_offset = > gen6_blorp_emit_binding_table(brw, params, > wm_surf_offset_renderbuffer, > diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp > b/src/mesa/drivers/dri/i965/gen7_blorp.cpp > index 99e7e58..1c23866 100644 > --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp > +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp > @@ -765,17 +765,19 @@ gen7_blorp_exec(struct intel_context *intel, > depthstencil_offset); > if (params->use_wm_prog) { > uint32_t wm_surf_offset_renderbuffer; > - uint32_t wm_surf_offset_texture; > + uint32_t wm_surf_offset_texture = 0; > wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params); > wm_surf_offset_renderbuffer = > gen7_blorp_emit_surface_state(brw, params, ¶ms->dst, > I915_GEM_DOMAIN_RENDER, > I915_GEM_DOMAIN_RENDER, > true /* is_render_target */); > - wm_surf_offset_texture = > - gen7_blorp_emit_surface_state(brw, params, ¶ms->src, > - I915_GEM_DOMAIN_SAMPLER, 0, > - false /* is_render_target */); > + if (params->src.mt) { > + wm_surf_offset_texture = > + gen7_blorp_emit_surface_state(brw, params, ¶ms->src, > + I915_GEM_DOMAIN_SAMPLER, 0, > + false /* is_render_target */); > + } > wm_bind_bo_offset = > gen6_blorp_emit_binding_table(brw, params, > wm_surf_offset_renderbuffer, > -- > 1.7.10.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev