On Fri, May 13, 2016 at 10:49 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> > > On Wed, May 11, 2016 at 7:42 PM, Jason Ekstrand <ja...@jlekstrand.net> > wrote: > >> The current MSAA resolve code has a special-case for if the MCS value is >> 0. >> In this case we can only sample once because we know that all values are >> in >> slice 0. This commit adds a second optimization that detecs the magic MCS >> value that indicates the clear color and grabs the color from a push >> constant and avoids sampling altogether. On a microbenchmark written by >> Neil Roberts that tests resolving surfaces with just clear color, this >> improves performance by 60% for 8x, 40% for 4x, and 28% for 2x MSAA on my >> SKL gte3 laptop. The benchmark can be found on the ML archive: >> >> https://lists.freedesktop.org/archives/mesa-dev/2016-February/108077.html >> > More data: It seems to help T-Rex on Haswell by maybe 0.5% and hurts some of the cpu-bound synthetics just a bit. Meh? --Jason > --- >> src/mesa/drivers/dri/i965/brw_blorp.h | 4 +- >> src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 101 >> +++++++++++++++++++++++++-- >> 2 files changed, 100 insertions(+), 5 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h >> b/src/mesa/drivers/dri/i965/brw_blorp.h >> index 15114d0..9d71ca4 100644 >> --- a/src/mesa/drivers/dri/i965/brw_blorp.h >> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h >> @@ -197,7 +197,9 @@ struct brw_blorp_wm_push_constants >> uint32_t src_z; >> >> /* Pad out to an integral number of registers */ >> - uint32_t pad[5]; >> + uint32_t pad; >> + >> + union gl_color_union clear_color; >> }; >> >> #define BRW_BLORP_NUM_PUSH_CONSTANT_DWORDS \ >> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp >> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp >> index 514a316..45b696d 100644 >> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp >> @@ -346,6 +346,7 @@ struct brw_blorp_blit_vars { >> nir_variable *offset; >> } u_x_transform, u_y_transform; >> nir_variable *u_src_z; >> + nir_variable *u_clear_color; >> >> /* gl_FragCoord */ >> nir_variable *frag_coord; >> @@ -374,6 +375,7 @@ brw_blorp_blit_vars_init(nir_builder *b, struct >> brw_blorp_blit_vars *v, >> LOAD_UNIFORM(y_transform.multiplier, glsl_float_type()) >> LOAD_UNIFORM(y_transform.offset, glsl_float_type()) >> LOAD_UNIFORM(src_z, glsl_uint_type()) >> + LOAD_UNIFORM(clear_color, glsl_vec4_type()) >> >> #undef DECL_UNIFORM >> >> @@ -858,7 +860,8 @@ static nir_ssa_def * >> blorp_nir_manual_blend_average(nir_builder *b, nir_ssa_def *pos, >> unsigned tex_samples, >> enum intel_msaa_layout tex_layout, >> - enum brw_reg_type dst_type) >> + enum brw_reg_type dst_type, >> + struct brw_blorp_blit_vars *v) >> { >> /* If non-null, this is the outer-most if statement */ >> nir_if *outer_if = NULL; >> @@ -867,9 +870,53 @@ blorp_nir_manual_blend_average(nir_builder *b, >> nir_ssa_def *pos, >> nir_local_variable_create(b->impl, glsl_vec4_type(), "color"); >> >> nir_ssa_def *mcs = NULL; >> - if (tex_layout == INTEL_MSAA_LAYOUT_CMS) >> + if (tex_layout == INTEL_MSAA_LAYOUT_CMS) { >> mcs = blorp_nir_txf_ms_mcs(b, pos); >> >> + /* The MCS buffer stores a packed value that provides a mapping >> from >> + * samples to array slices. The magic value of all ones means >> that all >> + * samples have the clear color. In this case, we can >> short-circuit the >> + * sampling process and just use the clear color that we pushed >> into the >> + * shader. >> + */ >> + nir_ssa_def *is_clear_color; >> + switch (tex_samples) { >> + case 2: >> + /* Empirical evidence suggests that the value returned from the >> + * sampler is not always 0x3 for clear color so we need to mask >> it. >> + */ >> + is_clear_color = >> + nir_ieq(b, nir_iand(b, nir_channel(b, mcs, 0), >> nir_imm_int(b, 0x3)), >> + nir_imm_int(b, 0x3)); >> + break; >> + case 4: >> + is_clear_color = >> + nir_ieq(b, nir_channel(b, mcs, 0), nir_imm_int(b, 0xff)); >> + break; >> + case 8: >> + is_clear_color = >> + nir_ieq(b, nir_channel(b, mcs, 0), nir_imm_int(b, ~0)); >> + break; >> + case 16: >> + is_clear_color = >> + nir_ior(b, nir_ieq(b, nir_channel(b, mcs, 0), nir_imm_int(b, >> ~0)), >> > > This needs to be nir_iand. Fixed locally... > > >> + nir_ieq(b, nir_channel(b, mcs, 1), nir_imm_int(b, >> ~0))); >> + break; >> + default: >> + unreachable("Invalid sample count"); >> + } >> + >> + nir_if *if_stmt = nir_if_create(b->shader); >> + if_stmt->condition = nir_src_for_ssa(is_clear_color); >> + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); >> + >> + b->cursor = nir_after_cf_list(&if_stmt->then_list); >> + nir_store_var(b, color, nir_load_var(b, v->u_clear_color), 0xf); >> + >> + b->cursor = nir_after_cf_list(&if_stmt->else_list); >> + outer_if = if_stmt; >> + } >> + >> /* We add together samples using a binary tree structure, e.g. for 4x >> MSAA: >> * >> * result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4 >> @@ -937,7 +984,8 @@ blorp_nir_manual_blend_average(nir_builder *b, >> nir_ssa_def *pos, >> nir_store_var(b, color, texture_data[0], 0xf); >> >> b->cursor = nir_after_cf_list(&if_stmt->else_list); >> - outer_if = if_stmt; >> + if (!outer_if) >> + outer_if = if_stmt; >> } >> >> for (int j = 0; j < count_trailing_one_bits(i); j++) { >> @@ -1341,7 +1389,7 @@ brw_blorp_build_nir_shader(struct brw_context *brw, >> /* Gen7+ hardware doesn't automaticaly blend. */ >> color = blorp_nir_manual_blend_average(&b, src_pos, >> key->src_samples, >> key->src_layout, >> - key->texture_data_type); >> + key->texture_data_type, >> &v); >> } >> } else if (key->blend && key->blit_scaled) { >> color = blorp_nir_manual_blend_bilinear(&b, src_pos, >> key->src_samples, key, &v); >> @@ -1493,6 +1541,48 @@ compute_msaa_layout_for_pipeline(struct >> brw_context *brw, unsigned num_samples, >> } >> >> >> +static union gl_color_union >> +brw_blorp_get_clear_color_for_mt(struct brw_context *brw, >> + struct intel_mipmap_tree *mt, >> + unsigned swizzle) >> +{ >> + union gl_color_union color; >> + if (brw->gen >= 9) { >> + color = mt->gen9_fast_clear_color; >> + } else if (_mesa_is_format_integer(mt->format)) { >> + color.i[0] = (mt->fast_clear_color_value & (1 << 31)) != 0; >> + color.i[1] = (mt->fast_clear_color_value & (1 << 30)) != 0; >> + color.i[2] = (mt->fast_clear_color_value & (1 << 29)) != 0; >> + color.i[3] = (mt->fast_clear_color_value & (1 << 28)) != 0; >> + } else { >> + color.f[0] = (mt->fast_clear_color_value & (1 << 31)) != 0; >> + color.f[1] = (mt->fast_clear_color_value & (1 << 30)) != 0; >> + color.f[2] = (mt->fast_clear_color_value & (1 << 29)) != 0; >> + color.f[3] = (mt->fast_clear_color_value & (1 << 28)) != 0; >> + } >> + >> + if (swizzle != SWIZZLE_NOOP) { >> + union gl_color_union orig_color = color; >> + for (unsigned i = 0; i < 4; i++) { >> + unsigned s = GET_SWZ(swizzle, i); >> + if (s <= SWIZZLE_W) { >> + color.i[i] = orig_color.i[s]; >> + } else if (s == SWIZZLE_ZERO) { >> + color.i[i] = 0; >> + } else { >> + assert(s == SWIZZLE_ONE); >> + if (_mesa_is_format_integer(mt->format)) >> + color.i[i] = 1; >> + else >> + color.f[i] = 1; >> + } >> + } >> + } >> + >> + return color; >> +} >> + >> + >> /** >> * Note: if the src (or dst) is a 2D multisample array texture on Gen7+ >> using >> * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) >> is >> @@ -1665,6 +1755,9 @@ brw_blorp_blit_miptrees(struct brw_context *brw, >> params.src.num_samples <= 1 && params.dst.num_samples <= 1) >> wm_prog_key.bilinear_filter = true; >> >> + params.wm_push_consts.clear_color = >> + brw_blorp_get_clear_color_for_mt(brw, src_mt, src_swizzle); >> + >> GLenum base_format = _mesa_get_format_base_format(src_mt->format); >> if (base_format != GL_DEPTH_COMPONENT && /* TODO: what about >> depth/stencil? */ >> base_format != GL_STENCIL_INDEX && >> -- >> 2.5.0.400.gff86faf >> >> >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev