On Saturday, April 04, 2015 04:28:49 PM Jordan Justen wrote: > On 2015-04-04 01:23:28, Kenneth Graunke wrote: > > This allows SIMD16 mode to work for a lot more programs. Texturing is > > also more efficient in SIMD16 mode than SIMD8. Several messages don't > > actually exist in SIMD8 mode, so we did SIMD16 messages and threw away > > half of the data. Now we compute real data in both halves. > > > > Also, the SIMD16 "sample" message doesn't require all three coordinate > > components to exist (like the SIMD8 one), so we can shorten the message > > lengths, cutting register usage a bit. > > > > I chose to implement the visitor functionality in a separate function, > > since mixing true SIMD16 with SIMD8 code that uses SIMD16 fallbacks > > seemed like a mess. The new code bails on a few cases where we'd > > have to do two SIMD8 messages - we just fall back to SIMD8 for now. > > > > Improves performance in "Shadowrun: Dragonfall - Director's Cut" by > > about 20% on GM45 (measured with LIBGL_SHOW_FPS=1 while standing around > > in the first mission). > > > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > > --- > > src/mesa/drivers/dri/i965/brw_fs.h | 4 ++ > > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 28 ++++++++--- > > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 68 > > +++++++++++++++++++++++++- > > 3 files changed, 90 insertions(+), 10 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > > b/src/mesa/drivers/dri/i965/brw_fs.h > > index 278a8ee..cfdbf55 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs.h > > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > > @@ -271,6 +271,10 @@ public: > > fs_reg shadow_comp, > > fs_reg lod, fs_reg lod2, int grad_components, > > uint32_t sampler); > > + fs_inst *emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, > > + fs_reg coordinate, int > > vector_elements, > > + fs_reg shadow_c, fs_reg lod, > > + uint32_t sampler); > > fs_inst *emit_texture_gen5(ir_texture_opcode op, fs_reg dst, > > fs_reg coordinate, int coord_components, > > fs_reg shadow_comp, > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > > index 40e51aa..2743297 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > > @@ -622,16 +622,26 @@ fs_generator::generate_tex(fs_inst *inst, struct > > brw_reg dst, struct brw_reg src > > /* Note that G45 and older determines shadow compare and dispatch > > width > > * from message length for most messages. > > */ > > - assert(dispatch_width == 8); > > - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; > > - if (inst->shadow_compare) { > > - assert(inst->mlen == 6); > > - } else { > > - assert(inst->mlen <= 4); > > - } > > + if (dispatch_width == 8) { > > + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; > > + if (inst->shadow_compare) { > > + assert(inst->mlen == 6); > > + } else { > > + assert(inst->mlen <= 4); > > + } > > + } else { > > + if (inst->shadow_compare) { > > + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; > > + assert(inst->mlen == 9); > > + } else { > > + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; > > + assert(inst->mlen <= 7 && inst->mlen % 2 == 1); > > + } > > + } > > break; > > case FS_OPCODE_TXB: > > if (inst->shadow_compare) { > > + assert(dispatch_width == 8); > > assert(inst->mlen == 6); > > msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; > > } else { > > @@ -642,6 +652,7 @@ fs_generator::generate_tex(fs_inst *inst, struct > > brw_reg dst, struct brw_reg src > > break; > > case SHADER_OPCODE_TXL: > > if (inst->shadow_compare) { > > + assert(dispatch_width == 8); > > assert(inst->mlen == 6); > > msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; > > } else { > > @@ -652,11 +663,12 @@ fs_generator::generate_tex(fs_inst *inst, struct > > brw_reg dst, struct brw_reg src > > break; > > case SHADER_OPCODE_TXD: > > /* There is no sample_d_c message; comparisons are done manually */ > > + assert(dispatch_width == 8); > > assert(inst->mlen == 7 || inst->mlen == 10); > > msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; > > break; > > case SHADER_OPCODE_TXF: > > - assert(inst->mlen == 9); > > + assert(inst->mlen <= 9 && inst->mlen % 2 == 1); > > msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD; > > simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; > > break; > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > > index 8c0ec33..25c424a 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > > @@ -1435,8 +1435,6 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, > > fs_reg dst, > > bool simd16 = false; > > fs_reg orig_dst; > > > > - no16("SIMD16 texturing on Gen4 not supported yet."); > > - > > /* g0 header. */ > > mlen = 1; > > > > @@ -1588,6 +1586,69 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, > > fs_reg dst, > > return inst; > > } > > > > +fs_inst * > > +fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, > > + fs_reg coordinate, int > > vector_elements, > > + fs_reg shadow_c, fs_reg lod, > > + uint32_t sampler) > > +{ > > + fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width); > > + bool has_lod = op == ir_txl || op == ir_txb; > > + > > + if (has_lod && shadow_c.file != BAD_FILE) > > + no16("TXB and TXL with shadow comparison unsupported in SIMD16."); > > + > > + if (op == ir_txd) > > + no16("textureGrad unsupported in SIMD16."); > > + > > + /* Copy the coordinates. */ > > + for (int i = 0; i < vector_elements; i++) { > > + emit(MOV(retype(offset(message, i), coordinate.type), coordinate)); > > + coordinate = offset(coordinate, 1); > > + } > > + > > + fs_reg msg_end = offset(message, vector_elements); > > + > > + /* Messages other than sample and ld require all three components */ > > + if (has_lod || shadow_c.file != BAD_FILE) { > > + for (int i = vector_elements; i < 3; i++) { > > + emit(MOV(offset(message, i), fs_reg(0.0f))); > > + } > > + } > > + > > + if (has_lod) { > > + fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ? > > + BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F); > > From above: has_lod = op == ir_txl || op == ir_txb, so the > op == ir_txf check here should always be false, right? > > Should has_lod also check for ir_txf?
Good catch, thanks! I added ir_txf to the has_lod case. Technically, if lod == 0, we can probably skip setting has_lod to avoid having to fill out the entire <u, v, r, lod> message. But there's some errata saying unnecessary values 'must be zero' - which probably means that you just can't program them to non-zero...leaving them off should be OK. But I'm not sure I care to find out. texelFetch is a GLSL 1.30 feature, which isn't supported on Gen4 - I suppose that's why I didn't see the bug. Still worth fixing; probably not worth optimizing just yet :) > Otherwise, > Series Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com> Thank you!
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev