On 02/26/2014 02:25 AM, Kenneth Graunke wrote: > Module: Mesa > Branch: master > Commit: 56879a7ac41b8c7513a97cc02921f76a2ec8407c > URL: > http://cgit.freedesktop.org/mesa/mesa/commit/?id=56879a7ac41b8c7513a97cc02921f76a2ec8407c > > Author: Kenneth Graunke <kenn...@whitecape.org> > Date: Sun Feb 23 16:29:46 2014 -0800 > > i965/vec4: Handle ir_triop_lrp on Gen4-5 as well. > > When the vec4 backend encountered an ir_triop_lrp, it always emitted an > actual LRP instruction, which only exists on Gen6+. Gen4-5 used > lower_instructions() to decompose ir_triop_lrp at the IR level. > > Since commit 8d37e9915a3b21 ("glsl: Optimize open-coded lrp into lrp."), > we've had an bug where lower_instructions translates ir_triop_lrp into > arithmetic, but opt_algebraic reassembles it back into a lrp. > > To avoid this ordering concern, just handle ir_triop_lrp in the backend. > The FS backend already does this, so we may as well do likewise. > > v2: Add a comment reminding us that we could emit better assembly if we > implemented the infrastructure necessary to support using MAC. > (Assembly code provided by Eric Anholt). > > Cc: "10.1" <mesa-sta...@lists.freedesktop.org> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75253 > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > Reviewed-by: Matt Turner <matts...@gmail.com> > Acked-by: Eric Anholt <e...@anholt.net> > > --- > > src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++ > src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 42 > ++++++++++++++++++++---- > 2 files changed, 38 insertions(+), 7 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h > b/src/mesa/drivers/dri/i965/brw_vec4.h > index 6bd8b80..fb5c0a6 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h > @@ -506,6 +506,9 @@ public: > > void emit_minmax(uint32_t condmod, dst_reg dst, src_reg src0, src_reg > src1); > > + void emit_lrp(const dst_reg &dst, > + const src_reg &x, const src_reg &y, const src_reg &a); > + > void emit_block_move(dst_reg *dst, src_reg *src, > const struct glsl_type *type, uint32_t predicate); > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > index 95e0064..aedab93 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > @@ -1132,6 +1132,40 @@ vec4_visitor::emit_minmax(uint32_t conditionalmod, > dst_reg dst, > } > } > > +void > +vec4_visitor::emit_lrp(const dst_reg &dst, > + const src_reg &x, const src_reg &y, const src_reg &a) > +{ > + if (brw->gen >= 6) { > + /* Note that the instruction's argument order is reversed from GLSL > + * and the IR. > + */ > + emit(LRP(dst, > + fix_3src_operand(a), fix_3src_operand(y), > fix_3src_operand(x))); > + } else { > + /* Earlier generations don't support three source operations, so we > + * need to emit x*(1-a) + y*a. > + * > + * A better way to do this would be: > + * ADD one_minus_a, negate(a), 1.0f > + * MUL null, y, a > + * MAC dst, x, one_minus_a > + * but we would need to support MAC and implicit accumulator. > + */ > + dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type); > + dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type); > + dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type); > + y_times_a.writemask = dst.writemask; > + one_minus_a.writemask = dst.writemask; > + x_times_one_minus_a.writemask = dst.writemask; > + > + emit(MUL(y_times_a, y, a)); > + emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
After picking to 10.1, this line gives me: brw_vec4_visitor.cpp: In member function 'void brw::vec4_visitor::emit_lrp(const brw::dst_reg&, const brw::src_reg&, const brw::src_reg&, const brw::src_reg&)': brw_vec4_visitor.cpp:1171:37: error: could not convert 'a' from 'const brw::src_reg' to 'brw_reg' I don't get this error on master. Is there another patch that also needs picking? > + emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); > + emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); > + } > +} > + > static bool > is_16bit_constant(ir_rvalue *rvalue) > { > @@ -1628,13 +1662,7 @@ vec4_visitor::visit(ir_expression *ir) > break; > > case ir_triop_lrp: > - op[0] = fix_3src_operand(op[0]); > - op[1] = fix_3src_operand(op[1]); > - op[2] = fix_3src_operand(op[2]); > - /* Note that the instruction's argument order is reversed from GLSL > - * and the IR. > - */ > - emit(LRP(result_dst, op[2], op[1], op[0])); > + emit_lrp(result_dst, op[0], op[1], op[2]); > break; > > case ir_triop_csel: > > _______________________________________________ > mesa-commit mailing list > mesa-com...@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-commit > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev