On Mon, 2016-05-02 at 11:38 +0300, Pohjolainen, Topi wrote: > On Fri, Apr 29, 2016 at 01:29:29PM +0200, Samuel Iglesias Gons?lvez wrote: > > From: Iago Toral Quiroga <ito...@igalia.com> > > > > When we are actually unpacking from a double that we have previously > > packed from its 32-bit components we can bypass the pack operation > > and source from its arguments directly. > > --- > > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 28 ++++++++++++++++++++++++---- > > 1 file changed, 24 insertions(+), 4 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > index 4332575..0ce25a0 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > @@ -1118,12 +1118,32 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, > > nir_alu_instr *instr) > > break; > > > > case nir_op_unpack_double_2x32_split_x: > > - bld.MOV(result, stride(retype(op[0], BRW_REGISTER_TYPE_UD), 2)); > > - break; > > + case nir_op_unpack_double_2x32_split_y: { > > + /* Optimize the common case where we are unpacking from a double we > > have > > + * previously packed. In this case we can just bypass the pack > > operation > > + * and source directly from its arguments. > > + */ > > + unsigned index = (instr->op == nir_op_unpack_double_2x32_split_x) ? > > 0 : 1; > > + if (instr->src[0].src.is_ssa) { > > + nir_instr *parent_instr = instr->src[0].src.ssa->parent_instr; > > + if (parent_instr->type == nir_instr_type_alu) { > > + nir_alu_instr *alu_parent = nir_instr_as_alu(parent_instr); > > + if (alu_parent->op == nir_op_pack_double_2x32_split) { > > + op[0] = retype(get_nir_src(alu_parent->src[index].src), > > + BRW_REGISTER_TYPE_UD); > > + op[0] = offset(op[0], bld, > > alu_parent->src[index].swizzle[channel]); > > Overflowing line, I tried further down hot it looks with less indentation. > I don't know if it was better or worse. > > > + bld.MOV(result, op[0]); > > + break; > > + } > > + } > > + } > > if (!instr->src[0].src.is_ssa) { > break; > } > > const nir_instr *parent_instr = instr->src[0].src.ssa->parent_instr; > if (parent_instr->type != nir_instr_type_alu) { > break; > } > > const nir_alu_instr *alu_parent = nir_instr_as_alu(parent_instr); > if (alu_parent->op != nir_op_pack_double_2x32_split) { > break; > }
For the record, this is not correct. We only want to break early if we can do the optimization, otherwise we need to fall back to the normal implementation of the opcode at the bottom. > op[0] = retype(get_nir_src(alu_parent->src[index].src), > BRW_REGISTER_TYPE_UD); > op[0] = offset(op[0], bld, alu_parent->src[index].swizzle[channel]); > bld.MOV(result, op[0]); > break; > > + } > > > > > > - case nir_op_unpack_double_2x32_split_y: > > - bld.MOV(result, stride(horiz_offset(retype(op[0], > > BRW_REGISTER_TYPE_UD), 1), 2)); > > + if (instr->op == nir_op_unpack_double_2x32_split_x) > > + bld.MOV(result, stride(retype(op[0], BRW_REGISTER_TYPE_UD), 2)); > > + else > > + bld.MOV(result, stride(horiz_offset(retype(op[0], > > BRW_REGISTER_TYPE_UD), 1), 2)); this is the chunk we need to execute when we can't do the optimization. > Same here, maybe: > > bld.MOV( > result, > stride(horiz_offset(retype(op[0], BRW_REGISTER_TYPE_UD), 1), > 2)); > > > break; > > + } > > > > case nir_op_fpow: > > inst = bld.emit(SHADER_OPCODE_POW, result, op[0], op[1]); > > -- > > 2.5.0 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev