Re: [Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors
Matt wanted shader-db numbers and here they are: total instructions in shared programs: 5998287 -> 5974070 (-0.40%) instructions in affected programs: 732041 -> 707824 (-3.31%) helped:3135 HURT: 193 GAINED:18 LOST: 0 On Thu, Jan 29, 2015 at 3:41 AM, Kenneth Graunke wrote: > On Wednesday, January 28, 2015 04:27:23 PM Jason Ekstrand wrote: > > On Wed, Jan 28, 2015 at 4:02 PM, Matt Turner wrote: > > > > > On Tue, Jan 27, 2015 at 5:31 PM, Jason Ekstrand > > > wrote: > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > > index de0d780..217fe09 100644 > > > > @@ -689,9 +689,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > > > > > > > >struct brw_reg acc = retype(brw_acc_reg(dispatch_width), > > > result.type); > > > > > > > > - emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask); > > > > - emit_percomp(MACH(reg_null_d, op[0], op[1]), > > > instr->dest.write_mask); > > > > - emit_percomp(MOV(result, fs_reg(acc)), > instr->dest.write_mask); > > > > > > Bah. How the hell did the old code pass piglit? > > > > > > > I think channel expressions saved it. > > > > > > > > > > > + emit(MUL(acc, op[0], op[1])); > > > > + emit(MACH(reg_null_d, op[0], op[1])); > > > > + emit(MOV(result, fs_reg(acc))); > > > >break; > > > > } > > > > > > > > @@ -773,72 +767,38 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > > > > case nir_op_ball_fequal3: > > > > case nir_op_ball_iequal3: > > > > case nir_op_ball_fequal4: > > > > - case nir_op_ball_iequal4: { > > > > - unsigned num_components = > nir_op_infos[instr->op].input_sizes[0]; > > > > - fs_reg temp = vgrf(num_components); > > > > - emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z), > > > > - (1 << num_components) - 1); > > > > - emit_reduction(BRW_OPCODE_AND, result, temp, num_components); > > > > - break; > > > > - } > > > > - > > > > + case nir_op_ball_iequal4: > > > > > > We can save it for later, but it might be interesting to let the fs > > > backend get the vector comparisons directly, if that's possible. > > > > > > See https://bugs.freedesktop.org/show_bug.cgi?id=77456 > > > > > > > We can. We just have to add a flag to turn that off in the > > nir_lower_alu_to_scalar pass. > > > > > > > > > > > @@ -867,83 +827,67 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > > > >unreachable("not reached: should be handled by > ldexp_to_arith()"); > > > > > > > > case nir_op_fsqrt: > > > > - emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0], > > > > -instr->dest.write_mask, > instr->dest.saturate); > > > > + emit_math(SHADER_OPCODE_SQRT, result, op[0]) > > > > + ->saturate = instr->dest.saturate; > > > > > > I don't really like the ->saturate = ... as a single statement. I know > > > I'm guilty of using it in a unit test recently. > > > > > > > Yeah, I'm not sure what to do there. I really don't like having to > assign > > it to a fs_inst variable and then do the saturate. I guess we could. Or > > we could add something to emit_math. Thoughts? > > In the brw_eu_emit.c code I worked around this by creating a brw_last_inst > macro that always refers to the most recently emitted instruction. > > Still not 100% crazy about it, but it's been pretty handy. *shrug*. > It's an idea, anyway. > > I'm not too opposed to just creating fs_inst * temporaries. > > --Ken ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors
On Wednesday, January 28, 2015 04:27:23 PM Jason Ekstrand wrote: > On Wed, Jan 28, 2015 at 4:02 PM, Matt Turner wrote: > > > On Tue, Jan 27, 2015 at 5:31 PM, Jason Ekstrand > > wrote: > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > index de0d780..217fe09 100644 > > > @@ -689,9 +689,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > > > > > >struct brw_reg acc = retype(brw_acc_reg(dispatch_width), > > result.type); > > > > > > - emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask); > > > - emit_percomp(MACH(reg_null_d, op[0], op[1]), > > instr->dest.write_mask); > > > - emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask); > > > > Bah. How the hell did the old code pass piglit? > > > > I think channel expressions saved it. > > > > > > > + emit(MUL(acc, op[0], op[1])); > > > + emit(MACH(reg_null_d, op[0], op[1])); > > > + emit(MOV(result, fs_reg(acc))); > > >break; > > > } > > > > > > @@ -773,72 +767,38 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > > > case nir_op_ball_fequal3: > > > case nir_op_ball_iequal3: > > > case nir_op_ball_fequal4: > > > - case nir_op_ball_iequal4: { > > > - unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; > > > - fs_reg temp = vgrf(num_components); > > > - emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z), > > > - (1 << num_components) - 1); > > > - emit_reduction(BRW_OPCODE_AND, result, temp, num_components); > > > - break; > > > - } > > > - > > > + case nir_op_ball_iequal4: > > > > We can save it for later, but it might be interesting to let the fs > > backend get the vector comparisons directly, if that's possible. > > > > See https://bugs.freedesktop.org/show_bug.cgi?id=77456 > > > > We can. We just have to add a flag to turn that off in the > nir_lower_alu_to_scalar pass. > > > > > > > @@ -867,83 +827,67 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > > >unreachable("not reached: should be handled by ldexp_to_arith()"); > > > > > > case nir_op_fsqrt: > > > - emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0], > > > -instr->dest.write_mask, instr->dest.saturate); > > > + emit_math(SHADER_OPCODE_SQRT, result, op[0]) > > > + ->saturate = instr->dest.saturate; > > > > I don't really like the ->saturate = ... as a single statement. I know > > I'm guilty of using it in a unit test recently. > > > > Yeah, I'm not sure what to do there. I really don't like having to assign > it to a fs_inst variable and then do the saturate. I guess we could. Or > we could add something to emit_math. Thoughts? In the brw_eu_emit.c code I worked around this by creating a brw_last_inst macro that always refers to the most recently emitted instruction. Still not 100% crazy about it, but it's been pretty handy. *shrug*. It's an idea, anyway. I'm not too opposed to just creating fs_inst * temporaries. --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors
Matt Turner writes: > On Tue, Jan 27, 2015 at 5:31 PM, Jason Ekstrand wrote: >> + emit(MUL(acc, op[0], op[1])); >> + emit(MACH(reg_null_d, op[0], op[1])); >> + emit(MOV(result, fs_reg(acc))); >>break; >> } >> >> @@ -773,72 +767,38 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) >> case nir_op_ball_fequal3: >> case nir_op_ball_iequal3: >> case nir_op_ball_fequal4: >> - case nir_op_ball_iequal4: { >> - unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; >> - fs_reg temp = vgrf(num_components); >> - emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z), >> - (1 << num_components) - 1); >> - emit_reduction(BRW_OPCODE_AND, result, temp, num_components); >> - break; >> - } >> - >> + case nir_op_ball_iequal4: > > We can save it for later, but it might be interesting to let the fs > backend get the vector comparisons directly, if that's possible. > > See https://bugs.freedesktop.org/show_bug.cgi?id=77456 I think you want to peephole it instead of relying on the language, since you also want avoid the ANDs for non-vector conditions like "a < 0 && b < 0". (I've been thinking about this too) signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors
On Wed, Jan 28, 2015 at 4:02 PM, Matt Turner wrote: > On Tue, Jan 27, 2015 at 5:31 PM, Jason Ekstrand > wrote: > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > index de0d780..217fe09 100644 > > @@ -689,9 +689,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > > > >struct brw_reg acc = retype(brw_acc_reg(dispatch_width), > result.type); > > > > - emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask); > > - emit_percomp(MACH(reg_null_d, op[0], op[1]), > instr->dest.write_mask); > > - emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask); > > Bah. How the hell did the old code pass piglit? > I think channel expressions saved it. > > > + emit(MUL(acc, op[0], op[1])); > > + emit(MACH(reg_null_d, op[0], op[1])); > > + emit(MOV(result, fs_reg(acc))); > >break; > > } > > > > @@ -773,72 +767,38 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > > case nir_op_ball_fequal3: > > case nir_op_ball_iequal3: > > case nir_op_ball_fequal4: > > - case nir_op_ball_iequal4: { > > - unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; > > - fs_reg temp = vgrf(num_components); > > - emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z), > > - (1 << num_components) - 1); > > - emit_reduction(BRW_OPCODE_AND, result, temp, num_components); > > - break; > > - } > > - > > + case nir_op_ball_iequal4: > > We can save it for later, but it might be interesting to let the fs > backend get the vector comparisons directly, if that's possible. > > See https://bugs.freedesktop.org/show_bug.cgi?id=77456 > We can. We just have to add a flag to turn that off in the nir_lower_alu_to_scalar pass. > > > @@ -867,83 +827,67 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > >unreachable("not reached: should be handled by ldexp_to_arith()"); > > > > case nir_op_fsqrt: > > - emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0], > > -instr->dest.write_mask, instr->dest.saturate); > > + emit_math(SHADER_OPCODE_SQRT, result, op[0]) > > + ->saturate = instr->dest.saturate; > > I don't really like the ->saturate = ... as a single statement. I know > I'm guilty of using it in a unit test recently. > Yeah, I'm not sure what to do there. I really don't like having to assign it to a fs_inst variable and then do the saturate. I guess we could. Or we could add something to emit_math. Thoughts? > I guess if we remove saturate as a destination modifier from NIR we'll > get rid of most instances of this pattern, leaving just ->predicate. > Are we planning to remove src/dst modifiers from NIR? > Not until we get SSA in the backend. It's so much easier to apply them in SSA. If we'd like to drop one of them we can always add some flags to the lowering pass that adds them back in. --Jason ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors
On Tue, Jan 27, 2015 at 5:31 PM, Jason Ekstrand wrote: > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index de0d780..217fe09 100644 > @@ -689,9 +689,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > >struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type); > > - emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask); > - emit_percomp(MACH(reg_null_d, op[0], op[1]), instr->dest.write_mask); > - emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask); Bah. How the hell did the old code pass piglit? > + emit(MUL(acc, op[0], op[1])); > + emit(MACH(reg_null_d, op[0], op[1])); > + emit(MOV(result, fs_reg(acc))); >break; > } > > @@ -773,72 +767,38 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) > case nir_op_ball_fequal3: > case nir_op_ball_iequal3: > case nir_op_ball_fequal4: > - case nir_op_ball_iequal4: { > - unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; > - fs_reg temp = vgrf(num_components); > - emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z), > - (1 << num_components) - 1); > - emit_reduction(BRW_OPCODE_AND, result, temp, num_components); > - break; > - } > - > + case nir_op_ball_iequal4: We can save it for later, but it might be interesting to let the fs backend get the vector comparisons directly, if that's possible. See https://bugs.freedesktop.org/show_bug.cgi?id=77456 > @@ -867,83 +827,67 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) >unreachable("not reached: should be handled by ldexp_to_arith()"); > > case nir_op_fsqrt: > - emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0], > -instr->dest.write_mask, instr->dest.saturate); > + emit_math(SHADER_OPCODE_SQRT, result, op[0]) > + ->saturate = instr->dest.saturate; I don't really like the ->saturate = ... as a single statement. I know I'm guilty of using it in a unit test recently. I guess if we remove saturate as a destination modifier from NIR we'll get rid of most instances of this pattern, leaving just ->predicate. Are we planning to remove src/dst modifiers from NIR? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors
Now that we can scalarize with NIR, there's no need for all this code anymore. Let's get rid of it and just do scalar operations. v2: run copy prop before lowering phi nodes --- src/mesa/drivers/dri/i965/brw_fs.h | 15 - src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 489 ++- 2 files changed, 158 insertions(+), 346 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 84e0b9e..25197cd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -592,21 +592,6 @@ public: fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src); fs_reg get_nir_dest(nir_dest dest); void emit_percomp(fs_inst *inst, unsigned wr_mask); - void emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, - unsigned wr_mask, bool saturate = false, - enum brw_predicate predicate = BRW_PREDICATE_NONE, - enum brw_conditional_mod mod = BRW_CONDITIONAL_NONE); - void emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1, - unsigned wr_mask, bool saturate = false, - enum brw_predicate predicate = BRW_PREDICATE_NONE, - enum brw_conditional_mod mod = BRW_CONDITIONAL_NONE); - void emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0, - unsigned wr_mask, bool saturate = false); - void emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0, - fs_reg src1, unsigned wr_mask, - bool saturate = false); - void emit_reduction(enum opcode op, fs_reg dest, fs_reg src, - unsigned num_components); int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components); void emit_alpha_test(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index de0d780..217fe09 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -36,6 +36,10 @@ nir_optimize(nir_shader *nir) nir_validate_shader(nir); progress |= nir_copy_prop(nir); nir_validate_shader(nir); + nir_lower_phis_to_scalar(nir); + nir_validate_shader(nir); + progress |= nir_copy_prop(nir); + nir_validate_shader(nir); progress |= nir_opt_dce(nir); nir_validate_shader(nir); progress |= nir_opt_cse(nir); @@ -85,6 +89,9 @@ fs_visitor::emit_nir_code() nir_split_var_copies(nir); nir_validate_shader(nir); + nir_lower_alu_to_scalar(nir); + nir_validate_shader(nir); + nir_optimize(nir); /* Lower a bunch of stuff */ @@ -540,20 +547,30 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) op[i] = get_nir_alu_src(instr, i); + if (nir_op_infos[instr->op].output_size == 0) { + /* We've already scalarized, so we know that we only have one + * channel. The only question is which channel. + */ + assert(_mesa_bitcount(instr->dest.write_mask) == 1); + unsigned off = ffs(instr->dest.write_mask) - 1; + result = offset(result, off); + + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + op[i] = offset(op[i], off); + } + switch (instr->op) { case nir_op_fmov: case nir_op_i2f: - case nir_op_u2f: { - fs_inst *inst = MOV(result, op[0]); - inst->saturate = instr->dest.saturate; - emit_percomp(inst, instr->dest.write_mask); - } + case nir_op_u2f: + emit(MOV(result, op[0])) + ->saturate = instr->dest.saturate; break; case nir_op_imov: case nir_op_f2i: case nir_op_f2u: - emit_percomp(MOV(result, op[0]), instr->dest.write_mask); + emit(MOV(result, op[0])); break; case nir_op_fsign: { @@ -562,55 +579,46 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) * Predicated OR ORs 1.0 (0x3f80) with the sign bit if val is not * zero. */ - emit_percomp(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ), - instr->dest.write_mask); + emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD); op[0].type = BRW_REGISTER_TYPE_UD; result.type = BRW_REGISTER_TYPE_UD; - emit_percomp(AND(result_int, op[0], fs_reg(0x8000u)), - instr->dest.write_mask); + emit(AND(result_int, op[0], fs_reg(0x8000u))); - fs_inst *inst = OR(result_int, result_int, fs_reg(0x3f80u)); - inst->predicate = BRW_PREDICATE_NORMAL; - emit_percomp(inst, instr->dest.write_mask); + emit(OR(result_int, result_int, fs_reg(0x3f80u))) + ->predicate = BRW_PREDICATE_NORMAL; if (instr->dest.saturate) { - fs_inst *inst = MOV(result, result); - inst->saturate = true