Now that we can scalarize with NIR, there's no need for all this code
anymore. Let's get rid of it and just do scalar operations.
v2: run copy prop before lowering phi nodes
v3: Get rid of the "emit(...)->saturate = foo" pattern
total instructions in shared programs: 5998321 -> 5974070 (-0.40%)
instructions in affected programs: 732075 -> 707824 (-3.31%)
helped:3137
HURT: 191
GAINED:18
LOST: 0
---
src/mesa/drivers/dri/i965/brw_fs.h | 15 -
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 476 ++-
2 files changed, 153 insertions(+), 338 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
b/src/mesa/drivers/dri/i965/brw_fs.h
index 84e0b9e..25197cd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -592,21 +592,6 @@ public:
fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
fs_reg get_nir_dest(nir_dest dest);
void emit_percomp(fs_inst *inst, unsigned wr_mask);
- void emit_percomp(enum opcode op, fs_reg dest, fs_reg src0,
- unsigned wr_mask, bool saturate = false,
- enum brw_predicate predicate = BRW_PREDICATE_NONE,
- enum brw_conditional_mod mod = BRW_CONDITIONAL_NONE);
- void emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1,
- unsigned wr_mask, bool saturate = false,
- enum brw_predicate predicate = BRW_PREDICATE_NONE,
- enum brw_conditional_mod mod = BRW_CONDITIONAL_NONE);
- void emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
- unsigned wr_mask, bool saturate = false);
- void emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
- fs_reg src1, unsigned wr_mask,
- bool saturate = false);
- void emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
- unsigned num_components);
int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components);
void emit_alpha_test();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index de0d780..37ccd24 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -36,6 +36,10 @@ nir_optimize(nir_shader *nir)
nir_validate_shader(nir);
progress |= nir_copy_prop(nir);
nir_validate_shader(nir);
+ nir_lower_phis_to_scalar(nir);
+ nir_validate_shader(nir);
+ progress |= nir_copy_prop(nir);
+ nir_validate_shader(nir);
progress |= nir_opt_dce(nir);
nir_validate_shader(nir);
progress |= nir_opt_cse(nir);
@@ -85,6 +89,9 @@ fs_visitor::emit_nir_code()
nir_split_var_copies(nir);
nir_validate_shader(nir);
+ nir_lower_alu_to_scalar(nir);
+ nir_validate_shader(nir);
+
nir_optimize(nir);
/* Lower a bunch of stuff */
@@ -532,6 +539,7 @@ void
fs_visitor::nir_emit_alu(nir_alu_instr *instr)
{
struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
+ fs_inst *inst;
fs_reg op[3];
fs_reg result = get_nir_dest(instr->dest.dest);
@@ -540,20 +548,30 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
op[i] = get_nir_alu_src(instr, i);
+ if (nir_op_infos[instr->op].output_size == 0) {
+ /* We've already scalarized, so we know that we only have one
+ * channel. The only question is which channel.
+ */
+ assert(_mesa_bitcount(instr->dest.write_mask) == 1);
+ unsigned off = ffs(instr->dest.write_mask) - 1;
+ result = offset(result, off);
+
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ op[i] = offset(op[i], off);
+ }
+
switch (instr->op) {
case nir_op_fmov:
case nir_op_i2f:
- case nir_op_u2f: {
- fs_inst *inst = MOV(result, op[0]);
+ case nir_op_u2f:
+ inst = emit(MOV(result, op[0]));
inst->saturate = instr->dest.saturate;
- emit_percomp(inst, instr->dest.write_mask);
- }
break;
case nir_op_imov:
case nir_op_f2i:
case nir_op_f2u:
- emit_percomp(MOV(result, op[0]), instr->dest.write_mask);
+ emit(MOV(result, op[0]));
break;
case nir_op_fsign: {
@@ -562,55 +580,46 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
* Predicated OR ORs 1.0 (0x3f80) with the sign bit if val is not
* zero.
*/
- emit_percomp(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
- instr->dest.write_mask);
+ emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
op[0].type = BRW_REGISTER_TYPE_UD;
result.type = BRW_REGISTER_TYPE_UD