Re: [Mesa-dev] [PATCH v5 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors

2015-02-02 Thread Kenneth Graunke
On Thursday, January 29, 2015 01:40:19 PM Jason Ekstrand wrote:
> Now that we can scalarize with NIR, there's no need for all this code
> anymore.  Let's get rid of it and just do scalar operations.
> 
> v2: run copy prop before lowering phi nodes
> 
> v3: Get rid of the "emit(...)->saturate = foo" pattern
> 
> total instructions in shared programs: 5998321 -> 5974070 (-0.40%)
> instructions in affected programs: 732075 -> 707824 (-3.31%)
> helped:3137
> HURT:  191
> GAINED:18
> LOST:  0
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h   |  15 -
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 476 
> ++-
>  2 files changed, 153 insertions(+), 338 deletions(-)
> 

It looks like you have two separate case...case runs that result in:

   unreachable("Lowered by nir_lower_alu_reductions");

It might make sense to combine them into one big group.  (Not worth
sending a v4, just do that before pushing, if you like the suggestion.)

As is, this is:
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v5 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors

2015-01-29 Thread Jason Ekstrand
Now that we can scalarize with NIR, there's no need for all this code
anymore.  Let's get rid of it and just do scalar operations.

v2: run copy prop before lowering phi nodes

v3: Get rid of the "emit(...)->saturate = foo" pattern

total instructions in shared programs: 5998321 -> 5974070 (-0.40%)
instructions in affected programs: 732075 -> 707824 (-3.31%)
helped:3137
HURT:  191
GAINED:18
LOST:  0
---
 src/mesa/drivers/dri/i965/brw_fs.h   |  15 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 476 ++-
 2 files changed, 153 insertions(+), 338 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 84e0b9e..25197cd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -592,21 +592,6 @@ public:
fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
fs_reg get_nir_dest(nir_dest dest);
void emit_percomp(fs_inst *inst, unsigned wr_mask);
-   void emit_percomp(enum opcode op, fs_reg dest, fs_reg src0,
- unsigned wr_mask, bool saturate = false,
- enum brw_predicate predicate = BRW_PREDICATE_NONE,
- enum brw_conditional_mod mod = BRW_CONDITIONAL_NONE);
-   void emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1,
- unsigned wr_mask, bool saturate = false,
- enum brw_predicate predicate = BRW_PREDICATE_NONE,
- enum brw_conditional_mod mod = BRW_CONDITIONAL_NONE);
-   void emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
-  unsigned wr_mask, bool saturate = false);
-   void emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
-  fs_reg src1, unsigned wr_mask,
-  bool saturate = false);
-   void emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
-   unsigned num_components);
 
int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components);
void emit_alpha_test();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index de0d780..37ccd24 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -36,6 +36,10 @@ nir_optimize(nir_shader *nir)
   nir_validate_shader(nir);
   progress |= nir_copy_prop(nir);
   nir_validate_shader(nir);
+  nir_lower_phis_to_scalar(nir);
+  nir_validate_shader(nir);
+  progress |= nir_copy_prop(nir);
+  nir_validate_shader(nir);
   progress |= nir_opt_dce(nir);
   nir_validate_shader(nir);
   progress |= nir_opt_cse(nir);
@@ -85,6 +89,9 @@ fs_visitor::emit_nir_code()
nir_split_var_copies(nir);
nir_validate_shader(nir);
 
+   nir_lower_alu_to_scalar(nir);
+   nir_validate_shader(nir);
+
nir_optimize(nir);
 
/* Lower a bunch of stuff */
@@ -532,6 +539,7 @@ void
 fs_visitor::nir_emit_alu(nir_alu_instr *instr)
 {
struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
+   fs_inst *inst;
 
fs_reg op[3];
fs_reg result = get_nir_dest(instr->dest.dest);
@@ -540,20 +548,30 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
   op[i] = get_nir_alu_src(instr, i);
 
+   if (nir_op_infos[instr->op].output_size == 0) {
+  /* We've already scalarized, so we know that we only have one
+   * channel.  The only question is which channel.
+   */
+  assert(_mesa_bitcount(instr->dest.write_mask) == 1);
+  unsigned off = ffs(instr->dest.write_mask) - 1;
+  result = offset(result, off);
+
+  for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ op[i] = offset(op[i], off);
+   }
+
switch (instr->op) {
case nir_op_fmov:
case nir_op_i2f:
-   case nir_op_u2f: {
-  fs_inst *inst = MOV(result, op[0]);
+   case nir_op_u2f:
+  inst = emit(MOV(result, op[0]));
   inst->saturate = instr->dest.saturate;
-  emit_percomp(inst, instr->dest.write_mask);
-   }
   break;
 
case nir_op_imov:
case nir_op_f2i:
case nir_op_f2u:
-  emit_percomp(MOV(result, op[0]), instr->dest.write_mask);
+  emit(MOV(result, op[0]));
   break;
 
case nir_op_fsign: {
@@ -562,55 +580,46 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
  * Predicated OR ORs 1.0 (0x3f80) with the sign bit if val is not
  * zero.
  */
-  emit_percomp(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
-   instr->dest.write_mask);
+  emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
 
   fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
   op[0].type = BRW_REGISTER_TYPE_UD;
   result.type = BRW_REGISTER_TYPE_UD