Re: [Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors

2015-01-29 Thread Jason Ekstrand
Matt wanted shader-db numbers and here they are:

total instructions in shared programs: 5998287 -> 5974070 (-0.40%)
instructions in affected programs: 732041 -> 707824 (-3.31%)
helped:3135
HURT:  193
GAINED:18
LOST:  0


On Thu, Jan 29, 2015 at 3:41 AM, Kenneth Graunke 
wrote:

> On Wednesday, January 28, 2015 04:27:23 PM Jason Ekstrand wrote:
> > On Wed, Jan 28, 2015 at 4:02 PM, Matt Turner  wrote:
> >
> > > On Tue, Jan 27, 2015 at 5:31 PM, Jason Ekstrand 
> > > wrote:
> > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > > > index de0d780..217fe09 100644
> > > > @@ -689,9 +689,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> > > >
> > > >struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
> > > result.type);
> > > >
> > > > -  emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
> > > > -  emit_percomp(MACH(reg_null_d, op[0], op[1]),
> > > instr->dest.write_mask);
> > > > -  emit_percomp(MOV(result, fs_reg(acc)),
> instr->dest.write_mask);
> > >
> > > Bah. How the hell did the old code pass piglit?
> > >
> >
> > I think channel expressions saved it.
> >
> >
> > >
> > > > +  emit(MUL(acc, op[0], op[1]));
> > > > +  emit(MACH(reg_null_d, op[0], op[1]));
> > > > +  emit(MOV(result, fs_reg(acc)));
> > > >break;
> > > > }
> > > >
> > > > @@ -773,72 +767,38 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> > > > case nir_op_ball_fequal3:
> > > > case nir_op_ball_iequal3:
> > > > case nir_op_ball_fequal4:
> > > > -   case nir_op_ball_iequal4: {
> > > > -  unsigned num_components =
> nir_op_infos[instr->op].input_sizes[0];
> > > > -  fs_reg temp = vgrf(num_components);
> > > > -  emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z),
> > > > -   (1 << num_components) - 1);
> > > > -  emit_reduction(BRW_OPCODE_AND, result, temp, num_components);
> > > > -  break;
> > > > -   }
> > > > -
> > > > +   case nir_op_ball_iequal4:
> > >
> > > We can save it for later, but it might be interesting to let the fs
> > > backend get the vector comparisons directly, if that's possible.
> > >
> > > See https://bugs.freedesktop.org/show_bug.cgi?id=77456
> > >
> >
> > We can.  We just have to add a flag to turn that off in the
> > nir_lower_alu_to_scalar pass.
> >
> >
> > >
> > > > @@ -867,83 +827,67 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> > > >unreachable("not reached: should be handled by
> ldexp_to_arith()");
> > > >
> > > > case nir_op_fsqrt:
> > > > -  emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0],
> > > > -instr->dest.write_mask,
> instr->dest.saturate);
> > > > +  emit_math(SHADER_OPCODE_SQRT, result, op[0])
> > > > +   ->saturate = instr->dest.saturate;
> > >
> > > I don't really like the ->saturate = ... as a single statement. I know
> > > I'm guilty of using it in a unit test recently.
> > >
> >
> > Yeah, I'm not sure what to do there.  I really don't like having to
> assign
> > it to a fs_inst variable and then do the saturate.  I guess we could.  Or
> > we could add something to emit_math.  Thoughts?
>
> In the brw_eu_emit.c code I worked around this by creating a brw_last_inst
> macro that always refers to the most recently emitted instruction.
>
> Still not 100% crazy about it, but it's been pretty handy.  *shrug*.
> It's an idea, anyway.
>
> I'm not too opposed to just creating fs_inst * temporaries.
>
> --Ken
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors

2015-01-29 Thread Kenneth Graunke
On Wednesday, January 28, 2015 04:27:23 PM Jason Ekstrand wrote:
> On Wed, Jan 28, 2015 at 4:02 PM, Matt Turner  wrote:
> 
> > On Tue, Jan 27, 2015 at 5:31 PM, Jason Ekstrand 
> > wrote:
> > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > > index de0d780..217fe09 100644
> > > @@ -689,9 +689,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> > >
> > >struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
> > result.type);
> > >
> > > -  emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
> > > -  emit_percomp(MACH(reg_null_d, op[0], op[1]),
> > instr->dest.write_mask);
> > > -  emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
> >
> > Bah. How the hell did the old code pass piglit?
> >
> 
> I think channel expressions saved it.
> 
> 
> >
> > > +  emit(MUL(acc, op[0], op[1]));
> > > +  emit(MACH(reg_null_d, op[0], op[1]));
> > > +  emit(MOV(result, fs_reg(acc)));
> > >break;
> > > }
> > >
> > > @@ -773,72 +767,38 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> > > case nir_op_ball_fequal3:
> > > case nir_op_ball_iequal3:
> > > case nir_op_ball_fequal4:
> > > -   case nir_op_ball_iequal4: {
> > > -  unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
> > > -  fs_reg temp = vgrf(num_components);
> > > -  emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z),
> > > -   (1 << num_components) - 1);
> > > -  emit_reduction(BRW_OPCODE_AND, result, temp, num_components);
> > > -  break;
> > > -   }
> > > -
> > > +   case nir_op_ball_iequal4:
> >
> > We can save it for later, but it might be interesting to let the fs
> > backend get the vector comparisons directly, if that's possible.
> >
> > See https://bugs.freedesktop.org/show_bug.cgi?id=77456
> >
> 
> We can.  We just have to add a flag to turn that off in the
> nir_lower_alu_to_scalar pass.
> 
> 
> >
> > > @@ -867,83 +827,67 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> > >unreachable("not reached: should be handled by ldexp_to_arith()");
> > >
> > > case nir_op_fsqrt:
> > > -  emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0],
> > > -instr->dest.write_mask, instr->dest.saturate);
> > > +  emit_math(SHADER_OPCODE_SQRT, result, op[0])
> > > +   ->saturate = instr->dest.saturate;
> >
> > I don't really like the ->saturate = ... as a single statement. I know
> > I'm guilty of using it in a unit test recently.
> >
> 
> Yeah, I'm not sure what to do there.  I really don't like having to assign
> it to a fs_inst variable and then do the saturate.  I guess we could.  Or
> we could add something to emit_math.  Thoughts?

In the brw_eu_emit.c code I worked around this by creating a brw_last_inst
macro that always refers to the most recently emitted instruction.

Still not 100% crazy about it, but it's been pretty handy.  *shrug*.
It's an idea, anyway.

I'm not too opposed to just creating fs_inst * temporaries.

--Ken

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors

2015-01-28 Thread Eric Anholt
Matt Turner  writes:

> On Tue, Jan 27, 2015 at 5:31 PM, Jason Ekstrand  wrote:
>> +  emit(MUL(acc, op[0], op[1]));
>> +  emit(MACH(reg_null_d, op[0], op[1]));
>> +  emit(MOV(result, fs_reg(acc)));
>>break;
>> }
>>
>> @@ -773,72 +767,38 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
>> case nir_op_ball_fequal3:
>> case nir_op_ball_iequal3:
>> case nir_op_ball_fequal4:
>> -   case nir_op_ball_iequal4: {
>> -  unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
>> -  fs_reg temp = vgrf(num_components);
>> -  emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z),
>> -   (1 << num_components) - 1);
>> -  emit_reduction(BRW_OPCODE_AND, result, temp, num_components);
>> -  break;
>> -   }
>> -
>> +   case nir_op_ball_iequal4:
>
> We can save it for later, but it might be interesting to let the fs
> backend get the vector comparisons directly, if that's possible.
>
> See https://bugs.freedesktop.org/show_bug.cgi?id=77456

I think you want to peephole it instead of relying on the language,
since you also want avoid the ANDs for non-vector conditions like "a < 0
&& b < 0".

(I've been thinking about this too)


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors

2015-01-28 Thread Jason Ekstrand
On Wed, Jan 28, 2015 at 4:02 PM, Matt Turner  wrote:

> On Tue, Jan 27, 2015 at 5:31 PM, Jason Ekstrand 
> wrote:
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > index de0d780..217fe09 100644
> > @@ -689,9 +689,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> >
> >struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
> result.type);
> >
> > -  emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
> > -  emit_percomp(MACH(reg_null_d, op[0], op[1]),
> instr->dest.write_mask);
> > -  emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
>
> Bah. How the hell did the old code pass piglit?
>

I think channel expressions saved it.


>
> > +  emit(MUL(acc, op[0], op[1]));
> > +  emit(MACH(reg_null_d, op[0], op[1]));
> > +  emit(MOV(result, fs_reg(acc)));
> >break;
> > }
> >
> > @@ -773,72 +767,38 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> > case nir_op_ball_fequal3:
> > case nir_op_ball_iequal3:
> > case nir_op_ball_fequal4:
> > -   case nir_op_ball_iequal4: {
> > -  unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
> > -  fs_reg temp = vgrf(num_components);
> > -  emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z),
> > -   (1 << num_components) - 1);
> > -  emit_reduction(BRW_OPCODE_AND, result, temp, num_components);
> > -  break;
> > -   }
> > -
> > +   case nir_op_ball_iequal4:
>
> We can save it for later, but it might be interesting to let the fs
> backend get the vector comparisons directly, if that's possible.
>
> See https://bugs.freedesktop.org/show_bug.cgi?id=77456
>

We can.  We just have to add a flag to turn that off in the
nir_lower_alu_to_scalar pass.


>
> > @@ -867,83 +827,67 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> >unreachable("not reached: should be handled by ldexp_to_arith()");
> >
> > case nir_op_fsqrt:
> > -  emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0],
> > -instr->dest.write_mask, instr->dest.saturate);
> > +  emit_math(SHADER_OPCODE_SQRT, result, op[0])
> > +   ->saturate = instr->dest.saturate;
>
> I don't really like the ->saturate = ... as a single statement. I know
> I'm guilty of using it in a unit test recently.
>

Yeah, I'm not sure what to do there.  I really don't like having to assign
it to a fs_inst variable and then do the saturate.  I guess we could.  Or
we could add something to emit_math.  Thoughts?


> I guess if we remove saturate as a destination modifier from NIR we'll
> get rid of most instances of this pattern, leaving just ->predicate.
> Are we planning to remove src/dst modifiers from NIR?
>

Not until we get SSA in the backend.  It's so much easier to apply them in
SSA.  If we'd like to drop one of them we can always add some flags to the
lowering pass that adds them back in.
--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors

2015-01-28 Thread Matt Turner
On Tue, Jan 27, 2015 at 5:31 PM, Jason Ekstrand  wrote:
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index de0d780..217fe09 100644
> @@ -689,9 +689,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
>
>struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
>
> -  emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
> -  emit_percomp(MACH(reg_null_d, op[0], op[1]), instr->dest.write_mask);
> -  emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);

Bah. How the hell did the old code pass piglit?

> +  emit(MUL(acc, op[0], op[1]));
> +  emit(MACH(reg_null_d, op[0], op[1]));
> +  emit(MOV(result, fs_reg(acc)));
>break;
> }
>
> @@ -773,72 +767,38 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> case nir_op_ball_fequal3:
> case nir_op_ball_iequal3:
> case nir_op_ball_fequal4:
> -   case nir_op_ball_iequal4: {
> -  unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
> -  fs_reg temp = vgrf(num_components);
> -  emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z),
> -   (1 << num_components) - 1);
> -  emit_reduction(BRW_OPCODE_AND, result, temp, num_components);
> -  break;
> -   }
> -
> +   case nir_op_ball_iequal4:

We can save it for later, but it might be interesting to let the fs
backend get the vector comparisons directly, if that's possible.

See https://bugs.freedesktop.org/show_bug.cgi?id=77456

> @@ -867,83 +827,67 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
>unreachable("not reached: should be handled by ldexp_to_arith()");
>
> case nir_op_fsqrt:
> -  emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0],
> -instr->dest.write_mask, instr->dest.saturate);
> +  emit_math(SHADER_OPCODE_SQRT, result, op[0])
> +   ->saturate = instr->dest.saturate;

I don't really like the ->saturate = ... as a single statement. I know
I'm guilty of using it in a unit test recently.

I guess if we remove saturate as a destination modifier from NIR we'll
get rid of most instances of this pattern, leaving just ->predicate.
Are we planning to remove src/dst modifiers from NIR?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 2/3] i965/fs: Use NIR's scalarizing abilities and stop handling vectors

2015-01-27 Thread Jason Ekstrand
Now that we can scalarize with NIR, there's no need for all this code
anymore.  Let's get rid of it and just do scalar operations.

v2: run copy prop before lowering phi nodes
---
 src/mesa/drivers/dri/i965/brw_fs.h   |  15 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 489 ++-
 2 files changed, 158 insertions(+), 346 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 84e0b9e..25197cd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -592,21 +592,6 @@ public:
fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
fs_reg get_nir_dest(nir_dest dest);
void emit_percomp(fs_inst *inst, unsigned wr_mask);
-   void emit_percomp(enum opcode op, fs_reg dest, fs_reg src0,
- unsigned wr_mask, bool saturate = false,
- enum brw_predicate predicate = BRW_PREDICATE_NONE,
- enum brw_conditional_mod mod = BRW_CONDITIONAL_NONE);
-   void emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1,
- unsigned wr_mask, bool saturate = false,
- enum brw_predicate predicate = BRW_PREDICATE_NONE,
- enum brw_conditional_mod mod = BRW_CONDITIONAL_NONE);
-   void emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
-  unsigned wr_mask, bool saturate = false);
-   void emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
-  fs_reg src1, unsigned wr_mask,
-  bool saturate = false);
-   void emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
-   unsigned num_components);
 
int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components);
void emit_alpha_test();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index de0d780..217fe09 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -36,6 +36,10 @@ nir_optimize(nir_shader *nir)
   nir_validate_shader(nir);
   progress |= nir_copy_prop(nir);
   nir_validate_shader(nir);
+  nir_lower_phis_to_scalar(nir);
+  nir_validate_shader(nir);
+  progress |= nir_copy_prop(nir);
+  nir_validate_shader(nir);
   progress |= nir_opt_dce(nir);
   nir_validate_shader(nir);
   progress |= nir_opt_cse(nir);
@@ -85,6 +89,9 @@ fs_visitor::emit_nir_code()
nir_split_var_copies(nir);
nir_validate_shader(nir);
 
+   nir_lower_alu_to_scalar(nir);
+   nir_validate_shader(nir);
+
nir_optimize(nir);
 
/* Lower a bunch of stuff */
@@ -540,20 +547,30 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
   op[i] = get_nir_alu_src(instr, i);
 
+   if (nir_op_infos[instr->op].output_size == 0) {
+  /* We've already scalarized, so we know that we only have one
+   * channel.  The only question is which channel.
+   */
+  assert(_mesa_bitcount(instr->dest.write_mask) == 1);
+  unsigned off = ffs(instr->dest.write_mask) - 1;
+  result = offset(result, off);
+
+  for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ op[i] = offset(op[i], off);
+   }
+
switch (instr->op) {
case nir_op_fmov:
case nir_op_i2f:
-   case nir_op_u2f: {
-  fs_inst *inst = MOV(result, op[0]);
-  inst->saturate = instr->dest.saturate;
-  emit_percomp(inst, instr->dest.write_mask);
-   }
+   case nir_op_u2f:
+  emit(MOV(result, op[0]))
+  ->saturate = instr->dest.saturate;
   break;
 
case nir_op_imov:
case nir_op_f2i:
case nir_op_f2u:
-  emit_percomp(MOV(result, op[0]), instr->dest.write_mask);
+  emit(MOV(result, op[0]));
   break;
 
case nir_op_fsign: {
@@ -562,55 +579,46 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
  * Predicated OR ORs 1.0 (0x3f80) with the sign bit if val is not
  * zero.
  */
-  emit_percomp(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
-   instr->dest.write_mask);
+  emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
 
   fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
   op[0].type = BRW_REGISTER_TYPE_UD;
   result.type = BRW_REGISTER_TYPE_UD;
-  emit_percomp(AND(result_int, op[0], fs_reg(0x8000u)),
-   instr->dest.write_mask);
+  emit(AND(result_int, op[0], fs_reg(0x8000u)));
 
-  fs_inst *inst = OR(result_int, result_int, fs_reg(0x3f80u));
-  inst->predicate = BRW_PREDICATE_NORMAL;
-  emit_percomp(inst, instr->dest.write_mask);
+  emit(OR(result_int, result_int, fs_reg(0x3f80u)))
+  ->predicate = BRW_PREDICATE_NORMAL;
   if (instr->dest.saturate) {
- fs_inst *inst = MOV(result, result);
- inst->saturate = true