Re: [Mesa-dev] [PATCH v5 3/3] i965/fs_nir: Get rid of get_alu_src

2015-02-02 Thread Jason Ekstrand
On Mon, Feb 2, 2015 at 10:00 PM, Kenneth Graunke 
wrote:

> On Thursday, January 29, 2015 01:40:20 PM Jason Ekstrand wrote:
> > Originally, get_alu_src was supposed to handle resolving swizzles and
> > things like that.  However, now that basically every instruction we have
> > only takes scalar sources, we don't really need it anymore.  The only
> case
> > where it's still marginally useful is for the mov and vecN operations
> that
> > are left over from SSA form.  We can handle those cases as a special case
> > easily enough.  As a side-effect, we don't need the vec_to_movs pass
> > anymore.
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.h   |   1 -
> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 153
> +++
> >  2 files changed, 95 insertions(+), 59 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> > index 25197cd..b95e2c0 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> > @@ -589,7 +589,6 @@ public:
> > void nir_emit_texture(nir_tex_instr *instr);
> > void nir_emit_jump(nir_jump_instr *instr);
> > fs_reg get_nir_src(nir_src src);
> > -   fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
> > fs_reg get_nir_dest(nir_dest dest);
> > void emit_percomp(fs_inst *inst, unsigned wr_mask);
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > index 37ccd24..fbb1622 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > @@ -140,8 +140,6 @@ fs_visitor::emit_nir_code()
> >
> > nir_convert_from_ssa(nir);
> > nir_validate_shader(nir);
> > -   nir_lower_vec_to_movs(nir);
> > -   nir_validate_shader(nir);
> >
> > /* emit the arrays used for inputs and outputs - load/store
> intrinsics will
> >  * be converted to reads/writes of these arrays
> > @@ -419,6 +417,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
> >  void
> >  fs_visitor::nir_emit_cf_list(exec_list *list)
> >  {
> > +   exec_list_validate(list);
> > foreach_list_typed(nir_cf_node, node, node, list) {
> >switch (node->type) {
> >case nir_cf_node_if:
> > @@ -541,34 +540,117 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> > struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *)
> this->key;
> > fs_inst *inst;
> >
> > -   fs_reg op[3];
> > fs_reg result = get_nir_dest(instr->dest.dest);
> > result.type =
> brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
> >
> > -   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
> > -  op[i] = get_nir_alu_src(instr, i);
> > +   fs_reg op[4];
> > +   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
> > +  op[i] = get_nir_src(instr->src[i].src);
> > +  op[i].type =
> brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
> > +  op[i].abs = instr->src[i].abs;
> > +  op[i].negate = instr->src[i].negate;
> > +   }
> > +
> > +   /* We get a bunch of mov's out of the from_ssa pass and they may
> still
> > +* be vectorized.  We'll handle them as a special-case.  We'll also
> > +* handle vecN here because it's basically the same thing.
> > +*/
> > +   bool need_extra_copy = false;
> > +   switch (instr->op) {
> > +   case nir_op_vec4:
> > +  if (!instr->src[3].src.is_ssa &&
> > +  instr->dest.dest.reg.reg == instr->src[3].src.reg.reg)
> > + need_extra_copy = true;
> > +  /* fall through */
> > +   case nir_op_vec3:
> > +  if (!instr->src[2].src.is_ssa &&
> > +  instr->dest.dest.reg.reg == instr->src[2].src.reg.reg)
> > + need_extra_copy = true;
> > +  /* fall through */
> > +   case nir_op_vec2:
> > +  if (!instr->src[1].src.is_ssa &&
> > +  instr->dest.dest.reg.reg == instr->src[1].src.reg.reg)
> > + need_extra_copy = true;
> > +  /* fall through */
> > +   case nir_op_imov:
> > +   case nir_op_fmov: {
> > +  if (!instr->src[0].src.is_ssa &&
> > +  instr->dest.dest.reg.reg == instr->src[0].src.reg.reg)
> > + need_extra_copy = true;
>
> How about:
>
> switch (instr->op) {
> case nir_op_vec4:
> case nir_op_vec3:
> case nir_op_vec2:
> case nir_op_imov:
> case nir_op_fmov: {
>fs_reg temp = result;
>bool need_extra_copy = false;
>for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
>   if (!instr->src[i].src.is_ssa &&
>   instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
>  need_extra_copy = true;
>  temp = retype(vgrf(4), result.type);
>  break;
>   }
>}
>...
> }
>
> That duplicates less code.
>

Yeah, the original is horrible.  That's much better. I'll do that.


>
> > +
> > +  fs_reg temp;
> > +  if (need_extra_copy) {
> > + temp = retype(vgrf(4), result.type);
> > +  } else {
> > + temp = result;
> > +  }

Re: [Mesa-dev] [PATCH v5 3/3] i965/fs_nir: Get rid of get_alu_src

2015-02-02 Thread Kenneth Graunke
On Thursday, January 29, 2015 01:40:20 PM Jason Ekstrand wrote:
> Originally, get_alu_src was supposed to handle resolving swizzles and
> things like that.  However, now that basically every instruction we have
> only takes scalar sources, we don't really need it anymore.  The only case
> where it's still marginally useful is for the mov and vecN operations that
> are left over from SSA form.  We can handle those cases as a special case
> easily enough.  As a side-effect, we don't need the vec_to_movs pass
> anymore.
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h   |   1 -
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 153 
> +++
>  2 files changed, 95 insertions(+), 59 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 25197cd..b95e2c0 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -589,7 +589,6 @@ public:
> void nir_emit_texture(nir_tex_instr *instr);
> void nir_emit_jump(nir_jump_instr *instr);
> fs_reg get_nir_src(nir_src src);
> -   fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
> fs_reg get_nir_dest(nir_dest dest);
> void emit_percomp(fs_inst *inst, unsigned wr_mask);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 37ccd24..fbb1622 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -140,8 +140,6 @@ fs_visitor::emit_nir_code()
>  
> nir_convert_from_ssa(nir);
> nir_validate_shader(nir);
> -   nir_lower_vec_to_movs(nir);
> -   nir_validate_shader(nir);
>  
> /* emit the arrays used for inputs and outputs - load/store intrinsics 
> will
>  * be converted to reads/writes of these arrays
> @@ -419,6 +417,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
>  void
>  fs_visitor::nir_emit_cf_list(exec_list *list)
>  {
> +   exec_list_validate(list);
> foreach_list_typed(nir_cf_node, node, node, list) {
>switch (node->type) {
>case nir_cf_node_if:
> @@ -541,34 +540,117 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
> fs_inst *inst;
>  
> -   fs_reg op[3];
> fs_reg result = get_nir_dest(instr->dest.dest);
> result.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
>  
> -   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
> -  op[i] = get_nir_alu_src(instr, i);
> +   fs_reg op[4];
> +   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
> +  op[i] = get_nir_src(instr->src[i].src);
> +  op[i].type = 
> brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
> +  op[i].abs = instr->src[i].abs;
> +  op[i].negate = instr->src[i].negate;
> +   }
> +
> +   /* We get a bunch of mov's out of the from_ssa pass and they may still
> +* be vectorized.  We'll handle them as a special-case.  We'll also
> +* handle vecN here because it's basically the same thing.
> +*/
> +   bool need_extra_copy = false;
> +   switch (instr->op) {
> +   case nir_op_vec4:
> +  if (!instr->src[3].src.is_ssa &&
> +  instr->dest.dest.reg.reg == instr->src[3].src.reg.reg)
> + need_extra_copy = true;
> +  /* fall through */
> +   case nir_op_vec3:
> +  if (!instr->src[2].src.is_ssa &&
> +  instr->dest.dest.reg.reg == instr->src[2].src.reg.reg)
> + need_extra_copy = true;
> +  /* fall through */
> +   case nir_op_vec2:
> +  if (!instr->src[1].src.is_ssa &&
> +  instr->dest.dest.reg.reg == instr->src[1].src.reg.reg)
> + need_extra_copy = true;
> +  /* fall through */
> +   case nir_op_imov:
> +   case nir_op_fmov: {
> +  if (!instr->src[0].src.is_ssa &&
> +  instr->dest.dest.reg.reg == instr->src[0].src.reg.reg)
> + need_extra_copy = true;

How about:

switch (instr->op) {
case nir_op_vec4:
case nir_op_vec3:
case nir_op_vec2:
case nir_op_imov:
case nir_op_fmov: {
   fs_reg temp = result;
   bool need_extra_copy = false;
   for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
  if (!instr->src[i].src.is_ssa &&
  instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
 need_extra_copy = true;
 temp = retype(vgrf(4), result.type);
 break;
  }
   }
   ...
}

That duplicates less code.

> +
> +  fs_reg temp;
> +  if (need_extra_copy) {
> + temp = retype(vgrf(4), result.type);
> +  } else {
> + temp = result;
> +  }
> +
> +  if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
> + for (unsigned i = 0; i < 4; i++) {
> +if (!(instr->dest.write_mask & (1 << i)))
> +   continue;
> +
> +inst = emit(MOV(offset(temp, i),
> +offset(op[0], instr->src[0].swizzle[i])));
> +inst->saturate = ins

[Mesa-dev] [PATCH v5 3/3] i965/fs_nir: Get rid of get_alu_src

2015-01-29 Thread Jason Ekstrand
Originally, get_alu_src was supposed to handle resolving swizzles and
things like that.  However, now that basically every instruction we have
only takes scalar sources, we don't really need it anymore.  The only case
where it's still marginally useful is for the mov and vecN operations that
are left over from SSA form.  We can handle those cases as a special case
easily enough.  As a side-effect, we don't need the vec_to_movs pass
anymore.
---
 src/mesa/drivers/dri/i965/brw_fs.h   |   1 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 153 +++
 2 files changed, 95 insertions(+), 59 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 25197cd..b95e2c0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -589,7 +589,6 @@ public:
void nir_emit_texture(nir_tex_instr *instr);
void nir_emit_jump(nir_jump_instr *instr);
fs_reg get_nir_src(nir_src src);
-   fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
fs_reg get_nir_dest(nir_dest dest);
void emit_percomp(fs_inst *inst, unsigned wr_mask);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 37ccd24..fbb1622 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -140,8 +140,6 @@ fs_visitor::emit_nir_code()
 
nir_convert_from_ssa(nir);
nir_validate_shader(nir);
-   nir_lower_vec_to_movs(nir);
-   nir_validate_shader(nir);
 
/* emit the arrays used for inputs and outputs - load/store intrinsics will
 * be converted to reads/writes of these arrays
@@ -419,6 +417,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
 void
 fs_visitor::nir_emit_cf_list(exec_list *list)
 {
+   exec_list_validate(list);
foreach_list_typed(nir_cf_node, node, node, list) {
   switch (node->type) {
   case nir_cf_node_if:
@@ -541,34 +540,117 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
fs_inst *inst;
 
-   fs_reg op[3];
fs_reg result = get_nir_dest(instr->dest.dest);
result.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
 
-   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
-  op[i] = get_nir_alu_src(instr, i);
+   fs_reg op[4];
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+  op[i] = get_nir_src(instr->src[i].src);
+  op[i].type = 
brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
+  op[i].abs = instr->src[i].abs;
+  op[i].negate = instr->src[i].negate;
+   }
+
+   /* We get a bunch of mov's out of the from_ssa pass and they may still
+* be vectorized.  We'll handle them as a special-case.  We'll also
+* handle vecN here because it's basically the same thing.
+*/
+   bool need_extra_copy = false;
+   switch (instr->op) {
+   case nir_op_vec4:
+  if (!instr->src[3].src.is_ssa &&
+  instr->dest.dest.reg.reg == instr->src[3].src.reg.reg)
+ need_extra_copy = true;
+  /* fall through */
+   case nir_op_vec3:
+  if (!instr->src[2].src.is_ssa &&
+  instr->dest.dest.reg.reg == instr->src[2].src.reg.reg)
+ need_extra_copy = true;
+  /* fall through */
+   case nir_op_vec2:
+  if (!instr->src[1].src.is_ssa &&
+  instr->dest.dest.reg.reg == instr->src[1].src.reg.reg)
+ need_extra_copy = true;
+  /* fall through */
+   case nir_op_imov:
+   case nir_op_fmov: {
+  if (!instr->src[0].src.is_ssa &&
+  instr->dest.dest.reg.reg == instr->src[0].src.reg.reg)
+ need_extra_copy = true;
+
+  fs_reg temp;
+  if (need_extra_copy) {
+ temp = retype(vgrf(4), result.type);
+  } else {
+ temp = result;
+  }
+
+  if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
+ for (unsigned i = 0; i < 4; i++) {
+if (!(instr->dest.write_mask & (1 << i)))
+   continue;
+
+inst = emit(MOV(offset(temp, i),
+offset(op[0], instr->src[0].swizzle[i])));
+inst->saturate = instr->dest.saturate;
+ }
+  } else {
+ for (unsigned i = 0; i < 4; i++) {
+if (!(instr->dest.write_mask & (1 << i)))
+   continue;
+
+inst = emit(MOV(offset(temp, i),
+offset(op[i], instr->src[i].swizzle[0])));
+inst->saturate = instr->dest.saturate;
+ }
+  }
+
+  /* In this case the source and destination registers were the same,
+   * so we need to insert an extra set of moves in order to deal with
+   * any swizzling.
+   */
+  if (need_extra_copy) {
+ for (unsigned i = 0; i < 4; i++) {
+if (!(instr->dest.write_mask & (1 << i)))
+   continue;
+
+emit(MOV(offset(result, i), offset(temp, i)));
+ }
+  }