Re: [Mesa-dev] [PATCH v4 27/40] intel/compiler: generalize the combine constants pass

2019-02-18 Thread Iago Toral
On Sat, 2019-02-16 at 09:29 -0600, Jason Ekstrand wrote:
> On Tue, Feb 12, 2019 at 5:57 AM Iago Toral Quiroga  > wrote:
> > At the very least we need it to handle HF too, since we are doing
> > 
> > constant propagation for MAD and LRP, which relies on this pass
> > 
> > to promote the immediates to GRF in the end, but ideally
> > 
> > we want it to support even more types so we can take advantage
> > 
> > of it to improve register pressure in some scenarios.
> > 
> > ---
> > 
> >  .../compiler/brw_fs_combine_constants.cpp | 202
> > --
> > 
> >  1 file changed, 180 insertions(+), 22 deletions(-)
> > 
> > 
> > 
> > diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp
> > b/src/intel/compiler/brw_fs_combine_constants.cpp
> > 
> > index 7343f77bb45..5d79f1a0826 100644
> > 
> > --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> > 
> > +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> > 
> > @@ -36,6 +36,7 @@
> > 
> > 
> > 
> >  #include "brw_fs.h"
> > 
> >  #include "brw_cfg.h"
> > 
> > +#include "util/half_float.h"
> > 
> > 
> > 
> >  using namespace brw;
> > 
> > 
> > 
> > @@ -114,8 +115,17 @@ struct imm {
> > 
> >  */
> > 
> > exec_list *uses;
> > 
> > 
> > 
> > -   /** The immediate value.  We currently only handle floats. */
> > 
> > -   float val;
> > 
> > +   /** The immediate value */
> > 
> > +   union {
> > 
> > +  char bytes[8];
> > 
> > +  float f;
> > 
> > +  int32_t d;
> > 
> > +  int16_t w;
> > 
> > +   };
> > 
> > +   uint8_t size;
> > 
> > +
> > 
> > +   /** When promoting half-float we need to account for certain
> > restrictions */
> > 
> > +   bool is_half_float;
> > 
> > 
> > 
> > /**
> > 
> >  * The GRF register and subregister number where we've decided
> > to store the
> > 
> > @@ -145,10 +155,11 @@ struct table {
> > 
> >  };
> > 
> > 
> > 
> >  static struct imm *
> > 
> > -find_imm(struct table *table, float val)
> > 
> > +find_imm(struct table *table, void *data, uint8_t size)
> > 
> >  {
> > 
> > for (int i = 0; i < table->len; i++) {
> > 
> > -  if (table->imm[i].val == val) {
> > 
> > +  if (table->imm[i].size == size &&
> > 
> > +  !memcmp(table->imm[i].bytes, data, size)) {
> > 
> >   return >imm[i];
> > 
> >}
> > 
> > }
> > 
> > @@ -190,6 +201,96 @@ compare(const void *_a, const void *_b)
> > 
> > return a->first_use_ip - b->first_use_ip;
> > 
> >  }
> > 
> > 
> > 
> > +static bool
> > 
> > +get_constant_value(const struct gen_device_info *devinfo,
> > 
> > +   const fs_inst *inst, uint32_t src_idx,
> > 
> > +   void *out, brw_reg_type *out_type)
> > 
> > +{
> > 
> > +   const bool can_do_source_mods = inst-
> > >can_do_source_mods(devinfo);
> > 
> > +   const fs_reg *src = >src[src_idx];
> > 
> > +
> > 
> > +   *out_type = src->type;
> > 
> > +
> > 
> > +   switch (*out_type) {
> > 
> > +   case BRW_REGISTER_TYPE_F: {
> > 
> > +  float val = !can_do_source_mods ? src->f : fabsf(src->f);
> > 
> > +  memcpy(out, , 4);
> > 
> > +  break;
> > 
> > +   }
> > 
> > +   case BRW_REGISTER_TYPE_HF: {
> > 
> > +  uint16_t val = src->d & 0xu;
> > 
> > +  if (can_do_source_mods)
> > 
> > + val =
> > _mesa_float_to_half(fabsf(_mesa_half_to_float(val)));
> > 
> > +  memcpy(out, , 2);
> > 
> > +  break;
> > 
> > +   }
> > 
> > +   case BRW_REGISTER_TYPE_D: {
> > 
> > +  int32_t val = !can_do_source_mods ? src->d : abs(src->d);
> > 
> > +  memcpy(out, , 4);
> > 
> > +  break;
> > 
> > +   }
> > 
> > +   case BRW_REGISTER_TYPE_UD:
> > 
> > +  memcpy(out, >ud, 4);
> > 
> > +  break;
> > 
> > +   case BRW_REGISTER_TYPE_W: {
> > 
> > +  int16_t val = src->d & 0xu;
> > 
> > +  if (can_do_source_mods)
> > 
> > + val = abs(val);
> > 
> > +  memcpy(out, , 2);
> > 
> > +  break;
> > 
> > +   }
> > 
> > +   case BRW_REGISTER_TYPE_UW:
> > 
> > +  memcpy(out, >ud, 2);
> > 
> > +  break;
> 
> You could also throw in DF and Q types.  This is probably sufficient
> for now though.

Sure, I was waiting to do that until I started enabling constant
propagation of 64-bit types but there is no reason why we can't leave
the pass prepared for that I guess. It seems that for platforms that
don't support 64-bit types we should not be seeing 64-bit constants
here, so I guess there is no risk.
>  
> > +   default:
> > 
> > +  return false;
> > 
> > +   };
> > 
> > +
> > 
> > +   return true;
> > 
> > +}
> > 
> > +
> > 
> > +static struct brw_reg
> > 
> > +build_imm_reg_for_copy(struct imm *imm)
> > 
> > +{
> > 
> > +   switch (imm->size) {
> > 
> > +   case 4:
> > 
> > +  return brw_imm_d(imm->d);
> > 
> > +   case 2:
> > 
> > +  return brw_imm_w(imm->w);
> > 
> > +   default:
> > 
> > +  unreachable("not implemented");
> > 
> > +   }
> > 
> > +}
> > 
> > +
> > 
> > +static inline uint32_t
> > 
> > +get_alignment_for_imm(const struct imm *imm)
> > 
> > +{
> > 
> > 

Re: [Mesa-dev] [PATCH v4 27/40] intel/compiler: generalize the combine constants pass

2019-02-16 Thread Jason Ekstrand
On Tue, Feb 12, 2019 at 5:57 AM Iago Toral Quiroga 
wrote:

> At the very least we need it to handle HF too, since we are doing
> constant propagation for MAD and LRP, which relies on this pass
> to promote the immediates to GRF in the end, but ideally
> we want it to support even more types so we can take advantage
> of it to improve register pressure in some scenarios.
> ---
>  .../compiler/brw_fs_combine_constants.cpp | 202 --
>  1 file changed, 180 insertions(+), 22 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp
> b/src/intel/compiler/brw_fs_combine_constants.cpp
> index 7343f77bb45..5d79f1a0826 100644
> --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> @@ -36,6 +36,7 @@
>
>  #include "brw_fs.h"
>  #include "brw_cfg.h"
> +#include "util/half_float.h"
>
>  using namespace brw;
>
> @@ -114,8 +115,17 @@ struct imm {
>  */
> exec_list *uses;
>
> -   /** The immediate value.  We currently only handle floats. */
> -   float val;
> +   /** The immediate value */
> +   union {
> +  char bytes[8];
> +  float f;
> +  int32_t d;
> +  int16_t w;
> +   };
> +   uint8_t size;
> +
> +   /** When promoting half-float we need to account for certain
> restrictions */
> +   bool is_half_float;
>
> /**
>  * The GRF register and subregister number where we've decided to
> store the
> @@ -145,10 +155,11 @@ struct table {
>  };
>
>  static struct imm *
> -find_imm(struct table *table, float val)
> +find_imm(struct table *table, void *data, uint8_t size)
>  {
> for (int i = 0; i < table->len; i++) {
> -  if (table->imm[i].val == val) {
> +  if (table->imm[i].size == size &&
> +  !memcmp(table->imm[i].bytes, data, size)) {
>   return >imm[i];
>}
> }
> @@ -190,6 +201,96 @@ compare(const void *_a, const void *_b)
> return a->first_use_ip - b->first_use_ip;
>  }
>
> +static bool
> +get_constant_value(const struct gen_device_info *devinfo,
> +   const fs_inst *inst, uint32_t src_idx,
> +   void *out, brw_reg_type *out_type)
> +{
> +   const bool can_do_source_mods = inst->can_do_source_mods(devinfo);
> +   const fs_reg *src = >src[src_idx];
> +
> +   *out_type = src->type;
> +
> +   switch (*out_type) {
> +   case BRW_REGISTER_TYPE_F: {
> +  float val = !can_do_source_mods ? src->f : fabsf(src->f);
> +  memcpy(out, , 4);
> +  break;
> +   }
> +   case BRW_REGISTER_TYPE_HF: {
> +  uint16_t val = src->d & 0xu;
> +  if (can_do_source_mods)
> + val = _mesa_float_to_half(fabsf(_mesa_half_to_float(val)));
> +  memcpy(out, , 2);
> +  break;
> +   }
> +   case BRW_REGISTER_TYPE_D: {
> +  int32_t val = !can_do_source_mods ? src->d : abs(src->d);
> +  memcpy(out, , 4);
> +  break;
> +   }
> +   case BRW_REGISTER_TYPE_UD:
> +  memcpy(out, >ud, 4);
> +  break;
> +   case BRW_REGISTER_TYPE_W: {
> +  int16_t val = src->d & 0xu;
> +  if (can_do_source_mods)
> + val = abs(val);
> +  memcpy(out, , 2);
> +  break;
> +   }
> +   case BRW_REGISTER_TYPE_UW:
> +  memcpy(out, >ud, 2);
> +  break;
>

You could also throw in DF and Q types.  This is probably sufficient for
now though.


> +   default:
> +  return false;
> +   };
> +
> +   return true;
> +}
> +
> +static struct brw_reg
> +build_imm_reg_for_copy(struct imm *imm)
> +{
> +   switch (imm->size) {
> +   case 4:
> +  return brw_imm_d(imm->d);
> +   case 2:
> +  return brw_imm_w(imm->w);
> +   default:
> +  unreachable("not implemented");
> +   }
> +}
> +
> +static inline uint32_t
> +get_alignment_for_imm(const struct imm *imm)
> +{
> +   if (imm->is_half_float)
> +  return 4; /* At least MAD seems to require this */
> +   else
> +  return imm->size;
> +}
> +
> +static bool
> +needs_negate(const struct fs_reg *reg, const struct imm *imm)
> +{
> +   switch (reg->type) {
> +   case BRW_REGISTER_TYPE_F:
> +  return signbit(reg->f) != signbit(imm->f);
> +   case BRW_REGISTER_TYPE_D:
> +  return (reg->d < 0) != (imm->d < 0);
> +   case BRW_REGISTER_TYPE_HF:
> +  return (reg->d & 0x8000u) != (imm->w & 0x8000u);
> +   case BRW_REGISTER_TYPE_W:
> +  return ((reg->d & 0xu) < 0) != (imm->w < 0);
> +   case BRW_REGISTER_TYPE_UD:
> +   case BRW_REGISTER_TYPE_UW:
> +  return false;
> +   default:
> +  unreachable("not implemented");
> +   };
> +}
> +
>  bool
>  fs_visitor::opt_combine_constants()
>  {
> @@ -214,13 +315,17 @@ fs_visitor::opt_combine_constants()
>   continue;
>
>for (int i = 0; i < inst->sources; i++) {
> - if (inst->src[i].file != IMM ||
> - inst->src[i].type != BRW_REGISTER_TYPE_F)
> + if (inst->src[i].file != IMM)
>  continue;
>
> - float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
> - fabs(inst->src[i].f);
> - struct imm 

[Mesa-dev] [PATCH v4 27/40] intel/compiler: generalize the combine constants pass

2019-02-12 Thread Iago Toral Quiroga
At the very least we need it to handle HF too, since we are doing
constant propagation for MAD and LRP, which relies on this pass
to promote the immediates to GRF in the end, but ideally
we want it to support even more types so we can take advantage
of it to improve register pressure in some scenarios.
---
 .../compiler/brw_fs_combine_constants.cpp | 202 --
 1 file changed, 180 insertions(+), 22 deletions(-)

diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp 
b/src/intel/compiler/brw_fs_combine_constants.cpp
index 7343f77bb45..5d79f1a0826 100644
--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@@ -36,6 +36,7 @@
 
 #include "brw_fs.h"
 #include "brw_cfg.h"
+#include "util/half_float.h"
 
 using namespace brw;
 
@@ -114,8 +115,17 @@ struct imm {
 */
exec_list *uses;
 
-   /** The immediate value.  We currently only handle floats. */
-   float val;
+   /** The immediate value */
+   union {
+  char bytes[8];
+  float f;
+  int32_t d;
+  int16_t w;
+   };
+   uint8_t size;
+
+   /** When promoting half-float we need to account for certain restrictions */
+   bool is_half_float;
 
/**
 * The GRF register and subregister number where we've decided to store the
@@ -145,10 +155,11 @@ struct table {
 };
 
 static struct imm *
-find_imm(struct table *table, float val)
+find_imm(struct table *table, void *data, uint8_t size)
 {
for (int i = 0; i < table->len; i++) {
-  if (table->imm[i].val == val) {
+  if (table->imm[i].size == size &&
+  !memcmp(table->imm[i].bytes, data, size)) {
  return >imm[i];
   }
}
@@ -190,6 +201,96 @@ compare(const void *_a, const void *_b)
return a->first_use_ip - b->first_use_ip;
 }
 
+static bool
+get_constant_value(const struct gen_device_info *devinfo,
+   const fs_inst *inst, uint32_t src_idx,
+   void *out, brw_reg_type *out_type)
+{
+   const bool can_do_source_mods = inst->can_do_source_mods(devinfo);
+   const fs_reg *src = >src[src_idx];
+
+   *out_type = src->type;
+
+   switch (*out_type) {
+   case BRW_REGISTER_TYPE_F: {
+  float val = !can_do_source_mods ? src->f : fabsf(src->f);
+  memcpy(out, , 4);
+  break;
+   }
+   case BRW_REGISTER_TYPE_HF: {
+  uint16_t val = src->d & 0xu;
+  if (can_do_source_mods)
+ val = _mesa_float_to_half(fabsf(_mesa_half_to_float(val)));
+  memcpy(out, , 2);
+  break;
+   }
+   case BRW_REGISTER_TYPE_D: {
+  int32_t val = !can_do_source_mods ? src->d : abs(src->d);
+  memcpy(out, , 4);
+  break;
+   }
+   case BRW_REGISTER_TYPE_UD:
+  memcpy(out, >ud, 4);
+  break;
+   case BRW_REGISTER_TYPE_W: {
+  int16_t val = src->d & 0xu;
+  if (can_do_source_mods)
+ val = abs(val);
+  memcpy(out, , 2);
+  break;
+   }
+   case BRW_REGISTER_TYPE_UW:
+  memcpy(out, >ud, 2);
+  break;
+   default:
+  return false;
+   };
+
+   return true;
+}
+
+static struct brw_reg
+build_imm_reg_for_copy(struct imm *imm)
+{
+   switch (imm->size) {
+   case 4:
+  return brw_imm_d(imm->d);
+   case 2:
+  return brw_imm_w(imm->w);
+   default:
+  unreachable("not implemented");
+   }
+}
+
+static inline uint32_t
+get_alignment_for_imm(const struct imm *imm)
+{
+   if (imm->is_half_float)
+  return 4; /* At least MAD seems to require this */
+   else
+  return imm->size;
+}
+
+static bool
+needs_negate(const struct fs_reg *reg, const struct imm *imm)
+{
+   switch (reg->type) {
+   case BRW_REGISTER_TYPE_F:
+  return signbit(reg->f) != signbit(imm->f);
+   case BRW_REGISTER_TYPE_D:
+  return (reg->d < 0) != (imm->d < 0);
+   case BRW_REGISTER_TYPE_HF:
+  return (reg->d & 0x8000u) != (imm->w & 0x8000u);
+   case BRW_REGISTER_TYPE_W:
+  return ((reg->d & 0xu) < 0) != (imm->w < 0);
+   case BRW_REGISTER_TYPE_UD:
+   case BRW_REGISTER_TYPE_UW:
+  return false;
+   default:
+  unreachable("not implemented");
+   };
+}
+
 bool
 fs_visitor::opt_combine_constants()
 {
@@ -214,13 +315,17 @@ fs_visitor::opt_combine_constants()
  continue;
 
   for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file != IMM ||
- inst->src[i].type != BRW_REGISTER_TYPE_F)
+ if (inst->src[i].file != IMM)
 continue;
 
- float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
- fabs(inst->src[i].f);
- struct imm *imm = find_imm(, val);
+ char data[8];
+ brw_reg_type type;
+ if (!get_constant_value(devinfo, inst, i, data, ))
+continue;
+
+ uint8_t size = type_sz(type);
+
+ struct imm *imm = find_imm(, data, size);
 
  if (imm) {
 bblock_t *intersection = cfg_t::intersect(block, imm->block);
@@ -237,7 +342,9 @@ fs_visitor::opt_combine_constants()
 imm->inst = inst;
 imm->uses