Re: [Mesa-dev] [PATCH v2 51/53] intel/compiler: support half-float in the combine constants pass

2019-01-02 Thread Iago Toral
On Wed, 2019-01-02 at 13:02 +0200, Pohjolainen, Topi wrote:
> On Wed, Dec 19, 2018 at 12:51:19PM +0100, Iago Toral Quiroga wrote:
> > ---
> >  .../compiler/brw_fs_combine_constants.cpp | 60
> > +++
> >  1 file changed, 49 insertions(+), 11 deletions(-)
> > 
> > diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp
> > b/src/intel/compiler/brw_fs_combine_constants.cpp
> > index e0c95d379b8..24307e365ab 100644
> > --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> > +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> > @@ -36,6 +36,7 @@
> >  
> >  #include "brw_fs.h"
> >  #include "brw_cfg.h"
> > +#include "util/half_float.h"
> >  
> >  using namespace brw;
> >  
> > @@ -114,8 +115,9 @@ struct imm {
> >  */
> > exec_list *uses;
> >  
> > -   /** The immediate value.  We currently only handle floats. */
> > +   /** The immediate value.  We currently only handle float and
> > half-float. */
> > float val;
> > +   brw_reg_type type;
> >  
> > /**
> >  * The GRF register and subregister number where we've decided
> > to store the
> > @@ -145,10 +147,10 @@ struct table {
> >  };
> >  
> >  static struct imm *
> > -find_imm(struct table *table, float val)
> > +find_imm(struct table *table, float val, brw_reg_type type)
> >  {
> > for (int i = 0; i < table->len; i++) {
> > -  if (table->imm[i].val == val) {
> > +  if (table->imm[i].val == val && table->imm[i].type == type)
> > {
> >   return &table->imm[i];
> >}
> > }
> > @@ -190,6 +192,20 @@ compare(const void *_a, const void *_b)
> > return a->first_use_ip - b->first_use_ip;
> >  }
> >  
> > +static bool
> > +needs_negate(float reg_val, float imm_val, brw_reg_type type)
> > +{
> > +   /* reg_val represents the immediate value in the register in
> > its original
> > +* bit-size, while imm_val is always a valid 32-bit float
> > value.
> > +*/
> > +   if (type == BRW_REGISTER_TYPE_HF) {
> > +  uint32_t reg_val_ud = *((uint32_t *) ®_val);
> 
> Casting "float" to uint32_t and reading then only 16-bits from it
> looks a
> little ugly. Could we use "uint32_t reg_val" and then below in the
> caller
> use "reg->u" instead of "reg->f"?

No, because the sigbit macro used below works on floating point values,
so if we did that, we would have to do a pointer cast to float before
we call that macro with reg_val.

One thing we can do is to pass the fs_reg instead of reg->f and then
read reg->f or reg->u as we need inside this function. We would be
abusing a bit the interface but we would get rid of the casting that
way... I am not sure if I like it better or worse.

Alternatively, we could just memcpy reg_val into a uint32_t to avoid
the pointer cast. Maybe that looks less ugly.

What do you think?

> > +  reg_val = _mesa_half_to_float(reg_val_ud & 0x);
> > +   }
> > +
> > +   return signbit(imm_val) != signbit(reg_val);
> > +}
> > +
> >  bool
> >  fs_visitor::opt_combine_constants()
> >  {
> > @@ -215,12 +231,20 @@ fs_visitor::opt_combine_constants()
> >  
> >for (int i = 0; i < inst->sources; i++) {
> >   if (inst->src[i].file != IMM ||
> > - inst->src[i].type != BRW_REGISTER_TYPE_F)
> > + (inst->src[i].type != BRW_REGISTER_TYPE_F &&
> > +  inst->src[i].type != BRW_REGISTER_TYPE_HF))
> >  continue;
> >  
> > - float val = !inst->can_do_source_mods(devinfo) ? inst-
> > >src[i].f :
> > - fabs(inst->src[i].f);
> > - struct imm *imm = find_imm(&table, val);
> > + float val;
> > + if (inst->src[i].type == BRW_REGISTER_TYPE_F) {
> > +val = !inst->can_do_source_mods(devinfo) ? inst-
> > >src[i].f :
> > +fabs(inst->src[i].f);
> > + } else {
> > +val = !inst->can_do_source_mods(devinfo) ?
> > +   _mesa_half_to_float(inst->src[i].d & 0x) :
> > +   fabs(_mesa_half_to_float(inst->src[i].d & 0x));
> > + }
> > + struct imm *imm = find_imm(&table, val, inst-
> > >src[i].type);
> >  
> >   if (imm) {
> >  bblock_t *intersection = cfg_t::intersect(block, imm-
> > >block);
> > @@ -238,6 +262,7 @@ fs_visitor::opt_combine_constants()
> >  imm->uses = new(const_ctx) exec_list();
> >  imm->uses->push_tail(link(const_ctx, &inst->src[i]));
> >  imm->val = val;
> > +imm->type = inst->src[i].type;
> >  imm->uses_by_coissue = could_coissue(devinfo, inst);
> >  imm->must_promote = must_promote_imm(devinfo, inst);
> >  imm->first_use_ip = ip;
> > @@ -278,12 +303,23 @@ fs_visitor::opt_combine_constants()
> >imm->block->last_non_control_flow_inst()-
> > >next);
> >const fs_builder ibld = bld.at(imm->block,
> > n).exec_all().group(1, 0);
> >  
> > -  ibld.MOV(reg, brw_imm_f(imm->val));
> > +  reg = retype(reg, imm->type);
> > +  if (imm

Re: [Mesa-dev] [PATCH v2 49/53] intel/compiler: fix cmod propagation for non 32-bit types

2019-01-02 Thread Iago Toral
On Thu, 2019-01-03 at 08:25 +0100, Iago Toral wrote:
> On Wed, 2019-01-02 at 12:42 +0200, Pohjolainen, Topi wrote:
> > On Wed, Dec 19, 2018 at 12:51:17PM +0100, Iago Toral Quiroga wrote:
> > > ---
> > >  src/intel/compiler/brw_fs_cmod_propagation.cpp | 8 +++-
> > >  1 file changed, 3 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp
> > > b/src/intel/compiler/brw_fs_cmod_propagation.cpp
> > > index 7bb5c9afbc9..dfef9d720a2 100644
> > > --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
> > > +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
> > > @@ -244,8 +244,7 @@ opt_cmod_propagation_local(const
> > > gen_device_info *devinfo,
> > >  /* CMP's result is the same regardless of dest type.
> > > */
> > >  if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
> > >  scan_inst->opcode == BRW_OPCODE_CMP &&
> > > -(inst->dst.type == BRW_REGISTER_TYPE_D ||
> > > - inst->dst.type == BRW_REGISTER_TYPE_UD)) {
> > > +brw_reg_type_is_integer(inst->dst.type)) {
> > > inst->remove(block);
> > > progress = true;
> > > break;
> > > @@ -258,9 +257,8 @@ opt_cmod_propagation_local(const
> > > gen_device_info *devinfo,
> > > break;
> > >  
> > >  /* Comparisons operate differently for ints and
> > > floats
> > > */
> > > -if (scan_inst->dst.type != inst->dst.type &&
> > 
> > This wouldn't let, for example, (DF, F) pair thru while the new
> > version does.
> > Should we keep this line?
> 
> This is about not turning a floating point comparison into an integer
> comparison and viceversa, but F and DF are both floating point and
> follow the same rules, so I think this sould be okay in that aspect.
> 
> With that being said though, I wonder if we shold prevent propagation
> when the types don't have the same bit-size, since even if the
> underlying comparison semantics are the same, we could have different
> behaviors for out-of-range values... I wonder if that can actually
> ever
> happen though, since that would mean that scan_inst writes, for
> example
> a DF, and then the CMP reads it as F... but anyway, I guess being
> extra
> careful here doesn't hurt.

To be more precise, I am thinking of adding this below, as a separate
check:

/* Comparison result may be altered if the bit-size changes
 * since that affects range, denorms, etc
 */
if (type_sz(scan_inst->dst.type) != type_sz(inst->dst.type))
   break;

> > > -(scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
> > > - inst->dst.type == BRW_REGISTER_TYPE_F))
> > > +if (brw_reg_type_is_floating_point(scan_inst-
> > > > dst.type) !=
> > > 
> > > +brw_reg_type_is_floating_point(inst->dst.type))
> > > break;
> > >  
> > >  /* If the instruction generating inst's source also
> > > wrote the
> > > -- 
> > > 2.17.1
> > > 
> > > ___
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > 
> > 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 49/53] intel/compiler: fix cmod propagation for non 32-bit types

2019-01-02 Thread Iago Toral
On Wed, 2019-01-02 at 12:42 +0200, Pohjolainen, Topi wrote:
> On Wed, Dec 19, 2018 at 12:51:17PM +0100, Iago Toral Quiroga wrote:
> > ---
> >  src/intel/compiler/brw_fs_cmod_propagation.cpp | 8 +++-
> >  1 file changed, 3 insertions(+), 5 deletions(-)
> > 
> > diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp
> > b/src/intel/compiler/brw_fs_cmod_propagation.cpp
> > index 7bb5c9afbc9..dfef9d720a2 100644
> > --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
> > +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
> > @@ -244,8 +244,7 @@ opt_cmod_propagation_local(const
> > gen_device_info *devinfo,
> >  /* CMP's result is the same regardless of dest type.
> > */
> >  if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
> >  scan_inst->opcode == BRW_OPCODE_CMP &&
> > -(inst->dst.type == BRW_REGISTER_TYPE_D ||
> > - inst->dst.type == BRW_REGISTER_TYPE_UD)) {
> > +brw_reg_type_is_integer(inst->dst.type)) {
> > inst->remove(block);
> > progress = true;
> > break;
> > @@ -258,9 +257,8 @@ opt_cmod_propagation_local(const
> > gen_device_info *devinfo,
> > break;
> >  
> >  /* Comparisons operate differently for ints and floats
> > */
> > -if (scan_inst->dst.type != inst->dst.type &&
> 
> This wouldn't let, for example, (DF, F) pair thru while the new
> version does.
> Should we keep this line?

This is about not turning a floating point comparison into an integer
comparison and viceversa, but F and DF are both floating point and
follow the same rules, so I think this sould be okay in that aspect.

With that being said though, I wonder if we shold prevent propagation
when the types don't have the same bit-size, since even if the
underlying comparison semantics are the same, we could have different
behaviors for out-of-range values... I wonder if that can actually ever
happen though, since that would mean that scan_inst writes, for example
a DF, and then the CMP reads it as F... but anyway, I guess being extra
careful here doesn't hurt.

> > -(scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
> > - inst->dst.type == BRW_REGISTER_TYPE_F))
> > +if (brw_reg_type_is_floating_point(scan_inst-
> > >dst.type) !=
> > +brw_reg_type_is_floating_point(inst->dst.type))
> > break;
> >  
> >  /* If the instruction generating inst's source also
> > wrote the
> > -- 
> > 2.17.1
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 38/53] intel/compiler: handle 64-bit to 8-bit conversions

2019-01-02 Thread Iago Toral
On Wed, 2019-01-02 at 11:59 +0200, Pohjolainen, Topi wrote:
> On Wed, Dec 19, 2018 at 12:51:06PM +0100, Iago Toral Quiroga wrote:
> > These are not directly supported in hardware and
> > brw_nir_lower_conversions
> > should have taken care of that before we get here.
> 
> It looks that there are two things actually happening here:
> 
> 1) For int64/uint64 to 8-case the support is already there and this
> just moves
>the case to a stronger one with an assert.
> 
> 2) Actually adding support for DF to 8-bit that didn't exist before.
> 
> If this is the case (i.e., I'm not missing something), should we
> adjust the
> commit to say that DF to 8-bit support is added and then add a note
> in the
> commit that I64/U64 to 8-bit gets an additional assertion?

Yes, I'll edit the commit log, thanks.

Iago

> > ---
> >  src/intel/compiler/brw_fs_nir.cpp | 6 --
> >  1 file changed, 4 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/intel/compiler/brw_fs_nir.cpp
> > b/src/intel/compiler/brw_fs_nir.cpp
> > index 6089c883c9a..802f5cb0944 100644
> > --- a/src/intel/compiler/brw_fs_nir.cpp
> > +++ b/src/intel/compiler/brw_fs_nir.cpp
> > @@ -818,6 +818,10 @@ fs_visitor::nir_emit_alu(const fs_builder
> > &bld, nir_alu_instr *instr)
> > case nir_op_f2f16:
> > case nir_op_i2f16:
> > case nir_op_u2f16:
> > +   case nir_op_i2i8:
> > +   case nir_op_u2u8:
> > +   case nir_op_f2i8:
> > +   case nir_op_f2u8:
> >assert(type_sz(op[0].type) < 8); /*
> > brw_nir_lower_conversions */
> >inst = bld.MOV(result, op[0]);
> >inst->saturate = instr->dest.saturate;
> > @@ -860,8 +864,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld,
> > nir_alu_instr *instr)
> > case nir_op_u2u32:
> > case nir_op_i2i16:
> > case nir_op_u2u16:
> > -   case nir_op_i2i8:
> > -   case nir_op_u2u8:
> >inst = bld.MOV(result, op[0]);
> >inst->saturate = instr->dest.saturate;
> >break;
> > -- 
> > 2.17.1
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 35/53] intel/compiler: workaround for SIMD8 half-float MAD in gen < 9

2019-01-02 Thread Iago Toral
On Wed, 2019-01-02 at 11:46 +0200, Pohjolainen, Topi wrote:
> On Wed, Dec 19, 2018 at 12:51:03PM +0100, Iago Toral Quiroga wrote:
> > Broadwell hardware has a bug that manifests in SIMD8 executions of
> > 16-bit MAD instructions when any of the sources is a Y or W
> > component.
> > We pack these components in the same SIMD register as components X
> > and
> > Z respectively, but starting at offset 16B (so they live in the
> > second
> > half of the register). The problem does not exist in SKL or later.
> > 
> > We work around this issue by moving any such sources to a temporary
> > starting at offset 0B. We want to do this after the main
> > optimization loop
> > to prevent copy-propagation and friends to undo the fix.
> > ---
> >  src/intel/compiler/brw_fs.cpp | 48
> > +++
> >  src/intel/compiler/brw_fs.h   |  1 +
> >  2 files changed, 49 insertions(+)
> > 
> > diff --git a/src/intel/compiler/brw_fs.cpp
> > b/src/intel/compiler/brw_fs.cpp
> > index 933b0b6ffc4..1343c2f4993 100644
> > --- a/src/intel/compiler/brw_fs.cpp
> > +++ b/src/intel/compiler/brw_fs.cpp
> > @@ -6449,6 +6449,48 @@ fs_visitor::optimize()
> > validate();
> >  }
> >  
> > +/**
> > + * Broadwell hardware has a bug that manifests in SIMD8 executions
> > of 16-bit
> > + * MAD instructions when any of the sources is a Y or W component.
> > We pack
> > + * these components in the same SIMD register as components X and
> > Z
> > + * respectively, but starting at offset 16B (so they live in the
> > second half
> > + * of the register).
> > + *
> > + * We work around this issue by moving any such sources to a
> > temporary
> > + * starting at offset 0B. We want to do this after the main
> > optimization loop
> > + * to prevent copy-propagation and friends to undo the fix.
> > + */
> > +void
> > +fs_visitor::fixup_hf_mad()
> > +{
> > +   if (devinfo->gen > 8)
> 
> We don't want to run this for gen < 8 either as it would iterate the
> instructions in vain. So just:
> 
>   if (devinfo->gen == 8)

Right, good point.

It should be "if (devinfo->gen != 8) though. I'll fix this, thanks!

> Otherwise:
> 
> Reviewed-by: Topi Pohjolainen 
> 
> > +  return;
> > +
> > +   bool progress = false;
> > +
> > +   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
> > +  if (inst->opcode != BRW_OPCODE_MAD ||
> > +  inst->dst.type != BRW_REGISTER_TYPE_HF ||
> > +  inst->exec_size > 8)
> > + continue;
> > +
> > +  for (int i = 0; i < 3; i++) {
> > + if (inst->src[i].offset > 0) {
> > +assert(inst->src[i].type == BRW_REGISTER_TYPE_HF);
> > +const fs_builder ibld =
> > +   bld.at(block, inst).exec_all().group(inst-
> > >exec_size, 0);
> > +fs_reg tmp = ibld.vgrf(inst->src[i].type);
> > +ibld.MOV(tmp, inst->src[i]);
> > +inst->src[i] = tmp;
> > +progress = true;
> > + }
> > +  }
> > +   }
> > +
> > +   if (progress)
> > +  invalidate_live_intervals();
> > +}
> > +
> >  /**
> >   * Three source instruction must have a GRF/MRF destination
> > register.
> >   * ARF NULL is not allowed.  Fix that up by allocating a temporary
> > GRF.
> > @@ -6607,6 +6649,7 @@ fs_visitor::run_vs()
> > assign_curb_setup();
> > assign_vs_urb_setup();
> >  
> > +   fixup_hf_mad();
> > fixup_3src_null_dest();
> > allocate_registers(8, true);
> >  
> > @@ -6691,6 +6734,7 @@ fs_visitor::run_tcs_single_patch()
> > assign_curb_setup();
> > assign_tcs_single_patch_urb_setup();
> >  
> > +   fixup_hf_mad();
> > fixup_3src_null_dest();
> > allocate_registers(8, true);
> >  
> > @@ -6725,6 +6769,7 @@ fs_visitor::run_tes()
> > assign_curb_setup();
> > assign_tes_urb_setup();
> >  
> > +   fixup_hf_mad();
> > fixup_3src_null_dest();
> > allocate_registers(8, true);
> >  
> > @@ -6774,6 +6819,7 @@ fs_visitor::run_gs()
> > assign_curb_setup();
> > assign_gs_urb_setup();
> >  
> > +   fixup_hf_mad();
> > fixup_3src_null_dest();
> > allocate_registers(8, true);
> >  
> > @@ -6874,6 +6920,7 @@ fs_visitor::run_fs(bool allow_spilling, bool
> > do_rep_send)
> >  
> >assign_urb_setup();
> >  
> > +  fixup_hf_mad();
> >fixup_3src_null_dest();
> >allocate_registers(8, allow_spilling);
> >  
> > @@ -6918,6 +6965,7 @@ fs_visitor::run_cs(unsigned
> > min_dispatch_width)
> >  
> > assign_curb_setup();
> >  
> > +   fixup_hf_mad();
> > fixup_3src_null_dest();
> > allocate_registers(min_dispatch_width, true);
> >  
> > diff --git a/src/intel/compiler/brw_fs.h
> > b/src/intel/compiler/brw_fs.h
> > index 163c0008820..f79f8554fb9 100644
> > --- a/src/intel/compiler/brw_fs.h
> > +++ b/src/intel/compiler/brw_fs.h
> > @@ -103,6 +103,7 @@ public:
> > void setup_vs_payload();
> > void setup_gs_payload();
> > void setup_cs_payload();
> > +   void fixup_hf_mad();
> > void fixup_3src_null_dest();
> > void assign_curb_set

Re: [Mesa-dev] [PATCH v2 28/53] intel/compiler: add new half-float register type for 3-src instructions

2019-01-02 Thread Iago Toral
On Wed, 2019-01-02 at 11:35 +0200, Pohjolainen, Topi wrote:
> On Wed, Dec 19, 2018 at 12:50:56PM +0100, Iago Toral Quiroga wrote:
> > This is available since gen8.
> > ---
> >  src/intel/compiler/brw_reg_type.c | 35
> > +++
> >  1 file changed, 31 insertions(+), 4 deletions(-)
> > 
> > diff --git a/src/intel/compiler/brw_reg_type.c
> > b/src/intel/compiler/brw_reg_type.c
> > index 60240ba1513..72295a2bd75 100644
> > --- a/src/intel/compiler/brw_reg_type.c
> > +++ b/src/intel/compiler/brw_reg_type.c
> > @@ -138,6 +138,7 @@ enum hw_3src_reg_type {
> > GEN7_3SRC_TYPE_D  = 1,
> > GEN7_3SRC_TYPE_UD = 2,
> > GEN7_3SRC_TYPE_DF = 3,
> > +   GEN8_3SRC_TYPE_HF = 4,
> >  
> > /** When ExecutionDatatype is 1: @{ */
> > GEN10_ALIGN1_3SRC_REG_TYPE_HF = 0b000,
> > @@ -166,6 +167,14 @@ static const struct hw_3src_type {
> > [BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
> > [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
> > [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
> > +}, gen8_hw_3src_type[] = {
> > +   [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
> > +
> > +   [BRW_REGISTER_TYPE_F]  = { GEN7_3SRC_TYPE_F  },
> > +   [BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
> > +   [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
> > +   [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
> > +   [BRW_REGISTER_TYPE_HF] = { GEN8_3SRC_TYPE_HF },
> >  }, gen10_hw_3src_align1_type[] = {
> >  #define E(x) BRW_ALIGN1_3SRC_EXEC_TYPE_##x
> > [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
> > @@ -249,6 +258,20 @@ brw_hw_type_to_reg_type(const struct
> > gen_device_info *devinfo,
> > unreachable("not reached");
> >  }
> >  
> > +static inline const struct hw_3src_type *
> > +get_hw_3src_type_map(const struct gen_device_info *devinfo,
> > uint32_t *size)
> > +{
> > +   if (devinfo->gen < 8) {
> > +  if (size)
> > + *size = ARRAY_SIZE(gen7_hw_3src_type);
> > +  return gen7_hw_3src_type;
> > +   } else {
> > +  if (size)
> > + *size = ARRAY_SIZE(gen8_hw_3src_type);
> > +  return gen8_hw_3src_type;
> > +   }
> > +}
> > +
> >  /**
> >   * Convert a brw_reg_type enumeration value into the hardware
> > representation
> >   * for a 3-src align16 instruction
> > @@ -257,9 +280,11 @@ unsigned
> >  brw_reg_type_to_a16_hw_3src_type(const struct gen_device_info
> > *devinfo,
> >   enum brw_reg_type type)
> >  {
> > -   assert(type < ARRAY_SIZE(gen7_hw_3src_type));
> > -   assert(gen7_hw_3src_type[type].reg_type != (enum
> > hw_3src_reg_type)INVALID);
> > -   return gen7_hw_3src_type[type].reg_type;
> > +   uint32_t map_size;
> > +   const struct hw_3src_type *hw_3src_type_map =
> > +  get_hw_3src_type_map(devinfo, &map_size);
> > +   assert(hw_3src_type_map[type].reg_type != (enum
> > hw_3src_reg_type)INVALID);
> > +   return hw_3src_type_map[type].reg_type;
> 
> I wonder if we should use a style equivalent to
> brw_reg_type_to_hw_type() and
> brw_hw_type_to_reg_type() and inline the table (or map) selection:

I don't have a strong opinion, but since we need this in at least two
different places I think it is best to have that code in a single
function that we can reuse rather than replicating it wherever we need
it. I'd be more in favor of changing the other functions to follow a
similar pattern for the same reason.

Iago

>   const struct hw_type *table;
> 
>   if (devinfo->gen >= 8) {
>  assert(type < ARRAY_SIZE(gen8_hw_3src_type));
>  table = gen7_hw_3src_type;
>   } else {
>  assert(type < ARRAY_SIZE(gen7_hw_3src_type));
>  table = gen7_hw_3src_type;
>   }
> 
>   assert(table[type].reg_type != (enum hw_reg_type)INVALID);
> 
>   return table[type].reg_type;
> 
> >  }
> >  
> >  /**
> > @@ -283,8 +308,10 @@ enum brw_reg_type
> >  brw_a16_hw_3src_type_to_reg_type(const struct gen_device_info
> > *devinfo,
> >   unsigned hw_type)
> >  {
> > +   const struct hw_3src_type *hw_3src_type_map =
> > +  get_hw_3src_type_map(devinfo, NULL);
> > for (enum brw_reg_type i = 0; i <= BRW_REGISTER_TYPE_LAST; i++)
> > {
> > -  if (gen7_hw_3src_type[i].reg_type == hw_type) {
> > +  if (hw_3src_type_map[i].reg_type == hw_type) {
> >   return i;
> >}
> > }
> > -- 
> > 2.17.1
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108967] DRM : eglCreatePbufferSurface failed with error EGL_BAD_MATCH

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108967

Tapani Pälli  changed:

   What|Removed |Added

 QA Contact|mesa-dev@lists.freedesktop. |etnaviv@lists.freedesktop.o
   |org |rg
  Component|EGL |Drivers/Gallium/etnaviv
   Assignee|mesa-dev@lists.freedesktop. |etnaviv@lists.freedesktop.o
   |org |rg

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl/linker: complete documentation for assign_attribute_or_color_locations

2019-01-02 Thread Tapani Pälli

Thanks Andres;
Reviewed-by: Tapani Pälli 

On 1/2/19 3:21 PM, Andres Gomez wrote:

Commit 27f1298b9d9 ("glsl/linker: validate attribute aliasing before 
optimizations")
forgot to complete the documentation.

Cc: Tapani Pälli 
Signed-off-by: Andres Gomez 
---
  src/compiler/glsl/linker.cpp | 22 +-
  1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 17fe0a58448..08e9fb721f8 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -2693,18 +2693,22 @@ find_available_slots(unsigned used_mask, unsigned 
needed_count)
  #define SAFE_MASK_FROM_INDEX(i) (((i) >= 32) ? ~0 : ((1 << (i)) - 1))
  
  /**

- * Assign locations for either VS inputs or FS outputs
+ * Assign locations for either VS inputs or FS outputs.
   *
- * \param mem_ctx   Temporary ralloc context used for linking
- * \param prog  Shader program whose variables need locations assigned
- * \param constants Driver specific constant values for the program.
- * \param target_index  Selector for the program target to receive location
- *  assignmnets.  Must be either \c MESA_SHADER_VERTEX or
- *  \c MESA_SHADER_FRAGMENT.
+ * \param mem_ctxTemporary ralloc context used for linking.
+ * \param prog   Shader program whose variables need locations
+ *   assigned.
+ * \param constants  Driver specific constant values for the program.
+ * \param target_index   Selector for the program target to receive location
+ *   assignmnets.  Must be either \c MESA_SHADER_VERTEX or
+ *   \c MESA_SHADER_FRAGMENT.
+ * \param do_assignment  Whether we are actually marking the assignment or we
+ *   are just doing a dry-run checking.
   *
   * \return
- * If locations are successfully assigned, true is returned.  Otherwise an
- * error is emitted to the shader link log and false is returned.
+ * If locations are (or can be, in case of dry-running) successfully assigned,
+ * true is returned.  Otherwise an error is emitted to the shader link log and
+ * false is returned.
   */
  static bool
  assign_attribute_or_color_locations(void *mem_ctx,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 23/25] radeonsi: factor si_query_buffer logic out of si_query_hw

2019-01-02 Thread Timothy Arceri
This commit seems to cause bad stuttering in the Batman Arkham City 
benchmark.


On 7/12/18 1:00 am, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

This is a move towards using composition instead of inheritance for
different query types.

This change weakens out-of-memory error reporting somewhat, though this
should be acceptable since we didn't consistently report such errors in
the first place.
---
  src/gallium/drivers/radeonsi/si_perfcounter.c |   8 +-
  src/gallium/drivers/radeonsi/si_query.c   | 177 +-
  src/gallium/drivers/radeonsi/si_query.h   |  17 +-
  src/gallium/drivers/radeonsi/si_texture.c |   7 +-
  4 files changed, 99 insertions(+), 110 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 0b3d8f89273..f0d10c054c4 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -761,23 +761,22 @@ static void si_pc_query_destroy(struct si_screen *sscreen,
struct si_query_group *group = query->groups;
query->groups = group->next;
FREE(group);
}
  
  	FREE(query->counters);
  
  	si_query_hw_destroy(sscreen, rquery);

  }
  
-static bool si_pc_query_prepare_buffer(struct si_screen *screen,

-  struct si_query_hw *hwquery,
-  struct r600_resource *buffer)
+static bool si_pc_query_prepare_buffer(struct si_context *ctx,
+  struct si_query_buffer *qbuf)
  {
/* no-op */
return true;
  }
  
  static void si_pc_query_emit_start(struct si_context *sctx,

   struct si_query_hw *hwquery,
   struct r600_resource *buffer, uint64_t va)
  {
struct si_query_pc *query = (struct si_query_pc *)hwquery;
@@ -1055,23 +1054,20 @@ struct pipe_query *si_create_batch_query(struct 
pipe_context *ctx,
counter->base = group->result_base + j;
counter->stride = group->num_counters;
  
  		counter->qwords = 1;

if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0)
counter->qwords = screen->info.max_se;
if (group->instance < 0)
counter->qwords *= block->num_instances;
}
  
-	if (!si_query_hw_init(screen, &query->b))

-   goto error;
-
return (struct pipe_query *)query;
  
  error:

si_pc_query_destroy(screen, &query->b.b);
return NULL;
  }
  
  static bool si_init_block_names(struct si_screen *screen,

struct si_pc_block *block)
  {
diff --git a/src/gallium/drivers/radeonsi/si_query.c 
b/src/gallium/drivers/radeonsi/si_query.c
index 479a1bbf2c4..5b0fba0ed92 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -514,86 +514,129 @@ static struct pipe_query *si_query_sw_create(unsigned 
query_type)
query = CALLOC_STRUCT(si_query_sw);
if (!query)
return NULL;
  
  	query->b.type = query_type;

query->b.ops = &sw_query_ops;
  
  	return (struct pipe_query *)query;

  }
  
-void si_query_hw_destroy(struct si_screen *sscreen,

-struct si_query *rquery)
+void si_query_buffer_destroy(struct si_screen *sscreen, struct si_query_buffer 
*buffer)
  {
-   struct si_query_hw *query = (struct si_query_hw *)rquery;
-   struct si_query_buffer *prev = query->buffer.previous;
+   struct si_query_buffer *prev = buffer->previous;
  
  	/* Release all query buffers. */

while (prev) {
struct si_query_buffer *qbuf = prev;
prev = prev->previous;
r600_resource_reference(&qbuf->buf, NULL);
FREE(qbuf);
}
  
-	r600_resource_reference(&query->buffer.buf, NULL);

-   r600_resource_reference(&query->workaround_buf, NULL);
-   FREE(rquery);
+   r600_resource_reference(&buffer->buf, NULL);
+}
+
+void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer 
*buffer)
+{
+   /* Discard all query buffers except for the oldest. */
+   while (buffer->previous) {
+   struct si_query_buffer *qbuf = buffer->previous;
+   buffer->previous = qbuf->previous;
+
+   r600_resource_reference(&buffer->buf, NULL);
+   buffer->buf = qbuf->buf; /* move ownership */
+   FREE(qbuf);
+   }
+   buffer->results_end = 0;
+
+   /* Discard even the oldest buffer if it can't be mapped without a 
stall. */
+   if (buffer->buf &&
+   (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, 
RADEON_USAGE_READWRITE) ||
+!sctx->ws->buffer_wait(buffer->buf->buf, 0, 
RADEON_USAGE_READWRITE))) {
+   r600_resource_reference(&buffer->buf, NULL);
+   }
  }
  
-static st

[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #11 from Bas Nieuwenhuizen  ---
I think the issue is that a matrix being passed to a function is not handled
correctly in vtn_ssa_value_add_to_call_params .

I'll continue debugging later.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/glsl: refactor st_link_nir()

2019-01-02 Thread Timothy Arceri
The functional change here is moving the nir_lower_io_to_scalar_early()
calls inside st_nir_link_shaders() and moving the st_nir_opts() call
after the call to nir_lower_io_arrays_to_elements().

This fixes a bug with the following piglit test due to the current code
not cleaning up dead code after we lower arrays. This was causing an
assert in the new duplicate varyings link time opt introduced in
70be9afccb23.

tests/spec/glsl-1.10/execution/vsfs-unused-array-member.shader_test

Moving the nir_lower_io_to_scalar_early() calls also allows us to tidy
up the code a little and merge some loops.
---
 src/mesa/state_tracker/st_glsl_to_nir.cpp | 52 +++
 1 file changed, 16 insertions(+), 36 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index af83a341e9..cbce4661e9 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -586,8 +586,16 @@ st_nir_get_mesa_program(struct gl_context *ctx,
 static void
 st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar)
 {
+   if (scalar) {
+  NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
+  NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
+   }
+
nir_lower_io_arrays_to_elements(*producer, *consumer);
 
+   st_nir_opts(*producer, scalar);
+   st_nir_opts(*consumer, scalar);
+
if (nir_link_opt_varyings(*producer, *consumer))
   st_nir_opts(*consumer, scalar);
 
@@ -663,51 +671,23 @@ st_link_nir(struct gl_context *ctx,
struct pipe_screen *screen = st->pipe->screen;
bool is_scalar[MESA_SHADER_STAGES];
 
-   /* Determine scalar property of each shader stage */
+   unsigned last_stage = 0;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
   struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
-  enum pipe_shader_type type;
-
   if (shader == NULL)
  continue;
 
-  type = pipe_shader_type_from_mesa(shader->Stage);
-  is_scalar[i] = screen->get_shader_param(screen, type, 
PIPE_SHADER_CAP_SCALAR_ISA);
-   }
-
-   /* Determine first and last stage. */
-   unsigned first = MESA_SHADER_STAGES;
-   unsigned last = 0;
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-  if (!shader_program->_LinkedShaders[i])
- continue;
-  if (first == MESA_SHADER_STAGES)
- first = i;
-  last = i;
-   }
-
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-  struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
-  if (shader == NULL)
- continue;
+  /* Determine scalar property of each shader stage */
+  enum pipe_shader_type type = pipe_shader_type_from_mesa(shader->Stage);
+  is_scalar[i] = screen->get_shader_param(screen, type,
+  PIPE_SHADER_CAP_SCALAR_ISA);
 
   st_nir_get_mesa_program(ctx, shader_program, shader);
-
-  nir_variable_mode mask = (nir_variable_mode) 0;
-  if (i != first)
- mask = (nir_variable_mode)(mask | nir_var_shader_in);
-
-  if (i != last)
- mask = (nir_variable_mode)(mask | nir_var_shader_out);
-
-  nir_shader *nir = shader->Program->nir;
+  last_stage = i;
 
   if (is_scalar[i]) {
- NIR_PASS_V(nir, nir_lower_io_to_scalar_early, mask);
- NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+ NIR_PASS_V(shader->Program->nir, nir_lower_load_const_to_scalar);
   }
-
-  st_nir_opts(nir, is_scalar[i]);
}
 
/* Linking the stages in the opposite order (from fragment to vertex)
@@ -715,7 +695,7 @@ st_link_nir(struct gl_context *ctx,
 * are eliminated if they are (transitively) not used in a later
 * stage.
 */
-   int next = last;
+   int next = last_stage;
for (int i = next - 1; i >= 0; i--) {
   struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
   if (shader == NULL)
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/16] nir: combine fmul and fadd across ffma operations

2019-01-02 Thread Rob Clark
On Wed, Jan 2, 2019 at 5:19 PM Ian Romanick  wrote:
>
> On 12/19/18 8:39 AM, Jonathan Marek wrote:
> > This works by moving the fadd up across the ffma operations, so that it
> > can eventually can be combined with a fmul. I'm not sure it works in all
> > cases, but it works in all the common cases.
> >
> > This will only affect freedreno since it is the only driver using the
> > fuse_ffma option.
>
> tl;dr: Optimal generation of FFMAs is much more difficult than you would
> think it should be.  You should collect some actual data before landing
> this.
>
> Any change to ffma generation is likely to have massive, unforeseen
> changes to lots of shaders.  Seemingly simple, obvious changes result in
> changes to live ranges, register pressure, scheduling, constant folding,
> and on, and on.
>
> I took this patch, substituted !options->lower_ffma for
> options->fuse_ffma in the pattern you added, and ran it through
> shader-db for Skylake and Haswell.  As I expected, the results were just
> all over the place (see below).  Notice that register spills are helped
> on one platform but hurt on the other.
>
> There are some simple rules in nir_opt_algebraic for generating and
> reassociating ffmas.  Given the complex interactions with live ranges,
> register pressure, and scheduling, I feel like ffma generation should
> happen much, much later in the process... it should almost certainly be
> deep in the backend where register pressure and scheduling information
> are available.

To be fair, I've tried a few different approaches, but I've yet to
come up with a good way to balance register pressure and instruction
scheduling in ir3 backend[1].. and for a2xx, given that it is limited
to more or less gles2 level shader features (which narrows the pool of
possible shaders to care about), a simple mostly-good-enough option to
improve things in nir, as long as it doesn't hurt other drivers, has
some appeal.

I guess before/after shader-db runs are in order.  But I guess for
a2xx, esp on a20x hw which seem to have pretty weak shader core this
could still be useful.

BR,
-R

[1] tbh, ir3 instruction scheduler needs to be semi-aggressive to fill
delay slots..  but I think it doesn't always make good decisions
without having some clue about register pressure

> The Intel compiler has its own pass for ffma generation, and I've found
> that makes really, really bad choices due to lack of this information.
> For example, consider a sequence like
>
> (shaderInputA * uniformB) + (texture(...) * shaderInputC)
>
> There are two ways to generate an ffma from that.  One will schedule
> well, and the other will be horrible.  You /probably/ want
>
> ffma(texture(...), shaderInputC, (shaderInputA * uniformB))
>
> so that the first multiply can happen during the latency of the texture
> lookup.  But maybe not.  Maybe shaderInputA and uniformB are still live
> after the multiply and storing the result of the multiply pushes
> register pressure too high.
>
> Right now our ffma pass is greedy.  If it sees a*b+c, it will always
> generate ffma(a, b, c), regardless of whether or not c is also a
> multiply.  In one of my experiments, I flipped the logic so a*b+c*d
> would always generate ffma(c, d, a*b).  The number of helped and hurt
> shaders was very close to even.  Some shaders were helped by a huge
> amount, and other shaders were hurt by an equally huge amount.  I also
> tried not generating an ffma at all for the a*b+c*d case.  My
> recollection is that a few shaders were helped by a large amount, and
> many thousands of shaders were hurt by small amounts.
>
> If I add it all up, I probably spent several weeks last year poking at
> changes like this in our ffma pass.  It began to feel like the old woman
> who swallowed a fly.  Every change helped some things, but it made other
> things fall off a cliff.  The next fix helped a few of the things
> damaged by the previous change, but it made other things fall of a
> different cliff.  I eventually abandoned the project.  If I ever pick it
> back up, it will be as a pass that occurs closer to scheduling and
> register allocation.
>
> Skylake
> total instructions in shared programs: 15031138 -> 15035206 (0.03%)
> instructions in affected programs: 1230624 -> 1234692 (0.33%)
> helped: 1428
> HURT: 1067
> helped stats (abs) min: 1 max: 671 x̄: 7.08 x̃: 3
> helped stats (rel) min: 0.04% max: 24.72% x̄: 2.30% x̃: 1.78%
> HURT stats (abs)   min: 1 max: 1601 x̄: 13.29 x̃: 4
> HURT stats (rel)   min: 0.05% max: 352.64% x̄: 4.42% x̃: 2.35%
> 95% mean confidence interval for instructions value: 0.03 3.23
> 95% mean confidence interval for instructions %-change: 0.24% 0.91%
> Instructions are HURT.
>
> total cycles in shared programs: 369712682 -> 370166527 (0.12%)
> cycles in affected programs: 128542483 -> 128996328 (0.35%)
> helped: 1679
> HURT: 2639
> helped stats (abs) min: 1 max: 27317 x̄: 162.81 x̃: 18
> helped stats (rel) min: <.01% max: 60.25% x̄: 2.34% x̃: 1.38%
> HURT stats

Re: [Mesa-dev] [PATCH 04/16] nir: add nir_lower_bool_to_float

2019-01-02 Thread Dylan Baker
Quoting Ian Romanick (2019-01-02 12:57:26)
> On 12/19/18 9:25 AM, Dylan Baker wrote:
> > Quoting Jonathan Marek (2018-12-19 08:39:53)
> >> Mainly a copy of nir_lower_bool_to_int32, but with float opcodes.
> >>
> >> Signed-off-by: Jonathan Marek 
> >> ---
> >>  src/compiler/Makefile.sources  |   1 +
> >>  src/compiler/nir/meson.build   |   3 +-
> >>  src/compiler/nir/nir.h |   1 +
> >>  src/compiler/nir/nir_lower_bool_to_float.c | 165 +
> >>  4 files changed, 169 insertions(+), 1 deletion(-)
> >>  create mode 100644 src/compiler/nir/nir_lower_bool_to_float.c
> >>
> >> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> >> index ef47bdb33b..39eaedc658 100644
> >> --- a/src/compiler/Makefile.sources
> >> +++ b/src/compiler/Makefile.sources
> >> @@ -231,6 +231,7 @@ NIR_FILES = \
> >> nir/nir_lower_atomics_to_ssbo.c \
> >> nir/nir_lower_bitmap.c \
> >> nir/nir_lower_bit_size.c \
> >> +   nir/nir_lower_bool_to_float.c \
> >> nir/nir_lower_bool_to_int32.c \
> >> nir/nir_lower_clamp_color_outputs.c \
> >> nir/nir_lower_clip.c \
> >> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> >> index e252f64539..f1016104af 100644
> >> --- a/src/compiler/nir/meson.build
> >> +++ b/src/compiler/nir/meson.build
> >> @@ -114,6 +114,7 @@ files_libnir = files(
> >>'nir_lower_alpha_test.c',
> >>'nir_lower_atomics_to_ssbo.c',
> >>'nir_lower_bitmap.c',
> >> +  'nir_lower_bool_to_float.c',
> >>'nir_lower_bool_to_int32.c',
> >>'nir_lower_clamp_color_outputs.c',
> >>'nir_lower_clip.c',
> >> @@ -248,7 +249,7 @@ if with_tests
> >>include_directories : [inc_common],
> >>dependencies : [dep_thread, idep_gtest, idep_nir],
> >>link_with : libmesa_util,
> >> -), 
> >> +),
> > 
> > This looks like stray whitespace?
> 
> It's deleting a stray (incorrect?) whitespace.  I'm usually not fond of
> slipping unrelated changes into a commit... but who's going to send a
> 1-line patch that deletes a single space character? :)

Ah, it wasn't clear whether it was removing whitespace or adding it :)


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 39/53] intel/compiler: add a helper to do conversions between integer and half-float

2019-01-02 Thread Francisco Jerez
Iago Toral Quiroga  writes:

> There are hardware restrictions to consider that seem to affect atom platforms
> only.

Same comment here as for PATCH 13 of this series.  This and PATCH 40
shouldn't be necessary anymore with [1] in place.  Please drop them.

[1] https://lists.freedesktop.org/archives/mesa-dev/2018-December/212775.html

> ---
>  src/intel/compiler/brw_fs_nir.cpp | 32 +++
>  1 file changed, 32 insertions(+)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index 802f5cb0944..a9fd98bab68 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -696,6 +696,38 @@ fixup_64bit_conversion(const fs_builder &bld,
> return false;
>  }
>  
> +static bool
> +fixup_int_half_float_conversion(const fs_builder &bld,
> +fs_reg dst, fs_reg src,
> +bool saturate,
> +const struct gen_device_info *devinfo)
> +{
> +   /* CHV PRM, 3D Media GPGPU Engine, Register Region Restrictions,
> +* Special Restrictions:
> +*
> +*"Conversion between Integer and HF (Half Float) must be DWord
> +* aligned and strided by a DWord on the destination."
> +*
> +* The same restriction is listed for other hardware platforms, however,
> +* empirical testing suggests that only atom platforms are affected.
> +*/
> +   if (!devinfo->is_cherryview && !gen_device_info_is_9lp(devinfo))
> +  return false;
> +
> +   if (!((dst.type == BRW_REGISTER_TYPE_HF && 
> !brw_reg_type_is_floating_point(src.type)) ||
> + (src.type == BRW_REGISTER_TYPE_HF && 
> !brw_reg_type_is_floating_point(dst.type
> +  return false;
> +
> +   fs_reg tmp = horiz_stride(retype(bld.vgrf(BRW_REGISTER_TYPE_F, 1),
> +dst.type),
> + 2);
> +   bld.MOV(tmp, src);
> +   fs_inst *inst = bld.MOV(dst, tmp);
> +   inst->saturate = saturate;
> +
> +   return true;
> +}
> +
>  void
>  fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
>  {
> -- 
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 13/53] intel/compiler: add a helper to handle conversions to 64-bit in atom

2019-01-02 Thread Francisco Jerez
This patch is redundant with the regioning lowering pass I sent a few
days ago [1].  The problem with this approach is that on the one hand
it's easy for the back-end compiler to cause code which was legalized at
NIR translation time to become illegal again accidentally, on the other
hand there's the maintainability issue of having workarounds for the
exact same restriction scattered all over the place.

Please drop it, there shouldn't be any need to do manual workarounds at
NIR translation time for the CHV/BXT regioning restrictions to be
honored anymore.

[1] https://lists.freedesktop.org/archives/mesa-dev/2018-December/212775.html

Iago Toral Quiroga  writes:

> ---
>  src/intel/compiler/brw_fs_nir.cpp | 55 ++-
>  1 file changed, 33 insertions(+), 22 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index 92ec85a27cc..15715651aa6 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -664,6 +664,38 @@ brw_rnd_mode_from_nir_op (const nir_op op) {
> }
>  }
>  
> +static bool
> +fixup_64bit_conversion(const fs_builder &bld,
> +   fs_reg dst, fs_reg src,
> +   bool saturate,
> +   const struct gen_device_info *devinfo)
> +{
> +   /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions:
> +*
> +*"When source or destination is 64b (...), regioning in Align1
> +* must follow these rules:
> +*
> +* 1. Source and destination horizontal stride must be aligned to
> +*the same qword.
> +* (...)"
> +*
> +* This means that conversions from bit-sizes smaller than 64-bit to
> +* 64-bit need to have the source data elements aligned to 64-bit.
> +* This restriction does not apply to BDW and later.
> +*/
> +   if (type_sz(dst.type) == 8 && type_sz(src.type) < 8 &&
> +   (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
> +  fs_reg tmp = bld.vgrf(dst.type, 1);
> +  tmp = subscript(tmp, src.type, 0);
> +  bld.MOV(tmp, src);
> +  fs_inst *inst = bld.MOV(dst, tmp);
> +  inst->saturate = saturate;
> +  return true;
> +   }
> +
> +   return false;
> +}
> +
>  void
>  fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
>  {
> @@ -805,29 +837,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
> nir_alu_instr *instr)
> case nir_op_i2i64:
> case nir_op_u2f64:
> case nir_op_u2u64:
> -  /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions:
> -   *
> -   *"When source or destination is 64b (...), regioning in Align1
> -   * must follow these rules:
> -   *
> -   * 1. Source and destination horizontal stride must be aligned to
> -   *the same qword.
> -   * (...)"
> -   *
> -   * This means that conversions from bit-sizes smaller than 64-bit to
> -   * 64-bit need to have the source data elements aligned to 64-bit.
> -   * This restriction does not apply to BDW and later.
> -   */
> -  if (nir_dest_bit_size(instr->dest.dest) == 64 &&
> -  nir_src_bit_size(instr->src[0].src) < 64 &&
> -  (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
> - fs_reg tmp = bld.vgrf(result.type, 1);
> - tmp = subscript(tmp, op[0].type, 0);
> - inst = bld.MOV(tmp, op[0]);
> - inst = bld.MOV(result, tmp);
> - inst->saturate = instr->dest.saturate;
> +  if (fixup_64bit_conversion(bld, result, op[0], instr->dest.saturate, 
> devinfo))
>   break;
> -  }
>/* fallthrough */
> case nir_op_f2f32:
> case nir_op_f2i32:
> -- 
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #10 from Bas Nieuwenhuizen  ---
Thanks, was able to reproduce with src/compiler/spirv2nir on the fragment
shader.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/16] nir: combine fmul and fadd across ffma operations

2019-01-02 Thread Ian Romanick
On 12/19/18 8:39 AM, Jonathan Marek wrote:
> This works by moving the fadd up across the ffma operations, so that it
> can eventually can be combined with a fmul. I'm not sure it works in all
> cases, but it works in all the common cases.
> 
> This will only affect freedreno since it is the only driver using the
> fuse_ffma option.

tl;dr: Optimal generation of FFMAs is much more difficult than you would
think it should be.  You should collect some actual data before landing
this.

Any change to ffma generation is likely to have massive, unforeseen
changes to lots of shaders.  Seemingly simple, obvious changes result in
changes to live ranges, register pressure, scheduling, constant folding,
and on, and on.

I took this patch, substituted !options->lower_ffma for
options->fuse_ffma in the pattern you added, and ran it through
shader-db for Skylake and Haswell.  As I expected, the results were just
all over the place (see below).  Notice that register spills are helped
on one platform but hurt on the other.

There are some simple rules in nir_opt_algebraic for generating and
reassociating ffmas.  Given the complex interactions with live ranges,
register pressure, and scheduling, I feel like ffma generation should
happen much, much later in the process... it should almost certainly be
deep in the backend where register pressure and scheduling information
are available.

The Intel compiler has its own pass for ffma generation, and I've found
that makes really, really bad choices due to lack of this information.
For example, consider a sequence like

(shaderInputA * uniformB) + (texture(...) * shaderInputC)

There are two ways to generate an ffma from that.  One will schedule
well, and the other will be horrible.  You /probably/ want

ffma(texture(...), shaderInputC, (shaderInputA * uniformB))

so that the first multiply can happen during the latency of the texture
lookup.  But maybe not.  Maybe shaderInputA and uniformB are still live
after the multiply and storing the result of the multiply pushes
register pressure too high.

Right now our ffma pass is greedy.  If it sees a*b+c, it will always
generate ffma(a, b, c), regardless of whether or not c is also a
multiply.  In one of my experiments, I flipped the logic so a*b+c*d
would always generate ffma(c, d, a*b).  The number of helped and hurt
shaders was very close to even.  Some shaders were helped by a huge
amount, and other shaders were hurt by an equally huge amount.  I also
tried not generating an ffma at all for the a*b+c*d case.  My
recollection is that a few shaders were helped by a large amount, and
many thousands of shaders were hurt by small amounts.

If I add it all up, I probably spent several weeks last year poking at
changes like this in our ffma pass.  It began to feel like the old woman
who swallowed a fly.  Every change helped some things, but it made other
things fall off a cliff.  The next fix helped a few of the things
damaged by the previous change, but it made other things fall of a
different cliff.  I eventually abandoned the project.  If I ever pick it
back up, it will be as a pass that occurs closer to scheduling and
register allocation.

Skylake
total instructions in shared programs: 15031138 -> 15035206 (0.03%)
instructions in affected programs: 1230624 -> 1234692 (0.33%)
helped: 1428
HURT: 1067
helped stats (abs) min: 1 max: 671 x̄: 7.08 x̃: 3
helped stats (rel) min: 0.04% max: 24.72% x̄: 2.30% x̃: 1.78%
HURT stats (abs)   min: 1 max: 1601 x̄: 13.29 x̃: 4
HURT stats (rel)   min: 0.05% max: 352.64% x̄: 4.42% x̃: 2.35%
95% mean confidence interval for instructions value: 0.03 3.23
95% mean confidence interval for instructions %-change: 0.24% 0.91%
Instructions are HURT.

total cycles in shared programs: 369712682 -> 370166527 (0.12%)
cycles in affected programs: 128542483 -> 128996328 (0.35%)
helped: 1679
HURT: 2639
helped stats (abs) min: 1 max: 27317 x̄: 162.81 x̃: 18
helped stats (rel) min: <.01% max: 60.25% x̄: 2.34% x̃: 1.38%
HURT stats (abs)   min: 1 max: 57100 x̄: 275.56 x̃: 58
HURT stats (rel)   min: <.01% max: 147.37% x̄: 8.62% x̃: 5.01%
95% mean confidence interval for cycles value: 61.86 148.35
95% mean confidence interval for cycles %-change: 4.06% 4.66%
Cycles are HURT.

total spills in shared programs: 10158 -> 9688 (-4.63%)
spills in affected programs: 1829 -> 1359 (-25.70%)
helped: 140
HURT: 3

total fills in shared programs: 22117 -> 21371 (-3.37%)
fills in affected programs: 2575 -> 1829 (-28.97%)
helped: 140
HURT: 3

LOST:   7
GAINED: 0


Haswell
total instructions in shared programs: 13625863 -> 13635875 (0.07%)
instructions in affected programs: 1554579 -> 1564591 (0.64%)
helped: 844
HURT: 1651
helped stats (abs) min: 1 max: 96 x̄: 4.16 x̃: 3
helped stats (rel) min: 0.04% max: 10.26% x̄: 1.91% x̃: 1.90%
HURT stats (abs)   min: 1 max: 1602 x̄: 8.19 x̃: 5
HURT stats (rel)   min: 0.10% max: 346.00% x̄: 2.97% x̃: 1.45%
95% mean confidence interval for instructions value: 2.70 5.33
95% mean confidenc

[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #9 from Brandon Wright  ---
I added the full generated GLSL for the vertex and fragment shaders and the SPV
produced. The glsl is in the form it's passed to the glslang compiler, and it
compiles with the command-line compiler. I believe it's the vertex shader
that's causing the crash, but since it occurs on pipeline creation I don't
really know.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #8 from Brandon Wright  ---
Created attachment 142945
  --> https://bugs.freedesktop.org/attachment.cgi?id=142945&action=edit
Vertex shader SPV

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #7 from Brandon Wright  ---
Created attachment 142944
  --> https://bugs.freedesktop.org/attachment.cgi?id=142944&action=edit
Vertex shader GLSL

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #5 from Brandon Wright  ---
Created attachment 142942
  --> https://bugs.freedesktop.org/attachment.cgi?id=142942&action=edit
Fragment shader GLSL

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #6 from Brandon Wright  ---
Created attachment 142943
  --> https://bugs.freedesktop.org/attachment.cgi?id=142943&action=edit
Fragment shader SPV

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109021] Kaveri no fix perfomance

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109021

--- Comment #6 from Dmitry  ---
GL games are not compared, only Vulkan. Because there is a suspicion that the
patch is not touched RADV.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/16] nir: add nir_lower_bool_to_float

2019-01-02 Thread Ian Romanick
On 12/19/18 9:25 AM, Dylan Baker wrote:
> Quoting Jonathan Marek (2018-12-19 08:39:53)
>> Mainly a copy of nir_lower_bool_to_int32, but with float opcodes.
>>
>> Signed-off-by: Jonathan Marek 
>> ---
>>  src/compiler/Makefile.sources  |   1 +
>>  src/compiler/nir/meson.build   |   3 +-
>>  src/compiler/nir/nir.h |   1 +
>>  src/compiler/nir/nir_lower_bool_to_float.c | 165 +
>>  4 files changed, 169 insertions(+), 1 deletion(-)
>>  create mode 100644 src/compiler/nir/nir_lower_bool_to_float.c
>>
>> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
>> index ef47bdb33b..39eaedc658 100644
>> --- a/src/compiler/Makefile.sources
>> +++ b/src/compiler/Makefile.sources
>> @@ -231,6 +231,7 @@ NIR_FILES = \
>> nir/nir_lower_atomics_to_ssbo.c \
>> nir/nir_lower_bitmap.c \
>> nir/nir_lower_bit_size.c \
>> +   nir/nir_lower_bool_to_float.c \
>> nir/nir_lower_bool_to_int32.c \
>> nir/nir_lower_clamp_color_outputs.c \
>> nir/nir_lower_clip.c \
>> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
>> index e252f64539..f1016104af 100644
>> --- a/src/compiler/nir/meson.build
>> +++ b/src/compiler/nir/meson.build
>> @@ -114,6 +114,7 @@ files_libnir = files(
>>'nir_lower_alpha_test.c',
>>'nir_lower_atomics_to_ssbo.c',
>>'nir_lower_bitmap.c',
>> +  'nir_lower_bool_to_float.c',
>>'nir_lower_bool_to_int32.c',
>>'nir_lower_clamp_color_outputs.c',
>>'nir_lower_clip.c',
>> @@ -248,7 +249,7 @@ if with_tests
>>include_directories : [inc_common],
>>dependencies : [dep_thread, idep_gtest, idep_nir],
>>link_with : libmesa_util,
>> -), 
>> +),
> 
> This looks like stray whitespace?

It's deleting a stray (incorrect?) whitespace.  I'm usually not fond of
slipping unrelated changes into a commit... but who's going to send a
1-line patch that deletes a single space character? :)

> 
> other than that, for the build system bits:
> Reviewed-by: Dylan Baker 
> 
>>  suite : ['compiler', 'nir'],
>>)
>>  
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index 54f9c64a3a..f6d0bdf7ec 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -2905,6 +2905,7 @@ void nir_lower_alpha_test(nir_shader *shader, enum 
>> compare_func func,
>>bool alpha_to_one);
>>  bool nir_lower_alu(nir_shader *shader);
>>  bool nir_lower_alu_to_scalar(nir_shader *shader);
>> +bool nir_lower_bool_to_float(nir_shader *shader);
>>  bool nir_lower_bool_to_int32(nir_shader *shader);
>>  bool nir_lower_load_const_to_scalar(nir_shader *shader);
>>  bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
>> diff --git a/src/compiler/nir/nir_lower_bool_to_float.c 
>> b/src/compiler/nir/nir_lower_bool_to_float.c
>> new file mode 100644
>> index 00..2756a1815f
>> --- /dev/null
>> +++ b/src/compiler/nir/nir_lower_bool_to_float.c
>> @@ -0,0 +1,165 @@
>> +/*
>> + * Copyright  2018 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include "nir.h"
>> +
>> +static bool
>> +assert_ssa_def_is_not_1bit(nir_ssa_def *def, UNUSED void *unused)
>> +{
>> +   assert(def->bit_size > 1);
>> +   return true;
>> +}
>> +
>> +static bool
>> +rewrite_1bit_ssa_def_to_32bit(nir_ssa_def *def, void *_progress)
>> +{
>> +   bool *progress = _progress;
>> +   if (def->bit_size == 1) {
>> +  def->bit_size = 32;
>> +  *progress = true;
>> +   }
>> +   return true;
>> +}
>> +
>> +static bool
>> +lower_alu_instr(nir_alu_instr *alu)
>> +{
>> +   const nir_op_info *op_info = &nir_op_infos[alu->op];
>> +
>> +   switch (alu->op) {
>> +   case nir_op_vec2:
>> +   case nir_op_vec3:
>> +   case nir_op_vec4:
>> +  /* These we expect to have boo

[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #4 from osch...@web.de ---
Created attachment 142941
  --> https://bugs.freedesktop.org/attachment.cgi?id=142941&action=edit
crt-royale-geometry-aa-last-pass.spirv

It seems to be indeed the shader mentioned in #2 which causes the crash.
I believe i managed to get a dump of the spirv via RADV_DEBUG=spirv.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] mesa: implement ARB/KHR_parallel_shader_compile

2019-01-02 Thread Marek Olšák
Ping. I'll push this by the end of the week.

Marek

On Wed, Nov 28, 2018 at 9:59 PM Marek Olšák  wrote:

> From: Marek Olšák 
>
> Tested by piglit.
> ---
>  docs/features.txt   |  2 +-
>  docs/relnotes/19.0.0.html   |  2 ++
>  src/mapi/glapi/gen/gl_API.xml   | 15 ++-
>  src/mesa/main/dd.h  |  7 +++
>  src/mesa/main/extensions_table.h|  2 ++
>  src/mesa/main/get_hash_params.py|  3 +++
>  src/mesa/main/hint.c| 12 
>  src/mesa/main/hint.h|  4 
>  src/mesa/main/mtypes.h  |  1 +
>  src/mesa/main/shaderapi.c   | 10 ++
>  src/mesa/main/tests/dispatch_sanity.cpp |  4 
>  11 files changed, 60 insertions(+), 2 deletions(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index 8999e42519c..7b827de6a92 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -295,21 +295,21 @@ GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+,
> radeonsi, virgl
>GL_OES_texture_storage_multisample_2d_array   DONE (all drivers
> that support GL_ARB_texture_multisample)
>
>  Khronos, ARB, and OES extensions that are not part of any OpenGL or
> OpenGL ES version:
>
>GL_ARB_bindless_texture   DONE (nvc0,
> radeonsi)
>GL_ARB_cl_event   not started
>GL_ARB_compute_variable_group_sizeDONE (nvc0,
> radeonsi)
>GL_ARB_ES3_2_compatibilityDONE (i965/gen8+,
> radeonsi, virgl)
>GL_ARB_fragment_shader_interlock  DONE (i965)
>GL_ARB_gpu_shader_int64   DONE (i965/gen8+,
> nvc0, radeonsi, softpipe, llvmpipe)
> -  GL_ARB_parallel_shader_compilenot started, but
> Chia-I Wu did some related work in 2014
> +  GL_ARB_parallel_shader_compileDONE (all drivers)
>GL_ARB_post_depth_coverageDONE (i965, nvc0)
>GL_ARB_robustness_isolation   not started
>GL_ARB_sample_locations   DONE (nvc0)
>GL_ARB_seamless_cubemap_per_texture   DONE (freedreno,
> i965, nvc0, radeonsi, r600, softpipe, swr, virgl)
>GL_ARB_shader_ballot  DONE (i965/gen8+,
> nvc0, radeonsi)
>GL_ARB_shader_clock   DONE (i965/gen7+,
> nv50, nvc0, r600, radeonsi, virgl)
>GL_ARB_shader_stencil_export  DONE (i965/gen9+,
> r600, radeonsi, softpipe, llvmpipe, swr, virgl)
>GL_ARB_shader_viewport_layer_arrayDONE (i965/gen6+,
> nvc0, radeonsi)
>GL_ARB_sparse_buffer  DONE
> (radeonsi/CIK+)
>GL_ARB_sparse_texture not started
> diff --git a/docs/relnotes/19.0.0.html b/docs/relnotes/19.0.0.html
> index bc1776e8f4e..540482bca5f 100644
> --- a/docs/relnotes/19.0.0.html
> +++ b/docs/relnotes/19.0.0.html
> @@ -33,24 +33,26 @@ Compatibility contexts may report a lower version
> depending on each driver.
>  SHA256 checksums
>  
>  TBD.
>  
>
>
>  New features
>
>  
>  GL_AMD_texture_texture4 on all GL 4.0 drivers.
> +GL_ARB_parallel_shader_compile on all drivers.
>  GL_EXT_shader_implicit_conversions on all drivers (ES extension).
>  GL_EXT_texture_compression_bptc on all GL 4.0 drivers (ES
> extension).
>  GL_EXT_texture_compression_rgtc on all GL 3.0 drivers (ES
> extension).
>  GL_EXT_texture_view on drivers supporting texture views (ES
> extension).
> +GL_KHR_parallel_shader_compile on all drivers.
>  GL_OES_texture_view on drivers supporting texture views (ES
> extension).
>  
>
>  Bug fixes
>
>  
>  TBD
>  
>
>  Changes
> diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
> index f4d0808f13b..4ce691b361b 100644
> --- a/src/mapi/glapi/gen/gl_API.xml
> +++ b/src/mapi/glapi/gen/gl_API.xml
> @@ -8402,21 +8402,34 @@
>  
>  
>  
>  
>  
>
>  
>
>  http://www.w3.org/2001/XInclude"/>
>
> -
> +
> +
> +
> +
> +
> +
> +
> +
> + alias="MaxShaderCompilerThreadsKHR">
> +
> +
> +
> +
> +
>
>  http://www.w3.org/2001/XInclude"/>
>
>  
>
>  
>  
>  
>  
>  
> diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
> index f14c3e04e91..92b6ecac33c 100644
> --- a/src/mesa/main/dd.h
> +++ b/src/mesa/main/dd.h
> @@ -1292,20 +1292,27 @@ struct dd_function_table {
> /**
>  * Called to initialize gl_program::driver_cache_blob (and size) with a
>  * ralloc allocated buffer.
>  *
>  * This buffer will be saved and restored as part of the gl_program
>  * serialization and deserialization.
>  */
> void (*ShaderCacheSerializeDriverBlob)(struct gl_context *ctx,
>struct gl_program *prog);
> /*@}*/
> +
> +   /**
> +* 

[Mesa-dev] [Bug 109202] nv50_ir.cpp:749:19: error: cannot use typeid with -fno-rtti

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109202

--- Comment #3 from Dylan Baker  ---
I don't think that the compiler gives you any way to know, we'd have to pass
some a #define ourselves.

autotools just adds it to LLVM targets (radeonsi, llvmpipe, etc). That feels
really dangerous to me though, as building with/without rtti changes the ABI
and my understanding is that there's no requirement for code with rtti to
correctly link with code not using rtti.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109177] Blender 2.8 triggers GPU lockup when entering Edit Mode

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109177

Alex Deucher  changed:

   What|Removed |Added

 QA Contact|mesa-dev@lists.freedesktop. |dri-devel@lists.freedesktop
   |org |.org
   Assignee|mesa-dev@lists.freedesktop. |dri-devel@lists.freedesktop
   |org |.org
  Component|Mesa core   |Drivers/Gallium/radeonsi

--- Comment #3 from Alex Deucher  ---
Please attach your xorg log and dmesg output.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109202] nv50_ir.cpp:749:19: error: cannot use typeid with -fno-rtti

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109202

--- Comment #2 from Ilia Mirkin  ---
(In reply to Dylan Baker from comment #1)
> You've either added -no-rtti or are using llvm built without rtti. I can
> make this a hard error.

Out of curiosity, how does the autotools build deal with this?

Also, I think that this only comes up in one assert in nouveau, so when asserts
are disabled (e.g. release builds), it wouldn't bring up a compile error.

It would also be fine to add a #ifdef rtti + #warning or something (does one
know in the source if rtti is enabled somehow?).

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

Caio Marcelo de Oliveira Filho  changed:

   What|Removed |Added

 CC||caio.olive...@intel.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109153] [KBL-G][VK] Vulkan CTS spirv_assembly cases failed Segmentation fault (core dumped)

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109153

--- Comment #5 from Bas Nieuwenhuizen  ---
Can be reproduced with 18.3.1 but has been fixed in master. Still confirming
what fixed it, but if my suspicion is right there are some extensive changes
and that fix may *not* be backported to 18.3 .

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109159] [KBL-G][vulkan] dEQP-VK.api.version_check.entry_points test failed.

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109159

--- Comment #1 from Bas Nieuwenhuizen  ---
So while I don't have a KBL-G GPU, this test would seem pretty hardware
independent. However I'm not able to reproduce this with mesa 18.3.1 and CTS
version 54e546a1a0a6fa412ea4c3e3081bbc9518928a6e .

However, looking at the logs this is a different CTS version? Even that version
does not reproduce the issue for me though?

Do you use a recent vulkan-loader?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #3 from Bas Nieuwenhuizen  ---
It would be appreciated if someone could get me the SPIR-V for a failing
shader, instead of me having to figure out how to get the retroarch build
system to spit one out myself.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #2 from Brandon Wright  ---
I was just about to post this as well.

Seems to be caused by the last pass:
https://github.com/libretro/slang-shaders/blob/master/crt/shaders/crt-royale/src/crt-royale-geometry-aa-last-pass.h

I can't pinpoint an exact cause. It might just be crashing because the number
of constants is overflowing something.

Swapping out the last stage for the no-geom version works fine.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108900] Non-recoverable GPU hangs with GfxBench v5 Aztec Ruins Vulkan test

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108900

Bas Nieuwenhuizen  changed:

   What|Removed |Added

 CC||cst...@google.com

--- Comment #5 from Bas Nieuwenhuizen  ---
*** Bug 109058 has been marked as a duplicate of this bug. ***

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109058] Machine freezes during early stages of gfxbench startup

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109058

Bas Nieuwenhuizen  changed:

   What|Removed |Added

 Resolution|--- |DUPLICATE
 Status|NEW |RESOLVED

--- Comment #2 from Bas Nieuwenhuizen  ---


*** This bug has been marked as a duplicate of bug 108900 ***

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109021] Kaveri no fix perfomance

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109021

Bas Nieuwenhuizen  changed:

   What|Removed |Added

 Status|NEW |NEEDINFO

--- Comment #5 from Bas Nieuwenhuizen  ---
Note that the patch is a theoretical 2x benefit in a very specific case.

Do you have a GL game where there is a significant difference? Otherwise I'd
assume this is just a specific bottleneck that is not hit a lot in practice and
hence would not typically result in significant performance changes.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109151] [KBL-G][vulkan] dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat failed verification.

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109151

--- Comment #8 from Bas Nieuwenhuizen  ---
This should fix it:

https://gitlab.freedesktop.org/mesa/mesa/merge_requests/60

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NIR constant problem for GPU which doesn't have native integer support

2019-01-02 Thread Ilia Mirkin
On Wed, Jan 2, 2019 at 11:17 AM Jason Ekstrand  wrote:
>
> On Wed, Jan 2, 2019 at 9:43 AM Ilia Mirkin  wrote:
>>
>> Have a look at the first 4 patches in the series from Jonathan Marek
>> to address some of these issues:
>>
>> https://patchwork.freedesktop.org/series/54295/
>>
>> Not sure exactly what state that work is in, but I've added Jonathan
>> to CC, perhaps he can provide an update.
>>
>> Cheers,
>>
>>   -ilia
>>
>> On Wed, Jan 2, 2019 at 6:28 AM Qiang Yu  wrote:
>> >
>> > Hi guys,
>> >
>> > I found the problem with this test fragment shader when lima development:
>> > uniform int color;
>> > void main() {
>> > if (color > 1)
>> > gl_FragColor = vec4(1.0, 0.0, 0.0, 1);
>> > else
>> > gl_FragColor = vec4(0.0, 1.0, 0.0, 1);
>> > }
>> >
>> > nir_print_shader output:
>> > impl main {
>> > block block_0:
>> > /* preds: */
>> > vec1 32 ssa_0 = load_const (0x0001 /* 0.00 */)
>> > vec4 32 ssa_1 = load_const (0x3f80 /* 1.00 */,
>> > 0x /* 0.00 */, 0x /* 0.00 */, 0x3f80 /*
>> > 1.00 */)
>> > vec4 32 ssa_2 = load_const (0x /* 0.00 */,
>> > 0x3f80 /* 1.00 */, 0x /* 0.00 */, 0x3f80 /*
>> > 1.00 */)
>> > vec1 32 ssa_3 = load_const (0x /* 0.00 */)
>> > vec1 32 ssa_4 = intrinsic load_uniform (ssa_3) (0, 1, 0) /*
>> > base=0 */ /* range=1 */ /* component=0 */   /* color */
>> > vec1 32 ssa_5 = slt ssa_0, ssa_4
>> > vec1 32 ssa_6 = fnot ssa_5
>> > vec4 32 ssa_7 = bcsel ssa_6., ssa_2, ssa_1
>> > intrinsic store_output (ssa_7, ssa_3) (0, 15, 0) /* base=0 */
>> > /* wrmask=xyzw */ /* component=0 */   /* gl_FragColor */
>> > /* succs: block_1 */
>> > block block_1:
>> > }
>> >
>> > ssa0 is not converted to float when glsl to nir. I see glsl_to_nir.cpp
>> > will create flt/ilt/ult
>> > based on source type for gpu support native integer, but for gpu not
>> > support native
>> > integer, just create slt for all source type. And in
>> > nir_lower_constant_initializers,
>> > there's also no type conversion for integer constant.
>
>
> This is a generally sticky issue.  In NIR, we have no concept of types on SSA 
> values which has proven perfectly reasonable and actually very powerful in a 
> world where integers are supported natively.  Unfortunately, it causes 
> significant problems for float-only architectures.  There are two general 
> possible solutions:
>
>  1. convert all integers to floats in glsl_to_nir and prog_to_nir and adjust 
> various lowering/optimization passes to handle 
> nir_compiler_options::supports_native_integers == false.
>
>  2. Just allow integers all the way through until you get very close to the 
> end and then lower integers to floats at the last possible moment.
>
> Both of these come with significant issues.  With the first approach, there 
> are potentially a lot of passes that will need to be adjusted and it's not 
> 100% clear what to do with UBO offsets and indirect addressing; fortunately, 
> you should be able to disable most of those optimizations to get going so it 
> shouldn't be too bad.  The second would be less invasive to NIR because it 
> doesn't require modifying as many passes.  However, doing such a lowering 
> would be very tricky to get right primarily because of constants.  With 
> everything else, you can just sort of assume that inputs are floats (you 
> lowered, right?) and lower to produce a float output.  With constants, 
> however, you don't know whether or not it's an integer that needs lowering.  
> We could, in theory, add an extra bit to load_const to solve this problem but 
> there are also significant problems with doing that so it's not clear it's a 
> good idea.
>
> I think the patches from Marek (as indicated by ilia) attempt the first 
> approach.  If we can do it practically, my suspicion is that the first will 
> work better than the second.  However, it will take some experimentation in 
> order to actually determine that.

Just a handful of thoughts, which I suspect Jason is already well
aware of, but I'll point them out anyways:

 - On nv30, uniform indirect addressing consumes a float value
directly. Other architectures have a dedicated "address register" used
for such things, which needs to be loaded with a float (but is
internally stored in an opaque manner). However direct addressing
takes an integer, and indirect addressing may be able to take a "bias"
integer offset.
 - All uniform values should be coming in as float, even if they say
"int" or "bool" on them -- this is what mesa core already does.
 - Bools (consumed by branches, etc) are awkward everywhere. I think
the recent rework to make them 1-bit generics enables the awkwardness
to be passed on to the backend where it belongs, which means everyone
can just handle it however they want.

The main point I'm trying to make is that there are only a handful of
prac

Re: [Mesa-dev] NIR constant problem for GPU which doesn't have native integer support

2019-01-02 Thread Jason Ekstrand
On Wed, Jan 2, 2019 at 9:43 AM Ilia Mirkin  wrote:

> Have a look at the first 4 patches in the series from Jonathan Marek
> to address some of these issues:
>
> https://patchwork.freedesktop.org/series/54295/
>
> Not sure exactly what state that work is in, but I've added Jonathan
> to CC, perhaps he can provide an update.
>
> Cheers,
>
>   -ilia
>
> On Wed, Jan 2, 2019 at 6:28 AM Qiang Yu  wrote:
> >
> > Hi guys,
> >
> > I found the problem with this test fragment shader when lima development:
> > uniform int color;
> > void main() {
> > if (color > 1)
> > gl_FragColor = vec4(1.0, 0.0, 0.0, 1);
> > else
> > gl_FragColor = vec4(0.0, 1.0, 0.0, 1);
> > }
> >
> > nir_print_shader output:
> > impl main {
> > block block_0:
> > /* preds: */
> > vec1 32 ssa_0 = load_const (0x0001 /* 0.00 */)
> > vec4 32 ssa_1 = load_const (0x3f80 /* 1.00 */,
> > 0x /* 0.00 */, 0x /* 0.00 */, 0x3f80 /*
> > 1.00 */)
> > vec4 32 ssa_2 = load_const (0x /* 0.00 */,
> > 0x3f80 /* 1.00 */, 0x /* 0.00 */, 0x3f80 /*
> > 1.00 */)
> > vec1 32 ssa_3 = load_const (0x /* 0.00 */)
> > vec1 32 ssa_4 = intrinsic load_uniform (ssa_3) (0, 1, 0) /*
> > base=0 */ /* range=1 */ /* component=0 */   /* color */
> > vec1 32 ssa_5 = slt ssa_0, ssa_4
> > vec1 32 ssa_6 = fnot ssa_5
> > vec4 32 ssa_7 = bcsel ssa_6., ssa_2, ssa_1
> > intrinsic store_output (ssa_7, ssa_3) (0, 15, 0) /* base=0 */
> > /* wrmask=xyzw */ /* component=0 */   /* gl_FragColor */
> > /* succs: block_1 */
> > block block_1:
> > }
> >
> > ssa0 is not converted to float when glsl to nir. I see glsl_to_nir.cpp
> > will create flt/ilt/ult
> > based on source type for gpu support native integer, but for gpu not
> > support native
> > integer, just create slt for all source type. And in
> > nir_lower_constant_initializers,
> > there's also no type conversion for integer constant.
>

This is a generally sticky issue.  In NIR, we have no concept of types on
SSA values which has proven perfectly reasonable and actually very powerful
in a world where integers are supported natively.  Unfortunately, it causes
significant problems for float-only architectures.  There are two general
possible solutions:

 1. convert all integers to floats in glsl_to_nir and prog_to_nir and
adjust various lowering/optimization passes to handle
nir_compiler_options::supports_native_integers == false.

 2. Just allow integers all the way through until you get very close to the
end and then lower integers to floats at the last possible moment.

Both of these come with significant issues.  With the first approach, there
are potentially a lot of passes that will need to be adjusted and it's not
100% clear what to do with UBO offsets and indirect addressing;
fortunately, you should be able to disable most of those optimizations to
get going so it shouldn't be too bad.  The second would be less invasive to
NIR because it doesn't require modifying as many passes.  However, doing
such a lowering would be very tricky to get right primarily because of
constants.  With everything else, you can just sort of assume that inputs
are floats (you lowered, right?) and lower to produce a float output.  With
constants, however, you don't know whether or not it's an integer that
needs lowering.  We could, in theory, add an extra bit to load_const to
solve this problem but there are also significant problems with doing that
so it's not clear it's a good idea.

I think the patches from Marek (as indicated by ilia) attempt the first
approach.  If we can do it practically, my suspicion is that the first will
work better than the second.  However, it will take some experimentation in
order to actually determine that.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NIR constant problem for GPU which doesn't have native integer support

2019-01-02 Thread Ilia Mirkin
Have a look at the first 4 patches in the series from Jonathan Marek
to address some of these issues:

https://patchwork.freedesktop.org/series/54295/

Not sure exactly what state that work is in, but I've added Jonathan
to CC, perhaps he can provide an update.

Cheers,

  -ilia

On Wed, Jan 2, 2019 at 6:28 AM Qiang Yu  wrote:
>
> Hi guys,
>
> I found the problem with this test fragment shader when lima development:
> uniform int color;
> void main() {
> if (color > 1)
> gl_FragColor = vec4(1.0, 0.0, 0.0, 1);
> else
> gl_FragColor = vec4(0.0, 1.0, 0.0, 1);
> }
>
> nir_print_shader output:
> impl main {
> block block_0:
> /* preds: */
> vec1 32 ssa_0 = load_const (0x0001 /* 0.00 */)
> vec4 32 ssa_1 = load_const (0x3f80 /* 1.00 */,
> 0x /* 0.00 */, 0x /* 0.00 */, 0x3f80 /*
> 1.00 */)
> vec4 32 ssa_2 = load_const (0x /* 0.00 */,
> 0x3f80 /* 1.00 */, 0x /* 0.00 */, 0x3f80 /*
> 1.00 */)
> vec1 32 ssa_3 = load_const (0x /* 0.00 */)
> vec1 32 ssa_4 = intrinsic load_uniform (ssa_3) (0, 1, 0) /*
> base=0 */ /* range=1 */ /* component=0 */   /* color */
> vec1 32 ssa_5 = slt ssa_0, ssa_4
> vec1 32 ssa_6 = fnot ssa_5
> vec4 32 ssa_7 = bcsel ssa_6., ssa_2, ssa_1
> intrinsic store_output (ssa_7, ssa_3) (0, 15, 0) /* base=0 */
> /* wrmask=xyzw */ /* component=0 */   /* gl_FragColor */
> /* succs: block_1 */
> block block_1:
> }
>
> ssa0 is not converted to float when glsl to nir. I see glsl_to_nir.cpp
> will create flt/ilt/ult
> based on source type for gpu support native integer, but for gpu not
> support native
> integer, just create slt for all source type. And in
> nir_lower_constant_initializers,
> there's also no type conversion for integer constant.
>
> Do you know how to fix this problem?
>
> Thanks,
> Qiang
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109151] [KBL-G][vulkan] dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat failed verification.

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109151

--- Comment #7 from Bas Nieuwenhuizen  ---
Hmm, I can actually reproduce (thought we would not be able to since we have a
pretty much 100% pass-rate on the 1.1.3 mustpass list on a bunch of HW).

However I also noticed that it is not on the mustpass list?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] MR: mesa/core: Enable NV_depth_clamp for GLES >= 2.0

2019-01-02 Thread Gert Wollny
https://gitlab.freedesktop.org/mesa/mesa/merge_requests/58

The extension NV_depth_clamp is written against OpenGL 1.2.1, and since
GLES 2.0 is based on GL 2.0 there is no reason not to enable this
extension also for GLES >= 2.0.

Best, 
Gert

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108967] DRM : eglCreatePbufferSurface failed with error EGL_BAD_MATCH

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108967

--- Comment #9 from Vishwanath Chandapur  ---
By looking mesa driver is causing the issue with webGL.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108967] DRM : eglCreatePbufferSurface failed with error EGL_BAD_MATCH

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108967

--- Comment #8 from Vishwanath Chandapur  ---
[ 1458.370916] [drm:etnaviv_ioctl_gem_submit] *ERROR* BO at index 96 already on
submit list
[ 1465.955216] [drm:etnaviv_ioctl_gem_submit] *ERROR* BO at index 122 already
on submit list
[ 1465.988983] [drm:etnaviv_ioctl_gem_submit] *ERROR* BO at index 122 already
on submit list
[ 1466.124379] [drm:etnaviv_ioctl_gem_submit] *ERROR* BO at index 122 already
on submit list

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] etnaviv: fix resource usage tracking across different pipe_context's

2019-01-02 Thread Marek Vasut
On 1/2/19 9:58 AM, Christian Gmeiner wrote:
> A pipe_resource can be shared by all the pipe_context's hanging off the
> same pipe_screen.
> 
> Changes from v1 -> v2:
>  - to remove the resource from the used_resources set when it is destroyed
> 
> Signed-off-by: Christian Gmeiner 

Is this somehow different from the version of your patch I posted about
a month ago ?

> ---
>  src/gallium/drivers/etnaviv/etnaviv_context.c | 21 
>  src/gallium/drivers/etnaviv/etnaviv_context.h |  3 --
>  .../drivers/etnaviv/etnaviv_resource.c| 48 ++-
>  .../drivers/etnaviv/etnaviv_resource.h|  7 ++-
>  src/gallium/drivers/etnaviv/etnaviv_screen.c  |  8 
>  src/gallium/drivers/etnaviv/etnaviv_screen.h  |  4 ++
>  6 files changed, 61 insertions(+), 30 deletions(-)
> 
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_context.c 
> b/src/gallium/drivers/etnaviv/etnaviv_context.c
> index 44b50925a4f..8d4956516f1 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_context.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_context.c
> @@ -36,6 +36,7 @@
>  #include "etnaviv_query.h"
>  #include "etnaviv_query_hw.h"
>  #include "etnaviv_rasterizer.h"
> +#include "etnaviv_resource.h"
>  #include "etnaviv_screen.h"
>  #include "etnaviv_shader.h"
>  #include "etnaviv_state.h"
> @@ -329,7 +330,8 @@ static void
>  etna_cmd_stream_reset_notify(struct etna_cmd_stream *stream, void *priv)
>  {
> struct etna_context *ctx = priv;
> -   struct etna_resource *rsc, *rsc_tmp;
> +   struct etna_screen *screen = ctx->screen;
> +   struct set_entry *entry;
>  
> etna_set_state(stream, VIVS_GL_API_MODE, VIVS_GL_API_MODE_OPENGL);
> etna_set_state(stream, VIVS_GL_VERTEX_ELEMENT_CONFIG, 0x0001);
> @@ -384,16 +386,13 @@ etna_cmd_stream_reset_notify(struct etna_cmd_stream 
> *stream, void *priv)
> ctx->dirty = ~0L;
> ctx->dirty_sampler_views = ~0L;
>  
> -   /* go through all the used resources and clear their status flag */
> -   LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, &ctx->used_resources, list)
> -   {
> -  debug_assert(rsc->status != 0);
> -  rsc->status = 0;
> -  rsc->pending_ctx = NULL;
> -  list_delinit(&rsc->list);
> -   }
> +   /* go through all the used context resources and clear their status flag 
> */
> +   set_foreach(screen->used_resources, entry) {
> +  struct etna_resource *rsc = (struct etna_resource *)entry->key;
>  
> -   assert(LIST_IS_EMPTY(&ctx->used_resources));
> +  _mesa_set_remove_key(rsc->pending_ctx, ctx);
> +  _mesa_set_remove(screen->used_resources, entry);
> +   }
>  }
>  
>  static void
> @@ -437,8 +436,6 @@ etna_context_create(struct pipe_screen *pscreen, void 
> *priv, unsigned flags)
> /* need some sane default in case state tracker doesn't set some state: */
> ctx->sample_mask = 0x;
>  
> -   list_inithead(&ctx->used_resources);
> -
> /*  Set sensible defaults for state */
> etna_cmd_stream_reset_notify(ctx->stream, ctx);
>  
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_context.h 
> b/src/gallium/drivers/etnaviv/etnaviv_context.h
> index 6ad9f3431e1..50a2cdf3d07 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_context.h
> +++ b/src/gallium/drivers/etnaviv/etnaviv_context.h
> @@ -136,9 +136,6 @@ struct etna_context {
> uint32_t prim_hwsupport;
> struct primconvert_context *primconvert;
>  
> -   /* list of resources used by currently-unsubmitted renders */
> -   struct list_head used_resources;
> -
> struct slab_child_pool transfer_pool;
> struct blitter_context *blitter;
>  
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.c 
> b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> index c0091288030..00b5f43bf3f 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> @@ -33,6 +33,7 @@
>  #include "etnaviv_screen.h"
>  #include "etnaviv_translate.h"
>  
> +#include "util/hash_table.h"
>  #include "util/u_inlines.h"
>  #include "util/u_memory.h"
>  
> @@ -282,7 +283,6 @@ etna_resource_alloc(struct pipe_screen *pscreen, unsigned 
> layout,
> rsc->halign = halign;
>  
> pipe_reference_init(&rsc->base.reference, 1);
> -   list_inithead(&rsc->list);
>  
> size = setup_miptree(rsc, paddingX, paddingY, msaa_xscale, msaa_yscale);
>  
> @@ -303,6 +303,11 @@ etna_resource_alloc(struct pipe_screen *pscreen, 
> unsigned layout,
>memset(map, 0, size);
> }
>  
> +   rsc->pending_ctx = _mesa_set_create(NULL, _mesa_hash_pointer,
> +   _mesa_key_pointer_equal);
> +   if (!rsc->pending_ctx)
> +  goto free_rsc;
> +
> return &rsc->base;
>  
>  free_rsc:
> @@ -462,8 +467,12 @@ etna_resource_changed(struct pipe_screen *pscreen, 
> struct pipe_resource *prsc)
>  static void
>  etna_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource 
> *prsc)
>  {
> +   struct etna_screen *screen = etna_screen(pscreen);
> struct etna_resource *rsc = etna_resource(prsc);
>  
> +   _mesa_set_d

[Mesa-dev] [Bug 109177] Blender 2.8 triggers GPU lockup when entering Edit Mode

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109177

--- Comment #2 from MirceaKitsune  ---
In the meantime I've preformed a test which was recommended to me by another
user. The crash seems to be unaffected by booting with either "amdgpu.dc=0" or
"amdgpu.dc=1" and will occur identically in both cases.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] egl/drivers/haiku: Fix reference to disp vs dpy

2019-01-02 Thread Eric Engestrom
On Thursday, 2018-12-27 20:41:47 +, Alexander von Gluck IV wrote:
> ---
>  src/egl/drivers/haiku/egl_haiku.cpp | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/egl/drivers/haiku/egl_haiku.cpp 
> b/src/egl/drivers/haiku/egl_haiku.cpp
> index a9c5cf8d29..d4b046c79b 100644
> --- a/src/egl/drivers/haiku/egl_haiku.cpp
> +++ b/src/egl/drivers/haiku/egl_haiku.cpp
> @@ -29,6 +29,7 @@
>  
>  #include "eglconfig.h"
>  #include "eglcontext.h"
> +#include "egldevice.h"
>  #include "egldisplay.h"
>  #include "egldriver.h"
>  #include "eglcurrent.h"
> @@ -215,7 +216,7 @@ init_haiku(_EGLDriver *drv, _EGLDisplay *dpy)
>   _eglError(EGL_NOT_INITIALIZED, "DRI2: failed to find 
> EGLDevice");
>   return EGL_FALSE;
>   }
> - disp->Device = dev;
> + dpy->Device = dev;
>  
>   TRACE("Add configs\n");
>   if (!haiku_add_configs_for_visuals(dpy))
> -- 
> 2.14.5

Thanks!

Pushed with these tags added:
Reviewed-by: Eric Engestrom 
Fixes: 00992700c9a812a54563 "egl: set the EGLDevice when creating a display"
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH mesa] travis: avoid using unset llvm-config

2019-01-02 Thread Eric Engestrom
Fixes the following errors:
  usage: which [-as] program ...
  /Users/travis/.travis/job_stages: line 110: --version: command not found

... caused by the use of an undefined $LLVM_CONFIG

Signed-off-by: Eric Engestrom 
---
 .travis.yml | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index b70df99d67efde5e35c8..7279f3ad9cf1dab9e3bf 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -823,12 +823,16 @@ script:
 
   - |
 if test "x$BUILD" = xmeson; then
-  # We need to control the version of llvm-config we're using, so we'll
-  # generate a native file to do so. This requires meson >=0.49
-  #
-  echo -e "[binaries]\nllvm-config = '`which $LLVM_CONFIG`'" > native.file
+  if test -n "$LLVM_CONFIG"; then
+# We need to control the version of llvm-config we're using, so we'll
+# generate a native file to do so. This requires meson >=0.49
+#
+echo -e "[binaries]\nllvm-config = '`which $LLVM_CONFIG`'" > 
native.file
 
-  $LLVM_CONFIG --version
+$LLVM_CONFIG --version
+  else
+: > native.file
+  fi
 
   export CFLAGS="$CFLAGS -isystem`pwd`"
   meson _build \
-- 
Cheers,
  Eric

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl/linker: complete documentation for assign_attribute_or_color_locations

2019-01-02 Thread Andres Gomez
Commit 27f1298b9d9 ("glsl/linker: validate attribute aliasing before 
optimizations")
forgot to complete the documentation.

Cc: Tapani Pälli 
Signed-off-by: Andres Gomez 
---
 src/compiler/glsl/linker.cpp | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 17fe0a58448..08e9fb721f8 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -2693,18 +2693,22 @@ find_available_slots(unsigned used_mask, unsigned 
needed_count)
 #define SAFE_MASK_FROM_INDEX(i) (((i) >= 32) ? ~0 : ((1 << (i)) - 1))
 
 /**
- * Assign locations for either VS inputs or FS outputs
+ * Assign locations for either VS inputs or FS outputs.
  *
- * \param mem_ctx   Temporary ralloc context used for linking
- * \param prog  Shader program whose variables need locations assigned
- * \param constants Driver specific constant values for the program.
- * \param target_index  Selector for the program target to receive location
- *  assignmnets.  Must be either \c MESA_SHADER_VERTEX or
- *  \c MESA_SHADER_FRAGMENT.
+ * \param mem_ctxTemporary ralloc context used for linking.
+ * \param prog   Shader program whose variables need locations
+ *   assigned.
+ * \param constants  Driver specific constant values for the program.
+ * \param target_index   Selector for the program target to receive location
+ *   assignmnets.  Must be either \c MESA_SHADER_VERTEX or
+ *   \c MESA_SHADER_FRAGMENT.
+ * \param do_assignment  Whether we are actually marking the assignment or we
+ *   are just doing a dry-run checking.
  *
  * \return
- * If locations are successfully assigned, true is returned.  Otherwise an
- * error is emitted to the shader link log and false is returned.
+ * If locations are (or can be, in case of dry-running) successfully assigned,
+ * true is returned.  Otherwise an error is emitted to the shader link log and
+ * false is returned.
  */
 static bool
 assign_attribute_or_color_locations(void *mem_ctx,
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109190] virgl: buffer flushing error with some dEQP tests [bisected]

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109190

--- Comment #1 from Gert Wollny  ---
The bug is addressed with 
https://gitlab.freedesktop.org/mesa/mesa/merge_requests/56

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18/18] travis: meson: port gallium build combinations over

2019-01-02 Thread Eric Engestrom
On Monday, 2018-12-17 11:37:05 -0800, Dylan Baker wrote:
> Quoting Emil Velikov (2018-12-17 11:03:32)
> > On Fri, 14 Dec 2018 at 17:53, Dylan Baker  wrote:
> > >
> > > Quoting Emil Velikov (2018-12-13 08:06:07)
> > > > From: Emil Velikov 
> > > >
> > > > This commit adds a number of build combos:
> > > >
> > > >  - Gallium Drivers {SWR, RadeonSI, Others)
> > > > Each one has different LLVM requirements. Building SWR alone is twice
> > > > as slow as all other drivers combined.
> > > >
> > > >  - Gallium ST Clover LLVM {5,6,7}
> > > > Because C++ API changes all the time. Analogous to above building
> > > > Clover takes as much time as building all other ST combined.
> > > >
> > > >  - Gallium ST Others
> > > > Nouveau is used, instead of i915g since meson has explicit target
> > > > tracking. Meaning that a configure error is throws if we use i915g
> > > > with say va, vdpau or others.
> > > >
> > > > Note: LLVM prior to 5.0 is intentionally dropped. If needed we can add
> > > > that later.
> > > >
> > > > Signed-off-by: Emil Velikov 
> > > > ---
> > > >  .travis.yml | 187 
> > > >  1 file changed, 187 insertions(+)
> > > >
> > > > diff --git a/.travis.yml b/.travis.yml
> > > > index 125d6ce3c68..b70df99d67e 100644
> > > > --- a/.travis.yml
> > > > +++ b/.travis.yml
> > > > @@ -98,6 +98,193 @@ matrix:
> > > >  - libxfixes-dev
> > > >  - python3-pip
> > > >  - python3-setuptools
> > > > +- env:
> > > > +# NOTE: Building SWR is 2x (yes two) times slower than all the 
> > > > other
> > > > +# gallium drivers combined.
> > > > +# Start this early so that it doesn't hunder the run time.
> > > > +- LABEL="meson Gallium Drivers SWR"
> > > > +- BUILD=meson
> > > > +- UNWIND="true"
> > > > +- DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
> > > > +- GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false 
> > > > -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false 
> > > > -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
> > > > +- GALLIUM_DRIVERS="swr"
> > > > +- LLVM_VERSION=6.0
> > > > +- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
> > > > +  addons:
> > > > +apt:
> > > > +  packages:
> > > > +- llvm-6.0-dev
> > > > +# Common
> > > > +- xz-utils
> > > > +- libexpat1-dev
> > > > +- libx11-xcb-dev
> > > > +- libelf-dev
> > > > +- libunwind8-dev
> > > > +- python3.5
> > > > +- python3-pip
> > > > +- python3-setuptools
> > > > +- env:
> > > > +- LABEL="meson Gallium Drivers RadeonSI"
> > > > +- BUILD=meson
> > > > +- UNWIND="true"
> > > > +- DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
> > > > +- GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false 
> > > > -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false 
> > > > -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
> > > > +- GALLIUM_DRIVERS="radeonsi"
> > > > +- LLVM_VERSION=7
> > > > +- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
> > > > +  addons:
> > > > +apt:
> > > > +  sources:
> > > > +- sourceline: 'deb http://apt.llvm.org/xenial/ 
> > > > llvm-toolchain-xenial-7 main'
> > > > +  key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
> > > > +  packages:
> > > > +# From sources above
> > > > +- llvm-7-dev
> > > > +# Common
> > > > +- xz-utils
> > > > +- libexpat1-dev
> > > > +- libx11-xcb-dev
> > > > +- libelf-dev
> > > > +- libunwind8-dev
> > > > +- python3.5
> > > > +- python3-pip
> > > > +- python3-setuptools
> > > > +- env:
> > > > +- LABEL="meson Gallium Drivers Other"
> > > > +- BUILD=meson
> > > > +- UNWIND="true"
> > > > +- DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
> > > > +- GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false 
> > > > -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false 
> > > > -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
> > > > +- 
> > > > GALLIUM_DRIVERS="i915,nouveau,pl111,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv,imx"
> > > > +- LLVM_VERSION=5.0
> > > > +- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
> > > > +  addons:
> > > > +apt:
> > > > +  packages:
> > > > +# LLVM packaging is broken and misses these dependencies
> > > > +- libedit-dev
> > > > +- llvm-5.0-dev
> > > > +# Common
> > > > +- xz-utils
> > > > +- libexpat1-dev
> > > > +- libx11-xcb-dev
> > > > +- libelf-dev
> > > > +

[Mesa-dev] NIR constant problem for GPU which doesn't have native integer support

2019-01-02 Thread Qiang Yu
Hi guys,

I found the problem with this test fragment shader when lima development:
uniform int color;
void main() {
if (color > 1)
gl_FragColor = vec4(1.0, 0.0, 0.0, 1);
else
gl_FragColor = vec4(0.0, 1.0, 0.0, 1);
}

nir_print_shader output:
impl main {
block block_0:
/* preds: */
vec1 32 ssa_0 = load_const (0x0001 /* 0.00 */)
vec4 32 ssa_1 = load_const (0x3f80 /* 1.00 */,
0x /* 0.00 */, 0x /* 0.00 */, 0x3f80 /*
1.00 */)
vec4 32 ssa_2 = load_const (0x /* 0.00 */,
0x3f80 /* 1.00 */, 0x /* 0.00 */, 0x3f80 /*
1.00 */)
vec1 32 ssa_3 = load_const (0x /* 0.00 */)
vec1 32 ssa_4 = intrinsic load_uniform (ssa_3) (0, 1, 0) /*
base=0 */ /* range=1 */ /* component=0 */   /* color */
vec1 32 ssa_5 = slt ssa_0, ssa_4
vec1 32 ssa_6 = fnot ssa_5
vec4 32 ssa_7 = bcsel ssa_6., ssa_2, ssa_1
intrinsic store_output (ssa_7, ssa_3) (0, 15, 0) /* base=0 */
/* wrmask=xyzw */ /* component=0 */   /* gl_FragColor */
/* succs: block_1 */
block block_1:
}

ssa0 is not converted to float when glsl to nir. I see glsl_to_nir.cpp
will create flt/ilt/ult
based on source type for gpu support native integer, but for gpu not
support native
integer, just create slt for all source type. And in
nir_lower_constant_initializers,
there's also no type conversion for integer constant.

Do you know how to fix this problem?

Thanks,
Qiang
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109201] Deep Rock Galactic: GPU Hang (Steam Play) (DXVK)

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109201

--- Comment #5 from Alexander  ---
Note: does not happen with AMD Vulkan Driver's

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 51/53] intel/compiler: support half-float in the combine constants pass

2019-01-02 Thread Pohjolainen, Topi
On Wed, Dec 19, 2018 at 12:51:19PM +0100, Iago Toral Quiroga wrote:
> ---
>  .../compiler/brw_fs_combine_constants.cpp | 60 +++
>  1 file changed, 49 insertions(+), 11 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp 
> b/src/intel/compiler/brw_fs_combine_constants.cpp
> index e0c95d379b8..24307e365ab 100644
> --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> @@ -36,6 +36,7 @@
>  
>  #include "brw_fs.h"
>  #include "brw_cfg.h"
> +#include "util/half_float.h"
>  
>  using namespace brw;
>  
> @@ -114,8 +115,9 @@ struct imm {
>  */
> exec_list *uses;
>  
> -   /** The immediate value.  We currently only handle floats. */
> +   /** The immediate value.  We currently only handle float and half-float. 
> */
> float val;
> +   brw_reg_type type;
>  
> /**
>  * The GRF register and subregister number where we've decided to store 
> the
> @@ -145,10 +147,10 @@ struct table {
>  };
>  
>  static struct imm *
> -find_imm(struct table *table, float val)
> +find_imm(struct table *table, float val, brw_reg_type type)
>  {
> for (int i = 0; i < table->len; i++) {
> -  if (table->imm[i].val == val) {
> +  if (table->imm[i].val == val && table->imm[i].type == type) {
>   return &table->imm[i];
>}
> }
> @@ -190,6 +192,20 @@ compare(const void *_a, const void *_b)
> return a->first_use_ip - b->first_use_ip;
>  }
>  
> +static bool
> +needs_negate(float reg_val, float imm_val, brw_reg_type type)
> +{
> +   /* reg_val represents the immediate value in the register in its original
> +* bit-size, while imm_val is always a valid 32-bit float value.
> +*/
> +   if (type == BRW_REGISTER_TYPE_HF) {
> +  uint32_t reg_val_ud = *((uint32_t *) ®_val);

Casting "float" to uint32_t and reading then only 16-bits from it looks a
little ugly. Could we use "uint32_t reg_val" and then below in the caller
use "reg->u" instead of "reg->f"?

> +  reg_val = _mesa_half_to_float(reg_val_ud & 0x);
> +   }
> +
> +   return signbit(imm_val) != signbit(reg_val);
> +}
> +
>  bool
>  fs_visitor::opt_combine_constants()
>  {
> @@ -215,12 +231,20 @@ fs_visitor::opt_combine_constants()
>  
>for (int i = 0; i < inst->sources; i++) {
>   if (inst->src[i].file != IMM ||
> - inst->src[i].type != BRW_REGISTER_TYPE_F)
> + (inst->src[i].type != BRW_REGISTER_TYPE_F &&
> +  inst->src[i].type != BRW_REGISTER_TYPE_HF))
>  continue;
>  
> - float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
> - fabs(inst->src[i].f);
> - struct imm *imm = find_imm(&table, val);
> + float val;
> + if (inst->src[i].type == BRW_REGISTER_TYPE_F) {
> +val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
> +fabs(inst->src[i].f);
> + } else {
> +val = !inst->can_do_source_mods(devinfo) ?
> +   _mesa_half_to_float(inst->src[i].d & 0x) :
> +   fabs(_mesa_half_to_float(inst->src[i].d & 0x));
> + }
> + struct imm *imm = find_imm(&table, val, inst->src[i].type);
>  
>   if (imm) {
>  bblock_t *intersection = cfg_t::intersect(block, imm->block);
> @@ -238,6 +262,7 @@ fs_visitor::opt_combine_constants()
>  imm->uses = new(const_ctx) exec_list();
>  imm->uses->push_tail(link(const_ctx, &inst->src[i]));
>  imm->val = val;
> +imm->type = inst->src[i].type;
>  imm->uses_by_coissue = could_coissue(devinfo, inst);
>  imm->must_promote = must_promote_imm(devinfo, inst);
>  imm->first_use_ip = ip;
> @@ -278,12 +303,23 @@ fs_visitor::opt_combine_constants()
>imm->block->last_non_control_flow_inst()->next);
>const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0);
>  
> -  ibld.MOV(reg, brw_imm_f(imm->val));
> +  reg = retype(reg, imm->type);
> +  if (imm->type == BRW_REGISTER_TYPE_F) {
> + ibld.MOV(reg, brw_imm_f(imm->val));
> +  } else {
> + const uint16_t val_hf = _mesa_float_to_half(imm->val);
> + ibld.MOV(reg, retype(brw_imm_uw(val_hf), BRW_REGISTER_TYPE_HF));
> +  }
>imm->nr = reg.nr;
>imm->subreg_offset = reg.offset;
>  
> +  /* Keep offsets 32-bit aligned since we are mixing 32-bit and 16-bit
> +   * constants into the same register
> +   *
> +   * TODO: try to pack pairs of HF constants into each 32-bit slot
> +   */
>reg.offset += sizeof(float);
> -  if (reg.offset == 8 * sizeof(float)) {
> +  if (reg.offset == REG_SIZE) {
>   reg.nr = alloc.allocate(1);
>   reg.offset = 0;
>}
> @@ -296,11 +332,13 @@ fs_visitor::opt_combine_constants()
>   fs_reg *reg = link->reg;
>   reg->file = VGRF;
>   r

[Mesa-dev] [Bug 109201] Deep Rock Galactic: GPU Hang (Steam Play) (DXVK)

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109201

Alexander  changed:

   What|Removed |Added

Summary|Deep Rock Galactic: GPU |Deep Rock Galactic: GPU
   |Hang|Hang (Steam Play) (DXVK)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109201] Deep Rock Galactic: GPU Hang

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109201

Alexander  changed:

   What|Removed |Added

Version|git |18.3

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 49/53] intel/compiler: fix cmod propagation for non 32-bit types

2019-01-02 Thread Pohjolainen, Topi
On Wed, Dec 19, 2018 at 12:51:17PM +0100, Iago Toral Quiroga wrote:
> ---
>  src/intel/compiler/brw_fs_cmod_propagation.cpp | 8 +++-
>  1 file changed, 3 insertions(+), 5 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp 
> b/src/intel/compiler/brw_fs_cmod_propagation.cpp
> index 7bb5c9afbc9..dfef9d720a2 100644
> --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
> +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
> @@ -244,8 +244,7 @@ opt_cmod_propagation_local(const gen_device_info *devinfo,
>  /* CMP's result is the same regardless of dest type. */
>  if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
>  scan_inst->opcode == BRW_OPCODE_CMP &&
> -(inst->dst.type == BRW_REGISTER_TYPE_D ||
> - inst->dst.type == BRW_REGISTER_TYPE_UD)) {
> +brw_reg_type_is_integer(inst->dst.type)) {
> inst->remove(block);
> progress = true;
> break;
> @@ -258,9 +257,8 @@ opt_cmod_propagation_local(const gen_device_info *devinfo,
> break;
>  
>  /* Comparisons operate differently for ints and floats */
> -if (scan_inst->dst.type != inst->dst.type &&

This wouldn't let, for example, (DF, F) pair thru while the new version does.
Should we keep this line?

> -(scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
> - inst->dst.type == BRW_REGISTER_TYPE_F))
> +if (brw_reg_type_is_floating_point(scan_inst->dst.type) !=
> +brw_reg_type_is_floating_point(inst->dst.type))
> break;
>  
>  /* If the instruction generating inst's source also wrote the
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 41/53] intel/compiler: assert that lower conversions produces valid strides

2019-01-02 Thread Pohjolainen, Topi

Subject reads a little odd, maybe just: "assert strides in conversion
lowering". The assert itself seems useful:

Reviewed-by: Topi Pohjolainen 

On Wed, Dec 19, 2018 at 12:51:09PM +0100, Iago Toral Quiroga wrote:
> The hardware only has two bits to specify the horizontal stride, so the
> maximum horizontal stride we can use is 4. The pass calculates strides
> based on the sizes of the types involved, and for conversions between
> 64-bit and 8-bit types that can lead to strides of 8.
> 
> The compiler should make sure that such conversions are handled in two
> steps to avoid that situation. If we fail to do this properly, the
> generated assembly will be invalid and validation will fail, but
> asserting here makes debugging easier.
> ---
>  src/intel/compiler/brw_fs_lower_conversions.cpp | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp 
> b/src/intel/compiler/brw_fs_lower_conversions.cpp
> index 145fb55f995..00781e824e8 100644
> --- a/src/intel/compiler/brw_fs_lower_conversions.cpp
> +++ b/src/intel/compiler/brw_fs_lower_conversions.cpp
> @@ -90,6 +90,13 @@ fs_visitor::lower_conversions()
>  fs_reg temp = ibld.vgrf(get_exec_type(inst));
>  fs_reg strided_temp = subscript(temp, dst.type, 0);
>  
> +/* Make sure we don't exceed hardware limits here. If we have 
> code
> + * that hits this assertion it means that we need to split the
> + * instruction in two, using intermediary types (see for
> + * example nir_op_i2i8).
> + */
> +assert(strided_temp.stride <= 4);
> +
>  assert(inst->size_written == 
> inst->dst.component_size(inst->exec_size));
>  inst->dst = strided_temp;
>  inst->saturate = false;
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108877] OpenGL CTS gl43 test cases were interrupted due to segment fault

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108877

--- Comment #3 from Chen Xi  ---
Hi, Is there any update for this issue?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 39/53] intel/compiler: add a helper to do conversions between integer and half-float

2019-01-02 Thread Pohjolainen, Topi
On Wed, Dec 19, 2018 at 12:51:07PM +0100, Iago Toral Quiroga wrote:
> There are hardware restrictions to consider that seem to affect atom platforms
> only.
> ---
>  src/intel/compiler/brw_fs_nir.cpp | 32 +++
>  1 file changed, 32 insertions(+)

Reviewed-by: Topi Pohjolainen 

> 
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index 802f5cb0944..a9fd98bab68 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -696,6 +696,38 @@ fixup_64bit_conversion(const fs_builder &bld,
> return false;
>  }
>  
> +static bool
> +fixup_int_half_float_conversion(const fs_builder &bld,
> +fs_reg dst, fs_reg src,
> +bool saturate,
> +const struct gen_device_info *devinfo)
> +{
> +   /* CHV PRM, 3D Media GPGPU Engine, Register Region Restrictions,
> +* Special Restrictions:
> +*
> +*"Conversion between Integer and HF (Half Float) must be DWord
> +* aligned and strided by a DWord on the destination."
> +*
> +* The same restriction is listed for other hardware platforms, however,
> +* empirical testing suggests that only atom platforms are affected.
> +*/
> +   if (!devinfo->is_cherryview && !gen_device_info_is_9lp(devinfo))
> +  return false;
> +
> +   if (!((dst.type == BRW_REGISTER_TYPE_HF && 
> !brw_reg_type_is_floating_point(src.type)) ||
> + (src.type == BRW_REGISTER_TYPE_HF && 
> !brw_reg_type_is_floating_point(dst.type
> +  return false;
> +
> +   fs_reg tmp = horiz_stride(retype(bld.vgrf(BRW_REGISTER_TYPE_F, 1),
> +dst.type),
> + 2);
> +   bld.MOV(tmp, src);
> +   fs_inst *inst = bld.MOV(dst, tmp);
> +   inst->saturate = saturate;
> +
> +   return true;
> +}
> +
>  void
>  fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
>  {
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 40/53] intel/compiler: handle conversions between int and half-float on atom

2019-01-02 Thread Pohjolainen, Topi
On Wed, Dec 19, 2018 at 12:51:08PM +0100, Iago Toral Quiroga wrote:
> ---
>  src/intel/compiler/brw_fs_nir.cpp | 15 +++
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index a9fd98bab68..57bc8a01a91 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -848,13 +848,22 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
> nir_alu_instr *instr)
> */
>  
> case nir_op_f2f16:
> -   case nir_op_i2f16:
> -   case nir_op_u2f16:
> case nir_op_i2i8:
> case nir_op_u2u8:
> +  assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */
> +  inst = bld.MOV(result, op[0]);
> +  inst->saturate = instr->dest.saturate;
> +  break;
> +
> +   case nir_op_i2f16:
> +   case nir_op_u2f16:
> case nir_op_f2i8:
> case nir_op_f2u8:
>assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */
> +   case nir_op_f2i16:
> +   case nir_op_f2u16:
> +  if (fixup_int_half_float_conversion(bld, result, op[0], 
> instr->dest.saturate, devinfo))

It looks we should wrap this line. Logic itself looks right:

Reviewed-by: Topi Pohjolainen 

> + break;
>inst = bld.MOV(result, op[0]);
>inst->saturate = instr->dest.saturate;
>break;
> @@ -890,8 +899,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
> nir_alu_instr *instr)
> case nir_op_f2f32:
> case nir_op_f2i32:
> case nir_op_f2u32:
> -   case nir_op_f2i16:
> -   case nir_op_f2u16:
> case nir_op_i2i32:
> case nir_op_u2u32:
> case nir_op_i2i16:
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] nir: make nir_opt_remove_phis_impl() static

2019-01-02 Thread apinheiro
The three patches:

Reviewed-by: Alejandro Piñeiro 

On 2/1/19 6:00, Timothy Arceri wrote:
> ---
>  src/compiler/nir/nir.h | 1 -
>  src/compiler/nir/nir_opt_remove_phis.c | 2 +-
>  2 files changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 4b8de4bb01..94d6578620 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -3221,7 +3221,6 @@ bool nir_opt_move_load_ubo(nir_shader *shader);
>  bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
>   bool indirect_load_ok, bool expensive_alu_ok);
>  
> -bool nir_opt_remove_phis_impl(nir_function_impl *impl);
>  bool nir_opt_remove_phis(nir_shader *shader);
>  
>  bool nir_opt_shrink_load(nir_shader *shader);
> diff --git a/src/compiler/nir/nir_opt_remove_phis.c 
> b/src/compiler/nir/nir_opt_remove_phis.c
> index e2d3994c49..d7ca2fe717 100644
> --- a/src/compiler/nir/nir_opt_remove_phis.c
> +++ b/src/compiler/nir/nir_opt_remove_phis.c
> @@ -139,7 +139,7 @@ remove_phis_block(nir_block *block, nir_builder *b)
> return progress;
>  }
>  
> -bool
> +static bool
>  nir_opt_remove_phis_impl(nir_function_impl *impl)
>  {
> bool progress = false;


pEpkey.asc
Description: application/pgp-keys
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 38/53] intel/compiler: handle 64-bit to 8-bit conversions

2019-01-02 Thread Pohjolainen, Topi
On Wed, Dec 19, 2018 at 12:51:06PM +0100, Iago Toral Quiroga wrote:
> These are not directly supported in hardware and brw_nir_lower_conversions
> should have taken care of that before we get here.

It looks that there are two things actually happening here:

1) For int64/uint64 to 8-case the support is already there and this just moves
   the case to a stronger one with an assert.

2) Actually adding support for DF to 8-bit that didn't exist before.

If this is the case (i.e., I'm not missing something), should we adjust the
commit to say that DF to 8-bit support is added and then add a note in the
commit that I64/U64 to 8-bit gets an additional assertion?

> ---
>  src/intel/compiler/brw_fs_nir.cpp | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index 6089c883c9a..802f5cb0944 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -818,6 +818,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
> nir_alu_instr *instr)
> case nir_op_f2f16:
> case nir_op_i2f16:
> case nir_op_u2f16:
> +   case nir_op_i2i8:
> +   case nir_op_u2u8:
> +   case nir_op_f2i8:
> +   case nir_op_f2u8:
>assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */
>inst = bld.MOV(result, op[0]);
>inst->saturate = instr->dest.saturate;
> @@ -860,8 +864,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
> nir_alu_instr *instr)
> case nir_op_u2u32:
> case nir_op_i2i16:
> case nir_op_u2u16:
> -   case nir_op_i2i8:
> -   case nir_op_u2u8:
>inst = bld.MOV(result, op[0]);
>inst->saturate = instr->dest.saturate;
>break;
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 35/53] intel/compiler: workaround for SIMD8 half-float MAD in gen < 9

2019-01-02 Thread Pohjolainen, Topi
On Wed, Dec 19, 2018 at 12:51:03PM +0100, Iago Toral Quiroga wrote:
> Broadwell hardware has a bug that manifests in SIMD8 executions of
> 16-bit MAD instructions when any of the sources is a Y or W component.
> We pack these components in the same SIMD register as components X and
> Z respectively, but starting at offset 16B (so they live in the second
> half of the register). The problem does not exist in SKL or later.
> 
> We work around this issue by moving any such sources to a temporary
> starting at offset 0B. We want to do this after the main optimization loop
> to prevent copy-propagation and friends to undo the fix.
> ---
>  src/intel/compiler/brw_fs.cpp | 48 +++
>  src/intel/compiler/brw_fs.h   |  1 +
>  2 files changed, 49 insertions(+)
> 
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 933b0b6ffc4..1343c2f4993 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -6449,6 +6449,48 @@ fs_visitor::optimize()
> validate();
>  }
>  
> +/**
> + * Broadwell hardware has a bug that manifests in SIMD8 executions of 16-bit
> + * MAD instructions when any of the sources is a Y or W component. We pack
> + * these components in the same SIMD register as components X and Z
> + * respectively, but starting at offset 16B (so they live in the second half
> + * of the register).
> + *
> + * We work around this issue by moving any such sources to a temporary
> + * starting at offset 0B. We want to do this after the main optimization loop
> + * to prevent copy-propagation and friends to undo the fix.
> + */
> +void
> +fs_visitor::fixup_hf_mad()
> +{
> +   if (devinfo->gen > 8)

We don't want to run this for gen < 8 either as it would iterate the
instructions in vain. So just:

  if (devinfo->gen == 8)

Otherwise:

Reviewed-by: Topi Pohjolainen 

> +  return;
> +
> +   bool progress = false;
> +
> +   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
> +  if (inst->opcode != BRW_OPCODE_MAD ||
> +  inst->dst.type != BRW_REGISTER_TYPE_HF ||
> +  inst->exec_size > 8)
> + continue;
> +
> +  for (int i = 0; i < 3; i++) {
> + if (inst->src[i].offset > 0) {
> +assert(inst->src[i].type == BRW_REGISTER_TYPE_HF);
> +const fs_builder ibld =
> +   bld.at(block, inst).exec_all().group(inst->exec_size, 0);
> +fs_reg tmp = ibld.vgrf(inst->src[i].type);
> +ibld.MOV(tmp, inst->src[i]);
> +inst->src[i] = tmp;
> +progress = true;
> + }
> +  }
> +   }
> +
> +   if (progress)
> +  invalidate_live_intervals();
> +}
> +
>  /**
>   * Three source instruction must have a GRF/MRF destination register.
>   * ARF NULL is not allowed.  Fix that up by allocating a temporary GRF.
> @@ -6607,6 +6649,7 @@ fs_visitor::run_vs()
> assign_curb_setup();
> assign_vs_urb_setup();
>  
> +   fixup_hf_mad();
> fixup_3src_null_dest();
> allocate_registers(8, true);
>  
> @@ -6691,6 +6734,7 @@ fs_visitor::run_tcs_single_patch()
> assign_curb_setup();
> assign_tcs_single_patch_urb_setup();
>  
> +   fixup_hf_mad();
> fixup_3src_null_dest();
> allocate_registers(8, true);
>  
> @@ -6725,6 +6769,7 @@ fs_visitor::run_tes()
> assign_curb_setup();
> assign_tes_urb_setup();
>  
> +   fixup_hf_mad();
> fixup_3src_null_dest();
> allocate_registers(8, true);
>  
> @@ -6774,6 +6819,7 @@ fs_visitor::run_gs()
> assign_curb_setup();
> assign_gs_urb_setup();
>  
> +   fixup_hf_mad();
> fixup_3src_null_dest();
> allocate_registers(8, true);
>  
> @@ -6874,6 +6920,7 @@ fs_visitor::run_fs(bool allow_spilling, bool 
> do_rep_send)
>  
>assign_urb_setup();
>  
> +  fixup_hf_mad();
>fixup_3src_null_dest();
>allocate_registers(8, allow_spilling);
>  
> @@ -6918,6 +6965,7 @@ fs_visitor::run_cs(unsigned min_dispatch_width)
>  
> assign_curb_setup();
>  
> +   fixup_hf_mad();
> fixup_3src_null_dest();
> allocate_registers(min_dispatch_width, true);
>  
> diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
> index 163c0008820..f79f8554fb9 100644
> --- a/src/intel/compiler/brw_fs.h
> +++ b/src/intel/compiler/brw_fs.h
> @@ -103,6 +103,7 @@ public:
> void setup_vs_payload();
> void setup_gs_payload();
> void setup_cs_payload();
> +   void fixup_hf_mad();
> void fixup_3src_null_dest();
> void assign_curb_setup();
> void calculate_urb_setup();
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 28/53] intel/compiler: add new half-float register type for 3-src instructions

2019-01-02 Thread Pohjolainen, Topi
On Wed, Dec 19, 2018 at 12:50:56PM +0100, Iago Toral Quiroga wrote:
> This is available since gen8.
> ---
>  src/intel/compiler/brw_reg_type.c | 35 +++
>  1 file changed, 31 insertions(+), 4 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_reg_type.c 
> b/src/intel/compiler/brw_reg_type.c
> index 60240ba1513..72295a2bd75 100644
> --- a/src/intel/compiler/brw_reg_type.c
> +++ b/src/intel/compiler/brw_reg_type.c
> @@ -138,6 +138,7 @@ enum hw_3src_reg_type {
> GEN7_3SRC_TYPE_D  = 1,
> GEN7_3SRC_TYPE_UD = 2,
> GEN7_3SRC_TYPE_DF = 3,
> +   GEN8_3SRC_TYPE_HF = 4,
>  
> /** When ExecutionDatatype is 1: @{ */
> GEN10_ALIGN1_3SRC_REG_TYPE_HF = 0b000,
> @@ -166,6 +167,14 @@ static const struct hw_3src_type {
> [BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
> [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
> [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
> +}, gen8_hw_3src_type[] = {
> +   [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
> +
> +   [BRW_REGISTER_TYPE_F]  = { GEN7_3SRC_TYPE_F  },
> +   [BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
> +   [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
> +   [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
> +   [BRW_REGISTER_TYPE_HF] = { GEN8_3SRC_TYPE_HF },
>  }, gen10_hw_3src_align1_type[] = {
>  #define E(x) BRW_ALIGN1_3SRC_EXEC_TYPE_##x
> [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
> @@ -249,6 +258,20 @@ brw_hw_type_to_reg_type(const struct gen_device_info 
> *devinfo,
> unreachable("not reached");
>  }
>  
> +static inline const struct hw_3src_type *
> +get_hw_3src_type_map(const struct gen_device_info *devinfo, uint32_t *size)
> +{
> +   if (devinfo->gen < 8) {
> +  if (size)
> + *size = ARRAY_SIZE(gen7_hw_3src_type);
> +  return gen7_hw_3src_type;
> +   } else {
> +  if (size)
> + *size = ARRAY_SIZE(gen8_hw_3src_type);
> +  return gen8_hw_3src_type;
> +   }
> +}
> +
>  /**
>   * Convert a brw_reg_type enumeration value into the hardware representation
>   * for a 3-src align16 instruction
> @@ -257,9 +280,11 @@ unsigned
>  brw_reg_type_to_a16_hw_3src_type(const struct gen_device_info *devinfo,
>   enum brw_reg_type type)
>  {
> -   assert(type < ARRAY_SIZE(gen7_hw_3src_type));
> -   assert(gen7_hw_3src_type[type].reg_type != (enum 
> hw_3src_reg_type)INVALID);
> -   return gen7_hw_3src_type[type].reg_type;
> +   uint32_t map_size;
> +   const struct hw_3src_type *hw_3src_type_map =
> +  get_hw_3src_type_map(devinfo, &map_size);
> +   assert(hw_3src_type_map[type].reg_type != (enum hw_3src_reg_type)INVALID);
> +   return hw_3src_type_map[type].reg_type;

I wonder if we should use a style equivalent to brw_reg_type_to_hw_type() and
brw_hw_type_to_reg_type() and inline the table (or map) selection:

  const struct hw_type *table;

  if (devinfo->gen >= 8) {
 assert(type < ARRAY_SIZE(gen8_hw_3src_type));
 table = gen7_hw_3src_type;
  } else {
 assert(type < ARRAY_SIZE(gen7_hw_3src_type));
 table = gen7_hw_3src_type;
  }

  assert(table[type].reg_type != (enum hw_reg_type)INVALID);

  return table[type].reg_type;

>  }
>  
>  /**
> @@ -283,8 +308,10 @@ enum brw_reg_type
>  brw_a16_hw_3src_type_to_reg_type(const struct gen_device_info *devinfo,
>   unsigned hw_type)
>  {
> +   const struct hw_3src_type *hw_3src_type_map =
> +  get_hw_3src_type_map(devinfo, NULL);
> for (enum brw_reg_type i = 0; i <= BRW_REGISTER_TYPE_LAST; i++) {
> -  if (gen7_hw_3src_type[i].reg_type == hw_type) {
> +  if (hw_3src_type_map[i].reg_type == hw_type) {
>   return i;
>}
> }
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] etnaviv: fix resource usage tracking across different pipe_context's

2019-01-02 Thread Christian Gmeiner
A pipe_resource can be shared by all the pipe_context's hanging off the
same pipe_screen.

Changes from v1 -> v2:
 - to remove the resource from the used_resources set when it is destroyed

Signed-off-by: Christian Gmeiner 
---
 src/gallium/drivers/etnaviv/etnaviv_context.c | 21 
 src/gallium/drivers/etnaviv/etnaviv_context.h |  3 --
 .../drivers/etnaviv/etnaviv_resource.c| 48 ++-
 .../drivers/etnaviv/etnaviv_resource.h|  7 ++-
 src/gallium/drivers/etnaviv/etnaviv_screen.c  |  8 
 src/gallium/drivers/etnaviv/etnaviv_screen.h  |  4 ++
 6 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_context.c 
b/src/gallium/drivers/etnaviv/etnaviv_context.c
index 44b50925a4f..8d4956516f1 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_context.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_context.c
@@ -36,6 +36,7 @@
 #include "etnaviv_query.h"
 #include "etnaviv_query_hw.h"
 #include "etnaviv_rasterizer.h"
+#include "etnaviv_resource.h"
 #include "etnaviv_screen.h"
 #include "etnaviv_shader.h"
 #include "etnaviv_state.h"
@@ -329,7 +330,8 @@ static void
 etna_cmd_stream_reset_notify(struct etna_cmd_stream *stream, void *priv)
 {
struct etna_context *ctx = priv;
-   struct etna_resource *rsc, *rsc_tmp;
+   struct etna_screen *screen = ctx->screen;
+   struct set_entry *entry;
 
etna_set_state(stream, VIVS_GL_API_MODE, VIVS_GL_API_MODE_OPENGL);
etna_set_state(stream, VIVS_GL_VERTEX_ELEMENT_CONFIG, 0x0001);
@@ -384,16 +386,13 @@ etna_cmd_stream_reset_notify(struct etna_cmd_stream 
*stream, void *priv)
ctx->dirty = ~0L;
ctx->dirty_sampler_views = ~0L;
 
-   /* go through all the used resources and clear their status flag */
-   LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, &ctx->used_resources, list)
-   {
-  debug_assert(rsc->status != 0);
-  rsc->status = 0;
-  rsc->pending_ctx = NULL;
-  list_delinit(&rsc->list);
-   }
+   /* go through all the used context resources and clear their status flag */
+   set_foreach(screen->used_resources, entry) {
+  struct etna_resource *rsc = (struct etna_resource *)entry->key;
 
-   assert(LIST_IS_EMPTY(&ctx->used_resources));
+  _mesa_set_remove_key(rsc->pending_ctx, ctx);
+  _mesa_set_remove(screen->used_resources, entry);
+   }
 }
 
 static void
@@ -437,8 +436,6 @@ etna_context_create(struct pipe_screen *pscreen, void 
*priv, unsigned flags)
/* need some sane default in case state tracker doesn't set some state: */
ctx->sample_mask = 0x;
 
-   list_inithead(&ctx->used_resources);
-
/*  Set sensible defaults for state */
etna_cmd_stream_reset_notify(ctx->stream, ctx);
 
diff --git a/src/gallium/drivers/etnaviv/etnaviv_context.h 
b/src/gallium/drivers/etnaviv/etnaviv_context.h
index 6ad9f3431e1..50a2cdf3d07 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_context.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_context.h
@@ -136,9 +136,6 @@ struct etna_context {
uint32_t prim_hwsupport;
struct primconvert_context *primconvert;
 
-   /* list of resources used by currently-unsubmitted renders */
-   struct list_head used_resources;
-
struct slab_child_pool transfer_pool;
struct blitter_context *blitter;
 
diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.c 
b/src/gallium/drivers/etnaviv/etnaviv_resource.c
index c0091288030..00b5f43bf3f 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
@@ -33,6 +33,7 @@
 #include "etnaviv_screen.h"
 #include "etnaviv_translate.h"
 
+#include "util/hash_table.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 
@@ -282,7 +283,6 @@ etna_resource_alloc(struct pipe_screen *pscreen, unsigned 
layout,
rsc->halign = halign;
 
pipe_reference_init(&rsc->base.reference, 1);
-   list_inithead(&rsc->list);
 
size = setup_miptree(rsc, paddingX, paddingY, msaa_xscale, msaa_yscale);
 
@@ -303,6 +303,11 @@ etna_resource_alloc(struct pipe_screen *pscreen, unsigned 
layout,
   memset(map, 0, size);
}
 
+   rsc->pending_ctx = _mesa_set_create(NULL, _mesa_hash_pointer,
+   _mesa_key_pointer_equal);
+   if (!rsc->pending_ctx)
+  goto free_rsc;
+
return &rsc->base;
 
 free_rsc:
@@ -462,8 +467,12 @@ etna_resource_changed(struct pipe_screen *pscreen, struct 
pipe_resource *prsc)
 static void
 etna_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc)
 {
+   struct etna_screen *screen = etna_screen(pscreen);
struct etna_resource *rsc = etna_resource(prsc);
 
+   _mesa_set_destroy(rsc->pending_ctx, NULL);
+   _mesa_set_remove_key(screen->used_resources, rsc);
+
if (rsc->bo)
   etna_bo_del(rsc->bo);
 
@@ -471,9 +480,7 @@ etna_resource_destroy(struct pipe_screen *pscreen, struct 
pipe_resource *prsc)
   etna_bo_del(rsc->ts_bo);
 
if (rsc->scanout)
-  renderonly_scanout_destroy(rsc->scanout, etna_screen(pscreen)->ro);
-
-   list_delinit(&rsc->list);
+ 

[Mesa-dev] [Bug 109151] [KBL-G][vulkan] dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat failed verification.

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109151

--- Comment #6 from Hai  ---
Any idea of this issue? Thanks.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109138] [KBL-G][GL-CTS]KHR-GL31.texture_size_promotion.functional test failed

2019-01-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109138

--- Comment #2 from Hai  ---
Is there any update? Thanks.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev