Re: [Mesa-dev] [PATCH 2/2] i965/nir: Use nir_system_value_from_intrinsic to reduce duplication.

2015-09-07 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Mon, 2015-09-07 at 00:30 -0700, Kenneth Graunke wrote:
> This code is all pretty much identical.  We just needed the translation
> from one enum value to the other.
> 
> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 47 
> +++---
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 30 +--
>  2 files changed, 17 insertions(+), 60 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 419ab6d..dbf10d6 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1408,35 +1408,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
> nir_intrinsic_instr *instr
> case nir_intrinsic_load_vertex_id:
>unreachable("should be lowered by lower_vertex_id()");
>  
> -   case nir_intrinsic_load_vertex_id_zero_base: {
> -  fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
> -  assert(vertex_id.file != BAD_FILE);
> -  dest.type = vertex_id.type;
> -  bld.MOV(dest, vertex_id);
> -  break;
> -   }
> -
> -   case nir_intrinsic_load_base_vertex: {
> -  fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
> -  assert(base_vertex.file != BAD_FILE);
> -  dest.type = base_vertex.type;
> -  bld.MOV(dest, base_vertex);
> -  break;
> -   }
> -
> -   case nir_intrinsic_load_instance_id: {
> -  fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
> -  assert(instance_id.file != BAD_FILE);
> -  dest.type = instance_id.type;
> -  bld.MOV(dest, instance_id);
> -  break;
> -   }
> -
> -   case nir_intrinsic_load_sample_mask_in: {
> -  fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
> -  assert(sample_mask_in.file != BAD_FILE);
> -  dest.type = sample_mask_in.type;
> -  bld.MOV(dest, sample_mask_in);
> +   case nir_intrinsic_load_vertex_id_zero_base:
> +   case nir_intrinsic_load_base_vertex:
> +   case nir_intrinsic_load_instance_id:
> +   case nir_intrinsic_load_sample_mask_in:
> +   case nir_intrinsic_load_sample_id: {
> +  gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
> +  fs_reg val = nir_system_values[sv];
> +  assert(val.file != BAD_FILE);
> +  dest.type = val.type;
> +  bld.MOV(dest, val);
>break;
> }
>  
> @@ -1449,14 +1430,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
> nir_intrinsic_instr *instr
>break;
> }
>  
> -   case nir_intrinsic_load_sample_id: {
> -  fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
> -  assert(sample_id.file != BAD_FILE);
> -  dest.type = sample_id.type;
> -  bld.MOV(dest, sample_id);
> -  break;
> -   }
> -
> case nir_intrinsic_load_uniform_indirect:
>has_indirect = true;
>/* fallthrough */
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index f3dc112..751ec73 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -533,30 +533,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
> *instr)
> case nir_intrinsic_load_vertex_id:
>unreachable("should be lowered by lower_vertex_id()");
>  
> -   case nir_intrinsic_load_vertex_id_zero_base: {
> -  src_reg vertex_id =
> - src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
> -  assert(vertex_id.file != BAD_FILE);
> -  dest = get_nir_dest(instr->dest, vertex_id.type);
> -  emit(MOV(dest, vertex_id));
> -  break;
> -   }
> -
> -   case nir_intrinsic_load_base_vertex: {
> -  src_reg base_vertex =
> - src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
> -  assert(base_vertex.file != BAD_FILE);
> -  dest = get_nir_dest(instr->dest, base_vertex.type);
> -  emit(MOV(dest, base_vertex));
> -  break;
> -   }
> -
> +   case nir_intrinsic_load_vertex_id_zero_base:
> +   case nir_intrinsic_load_base_vertex:
> case nir_intrinsic_load_instance_id: {
> -  src_reg instance_id =
> - src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
> -  assert(instance_id.file != BAD_FILE);
> -  dest = get_nir_dest(instr->dest, instance_id.type);
> -  emit(MOV(dest, instance_id));
> +  gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
> +  src_reg val = src_reg(nir_system_values[sv]);
> +  assert(val.file != BAD_FILE);
> +  dest = get_nir_dest(instr->dest, val.type);
> +  emit(MOV(dest, val));
>break;
> }
>  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Disallow fast blit paths for CopyTexImage with PixelTransfer ops

2015-09-07 Thread Iago Toral
Looks correct, based on the previous discussion about the same fix for
ReadPixels and TexImage. CopyTexImage has the same requirements.

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Sun, 2015-09-06 at 17:37 +0100, Chris Wilson wrote:
> glCopyTexImage behaves similarly to glReadPixels with respect to the
> pixel transfer operations. Therefore if any are set we cannot use the
> simply blit fast paths.
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Jason Ekstrand <jason.ekstr...@intel.com>
> Cc: Kenneth Graunke <kenn...@whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 4 
>  src/mesa/drivers/dri/i965/intel_tex_copy.c   | 4 
>  2 files changed, 8 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> index 205c905..ba11d3d 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> @@ -215,6 +215,10 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
> struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
> struct intel_texture_image *intel_image = intel_texture_image(dst_image);
>  
> +   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
> +   if (brw->ctx._ImageTransferState)
> +  return false;
> +
> /* Sync up the state of window system buffers.  We need to do this before
>  * we go looking at the src renderbuffer's miptree.
>  */
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c 
> b/src/mesa/drivers/dri/i965/intel_tex_copy.c
> index 4d8c82e..ecdd052 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
> @@ -55,6 +55,10 @@ intel_copy_texsubimage(struct brw_context *brw,
> const GLenum internalFormat = intelImage->base.Base.InternalFormat;
> bool ret;
>  
> +   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
> +   if (brw->ctx._ImageTransferState)
> +  return false;
> +
> intel_prepare_render(brw);
>  
> /* glCopyTexSubImage() can be called on a multisampled renderbuffer (if


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] nir: Add a nir_system_value_from_intrinsic() function.

2015-09-07 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Mon, 2015-09-07 at 00:30 -0700, Kenneth Graunke wrote:
> This converts NIR intrinsics that load system values into Mesa's
> SYSTEM_VALUE_* enumerations.
> 
> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
> ---
>  src/glsl/nir/nir.c | 34 ++
>  src/glsl/nir/nir.h |  2 ++
>  2 files changed, 36 insertions(+)
> 
> diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
> index 3f4bec4..ab06ea2 100644
> --- a/src/glsl/nir/nir.c
> +++ b/src/glsl/nir/nir.c
> @@ -1404,3 +1404,37 @@ nir_index_ssa_defs(nir_function_impl *impl)
> nir_foreach_block(impl, index_ssa_block, );
> impl->ssa_alloc = index;
>  }
> +
> +gl_system_value
> +nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
> +{
> +   switch (intrin) {
> +   case nir_intrinsic_load_vertex_id:
> +  return SYSTEM_VALUE_VERTEX_ID;
> +   case nir_intrinsic_load_instance_id:
> +  return SYSTEM_VALUE_INSTANCE_ID;
> +   case nir_intrinsic_load_vertex_id_zero_base:
> +  return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
> +   case nir_intrinsic_load_base_vertex:
> +  return SYSTEM_VALUE_BASE_VERTEX;
> +   case nir_intrinsic_load_invocation_id:
> +  return SYSTEM_VALUE_INVOCATION_ID;
> +   case nir_intrinsic_load_front_face:
> +  return SYSTEM_VALUE_FRONT_FACE;
> +   case nir_intrinsic_load_sample_id:
> +  return SYSTEM_VALUE_SAMPLE_ID;
> +   case nir_intrinsic_load_sample_pos:
> +  return SYSTEM_VALUE_SAMPLE_POS;
> +   case nir_intrinsic_load_sample_mask_in:
> +  return SYSTEM_VALUE_SAMPLE_MASK_IN;
> +   /* FINISHME: Add tessellation intrinsics.
> +  return SYSTEM_VALUE_TESS_COORD;
> +  return SYSTEM_VALUE_VERTICES_IN;
> +  return SYSTEM_VALUE_PRIMITIVE_ID;
> +  return SYSTEM_VALUE_TESS_LEVEL_OUTER;
> +  return SYSTEM_VALUE_TESS_LEVEL_INNER;
> +*/
> +   default:
> +  unreachable("intrinsic doesn't produce a system value");
> +   }
> +}
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 53e883e..b13e92d 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1843,6 +1843,8 @@ bool nir_opt_undef(nir_shader *shader);
>  
>  void nir_sweep(nir_shader *shader);
>  
> +gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
> +
>  #ifdef __cplusplus
>  } /* extern "C" */
>  #endif


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] Resolve GCC missing field initializer warnings

2015-09-08 Thread Iago Toral
Thanks! I think I would have squashed these together in one patch, it is
the same one-line fix in 4 consecutive lines after all. In any case,
this series is:

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Tue, 2015-09-08 at 20:21 +1000, Rhys Kidd wrote:
> Resolve a series of missing field initializer warnings within 
> get_hash_params.py
> 
> This changeset addresses some likely code rot around the *extra field, where 
> the
> initialization is via C code generated indirectly from a Python script.
> 
> As a new contributor, I am continuing to focus on manageable, easily reviewed,
> discrete improvements. This patchset resolves a number of warnings reported
> by GCC when configured to be pedantic.
> 
> $ gcc --version
> gcc (Ubuntu 4.9.2-10ubuntu13) 4.9.2
> 
> No piglit regressions on Ironlake.
> 
> Rhys Kidd (4):
>   mesa: Resolve GCC missing field initializer warning.
>   mesa: Resolve GCC missing field initializer warning.
>   mesa: Resolve GCC missing field initializer warning.
>   mesa: Resolve GCC missing field initializer warning.
> 
>  src/mesa/main/get_hash_params.py | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 4/4] i965/vec4: Don't unspill the same register in consecutive instructions

2015-09-03 Thread Iago Toral
On Wed, 2015-09-02 at 17:53 +0300, Francisco Jerez wrote:
> Iago Toral <ito...@igalia.com> writes:
> 
> > On Wed, 2015-09-02 at 14:29 +0300, Francisco Jerez wrote:
> >> Iago Toral <ito...@igalia.com> writes:
> >> 
> >> > Hi Curro,
> >> >
> >> > I have been a couple of weeks on holidays and have just come back to
> >> > this:
> >> >
> >> > On Thu, 2015-08-06 at 18:27 +0300, Francisco Jerez wrote:
> >> >> Iago Toral Quiroga <ito...@igalia.com> writes:
> >> >> 
> >> >> > If we have spilled/unspilled a register in the current instruction, 
> >> >> > avoid
> >> >> > emitting unspills for the same register in the same instruction or 
> >> >> > consecutive
> >> >> > instructions following the current one as long as they keep reading 
> >> >> > the spilled
> >> >> > register. This should allow us to avoid emitting costy unspills that 
> >> >> > come with
> >> >> > little benefit to register allocation.
> >> >> >
> >> >> > Also, update evaluate_spill_costs so that we account for the saved 
> >> >> > unspills.
> >> >> > ---
> >> >> >  .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 129 
> >> >> > +++--
> >> >> >  1 file changed, 121 insertions(+), 8 deletions(-)
> >> >> >
> >> >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp 
> >> >> > b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> >> > index 617c988..fed5f4d 100644
> >> >> > --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> >> > @@ -264,6 +264,95 @@ vec4_visitor::reg_allocate()
> >> >> > return true;
> >> >> >  }
> >> >> >  
> >> >> > +/**
> >> >> > + * When we decide to spill a register, instead of blindly spilling 
> >> >> > every use,
> >> >> > + * save unspills when the spill register is used (read) in 
> >> >> > consecutive
> >> >> > + * instructions. This can potentially save a bunch of unspills that 
> >> >> > would
> >> >> > + * have very little impact in register allocation anyway.
> >> >> > + *
> >> >> > + * Notice that we need to account for this behavior when spilling a 
> >> >> > register
> >> >> > + * and when evaluating spilling costs. This function is designed so 
> >> >> > it can
> >> >> > + * be called from both places and avoid repeating the logic.
> >> >> > + *
> >> >> > + *  - When we call this function from spill_reg, we pass in 
> >> >> > scratch_reg the
> >> >> > + *actual unspill/spill register that we want to reuse in the 
> >> >> > current
> >> >> > + *instruction.
> >> >> > + *
> >> >> > + *  - When we call this from evaluate_spill_costs, we pass the 
> >> >> > register for
> >> >> > + *which we are evaluating spilling costs.
> >> >> > + *
> >> >> > + * In either case, we check if the previous instructions read 
> >> >> > scratch_reg until
> >> >> > + * we find an instruction that writes to it (in which case we can 
> >> >> > reuse
> >> >> > + * scratch_reg as long as the writemask is compatible with the 
> >> >> > channels we need
> >> >> > + * to read in the current instruction) or we hit an instruction that 
> >> >> > does not
> >> >> > + * read scratch_reg at all. The latter can only happen when we call 
> >> >> > this from
> >> >> > + * evaluate_spill_costs,
> >> >> 
> >> >> Strictly speaking it can also happen when called from spill_reg() for
> >> >> the first time in a given sequence of consecutive instructions (in which
> >> >> case you correctly return false).
> >> >
> >> > not really, spill_reg() knows if it is the first time that it is
> >> > spilling a register and won't call this function in that case.
> >> >
> >> You

Re: [Mesa-dev] [PATCH v2 6/6] i965: Add a debug option for spilling everything in vec4 code

2015-09-03 Thread Iago Toral
On Wed, 2015-09-02 at 14:32 +0300, Francisco Jerez wrote:
> Iago Toral <ito...@igalia.com> writes:
> 
> > On Thu, 2015-07-30 at 16:13 +0300, Francisco Jerez wrote:
> >> Iago Toral <ito...@igalia.com> writes:
> >> 
> >> > On Thu, 2015-07-30 at 15:58 +0300, Francisco Jerez wrote:
> >> >> Iago Toral Quiroga <ito...@igalia.com> writes:
> >> >> 
> >> >> > ---
> >> >> >  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 +-
> >> >> >  src/mesa/drivers/dri/i965/brw_vec4.cpp| 2 +-
> >> >> >  src/mesa/drivers/dri/i965/intel_debug.c   | 3 ++-
> >> >> >  src/mesa/drivers/dri/i965/intel_debug.h   | 5 +++--
> >> >> >  4 files changed, 7 insertions(+), 5 deletions(-)
> >> >> >
> >> >> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp 
> >> >> > b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> >> > index f25f2ec..714248a 100644
> >> >> > --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> >> > +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> >> > @@ -634,7 +634,7 @@ fs_visitor::assign_regs(bool allow_spilling)
> >> >> > }
> >> >> >  
> >> >> > /* Debug of register spilling: Go spill everything. */
> >> >> > -   if (unlikely(INTEL_DEBUG & DEBUG_SPILL)) {
> >> >> > +   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) {
> >> >> >int reg = choose_spill_reg(g);
> >> >> >  
> >> >> >if (reg != -1) {
> >> >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> >> >> > b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> >> > index 53270fb..6cf5ede 100644
> >> >> > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> >> > @@ -1814,7 +1814,7 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
> >> >> >  
> >> >> > setup_payload();
> >> >> >  
> >> >> > -   if (false) {
> >> >> > +   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
> >> >> >/* Debug of register spilling: Go spill everything. */
> >> >> >const int grf_count = alloc.count;
> >> >> >float spill_costs[alloc.count];
> >> >> > diff --git a/src/mesa/drivers/dri/i965/intel_debug.c 
> >> >> > b/src/mesa/drivers/dri/i965/intel_debug.c
> >> >> > index a077731..8d34349 100644
> >> >> > --- a/src/mesa/drivers/dri/i965/intel_debug.c
> >> >> > +++ b/src/mesa/drivers/dri/i965/intel_debug.c
> >> >> > @@ -69,7 +69,8 @@ static const struct dri_debug_control 
> >> >> > debug_control[] = {
> >> >> > { "ann", DEBUG_ANNOTATION },
> >> >> > { "no8", DEBUG_NO8 },
> >> >> > { "vec4vs",  DEBUG_VEC4VS },
> >> >> > -   { "spill",   DEBUG_SPILL },
> >> >> > +   { "spill_frag",  DEBUG_SPILL_FS },
> >> >> 
> >> >> How about we call this "spill_fs" instead?  The flag doesn't only affect
> >> >> fragment shaders, AFAICT it will cause all programs compiled with the FS
> >> >> back-end [F for fast ;)] to spill everything.  With that fixed:
> >> >
> >> > that was my first choice, but if we do that it seems that
> >> > driParseDebugString will also mark INTEL_DEBUG=fs as enabled.
> >> >
> >> > It seems as if this function checks if any of the string options is
> >> > present in the provided string to enable them, so we can't really use an
> >> > option name where any substring of it is included as a separate
> >> > option :-(
> >> >
> >> 
> >> Oh man...  That sounds seriously broken...
> >
> > So with that explanation as to why we can't change that, does your Rb
> > stand?
> >
> Seems like a hack and might be confusing because it will cause shaders
> of stages other than fragment to be spilled.  But if you insist in using
> a band-aid solution you can have my:
> 
> Acked-by: Francisco Jerez <curroje...@riseup.net>

It seems that driPar

Re: [Mesa-dev] [PATCH 13/17 v2] glsl: Silence unused parameter warnings

2015-09-02 Thread Iago Toral
On Tue, 2015-09-01 at 18:56 -0700, Ian Romanick wrote:
(...)
> diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
> index dd7804f..bd25f45 100644
> --- a/src/glsl/builtin_variables.cpp
> +++ b/src/glsl/builtin_variables.cpp
> @@ -383,8 +383,7 @@ private:
> ir_variable *add_uniform(const glsl_type *type, const char *name);
> ir_variable *add_const(const char *name, int value);
> ir_variable *add_const_ivec3(const char *name, int x, int y, int z);
> -   void add_varying(int slot, const glsl_type *type, const char *name,
> -const char *name_as_gs_input);
> +   void add_varying(int slot, const glsl_type *type, const char *name);
>  
> exec_list * const instructions;
> struct _mesa_glsl_parse_state * const state;
> @@ -1064,8 +1063,7 @@ builtin_variable_generator::generate_cs_special_vars()
>   */
>  void
>  builtin_variable_generator::add_varying(int slot, const glsl_type *type,
> -const char *name,
> -const char *name_as_gs_input)
> +const char *name)

You also want to remove the reference to 'name_as_gs_input' in the
comment right above this function too.

Other than this,
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

>  {
> switch (state->stage) {
> case MESA_SHADER_TESS_CTRL:
> @@ -1094,7 +1092,7 @@ void
>  builtin_variable_generator::generate_varyings()
>  {
>  #define ADD_VARYING(loc, type, name) \
> -   add_varying(loc, type, name, name "In")
> +   add_varying(loc, type, name)
>  
> /* gl_Position and gl_PointSize are not visible from fragment shaders. */
> if (state->stage != MESA_SHADER_FRAGMENT) {
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index ede8caa..7cdea01 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -2523,8 +2523,7 @@ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state 
> *state,
>   const char *name, exec_list 
> *actual_parameters);
>  
>  extern ir_function *
> -_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
> - const char *name);
> +_mesa_glsl_find_builtin_function_by_name(const char *name);
>  
>  extern gl_shader *
>  _mesa_glsl_get_builtin_function_shader(void);
> diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
> index 8dbd938..b683269 100644
> --- a/src/glsl/ir_print_visitor.cpp
> +++ b/src/glsl/ir_print_visitor.cpp
> @@ -586,7 +586,7 @@ ir_print_visitor::visit(ir_end_primitive *ir)
>  }
>  
>  void
> -ir_print_visitor::visit(ir_barrier *ir)
> +ir_print_visitor::visit(ir_barrier *)
>  {
> fprintf(f, "(barrier)\n");
>  }
> diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
> index bc5e55b..7fc70de 100644
> --- a/src/glsl/linker.cpp
> +++ b/src/glsl/linker.cpp
> @@ -3209,8 +3209,7 @@ add_interface_variables(struct gl_shader_program 
> *shProg,
>   * resource data.
>   */
>  void
> -build_program_resource_list(struct gl_context *ctx,
> -struct gl_shader_program *shProg)
> +build_program_resource_list(struct gl_shader_program *shProg)
>  {
> /* Rebuild resource list. */
> if (shProg->ProgramResourceList) {
> diff --git a/src/glsl/program.h b/src/glsl/program.h
> index c06541a..64f5463 100644
> --- a/src/glsl/program.h
> +++ b/src/glsl/program.h
> @@ -40,8 +40,7 @@ extern void
>  link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);
>  
>  extern void
> -build_program_resource_list(struct gl_context *ctx,
> -struct gl_shader_program *shProg);
> +build_program_resource_list(struct gl_shader_program *shProg);
>  
>  extern void
>  linker_error(struct gl_shader_program *prog, const char *fmt, ...)
> diff --git a/src/glsl/standalone_scaffolding.cpp 
> b/src/glsl/standalone_scaffolding.cpp
> index 6033364..e52869f 100644
> --- a/src/glsl/standalone_scaffolding.cpp
> +++ b/src/glsl/standalone_scaffolding.cpp
> @@ -62,7 +62,7 @@ _mesa_reference_shader(struct gl_context *ctx, struct 
> gl_shader **ptr,
>  }
>  
>  void
> -_mesa_shader_debug(struct gl_context *, GLenum, GLuint *id,
> +_mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
> const char *, int)
>  {
>  }
> diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
> index 8f58f3e..0defed8 100644
> --- a/src/mesa/program/ir_to_mesa.cpp
> +++ b/src/mesa/program/ir_to_mesa.cpp
> @@ -2979,7 +2979,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct 
> gl_shader_program *prog)
>if (!ctx->Driver.LinkShader(ctx, prog)) {
>prog->LinkStatus = GL_FALSE;
>} else {
> - build_program_resource_list(ctx, prog);
> + build_program_resource_list(prog);
>}
> }
>  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 4/4] i965/vec4: Don't unspill the same register in consecutive instructions

2015-09-03 Thread Iago Toral
On Thu, 2015-09-03 at 13:15 +0300, Francisco Jerez wrote:
> Iago Toral <ito...@igalia.com> writes:
> 
> > On Wed, 2015-09-02 at 17:53 +0300, Francisco Jerez wrote:
> >> Iago Toral <ito...@igalia.com> writes:
> >> 
> >> > On Wed, 2015-09-02 at 14:29 +0300, Francisco Jerez wrote:
> >> >> Iago Toral <ito...@igalia.com> writes:
> >> >> 
> >> >> > Hi Curro,
> >> >> >
> >> >> > I have been a couple of weeks on holidays and have just come back to
> >> >> > this:
> >> >> >
> >> >> > On Thu, 2015-08-06 at 18:27 +0300, Francisco Jerez wrote:
> >> >> >> Iago Toral Quiroga <ito...@igalia.com> writes:
> >> >> >> 
> >> >> >> > If we have spilled/unspilled a register in the current 
> >> >> >> > instruction, avoid
> >> >> >> > emitting unspills for the same register in the same instruction or 
> >> >> >> > consecutive
> >> >> >> > instructions following the current one as long as they keep 
> >> >> >> > reading the spilled
> >> >> >> > register. This should allow us to avoid emitting costy unspills 
> >> >> >> > that come with
> >> >> >> > little benefit to register allocation.
> >> >> >> >
> >> >> >> > Also, update evaluate_spill_costs so that we account for the saved 
> >> >> >> > unspills.
> >> >> >> > ---
> >> >> >> >  .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 129 
> >> >> >> > +++--
> >> >> >> >  1 file changed, 121 insertions(+), 8 deletions(-)
> >> >> >> >
> >> >> >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp 
> >> >> >> > b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> >> >> > index 617c988..fed5f4d 100644
> >> >> >> > --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> >> >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> >> >> > @@ -264,6 +264,95 @@ vec4_visitor::reg_allocate()
> >> >> >> > return true;
> >> >> >> >  }
> >> >> >> >  
> >> >> >> > +/**
> >> >> >> > + * When we decide to spill a register, instead of blindly 
> >> >> >> > spilling every use,
> >> >> >> > + * save unspills when the spill register is used (read) in 
> >> >> >> > consecutive
> >> >> >> > + * instructions. This can potentially save a bunch of unspills 
> >> >> >> > that would
> >> >> >> > + * have very little impact in register allocation anyway.
> >> >> >> > + *
> >> >> >> > + * Notice that we need to account for this behavior when spilling 
> >> >> >> > a register
> >> >> >> > + * and when evaluating spilling costs. This function is designed 
> >> >> >> > so it can
> >> >> >> > + * be called from both places and avoid repeating the logic.
> >> >> >> > + *
> >> >> >> > + *  - When we call this function from spill_reg, we pass in 
> >> >> >> > scratch_reg the
> >> >> >> > + *actual unspill/spill register that we want to reuse in the 
> >> >> >> > current
> >> >> >> > + *instruction.
> >> >> >> > + *
> >> >> >> > + *  - When we call this from evaluate_spill_costs, we pass the 
> >> >> >> > register for
> >> >> >> > + *which we are evaluating spilling costs.
> >> >> >> > + *
> >> >> >> > + * In either case, we check if the previous instructions read 
> >> >> >> > scratch_reg until
> >> >> >> > + * we find an instruction that writes to it (in which case we can 
> >> >> >> > reuse
> >> >> >> > + * scratch_reg as long as the writemask is compatible with the 
> >> >> >> > channels we need
> >> >> >> > + * to read in the curre

Re: [Mesa-dev] [PATCH 1/2] dri/common: embed drirc into driver binaries

2015-09-02 Thread Iago Toral
Both patches do what they advertise and seem to work, so they are

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

That said, I imagine that probably you want to get at least a few ACKs
from other devs, like Illia did, to make sure that your changes have
enough support.

Iago

On Wed, 2015-09-02 at 02:26 +0200, Marek Olšák wrote:
> From: Marek Olšák <marek.ol...@amd.com>
> 
> People are having issues with apps because drirc wasn't installed
> into /etc. I've lost patience.
> ---
>  src/mesa/drivers/dri/common/Makefile.am  |   4 +-
>  src/mesa/drivers/dri/common/Makefile.sources |   3 +-
>  src/mesa/drivers/dri/common/drirc|  84 
>  src/mesa/drivers/dri/common/drirc_built_in.h | 111 
> +++
>  src/mesa/drivers/dri/common/xmlconfig.c  |  30 +++-
>  5 files changed, 140 insertions(+), 92 deletions(-)
>  delete mode 100644 src/mesa/drivers/dri/common/drirc
>  create mode 100644 src/mesa/drivers/dri/common/drirc_built_in.h
> 
> diff --git a/src/mesa/drivers/dri/common/Makefile.am 
> b/src/mesa/drivers/dri/common/Makefile.am
> index b307f10..7106abe 100644
> --- a/src/mesa/drivers/dri/common/Makefile.am
> +++ b/src/mesa/drivers/dri/common/Makefile.am
> @@ -23,7 +23,7 @@ SUBDIRS = xmlpool
>  
>  include Makefile.sources
>  
> -EXTRA_DIST = drirc xmlpool.h SConscript
> +EXTRA_DIST = xmlpool.h SConscript
>  
>  AM_CFLAGS = \
>   -I$(top_srcdir)/include \
> @@ -52,5 +52,3 @@ libdri_test_stubs_la_SOURCES = $(test_stubs_FILES)
>  libdri_test_stubs_la_CFLAGS = $(AM_CFLAGS) -DNO_MAIN
>  
>  libmegadriver_stub_la_SOURCES = $(megadriver_stub_FILES)
> -
> -sysconf_DATA = drirc
> diff --git a/src/mesa/drivers/dri/common/Makefile.sources 
> b/src/mesa/drivers/dri/common/Makefile.sources
> index d5d8da8..71ba01d 100644
> --- a/src/mesa/drivers/dri/common/Makefile.sources
> +++ b/src/mesa/drivers/dri/common/Makefile.sources
> @@ -6,7 +6,8 @@ DRI_COMMON_FILES := \
>  
>  XMLCONFIG_FILES := \
>   xmlconfig.c \
> - xmlconfig.h
> + xmlconfig.h \
> + drirc_built_in.h
>  
>  # Paths are relative to MESA_TOP.
>  mesa_dri_common_INCLUDES := \
> diff --git a/src/mesa/drivers/dri/common/drirc 
> b/src/mesa/drivers/dri/common/drirc
> deleted file mode 100644
> index bb840ea..000
> --- a/src/mesa/drivers/dri/common/drirc
> +++ /dev/null
> @@ -1,84 +0,0 @@
> -
> -
> -
> -
> -
> -
> -
> -
> - 
> -
> -
> -
> -
> - 
> -
> -
> - value="true" />
> - 
> -
> -
> - value="true" />
> - 
> -
> -
> - value="true" />
> - 
> -
> -
> - value="true" />
> - 
> -
> - executable="OilRush_x86">
> -
> - value="true" />
> - 
> -
> - executable="OilRush_x64">
> -
> - value="true" />
> - 
> -
> -
> -
> -
> -
> -
> -
> -
> -
> -
> -
> -
> -
> -
> - value="true" />
> -
> -
> - executable="do-not-directly-run-secondlife-bin">
> - value="true" />
> -
> -
> -
> diff --git a/src/mesa/drivers/dri/common/drirc_built_in.h 
> b/src/mesa/drivers/dri/common/drirc_built_in.h
> new file mode 100644
> index 000..592b1d1
> --- /dev/null
> +++ b/src/mesa/drivers/dri/common/drirc_built_in.h
> @@ -0,0 +1,111 @@
> +/*
> + * Copyright 2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * on the rights to use, copy, modify, merge, publish, distribute, sub
> + * license, and/or sell copies of the Software, and to permit persons to whom
> + * the Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PA

Re: [Mesa-dev] [PATCH v3 4/4] i965/vec4: Don't unspill the same register in consecutive instructions

2015-09-02 Thread Iago Toral
Hi Curro,

I have been a couple of weeks on holidays and have just come back to
this:

On Thu, 2015-08-06 at 18:27 +0300, Francisco Jerez wrote:
> Iago Toral Quiroga <ito...@igalia.com> writes:
> 
> > If we have spilled/unspilled a register in the current instruction, avoid
> > emitting unspills for the same register in the same instruction or 
> > consecutive
> > instructions following the current one as long as they keep reading the 
> > spilled
> > register. This should allow us to avoid emitting costy unspills that come 
> > with
> > little benefit to register allocation.
> >
> > Also, update evaluate_spill_costs so that we account for the saved unspills.
> > ---
> >  .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 129 
> > +++--
> >  1 file changed, 121 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp 
> > b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> > index 617c988..fed5f4d 100644
> > --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> > @@ -264,6 +264,95 @@ vec4_visitor::reg_allocate()
> > return true;
> >  }
> >  
> > +/**
> > + * When we decide to spill a register, instead of blindly spilling every 
> > use,
> > + * save unspills when the spill register is used (read) in consecutive
> > + * instructions. This can potentially save a bunch of unspills that would
> > + * have very little impact in register allocation anyway.
> > + *
> > + * Notice that we need to account for this behavior when spilling a 
> > register
> > + * and when evaluating spilling costs. This function is designed so it can
> > + * be called from both places and avoid repeating the logic.
> > + *
> > + *  - When we call this function from spill_reg, we pass in scratch_reg the
> > + *actual unspill/spill register that we want to reuse in the current
> > + *instruction.
> > + *
> > + *  - When we call this from evaluate_spill_costs, we pass the register for
> > + *which we are evaluating spilling costs.
> > + *
> > + * In either case, we check if the previous instructions read scratch_reg 
> > until
> > + * we find an instruction that writes to it (in which case we can reuse
> > + * scratch_reg as long as the writemask is compatible with the channels we 
> > need
> > + * to read in the current instruction) or we hit an instruction that does 
> > not
> > + * read scratch_reg at all. The latter can only happen when we call this 
> > from
> > + * evaluate_spill_costs,
> 
> Strictly speaking it can also happen when called from spill_reg() for
> the first time in a given sequence of consecutive instructions (in which
> case you correctly return false).

not really, spill_reg() knows if it is the first time that it is
spilling a register and won't call this function in that case.

> >  and means that this is the point at which we first
> > + * need the unspill this register for our current instruction. Since all 
> > our
> > + * unspills read a full vec4, we know that in this case we will have all
> > + * the channels available in scratch_reg and we can reuse it.
> > + *
> > + * In any other case, we can't reuse scratch_reg in the current 
> > instruction,
> > + * meaning that we will need to unspill it.
> > + */
> > +static bool
> > +can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
> > +   unsigned scratch_reg)
> > +{
> > +   assert(inst->src[i].file == GRF);
> > +
> > +   /* If the current instruction is already using scratch_reg in src[n] 
> > with
> > +* n < i, then we know we can reuse it for src[i] too.
> > +*/
> > +   for (unsigned n = 0; n < i; n++) {
> > +  if (inst->src[n].file == GRF && inst->src[n].reg == scratch_reg)
> > + return true;
> > +   }
> 
> I don't think this is correct in cases where the previous source reused
> the temporary of a previously spilled register with incompatible
> writemask.  You probably want to handle the current instruction
> consistently with the previous ones, i.e. as part of the loop below.
> 
> I suggest you define a variable (e.g. n as you've called it) initially
> equal to i that would determine the number of sources to check for the
> next instruction.  At the end of the loop body it would be re-set to 3,
> what would also cause the destination registers to be checked in
> subsequent iterations.

I have been thinking a 

Re: [Mesa-dev] [PATCH v2 6/6] i965: Add a debug option for spilling everything in vec4 code

2015-09-02 Thread Iago Toral
On Thu, 2015-07-30 at 16:13 +0300, Francisco Jerez wrote:
> Iago Toral <ito...@igalia.com> writes:
> 
> > On Thu, 2015-07-30 at 15:58 +0300, Francisco Jerez wrote:
> >> Iago Toral Quiroga <ito...@igalia.com> writes:
> >> 
> >> > ---
> >> >  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 +-
> >> >  src/mesa/drivers/dri/i965/brw_vec4.cpp| 2 +-
> >> >  src/mesa/drivers/dri/i965/intel_debug.c   | 3 ++-
> >> >  src/mesa/drivers/dri/i965/intel_debug.h   | 5 +++--
> >> >  4 files changed, 7 insertions(+), 5 deletions(-)
> >> >
> >> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp 
> >> > b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> > index f25f2ec..714248a 100644
> >> > --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> > +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> > @@ -634,7 +634,7 @@ fs_visitor::assign_regs(bool allow_spilling)
> >> > }
> >> >  
> >> > /* Debug of register spilling: Go spill everything. */
> >> > -   if (unlikely(INTEL_DEBUG & DEBUG_SPILL)) {
> >> > +   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) {
> >> >int reg = choose_spill_reg(g);
> >> >  
> >> >if (reg != -1) {
> >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> >> > b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> > index 53270fb..6cf5ede 100644
> >> > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> > @@ -1814,7 +1814,7 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
> >> >  
> >> > setup_payload();
> >> >  
> >> > -   if (false) {
> >> > +   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
> >> >/* Debug of register spilling: Go spill everything. */
> >> >const int grf_count = alloc.count;
> >> >float spill_costs[alloc.count];
> >> > diff --git a/src/mesa/drivers/dri/i965/intel_debug.c 
> >> > b/src/mesa/drivers/dri/i965/intel_debug.c
> >> > index a077731..8d34349 100644
> >> > --- a/src/mesa/drivers/dri/i965/intel_debug.c
> >> > +++ b/src/mesa/drivers/dri/i965/intel_debug.c
> >> > @@ -69,7 +69,8 @@ static const struct dri_debug_control debug_control[] 
> >> > = {
> >> > { "ann", DEBUG_ANNOTATION },
> >> > { "no8", DEBUG_NO8 },
> >> > { "vec4vs",  DEBUG_VEC4VS },
> >> > -   { "spill",   DEBUG_SPILL },
> >> > +   { "spill_frag",  DEBUG_SPILL_FS },
> >> 
> >> How about we call this "spill_fs" instead?  The flag doesn't only affect
> >> fragment shaders, AFAICT it will cause all programs compiled with the FS
> >> back-end [F for fast ;)] to spill everything.  With that fixed:
> >
> > that was my first choice, but if we do that it seems that
> > driParseDebugString will also mark INTEL_DEBUG=fs as enabled.
> >
> > It seems as if this function checks if any of the string options is
> > present in the provided string to enable them, so we can't really use an
> > option name where any substring of it is included as a separate
> > option :-(
> >
> 
> Oh man...  That sounds seriously broken...

So with that explanation as to why we can't change that, does your Rb
stand?

> >> Reviewed-by: Francisco Jerez <curroje...@riseup.net>
> >> 
> >> > +   { "spill_vec4",  DEBUG_SPILL_VEC4 },
> >> > { "cs",  DEBUG_CS },
> >> > { NULL,0 }
> >> >  };
> >> > diff --git a/src/mesa/drivers/dri/i965/intel_debug.h 
> >> > b/src/mesa/drivers/dri/i965/intel_debug.h
> >> > index 4689492..b7d0c82 100644
> >> > --- a/src/mesa/drivers/dri/i965/intel_debug.h
> >> > +++ b/src/mesa/drivers/dri/i965/intel_debug.h
> >> > @@ -64,8 +64,9 @@ extern uint64_t INTEL_DEBUG;
> >> >  #define DEBUG_ANNOTATION  (1ull << 28)
> >> >  #define DEBUG_NO8 (1ull << 29)
> >> >  #define DEBUG_VEC4VS  (1ull << 30)
> >> > -#define DEBUG_SPILL   (1ull << 31)
> >> > -#define DEBUG_CS  (1ull << 32)
> >> > +#define DEBUG_SPILL_FS(1ull << 31)
> >> > +#define DEBUG_SPILL_VEC4  (1ull << 32)
> >> > +#define DEBUG_CS  (1ull << 33)
> >> >  
> >> >  #ifdef HAVE_ANDROID_PLATFORM
> >> >  #define LOG_TAG "INTEL-MESA"
> >> > -- 
> >> > 1.9.1


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 (part2) 04/56] i965: set ARB_shader_storage_buffer_object related constant values

2015-09-03 Thread Iago Toral
On Thu, 2015-09-03 at 13:52 +0300, Tapani Pälli wrote:
> 
> On 09/03/2015 01:40 PM, Samuel Iglesias Gonsálvez wrote:
> >
> >
> > On 03/09/15 12:30, Tapani Pälli wrote:
> >> Hi;
> >>
> >> On 07/23/2015 09:42 AM, Samuel Iglesias Gonsalvez wrote:
> >>> v2:
> >>> - Add tessellation shader constants assignment
> >>>
> >>> Signed-off-by: Samuel Iglesias Gonsalvez 
> >>> ---
> >>>src/mesa/drivers/dri/i965/brw_context.c | 12 
> >>>1 file changed, 12 insertions(+)
> >>>
> >>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c
> >>> b/src/mesa/drivers/dri/i965/brw_context.c
> >>> index b08a53b..a5c7b91 100644
> >>> --- a/src/mesa/drivers/dri/i965/brw_context.c
> >>> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> >>> @@ -551,6 +551,18 @@ brw_initialize_context_constants(struct
> >>> brw_context *brw)
> >>>   ctx->Const.TextureBufferOffsetAlignment = 16;
> >>>   ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
> >>>
> >>> +   /* FIXME: Tessellation stages are not yet supported in i965, so
> >>> +* MaxCombinedShaderStorageBlocks doesn't take them into account.
> >>> +*/
> >>> +   ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = 12;
> >>> +   ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = 12;
> >>> +   ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
> >>> +   ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
> >>> +   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = 12;
> >>> +   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = 12;
> >>> +   ctx->Const.MaxCombinedShaderStorageBlocks = 12 * 3;
> >>> +   ctx->Const.MaxShaderStorageBufferBindings = 48;
> >>
> >> I think there is something funny with MaxShaderStorageBufferBindings
> >> value calculation. Commit 28ef0f83 adds 12 to it and then this commit
> >> overwrites it as 48. Without compute shaders I guess this value should
> >> be 48 - 12?
> >>
> >> I guess earlier '+12' should be removed and this part should be modified
> >> to calculate based on what is supported. For me '48 -12' fixes a few
> >> crashes I'm getting with
> >> "igalia/wip/siglesias/ARB_shader_storage_buffer_object-v4.3" branch I've
> >> used for some testing.
> >>
> >
> > I see. We are going to review the MaxShaderStorageBufferBindings
> > calculation, thanks for the report and the ideas.
> >
> > Can you share with us the tests that crashed because of this? You can
> > send them privately to me, if needed.
> 
> These are some Piglit tests that use buffer objects. Crash happens at 
> _mesa_free_buffer_objects(), it's not quite clear yet to me why (maybe 
> memory gets trashed) but I bisected it to this commit changing 
> MaxShaderStorageBufferBindings. For example 
> 'arb_framebuffer_no_attachments-atomic' segfaults when destroying the 
> context and calling _mesa_free_buffer_objects().
> 
> Here's example backtrace:
> (gdb) bt
> #0  0x73dae830 in pthread_mutex_lock () from /lib64/libpthread.so.0
> #1  0x71da8681 in mtx_lock (mtx=0x) at 
> ../../include/c11/threads_posix.h:192
> #2  _mesa_reference_buffer_object_ (ctx=0x77fd0040, 
> ptr=0x77ff13d0, bufObj=0x0) at main/bufferobj.c:450
> #3  0x71da9173 in _mesa_reference_buffer_object (bufObj=0x0, 
> ptr=, ctx=0x77fd0040) at main/bufferobj.h:123
> #4  _mesa_free_buffer_objects (ctx=ctx@entry=0x77fd0040) at 
> main/bufferobj.c:907
> #5  0x71db553b in _mesa_free_context_data 
> (ctx=ctx@entry=0x77fd0040) at main/context.c:1342
> #6  0x72077a3e in intelDestroyContext (driContextPriv=0x6392f0) 
> at brw_context.c:996
> 
> it crashes because 'oldObj' in _mesa_reference_buffer_object_ points to 
> 0x.

Thanks for reporting this, the problem with the crash is that there is a
mismatch between the number of bindings used by the driver and the
maximum declared by Mesa, this was actually a silly mistake we
introduced after updating some of our code during reviews. To fix this
you need to set MAX_SHADER_STORAGE_BUFFERS in src/mesa/main/config.h to
15 (instead of the current value of 7, the issue comes from 7*6 = 42 <
48).

Samuel is going to look into the +12 issue you also mentioned.

Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] texcompress_s3tc: fix stride checks

2015-09-01 Thread Iago Toral
On Tue, 2015-09-01 at 16:41 +1000, Dave Airlie wrote:
> From: Dave Airlie <airl...@redhat.com>
> 
> The fastpath currently checks the stride != width, but

Maybe replace stride with RowLength in the line above to make things
more clear. 

> if you have a RowLength of 7, and Alignment of 4, then
> that shuoldn't match.

Typo in shouldn't

> align the rowlength to the pack alignment before comparing.

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

BTW, it seems that at least _mesa_texstore_rgb_fxt1 in
texcompress_fxt1.c has the same issue, right?

> This fixes compressed cases in CTS packed_pixels_pixelstore
> test when SKIP_PIXELS is enabled, which causes row length
> to get set.
> 
> Signed-off-by: Dave Airlie <airl...@redhat.com>
> ---
>  src/mesa/main/texcompress_s3tc.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/main/texcompress_s3tc.c 
> b/src/mesa/main/texcompress_s3tc.c
> index 7ce3cb8..6cfe06a 100644
> --- a/src/mesa/main/texcompress_s3tc.c
> +++ b/src/mesa/main/texcompress_s3tc.c
> @@ -130,7 +130,7 @@ _mesa_texstore_rgb_dxt1(TEXSTORE_PARAMS)
> if (srcFormat != GL_RGB ||
> srcType != GL_UNSIGNED_BYTE ||
> ctx->_ImageTransferState ||
> -   srcPacking->RowLength != srcWidth ||
> +   ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
> srcPacking->SwapBytes) {
>/* convert image to RGB/GLubyte */
>GLubyte *tempImageSlices[1];
> @@ -187,7 +187,7 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
> if (srcFormat != GL_RGBA ||
> srcType != GL_UNSIGNED_BYTE ||
> ctx->_ImageTransferState ||
> -   srcPacking->RowLength != srcWidth ||
> +   ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
> srcPacking->SwapBytes) {
>/* convert image to RGBA/GLubyte */
>GLubyte *tempImageSlices[1];
> @@ -244,7 +244,7 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
> if (srcFormat != GL_RGBA ||
> srcType != GL_UNSIGNED_BYTE ||
> ctx->_ImageTransferState ||
> -   srcPacking->RowLength != srcWidth ||
> +   ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
> srcPacking->SwapBytes) {
>/* convert image to RGBA/GLubyte */
>GLubyte *tempImageSlices[1];
> @@ -300,7 +300,7 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
> if (srcFormat != GL_RGBA ||
> srcType != GL_UNSIGNED_BYTE ||
> ctx->_ImageTransferState ||
> -   srcPacking->RowLength != srcWidth ||
> +   ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
> srcPacking->SwapBytes) {
>/* convert image to RGBA/GLubyte */
>GLubyte *tempImageSlices[1];


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] st/readpixels: fix accel path for skipimages.

2015-09-01 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Tue, 2015-09-01 at 16:41 +1000, Dave Airlie wrote:
> From: Dave Airlie <airl...@redhat.com>
> 
> We don't need to use the 3d image address here as that will
> include SKIP_IMAGES, and we are only blitting a single
> 2D anyways, so just use the 2D path.
> 
> This fixes some memory overruns under CTS
>  packed_pixels.packed_pixels_pixelstore when PACK_SKIP_IMAGES
> is used.
> 
> Signed-off-by: Dave Airlie <airl...@redhat.com>
> ---
>  src/mesa/state_tracker/st_cb_readpixels.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/state_tracker/st_cb_readpixels.c 
> b/src/mesa/state_tracker/st_cb_readpixels.c
> index 6ff6cf6..bb36e69 100644
> --- a/src/mesa/state_tracker/st_cb_readpixels.c
> +++ b/src/mesa/state_tracker/st_cb_readpixels.c
> @@ -238,9 +238,9 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
>GLuint row;
>  
>for (row = 0; row < (unsigned) height; row++) {
> - GLvoid *dest = _mesa_image_address3d(pack, pixels,
> + GLvoid *dest = _mesa_image_address2d(pack, pixels,
>width, height, format,
> -  type, 0, row, 0);
> +  type, row, 0);
>   memcpy(dest, map, bytesPerRow);
>   map += tex_xfer->stride;
>}


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] mesa/readpixels: check strides are equal before skipping conversion

2015-09-01 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Tue, 2015-09-01 at 16:41 +1000, Dave Airlie wrote:
> From: Dave Airlie <airl...@redhat.com>
> 
> The CTS packed_pixels test checks that readpixels doesn't write
> into the space between rows, however we fail that here unless
> we check the format and stride match.
> This fixes all the core mesa problems with CTS packed_pixels
> tests.
> 
> Signed-off-by: Dave Airlie <airl...@redhat.com>
> ---
>  src/mesa/main/readpix.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
> index 0ef07b5..c57fbac 100644
> --- a/src/mesa/main/readpix.c
> +++ b/src/mesa/main/readpix.c
> @@ -523,7 +523,8 @@ read_rgba_pixels( struct gl_context *ctx,
> * convert to, then we can convert directly into the dst buffer and 
> avoid
> * the final conversion/copy from the rgba buffer to the dst buffer.
> */
> -  if (dst_format == rgba_format) {
> +  if (dst_format == rgba_format &&
> +  dst_stride == rgba_stride) {
>   need_convert = false;
>   rgba = dst;
>} else {


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 6/6] i965: Add a debug option for spilling everything in vec4 code

2015-09-03 Thread Iago Toral
On Thu, 2015-09-03 at 15:37 +0300, Francisco Jerez wrote:
> Iago Toral <ito...@igalia.com> writes:
> 
> > On Wed, 2015-09-02 at 14:32 +0300, Francisco Jerez wrote:
> >> Iago Toral <ito...@igalia.com> writes:
> >> 
> >> > On Thu, 2015-07-30 at 16:13 +0300, Francisco Jerez wrote:
> >> >> Iago Toral <ito...@igalia.com> writes:
> >> >> 
> >> >> > On Thu, 2015-07-30 at 15:58 +0300, Francisco Jerez wrote:
> >> >> >> Iago Toral Quiroga <ito...@igalia.com> writes:
> >> >> >> 
> >> >> >> > ---
> >> >> >> >  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 +-
> >> >> >> >  src/mesa/drivers/dri/i965/brw_vec4.cpp| 2 +-
> >> >> >> >  src/mesa/drivers/dri/i965/intel_debug.c   | 3 ++-
> >> >> >> >  src/mesa/drivers/dri/i965/intel_debug.h   | 5 +++--
> >> >> >> >  4 files changed, 7 insertions(+), 5 deletions(-)
> >> >> >> >
> >> >> >> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp 
> >> >> >> > b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> >> >> > index f25f2ec..714248a 100644
> >> >> >> > --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> >> >> > +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> >> >> > @@ -634,7 +634,7 @@ fs_visitor::assign_regs(bool allow_spilling)
> >> >> >> > }
> >> >> >> >  
> >> >> >> > /* Debug of register spilling: Go spill everything. */
> >> >> >> > -   if (unlikely(INTEL_DEBUG & DEBUG_SPILL)) {
> >> >> >> > +   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) {
> >> >> >> >int reg = choose_spill_reg(g);
> >> >> >> >  
> >> >> >> >if (reg != -1) {
> >> >> >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> >> >> >> > b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> >> >> > index 53270fb..6cf5ede 100644
> >> >> >> > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> >> >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> >> >> > @@ -1814,7 +1814,7 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
> >> >> >> >  
> >> >> >> > setup_payload();
> >> >> >> >  
> >> >> >> > -   if (false) {
> >> >> >> > +   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
> >> >> >> >/* Debug of register spilling: Go spill everything. */
> >> >> >> >const int grf_count = alloc.count;
> >> >> >> >float spill_costs[alloc.count];
> >> >> >> > diff --git a/src/mesa/drivers/dri/i965/intel_debug.c 
> >> >> >> > b/src/mesa/drivers/dri/i965/intel_debug.c
> >> >> >> > index a077731..8d34349 100644
> >> >> >> > --- a/src/mesa/drivers/dri/i965/intel_debug.c
> >> >> >> > +++ b/src/mesa/drivers/dri/i965/intel_debug.c
> >> >> >> > @@ -69,7 +69,8 @@ static const struct dri_debug_control 
> >> >> >> > debug_control[] = {
> >> >> >> > { "ann", DEBUG_ANNOTATION },
> >> >> >> > { "no8", DEBUG_NO8 },
> >> >> >> > { "vec4vs",  DEBUG_VEC4VS },
> >> >> >> > -   { "spill",   DEBUG_SPILL },
> >> >> >> > +   { "spill_frag",  DEBUG_SPILL_FS },
> >> >> >> 
> >> >> >> How about we call this "spill_fs" instead?  The flag doesn't only 
> >> >> >> affect
> >> >> >> fragment shaders, AFAICT it will cause all programs compiled with 
> >> >> >> the FS
> >> >> >> back-end [F for fast ;)] to spill everything.  With that fixed:
> >> >> >
> >> >> > that was my first choice, but if we do that it seems that
> >> >> > driParseDebugString will also mark INTEL_DEBUG=fs as enabled.
> >> >> >
> >> >> > It seems as if this function checks if any of th

Re: [Mesa-dev] [PATCH v3 4/4] i965/vec4: Don't unspill the same register in consecutive instructions

2015-09-02 Thread Iago Toral
On Wed, 2015-09-02 at 14:29 +0300, Francisco Jerez wrote:
> Iago Toral <ito...@igalia.com> writes:
> 
> > Hi Curro,
> >
> > I have been a couple of weeks on holidays and have just come back to
> > this:
> >
> > On Thu, 2015-08-06 at 18:27 +0300, Francisco Jerez wrote:
> >> Iago Toral Quiroga <ito...@igalia.com> writes:
> >> 
> >> > If we have spilled/unspilled a register in the current instruction, avoid
> >> > emitting unspills for the same register in the same instruction or 
> >> > consecutive
> >> > instructions following the current one as long as they keep reading the 
> >> > spilled
> >> > register. This should allow us to avoid emitting costy unspills that 
> >> > come with
> >> > little benefit to register allocation.
> >> >
> >> > Also, update evaluate_spill_costs so that we account for the saved 
> >> > unspills.
> >> > ---
> >> >  .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 129 
> >> > +++--
> >> >  1 file changed, 121 insertions(+), 8 deletions(-)
> >> >
> >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp 
> >> > b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> > index 617c988..fed5f4d 100644
> >> > --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> > @@ -264,6 +264,95 @@ vec4_visitor::reg_allocate()
> >> > return true;
> >> >  }
> >> >  
> >> > +/**
> >> > + * When we decide to spill a register, instead of blindly spilling 
> >> > every use,
> >> > + * save unspills when the spill register is used (read) in consecutive
> >> > + * instructions. This can potentially save a bunch of unspills that 
> >> > would
> >> > + * have very little impact in register allocation anyway.
> >> > + *
> >> > + * Notice that we need to account for this behavior when spilling a 
> >> > register
> >> > + * and when evaluating spilling costs. This function is designed so it 
> >> > can
> >> > + * be called from both places and avoid repeating the logic.
> >> > + *
> >> > + *  - When we call this function from spill_reg, we pass in scratch_reg 
> >> > the
> >> > + *actual unspill/spill register that we want to reuse in the current
> >> > + *instruction.
> >> > + *
> >> > + *  - When we call this from evaluate_spill_costs, we pass the register 
> >> > for
> >> > + *which we are evaluating spilling costs.
> >> > + *
> >> > + * In either case, we check if the previous instructions read 
> >> > scratch_reg until
> >> > + * we find an instruction that writes to it (in which case we can reuse
> >> > + * scratch_reg as long as the writemask is compatible with the channels 
> >> > we need
> >> > + * to read in the current instruction) or we hit an instruction that 
> >> > does not
> >> > + * read scratch_reg at all. The latter can only happen when we call 
> >> > this from
> >> > + * evaluate_spill_costs,
> >> 
> >> Strictly speaking it can also happen when called from spill_reg() for
> >> the first time in a given sequence of consecutive instructions (in which
> >> case you correctly return false).
> >
> > not really, spill_reg() knows if it is the first time that it is
> > spilling a register and won't call this function in that case.
> >
> You may have several disjoint sequences of consecutive instructions
> using spill_reg_nr repeatedly.  The check you have in spill_reg() will
> only help you for the first one sequence, so it's in fact redundant
> because can_use_scratch_for_source() seems to handle the case in which
> the register access is the first in a sequence just fine anyway.

Ah, right. I'll update the comment accordingly.

> >> >  and means that this is the point at which we first
> >> > + * need the unspill this register for our current instruction. Since 
> >> > all our
> >> > + * unspills read a full vec4, we know that in this case we will have all
> >> > + * the channels available in scratch_reg and we can reuse it.
> >> > + *
> >> > + * In any other case, we can't reuse scratch_reg in the current 
> >> > instruction,
> >>

Re: [Mesa-dev] [PATCH v2 6/6] i965: Add a debug option for spilling everything in vec4 code

2015-09-04 Thread Iago Toral
On Thu, 2015-09-03 at 15:37 +0300, Francisco Jerez wrote:
> Iago Toral <ito...@igalia.com> writes:
> 
> > On Wed, 2015-09-02 at 14:32 +0300, Francisco Jerez wrote:
> >> Iago Toral <ito...@igalia.com> writes:
> >> 
> >> > On Thu, 2015-07-30 at 16:13 +0300, Francisco Jerez wrote:
> >> >> Iago Toral <ito...@igalia.com> writes:
> >> >> 
> >> >> > On Thu, 2015-07-30 at 15:58 +0300, Francisco Jerez wrote:
> >> >> >> Iago Toral Quiroga <ito...@igalia.com> writes:
> >> >> >> 
> >> >> >> > ---
> >> >> >> >  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 +-
> >> >> >> >  src/mesa/drivers/dri/i965/brw_vec4.cpp| 2 +-
> >> >> >> >  src/mesa/drivers/dri/i965/intel_debug.c   | 3 ++-
> >> >> >> >  src/mesa/drivers/dri/i965/intel_debug.h   | 5 +++--
> >> >> >> >  4 files changed, 7 insertions(+), 5 deletions(-)
> >> >> >> >
> >> >> >> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp 
> >> >> >> > b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> >> >> > index f25f2ec..714248a 100644
> >> >> >> > --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> >> >> > +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> >> >> >> > @@ -634,7 +634,7 @@ fs_visitor::assign_regs(bool allow_spilling)
> >> >> >> > }
> >> >> >> >  
> >> >> >> > /* Debug of register spilling: Go spill everything. */
> >> >> >> > -   if (unlikely(INTEL_DEBUG & DEBUG_SPILL)) {
> >> >> >> > +   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) {
> >> >> >> >int reg = choose_spill_reg(g);
> >> >> >> >  
> >> >> >> >if (reg != -1) {
> >> >> >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> >> >> >> > b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> >> >> > index 53270fb..6cf5ede 100644
> >> >> >> > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> >> >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> >> >> >> > @@ -1814,7 +1814,7 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
> >> >> >> >  
> >> >> >> > setup_payload();
> >> >> >> >  
> >> >> >> > -   if (false) {
> >> >> >> > +   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
> >> >> >> >/* Debug of register spilling: Go spill everything. */
> >> >> >> >const int grf_count = alloc.count;
> >> >> >> >float spill_costs[alloc.count];
> >> >> >> > diff --git a/src/mesa/drivers/dri/i965/intel_debug.c 
> >> >> >> > b/src/mesa/drivers/dri/i965/intel_debug.c
> >> >> >> > index a077731..8d34349 100644
> >> >> >> > --- a/src/mesa/drivers/dri/i965/intel_debug.c
> >> >> >> > +++ b/src/mesa/drivers/dri/i965/intel_debug.c
> >> >> >> > @@ -69,7 +69,8 @@ static const struct dri_debug_control 
> >> >> >> > debug_control[] = {
> >> >> >> > { "ann", DEBUG_ANNOTATION },
> >> >> >> > { "no8", DEBUG_NO8 },
> >> >> >> > { "vec4vs",  DEBUG_VEC4VS },
> >> >> >> > -   { "spill",   DEBUG_SPILL },
> >> >> >> > +   { "spill_frag",  DEBUG_SPILL_FS },
> >> >> >> 
> >> >> >> How about we call this "spill_fs" instead?  The flag doesn't only 
> >> >> >> affect
> >> >> >> fragment shaders, AFAICT it will cause all programs compiled with 
> >> >> >> the FS
> >> >> >> back-end [F for fast ;)] to spill everything.  With that fixed:
> >> >> >
> >> >> > that was my first choice, but if we do that it seems that
> >> >> > driParseDebugString will also mark INTEL_DEBUG=fs as enabled.
> >> >> >
> >> >> > It seems as if this function check

Re: [Mesa-dev] [PATCH 2/7] glsl/cs: Add gl_LocalInvocationID variable

2015-09-10 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Mon, 2015-08-03 at 23:00 -0700, Jordan Justen wrote:
> Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com>
> ---
>  src/glsl/builtin_variables.cpp | 2 ++
>  src/glsl/shader_enums.h| 9 +
>  2 files changed, 11 insertions(+)
> 
> diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
> index 0ff3a3f..b2936a5 100644
> --- a/src/glsl/builtin_variables.cpp
> +++ b/src/glsl/builtin_variables.cpp
> @@ -1045,6 +1045,8 @@ builtin_variable_generator::generate_fs_special_vars()
>  void
>  builtin_variable_generator::generate_cs_special_vars()
>  {
> +   add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, glsl_type::uvec3_type,
> +"gl_LocalInvocationID");
> /* TODO: finish this. */
>  }
>  
> diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
> index 3c39416..64e1e46 100644
> --- a/src/glsl/shader_enums.h
> +++ b/src/glsl/shader_enums.h
> @@ -51,6 +51,8 @@ typedef enum
>  #define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID)
>  #define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS)
>  #define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << 
> SYSTEM_VALUE_SAMPLE_MASK_IN)
> +#define SYSTEM_BIT_LOCAL_INVOCATION_ID ((uint64_t)1 << 
> SYSTEM_VALUE_LOCAL_INVOCATION_ID)
> +
>  /**
>   * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will 
> be
>   * one of these values.  If a NIR variable's mode is nir_var_system_value, it
> @@ -176,6 +178,13 @@ typedef enum
> SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
> /*@}*/
>  
> +   /**
> +* \name Compute shader system values
> +*/
> +   /*@{*/
> +   SYSTEM_VALUE_LOCAL_INVOCATION_ID,
> +   /*@}*/
> +
> SYSTEM_VALUE_MAX /**< Number of values */
>  } gl_system_value;
>  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/7] nir: Add gl_LocalInvocationID variable

2015-09-10 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Mon, 2015-08-03 at 23:00 -0700, Jordan Justen wrote:
> Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com>
> ---
>  src/glsl/nir/nir_intrinsics.h  | 1 +
>  src/glsl/nir/nir_lower_system_values.c | 3 +++
>  2 files changed, 4 insertions(+)
> 
> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
> index bc6e6b8..29b5e64 100644
> --- a/src/glsl/nir/nir_intrinsics.h
> +++ b/src/glsl/nir/nir_intrinsics.h
> @@ -137,6 +137,7 @@ SYSTEM_VALUE(sample_id, 1)
>  SYSTEM_VALUE(sample_pos, 2)
>  SYSTEM_VALUE(sample_mask_in, 1)
>  SYSTEM_VALUE(invocation_id, 1)
> +SYSTEM_VALUE(local_invocation_id, 3)
>  
>  /*
>   * The first and only index is the base address to load from.  Indirect
> diff --git a/src/glsl/nir/nir_lower_system_values.c 
> b/src/glsl/nir/nir_lower_system_values.c
> index a6eec65..ee375ea 100644
> --- a/src/glsl/nir/nir_lower_system_values.c
> +++ b/src/glsl/nir/nir_lower_system_values.c
> @@ -70,6 +70,9 @@ convert_instr(nir_intrinsic_instr *instr)
> case SYSTEM_VALUE_INVOCATION_ID:
>op = nir_intrinsic_load_invocation_id;
>break;
> +   case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
> +  op = nir_intrinsic_load_local_invocation_id;
> +  break;
> default:
>unreachable("not reached");
> }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] glsl/cs: Add gl_LocalInvocationID variable

2015-09-10 Thread Iago Toral
On Thu, 2015-09-10 at 16:38 +0200, Iago Toral wrote:
> Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>
> 
> On Mon, 2015-08-03 at 23:00 -0700, Jordan Justen wrote:
> > Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com>
> > ---
> >  src/glsl/builtin_variables.cpp | 2 ++
> >  src/glsl/shader_enums.h| 9 +
> >  2 files changed, 11 insertions(+)
> > 
> > diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
> > index 0ff3a3f..b2936a5 100644
> > --- a/src/glsl/builtin_variables.cpp
> > +++ b/src/glsl/builtin_variables.cpp
> > @@ -1045,6 +1045,8 @@ builtin_variable_generator::generate_fs_special_vars()
> >  void
> >  builtin_variable_generator::generate_cs_special_vars()
> >  {
> > +   add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, 
> > glsl_type::uvec3_type,
> > +"gl_LocalInvocationID");

Probably not relevant, but since this caught my eye I figured I would
mention it: I see that other parts of this file do not use glsl_type::*
directly, instead they use the *_t class members defined in
builtin_variable_generator... which it inits in the class constructor.
To be honest, I don't see the point of this, but in case there is a
reason for it you might want to do that for uvec3 too.

Iago

> > /* TODO: finish this. */
> >  }
> >  
> > diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
> > index 3c39416..64e1e46 100644
> > --- a/src/glsl/shader_enums.h
> > +++ b/src/glsl/shader_enums.h
> > @@ -51,6 +51,8 @@ typedef enum
> >  #define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID)
> >  #define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS)
> >  #define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << 
> > SYSTEM_VALUE_SAMPLE_MASK_IN)
> > +#define SYSTEM_BIT_LOCAL_INVOCATION_ID ((uint64_t)1 << 
> > SYSTEM_VALUE_LOCAL_INVOCATION_ID)
> > +
> >  /**
> >   * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index 
> > will be
> >   * one of these values.  If a NIR variable's mode is nir_var_system_value, 
> > it
> > @@ -176,6 +178,13 @@ typedef enum
> > SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
> > /*@}*/
> >  
> > +   /**
> > +* \name Compute shader system values
> > +*/
> > +   /*@{*/
> > +   SYSTEM_VALUE_LOCAL_INVOCATION_ID,
> > +   /*@}*/
> > +
> > SYSTEM_VALUE_MAX /**< Number of values */
> >  } gl_system_value;
> >  
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] glsl: shader-enum to name debug fxns

2015-09-16 Thread Iago Toral
Thanks for all the explanations, this is:

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Tue, 2015-09-15 at 19:33 -0400, Rob Clark wrote:
> From: Rob Clark <robcl...@freedesktop.org>
> 
> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
> ---
>  src/Makefile.am   |   1 +
>  src/glsl/shader_enums.c   | 204 
> ++
>  src/glsl/shader_enums.h   |  53 
>  src/mesa/Makefile.sources |   4 +-
>  4 files changed, 261 insertions(+), 1 deletion(-)
>  create mode 100644 src/glsl/shader_enums.c
> 
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 0d49bcd..9e15cca 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -62,6 +62,7 @@ AM_CPPFLAGS = \
>  noinst_LTLIBRARIES = libglsl_util.la
>  
>  libglsl_util_la_SOURCES = \
> + glsl/shader_enums.c \
>   mesa/main/imports.c \
>   mesa/program/prog_hash_table.c \
>   mesa/program/symbol_table.c \
> diff --git a/src/glsl/shader_enums.c b/src/glsl/shader_enums.c
> new file mode 100644
> index 000..530fd9e
> --- /dev/null
> +++ b/src/glsl/shader_enums.c
> @@ -0,0 +1,204 @@
> +/*
> + * Mesa 3-D graphics library
> + *
> + * Copyright © 2015 Red Hat
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *Rob Clark <robcl...@freedesktop.org>
> + */
> +
> +#include "glsl/shader_enums.h"
> +#include "util/macros.h"
> +
> +#define ENUM(x) [x] = #x
> +#define NAME(val) val) < ARRAY_SIZE(names)) && names[(val)]) ? 
> names[(val)] : "UNKNOWN")
> +
> +const char * gl_shader_stage_name(gl_shader_stage stage)
> +{
> +   static const char *names[] = {
> +  ENUM(MESA_SHADER_VERTEX),
> +  ENUM(MESA_SHADER_TESS_CTRL),
> +  ENUM(MESA_SHADER_TESS_EVAL),
> +  ENUM(MESA_SHADER_GEOMETRY),
> +  ENUM(MESA_SHADER_FRAGMENT),
> +  ENUM(MESA_SHADER_COMPUTE),
> +   };
> +   return NAME(stage);
> +}
> +
> +const char * gl_vert_attrib_name(gl_vert_attrib attrib)
> +{
> +   static const char *names[] = {
> +  ENUM(VERT_ATTRIB_POS),
> +  ENUM(VERT_ATTRIB_WEIGHT),
> +  ENUM(VERT_ATTRIB_NORMAL),
> +  ENUM(VERT_ATTRIB_COLOR0),
> +  ENUM(VERT_ATTRIB_COLOR1),
> +  ENUM(VERT_ATTRIB_FOG),
> +  ENUM(VERT_ATTRIB_COLOR_INDEX),
> +  ENUM(VERT_ATTRIB_EDGEFLAG),
> +  ENUM(VERT_ATTRIB_TEX0),
> +  ENUM(VERT_ATTRIB_TEX1),
> +  ENUM(VERT_ATTRIB_TEX2),
> +  ENUM(VERT_ATTRIB_TEX3),
> +  ENUM(VERT_ATTRIB_TEX4),
> +  ENUM(VERT_ATTRIB_TEX5),
> +  ENUM(VERT_ATTRIB_TEX6),
> +  ENUM(VERT_ATTRIB_TEX7),
> +  ENUM(VERT_ATTRIB_POINT_SIZE),
> +  ENUM(VERT_ATTRIB_GENERIC0),
> +  ENUM(VERT_ATTRIB_GENERIC1),
> +  ENUM(VERT_ATTRIB_GENERIC2),
> +  ENUM(VERT_ATTRIB_GENERIC3),
> +  ENUM(VERT_ATTRIB_GENERIC4),
> +  ENUM(VERT_ATTRIB_GENERIC5),
> +  ENUM(VERT_ATTRIB_GENERIC6),
> +  ENUM(VERT_ATTRIB_GENERIC7),
> +  ENUM(VERT_ATTRIB_GENERIC8),
> +  ENUM(VERT_ATTRIB_GENERIC9),
> +  ENUM(VERT_ATTRIB_GENERIC10),
> +  ENUM(VERT_ATTRIB_GENERIC11),
> +  ENUM(VERT_ATTRIB_GENERIC12),
> +  ENUM(VERT_ATTRIB_GENERIC13),
> +  ENUM(VERT_ATTRIB_GENERIC14),
> +  ENUM(VERT_ATTRIB_GENERIC15),
> +   };
> +   return NAME(attrib);
> +}
> +
> +const char * gl_varying_slot_name(gl_varying_slot slot)
> +{
> +   static const char *names[] = {
> +  ENUM(VARYING_SLOT_POS),
> +  ENUM(VARYING_SLOT_COL0),
> +  ENUM(VARYING_SLOT_CO

Re: [Mesa-dev] [PATCH 3/3] nir/print: print symbolic names from shader-enum

2015-09-16 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Tue, 2015-09-15 at 19:33 -0400, Rob Clark wrote:
> From: Rob Clark <robcl...@freedesktop.org>
> 
> v2: split out moving of FILE *fp into state structure into it's own
> (more complete patch) to reduce the noise in this one
> 
> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
> ---
>  src/glsl/nir/nir_print.c | 45 ++---
>  1 file changed, 42 insertions(+), 3 deletions(-)
> 
> diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
> index 405dbf3..8f568eb 100644
> --- a/src/glsl/nir/nir_print.c
> +++ b/src/glsl/nir/nir_print.c
> @@ -26,6 +26,7 @@
>   */
>  
>  #include "nir.h"
> +#include "shader_enums.h"
>  #include 
>  #include 
>  
> @@ -38,6 +39,7 @@ print_tabs(unsigned num_tabs, FILE *fp)
>  
>  typedef struct {
> FILE *fp;
> +   nir_shader *shader;
> /** map from nir_variable -> printable name */
> struct hash_table *ht;
>  
> @@ -218,10 +220,10 @@ print_var_decl(nir_variable *var, print_state *state)
> const char *const inv = (var->data.invariant) ? "invariant " : "";
> const char *const mode[] = { "shader_in ", "shader_out ", "", "",
>  "uniform ", "shader_storage", "system " };
> -   const char *const interp[] = { "", "smooth", "flat", "noperspective" };
>  
> fprintf(fp, "%s%s%s%s%s ",
> -  cent, samp, inv, mode[var->data.mode], 
> interp[var->data.interpolation]);
> +  cent, samp, inv, mode[var->data.mode],
> +   glsl_interp_qualifier_name(var->data.interpolation));
>  
> glsl_print_type(var->type, fp);
>  
> @@ -244,7 +246,41 @@ print_var_decl(nir_variable *var, print_state *state)
> var->data.mode == nir_var_shader_out ||
> var->data.mode == nir_var_uniform ||
> var->data.mode == nir_var_shader_storage) {
> -  fprintf(fp, " (%u, %u)", var->data.location, 
> var->data.driver_location);
> +  const char *loc = NULL;
> +  char buf[4];
> +
> +  switch (state->shader->stage) {
> +  case MESA_SHADER_VERTEX:
> + if (var->data.mode == nir_var_shader_in)
> +loc = gl_vert_attrib_name(var->data.location);
> + else if (var->data.mode == nir_var_shader_out)
> +loc = gl_varying_slot_name(var->data.location);
> + break;
> +  case MESA_SHADER_GEOMETRY:
> + if ((var->data.mode == nir_var_shader_in) ||
> + (var->data.mode == nir_var_shader_out))
> +loc = gl_varying_slot_name(var->data.location);
> + break;
> +  case MESA_SHADER_FRAGMENT:
> + if (var->data.mode == nir_var_shader_in)
> +loc = gl_varying_slot_name(var->data.location);
> + else if (var->data.mode == nir_var_shader_out)
> +loc = gl_frag_result_name(var->data.location);
> + break;
> +  case MESA_SHADER_TESS_CTRL:
> +  case MESA_SHADER_TESS_EVAL:
> +  case MESA_SHADER_COMPUTE:
> +  default:
> + /* TODO */
> + break;
> +  }
> +
> +  if (!loc) {
> + snprintf(buf, sizeof(buf), "%u", var->data.location);
> + loc = buf;
> +  }
> +
> +  fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
> }
>  
> fprintf(fp, "\n");
> @@ -855,6 +891,7 @@ static void
>  init_print_state(print_state *state, nir_shader *shader, FILE *fp)
>  {
> state->fp = fp;
> +   state->shader = shader;
> state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
> _mesa_key_pointer_equal);
> state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
> @@ -875,6 +912,8 @@ nir_print_shader(nir_shader *shader, FILE *fp)
> print_state state;
> init_print_state(, shader, fp);
>  
> +   fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
> +
> foreach_list_typed(nir_variable, var, node, >uniforms) {
>print_var_decl(var, );
> }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: add lowering for ffract

2015-09-16 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Tue, 2015-09-15 at 17:40 -0400, Rob Clark wrote:
> From: Rob Clark <robcl...@freedesktop.org>
> 
> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
> ---
>  src/glsl/nir/nir.h| 3 +++
>  src/glsl/nir/nir_opt_algebraic.py | 1 +
>  2 files changed, 4 insertions(+)
> 
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index f0acd75..284fccd 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1440,6 +1440,9 @@ typedef struct nir_shader_compiler_options {
>  */
> bool fdot_replicates;
>  
> +   /** lowers ffract to fsub+ffloor: */
> +   bool lower_ffract;
> +
> /**
>  * Does the driver support real 32-bit integers?  (Otherwise, integers
>  * are simulated by floats.)
> diff --git a/src/glsl/nir/nir_opt_algebraic.py 
> b/src/glsl/nir/nir_opt_algebraic.py
> index acc3b04..43558a5 100644
> --- a/src/glsl/nir/nir_opt_algebraic.py
> +++ b/src/glsl/nir/nir_opt_algebraic.py
> @@ -76,6 +76,7 @@ optimizations = [
> (('flrp', a, a, b), a),
> (('flrp', 0.0, a, b), ('fmul', a, b)),
> (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 
> 'options->lower_flrp'),
> +   (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
> (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), 
> ('flrp', a, b, c), '!options->lower_flrp'),
> (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a, ('flrp', a, b, c), 
> '!options->lower_flrp'),
> (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] nir/print: bit of state refactoring

2015-09-16 Thread Iago Toral
Looks good,

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Wed, 2015-09-16 at 08:25 -0400, Rob Clark wrote:
> From: Rob Clark <robcl...@freedesktop.org>
> 
> Rename print_var_state to print_state, and stuff FILE ptr into the state
> object.  This avoids passing around an extra parameter everywhere.
> 
> v2: even more extensive conversion.. use state *everywhere* instead of
> FILE ptr, and convert nir_print_instr() to use state as well
> 
> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
> ---
>  src/glsl/nir/nir_print.c | 261 
> +++
>  1 file changed, 152 insertions(+), 109 deletions(-)
> 
> diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
> index 69cadba..bdecc3c 100644
> --- a/src/glsl/nir/nir_print.c
> +++ b/src/glsl/nir/nir_print.c
> @@ -37,6 +37,7 @@ print_tabs(unsigned num_tabs, FILE *fp)
>  }
>  
>  typedef struct {
> +   FILE *fp;
> /** map from nir_variable -> printable name */
> struct hash_table *ht;
>  
> @@ -45,11 +46,12 @@ typedef struct {
>  
> /* an index used to make new non-conflicting names */
> unsigned index;
> -} print_var_state;
> +} print_state;
>  
>  static void
> -print_register(nir_register *reg, FILE *fp)
> +print_register(nir_register *reg, print_state *state)
>  {
> +   FILE *fp = state->fp;
> if (reg->name != NULL)
>fprintf(fp, "/* %s */ ", reg->name);
> if (reg->is_global)
> @@ -61,90 +63,97 @@ print_register(nir_register *reg, FILE *fp)
>  static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" };
>  
>  static void
> -print_register_decl(nir_register *reg, FILE *fp)
> +print_register_decl(nir_register *reg, print_state *state)
>  {
> +   FILE *fp = state->fp;
> fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
> if (reg->is_packed)
>fprintf(fp, "(packed) ");
> -   print_register(reg, fp);
> +   print_register(reg, state);
> if (reg->num_array_elems != 0)
>fprintf(fp, "[%u]", reg->num_array_elems);
> fprintf(fp, "\n");
>  }
>  
>  static void
> -print_ssa_def(nir_ssa_def *def, FILE *fp)
> +print_ssa_def(nir_ssa_def *def, print_state *state)
>  {
> +   FILE *fp = state->fp;
> if (def->name != NULL)
>fprintf(fp, "/* %s */ ", def->name);
> fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
>  }
>  
>  static void
> -print_ssa_use(nir_ssa_def *def, FILE *fp)
> +print_ssa_use(nir_ssa_def *def, print_state *state)
>  {
> +   FILE *fp = state->fp;
> if (def->name != NULL)
>fprintf(fp, "/* %s */ ", def->name);
> fprintf(fp, "ssa_%u", def->index);
>  }
>  
> -static void print_src(nir_src *src, FILE *fp);
> +static void print_src(nir_src *src, print_state *state);
>  
>  static void
> -print_reg_src(nir_reg_src *src, FILE *fp)
> +print_reg_src(nir_reg_src *src, print_state *state)
>  {
> -   print_register(src->reg, fp);
> +   FILE *fp = state->fp;
> +   print_register(src->reg, state);
> if (src->reg->num_array_elems != 0) {
>fprintf(fp, "[%u", src->base_offset);
>if (src->indirect != NULL) {
>   fprintf(fp, " + ");
> - print_src(src->indirect, fp);
> + print_src(src->indirect, state);
>}
>fprintf(fp, "]");
> }
>  }
>  
>  static void
> -print_reg_dest(nir_reg_dest *dest, FILE *fp)
> +print_reg_dest(nir_reg_dest *dest, print_state *state)
>  {
> -   print_register(dest->reg, fp);
> +   FILE *fp = state->fp;
> +   print_register(dest->reg, state);
> if (dest->reg->num_array_elems != 0) {
>fprintf(fp, "[%u", dest->base_offset);
>if (dest->indirect != NULL) {
>   fprintf(fp, " + ");
> - print_src(dest->indirect, fp);
> + print_src(dest->indirect, state);
>}
>fprintf(fp, "]");
> }
>  }
>  
>  static void
> -print_src(nir_src *src, FILE *fp)
> +print_src(nir_src *src, print_state *state)
>  {
> if (src->is_ssa)
> -  print_ssa_use(src->ssa, fp);
> +  print_ssa_use(src->ssa, state);
> else
> -  print_reg_src(>reg, fp);
> +  print_reg_src(>reg, state);
>  }
>  
>  static void
> -print_dest(nir_dest *dest, FILE *fp)
> +print_dest(nir_dest *dest, print_state *state)
>  {
> if (dest-&g

Re: [Mesa-dev] [PATCH v5 01/70] mesa: set MAX_SHADER_STORAGE_BUFFERS to 15.

2015-09-11 Thread Iago Toral
On Thu, 2015-09-10 at 15:17 -0400, Ilia Mirkin wrote:
> On Thu, Sep 10, 2015 at 2:52 PM, Ian Romanick <i...@freedesktop.org> wrote:
> > On 09/10/2015 10:45 AM, Ilia Mirkin wrote:
> >> On Thu, Sep 10, 2015 at 9:35 AM, Iago Toral Quiroga <ito...@igalia.com> 
> >> wrote:
> >>> From: Samuel Iglesias Gonsalvez <sigles...@igalia.com>
> >>>
> >>> This patch sets the same value used for uniform buffers.
> >>>
> >>> Signed-off-by: Samuel Iglesias Gonsalvez <sigles...@igalia.com>
> >>> ---
> >>>  src/mesa/main/config.h | 2 +-
> >>>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>>
> >>> diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
> >>> index b35031d..69acd7d 100644
> >>> --- a/src/mesa/main/config.h
> >>> +++ b/src/mesa/main/config.h
> >>> @@ -171,7 +171,7 @@
> >>>  #define MAX_PROGRAM_LOCAL_PARAMS   4096
> >>>  #define MAX_UNIFORMS   4096
> >>>  #define MAX_UNIFORM_BUFFERS15 /* + 1 default uniform buffer 
> >>> */
> >>> -#define MAX_SHADER_STORAGE_BUFFERS 7  /* + 1 default shader storage 
> >>> buffer */
> >>> +#define MAX_SHADER_STORAGE_BUFFERS 15  /* + 1 default shader storage 
> >>> buffer */
> >>
> >> Is there such a thing as a default shader storage buffer? I would have
> >> assumed not, but haven't read the spec.
> >
> > Technically no, but I think (and I'm sure Ken will correct me) i965 uses
> > one for register spilling.  Or is "scratch space" a different sort of thing?
> 
> Sure, but that's a driver-specific thing. The MAX_UNIFORM_BUFFERS
> thing is in reference to the number of uniform buffers in the GL.
> MAX_SHADER_STORAGE_BUFFERS seems like it should be the same thing. Why
> not make it 16? Or perhaps leave it at 15 and remove the comment?

Right, this is used to define the maximum number of binding points
available per shader stage. There is no such thing as a default shader
storage buffer that is different from the others, I think that comment
is just a copy mistake. We can remove the comment and leave this
at 15, or round it up to 16 as you suggest. I'd go with the latter.

Iago

> With uniform buffers, there's a "default" uniform buffer, and then
> there are actual user ones.



> >
> >>>  /* 6 is for vertex, hull, domain, geometry, fragment, and compute 
> >>> shader. */
> >>>  #define MAX_COMBINED_UNIFORM_BUFFERS   (MAX_UNIFORM_BUFFERS * 6)
> >>>  #define MAX_COMBINED_SHADER_STORAGE_BUFFERS   
> >>> (MAX_SHADER_STORAGE_BUFFERS * 6)
> >>> --
> >>> 1.9.1
> >>>
> >>> ___
> >>> mesa-dev mailing list
> >>> mesa-dev@lists.freedesktop.org
> >>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] i965/cs: Reserve local invocation id in payload regs

2015-09-11 Thread Iago Toral
On Mon, 2015-08-03 at 23:00 -0700, Jordan Justen wrote:
> Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_cs.cpp | 29 +
>  src/mesa/drivers/dri/i965/brw_cs.h   |  5 +
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +++
>  src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
>  4 files changed, 46 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp 
> b/src/mesa/drivers/dri/i965/brw_cs.cpp
> index b566b92..541151a 100644
> --- a/src/mesa/drivers/dri/i965/brw_cs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
> @@ -444,6 +444,35 @@ const struct brw_tracked_state brw_cs_state = {
>  
> 
>  /**
> + * We are building the local ID push constant data using the simplest 
> possible
> + * method. We simply push the local IDs directly as they should appear in the
> + * registers for the uvec3 gl_LocalInvocationID variable.
> + *
> + * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
> + * registers worth of push constant space.
> + *
> + * FINISHME: There are a few easy optimizations to consider.
> + *
> + * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
> + *no need for using push constant space for that dimension.
> + *
> + * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
> + *easily use 16-bit words rather than 32-bit dwords in the push constant
> + *data.
> + *
> + * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
> + *conveying the data, and thereby reduce push constant usage.
> + *
> + */
> +unsigned
> +brw_cs_prog_local_id_payload_size(const struct gl_program *prog,
> +  unsigned dispatch_width)
> +{
> +   return 3 * dispatch_width * sizeof(uint32_t);
> +}
> +
> +
> +/**
>   * Creates a region containing the push constants for the CS on gen7+.
>   *
>   * Push constants are constant values (such as GLSL uniforms) that are
> diff --git a/src/mesa/drivers/dri/i965/brw_cs.h 
> b/src/mesa/drivers/dri/i965/brw_cs.h
> index 8404aa3..5738918 100644
> --- a/src/mesa/drivers/dri/i965/brw_cs.h
> +++ b/src/mesa/drivers/dri/i965/brw_cs.h
> @@ -42,6 +42,11 @@ void
>  brw_upload_cs_prog(struct brw_context *brw);
>  
>  #ifdef __cplusplus
> +
> +unsigned
> +brw_cs_prog_local_id_payload_size(const struct gl_program *prog,
> +  unsigned dispatch_width);
> +
>  }
>  #endif
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 15fe364..b72eb76 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -42,6 +42,7 @@
>  #include "brw_eu.h"
>  #include "brw_wm.h"
>  #include "brw_fs.h"
> +#include "brw_cs.h"
>  #include "brw_cfg.h"
>  #include "brw_dead_control_flow.h"
>  #include "main/uniforms.h"
> @@ -4624,6 +4625,16 @@ fs_visitor::setup_cs_payload()
> assert(devinfo->gen >= 7);
>  
> payload.num_regs = 1;
> +
> +   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
> +  const unsigned local_id_size =
> + brw_cs_prog_local_id_payload_size(prog, dispatch_width);
> +  const unsigned local_id_regs = ALIGN(local_id_size, 32) / 32;

Isn't this guaranteed to be 32-byte aligned? In any case, I suppose this
is okay to prepare the ground for some of the potential optimizations
you mentioned above.

> +  if (local_id_regs > 0) {
> + payload.local_invocation_id_reg = payload.num_regs;
> + payload.num_regs += local_id_regs;
> +  }
> +   }

As it is now, local_id_regs can't be zero. I suppose that it could be
possible for it to be zero in the future if we end up implementing the
first of the optimizations you suggest above for the case where all the
components are 1 though... is that why you decided to go with a
condition here instead of an assert? In that case maybe it could be
worth to add a comment explaining when this could be zero.

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

>  }
>  
>  void
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 4749c47..b2266b2 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -368,6 +368,7 @@ public:
>uint8_t sample_pos_reg;
>uint8_t sample_mask_in_reg;
>uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
> +  uint8_t local_invocation_id_reg;
>  
>/** The number of thread payload registers the hardware will supply. */
>uint8_t num_regs;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Fix output variable names

2015-09-11 Thread Iago Toral
On Fri, 2015-09-11 at 09:24 +0200, Eduardo Lima Mitev wrote:
> Commit 1dbe4af9c9e318525fc082b542b93fb7f1e5efba
> "nir: Add a pass to lower outputs to temporary variables" messed up output
> variable names. The issue can be reproduced by dumping the NIR shaders
> with INTEL_DEBUG="vs,fs".
> ---
>  src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c 
> b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> index b730cad..e9c4c0d 100644
> --- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> @@ -87,12 +87,13 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
> foreach_list_typed(nir_variable, var, node, _outputs) {
>nir_variable *output = ralloc(shader, nir_variable);
>memcpy(output, var, sizeof *output);
> +  output->name = ralloc_strdup(output, var->name);
>  
>/* The orignal is now the temporary */
>nir_variable *temp = var;
>  
>/* Give the output a new name with @out-temp appended */
> -  temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
> +  temp->name = ralloc_asprintf(output, "%s@out-temp", output->name);
>temp->data.mode = nir_var_global;
>temp->constant_initializer = NULL;

I think I saw some instances of this happening today... however, isn't
the problem bigger than just the variable name? I see that nir_variable
has other fields that are also dynamically allocated so they are bound
to point to trashed memory as soon as some opt pass kills the old
variable (which is what is happening with the name).
 
Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] svga: remove useless MAX2() call

2015-09-11 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Thu, 2015-09-10 at 09:04 -0600, Brian Paul wrote:
> The sum of two unsigned ints is always >= 0.  Found with Coverity.
> ---
>  src/gallium/drivers/svga/svga_state_tss.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/svga/svga_state_tss.c 
> b/src/gallium/drivers/svga/svga_state_tss.c
> index a13980d..5991da1 100644
> --- a/src/gallium/drivers/svga/svga_state_tss.c
> +++ b/src/gallium/drivers/svga/svga_state_tss.c
> @@ -90,7 +90,7 @@ emit_tex_binding_unit(struct svga_context *svga,
>}
>else {
>   last_level = MIN2(sv->u.tex.last_level, sv->texture->last_level);
> - min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level));
> + min_lod = s->view_min_lod + sv->u.tex.first_level;
>   min_lod = MIN2(min_lod, last_level);
>   max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level, last_level);
>}


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/11] nir/print: print symbolic names from shader-enum

2015-09-15 Thread Iago Toral
On Sun, 2015-09-13 at 11:51 -0400, Rob Clark wrote:
> From: Rob Clark 
> 
> Signed-off-by: Rob Clark 
> ---
>  src/glsl/nir/nir_print.c | 73 
> ++--
>  1 file changed, 59 insertions(+), 14 deletions(-)
> 
> diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
> index 69cadba..6c9bd4b 100644
> --- a/src/glsl/nir/nir_print.c
> +++ b/src/glsl/nir/nir_print.c
> @@ -26,6 +26,7 @@
>   */
>  
>  #include "nir.h"
> +#include "glsl/shader_enums.h"

I think you don't need the "glsl/" prefix for this include.

>  #include 
>  #include 
>  
> @@ -37,6 +38,9 @@ print_tabs(unsigned num_tabs, FILE *fp)
>  }
>  
>  typedef struct {
> +   FILE *fp;
> +
> +   nir_shader *shader;
> /** map from nir_variable -> printable name */
> struct hash_table *ht;
>  
> @@ -206,8 +210,10 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp)
>  }
>  
>  static void
> -print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
> +print_var_decl(nir_variable *var, print_var_state *state)
>  {
> +   FILE *fp = state->fp;
> +
> fprintf(fp, "decl_var ");
>  
> const char *const cent = (var->data.centroid) ? "centroid " : "";
> @@ -215,10 +221,10 @@ print_var_decl(nir_variable *var, print_var_state 
> *state, FILE *fp)
> const char *const inv = (var->data.invariant) ? "invariant " : "";
> const char *const mode[] = { "shader_in ", "shader_out ", "", "",
>  "uniform ", "shader_storage", "system " };
> -   const char *const interp[] = { "", "smooth", "flat", "noperspective" };
>  
> fprintf(fp, "%s%s%s%s%s ",
> -  cent, samp, inv, mode[var->data.mode], 
> interp[var->data.interpolation]);
> +  cent, samp, inv, mode[var->data.mode],
> +   glsl_interp_qualifier_name(var->data.interpolation));
>  
> glsl_print_type(var->type, fp);
>  
> @@ -241,7 +247,41 @@ print_var_decl(nir_variable *var, print_var_state 
> *state, FILE *fp)
> var->data.mode == nir_var_shader_out ||
> var->data.mode == nir_var_uniform ||
> var->data.mode == nir_var_shader_storage) {
> -  fprintf(fp, " (%u, %u)", var->data.location, 
> var->data.driver_location);
> +  const char *loc = NULL;
> +  char buf[4];
> +
> +  switch (state->shader->stage) {
> +  case MESA_SHADER_VERTEX:
> + if (var->data.mode == nir_var_shader_in)
> +loc = gl_vert_attrib_name(var->data.location);
> + else if (var->data.mode == nir_var_shader_out)
> +loc = gl_varying_slot_name(var->data.location);
> + break;
> +  case MESA_SHADER_GEOMETRY:
> + if ((var->data.mode == nir_var_shader_in) ||
> + (var->data.mode == nir_var_shader_out))
> +loc = gl_vert_attrib_name(var->data.location);

Mmm... shouldn't this be the same as in the case of a vertex shader?
that is, use gl_varying_slot_name for GS outputs.

> + break;
> +  case MESA_SHADER_FRAGMENT:
> + if (var->data.mode == nir_var_shader_in)
> +loc = gl_varying_slot_name(var->data.location);
> + else if (var->data.mode == nir_var_shader_out)
> +loc = gl_frag_result_name(var->data.location);
> + break;
> +  case MESA_SHADER_TESS_CTRL:
> +  case MESA_SHADER_TESS_EVAL:
> +  case MESA_SHADER_COMPUTE:
> +  default:
> + /* TODO */
> + break;
> +  }
> +
> +  if (!loc) {
> + snprintf(buf, sizeof(buf), "%u", var->data.location);
> + loc = buf;
> +  }
> +
> +  fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
> }
>  
> fprintf(fp, "\n");
> @@ -772,7 +812,7 @@ print_function_impl(nir_function_impl *impl, 
> print_var_state *state, FILE *fp)
>  
> foreach_list_typed(nir_variable, var, node, >locals) {
>fprintf(fp, "\t");
> -  print_var_decl(var, state, fp);
> +  print_var_decl(var, state);
> }
>  
> foreach_list_typed(nir_register, reg, node, >registers) {
> @@ -832,16 +872,19 @@ print_function_overload(nir_function_overload *overload,
>  }
>  
>  static void
> -print_function(nir_function *func, print_var_state *state, FILE *fp)
> +print_function(nir_function *func, print_var_state *state)
>  {
> +   FILE *fp = state->fp;
> foreach_list_typed(nir_function_overload, overload, node, 
> >overload_list) {
>print_function_overload(overload, state, fp);
> }
>  }
>  
>  static void
> -init_print_state(print_var_state *state)
> +init_print_state(print_var_state *state, nir_shader *shader, FILE *fp)
>  {
> +   state->fp = fp;
> +   state->shader = shader;
> state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
> _mesa_key_pointer_equal);
> state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
> @@ -860,26 +903,28 @@ void
>  nir_print_shader(nir_shader *shader, FILE *fp)
>  {
> print_var_state state;
> -   init_print_state();

Re: [Mesa-dev] [PATCH 03/11] glsl: shader-enum to name debug fxns

2015-09-15 Thread Iago Toral
On Sun, 2015-09-13 at 11:51 -0400, Rob Clark wrote:
> From: Rob Clark 
> 
> Signed-off-by: Rob Clark 
> ---
>  src/Makefile.am   |   1 +
>  src/glsl/shader_enums.c   | 202 
> ++
>  src/glsl/shader_enums.h   |  53 
>  src/mesa/Makefile.sources |   4 +-
>  4 files changed, 259 insertions(+), 1 deletion(-)
>  create mode 100644 src/glsl/shader_enums.c
> 
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 0d49bcd..9e15cca 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -62,6 +62,7 @@ AM_CPPFLAGS = \
>  noinst_LTLIBRARIES = libglsl_util.la
>  
>  libglsl_util_la_SOURCES = \
> + glsl/shader_enums.c \
>   mesa/main/imports.c \
>   mesa/program/prog_hash_table.c \
>   mesa/program/symbol_table.c \
> diff --git a/src/glsl/shader_enums.c b/src/glsl/shader_enums.c
> new file mode 100644
> index 000..1c45a40
> --- /dev/null
> +++ b/src/glsl/shader_enums.c
> @@ -0,0 +1,202 @@
> +/*
> + * Mesa 3-D graphics library
> + *
> + * Copyright © 2015 Red Hat
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *Rob Clark 
> + */
> +
> +#include "glsl/shader_enums.h"
> +#include "util/macros.h"
> +
> +#define ENUM(x) [x] = #x
> +#define NAME(val) val) < ARRAY_SIZE(names)) && names[(val)]) ? 
> names[(val)] : "UNKNOWN")
> +
> +const char * gl_shader_stage_name(gl_shader_stage stage)
> +{
> +   static const char *names[] = {
> +  ENUM(MESA_SHADER_VERTEX),
> +  ENUM(MESA_SHADER_TESS_CTRL),
> +  ENUM(MESA_SHADER_TESS_EVAL),
> +  ENUM(MESA_SHADER_GEOMETRY),
> +  ENUM(MESA_SHADER_FRAGMENT),
> +  ENUM(MESA_SHADER_COMPUTE),
> +   };
> +   return NAME(stage);
> +}
> +
> +const char * gl_vert_attrib_name(gl_vert_attrib attrib)
> +{
> +   static const char *names[] = {
> +  ENUM(VERT_ATTRIB_POS),
> +  ENUM(VERT_ATTRIB_WEIGHT),
> +  ENUM(VERT_ATTRIB_NORMAL),
> +  ENUM(VERT_ATTRIB_COLOR0),
> +  ENUM(VERT_ATTRIB_COLOR1),
> +  ENUM(VERT_ATTRIB_FOG),
> +  ENUM(VERT_ATTRIB_COLOR_INDEX),
> +  ENUM(VERT_ATTRIB_EDGEFLAG),
> +  ENUM(VERT_ATTRIB_TEX0),
> +  ENUM(VERT_ATTRIB_TEX1),
> +  ENUM(VERT_ATTRIB_TEX2),
> +  ENUM(VERT_ATTRIB_TEX3),
> +  ENUM(VERT_ATTRIB_TEX4),
> +  ENUM(VERT_ATTRIB_TEX5),
> +  ENUM(VERT_ATTRIB_TEX6),
> +  ENUM(VERT_ATTRIB_TEX7),
> +  ENUM(VERT_ATTRIB_POINT_SIZE),
> +  ENUM(VERT_ATTRIB_GENERIC0),
> +  ENUM(VERT_ATTRIB_GENERIC1),
> +  ENUM(VERT_ATTRIB_GENERIC2),
> +  ENUM(VERT_ATTRIB_GENERIC3),
> +  ENUM(VERT_ATTRIB_GENERIC4),
> +  ENUM(VERT_ATTRIB_GENERIC5),
> +  ENUM(VERT_ATTRIB_GENERIC6),
> +  ENUM(VERT_ATTRIB_GENERIC7),
> +  ENUM(VERT_ATTRIB_GENERIC8),
> +  ENUM(VERT_ATTRIB_GENERIC9),
> +  ENUM(VERT_ATTRIB_GENERIC10),
> +  ENUM(VERT_ATTRIB_GENERIC11),
> +  ENUM(VERT_ATTRIB_GENERIC12),
> +  ENUM(VERT_ATTRIB_GENERIC13),
> +  ENUM(VERT_ATTRIB_GENERIC14),
> +  ENUM(VERT_ATTRIB_GENERIC15),
> +   };
> +   return NAME(attrib);
> +}
> +
> +const char * gl_varying_slot_name(gl_varying_slot slot)
> +{
> +   static const char *names[] = {
> +  ENUM(VARYING_SLOT_POS),
> +  ENUM(VARYING_SLOT_COL0),
> +  ENUM(VARYING_SLOT_COL1),
> +  ENUM(VARYING_SLOT_FOGC),
> +  ENUM(VARYING_SLOT_TEX0),
> +  ENUM(VARYING_SLOT_TEX1),
> +  ENUM(VARYING_SLOT_TEX2),
> +  ENUM(VARYING_SLOT_TEX3),
> +  ENUM(VARYING_SLOT_TEX4),
> +  ENUM(VARYING_SLOT_TEX5),
> +  ENUM(VARYING_SLOT_TEX6),
> +  ENUM(VARYING_SLOT_TEX7),
> +  ENUM(VARYING_SLOT_PSIZ),
> +  ENUM(VARYING_SLOT_BFC0),
> +  ENUM(VARYING_SLOT_BFC1),
> +  ENUM(VARYING_SLOT_EDGE),
> +  ENUM(VARYING_SLOT_CLIP_VERTEX),
> +  ENUM(VARYING_SLOT_CLIP_DIST0),
> +  

Re: [Mesa-dev] [RFC 0/3] i965: Enable up to 24 MRF registers in gen6

2015-09-17 Thread Iago Toral
On Wed, 2015-09-16 at 12:32 -0700, Kenneth Graunke wrote:
> On Wednesday, September 16, 2015 11:17:53 AM Iago Toral Quiroga wrote:
> > It seems that we have some bugs where we fail to compile shaders in gen6
> > because we do not having enough MRF registers available (see bugs 86469 and
> > 90631 for example). That triggered some discussion about the fact that SNB
> > might actually have 24 MRF registers available, but since the docs where not
> > very clear about this, it was suggested that it would be nice to try and
> > experiment if that was the case.
> > 
> > These series of patches implement such test, basically they turn our fixed
> > BRW_MAX_MRF into a macro that accepts the hardware generation and then 
> > changes
> > the spilling code in brw_fs_reg_allocate.cpp to use MRF registers 21-23 for
> > gen6 (something similar can be done for the vec4 code, I just did not do it
> > yet).
> > 
> > The good news is that this seems to work fine, at least I can do a full 
> > piglit
> > run without issues in SNB.
> 
> Sweet!
> 
> > In fact, this seems to help a lot of tests when I
> > force spilling of everything in the FS backend (INTEL_DEBUG=spill_fs):
> > 
> > Using MRF registers 13-15 for spilling:
> > crash: 5, fail 267, pass: 15853, skip: 11679, warn: 3
> > 
> > Using MRF registers 21-23 for spilling:
> > crash: 5, fail 140, pass: 15980, skip: 11679, warn: 3
> > 
> > As you can see, we drop the fail ratio to almost 50%...
> 
> That seems odd - I wouldn't think using m13-15 vs. m21-23 would actually
> make a difference.  Perhaps it's papering over a bug where we're failing
> to notice that MRFs are in use?  If so, we should probably fix that (in
> addition to making this change).

It could be, I will have a look at one of the affected tests and try to
understand what is going on when we hit that case.

> > The bad news is that, currently, we assert that MRF registers are within the
> > supported range in brw_reg.h. This works fine now because the limit does not
> > depend on the hardware generation, but these patches change that of course.
> > The natural way to fix this would be to pass a generation argument to
> > all brw_reg functions that can create a brw_reg, but I imagine that we don't
> > want to do that only for this, right?
> 
> Yeah...it does seem a bit funny to add a generation parameter to brw_reg
> functions just for an assert that the register number is in range.
> 
> What about adding the asserts in brw_set_src0 and brw_set_dest?  This
> would catch BLORP and the Gen4 clip/sf/gs code that emits assembly
> directly - it would catch everything.  But, unfortunately, at the last
> minute...when it might be harder to debug.  So, I do like adding the
> assertions to the generators as well.

Yeah, adding them to brw_eu_emit.c looks like the best choice, I'll do
that and also add asserts to the generator.

> > In that case, if we want to keep the
> > asserts (I think we do) we need a way around that limitatation. The first
> > patch in this series tries to move the asserts to the generator, but that 
> > won't
> > manage things like blorp and other modules that can emit code directly, so 
> > we
> > would lose the assert checks for those. Of course we could add individual
> > asserts for these as needed, but it is not ideal. Alternatively, we could 
> > add
> > a function wrapper to brw_message_reg that has the assert and use that
> > version of the function from these places. In that case, this wrapper might 
> > not
> > need to take in the generation number as parameter and could just check
> > with 16 as the limit, since we really only use MRF registers
> > beyond 16 for spilling, and we only handle spilling in code paths that end
> > up going through the generator.
> > 
> > Or maybe we think this is just not worth it if it only helps gen6...
> 
> I'd like to do it.

Great, I'll work on the patches and send them for review. Thanks for the
feedback!

Iago

> > 
> > what do you think? 
> > 
> > Iago Toral Quiroga (3):
> >   i965: Move MRF register asserts to the generator
> >   i965: Turn BRW_MAX_MRF into a macro that accepts a hardware generation
> >   i965/fs: Use MRF registers 21-23 for spilling on gen6
> > 
> >  src/mesa/drivers/dri/i965/brw_eu_emit.c|  2 +-
> >  src/mesa/drivers/dri/i965/brw_fs.cpp   |  4 ++--
> >  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 14 +++
> >  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  | 27 
> > --
> >  src/mesa/drivers/dri/i965/brw_ir_vec4.h|  2 +-
> >  src/mesa/drivers/dri/i965/brw_reg.h|  5 +---
> >  .../drivers/dri/i965/brw_schedule_instructions.cpp |  4 ++--
> >  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp   |  9 +---
> >  8 files changed, 37 insertions(+), 30 deletions(-)
> > 
> > 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: fix textureGrad for cubemaps

2015-09-17 Thread Iago Toral
Hi Tapani, Kevin,

awesome work! I was curious about how to fix this, at least when I was
looking at the specs for this stuff it was not obvious that the Math
involved for this was so different, I only recall seeing the reference
that texure coordinates had to be normalized to a [-1, 1] space after
selecting the face in the cube, but I did not see formulas to implement
all this like they had for the normal case. It looks like the Math
involved is quite different.

I added some minor comments below:

On Thu, 2015-09-17 at 08:12 +0300, Tapani Pälli wrote:
> Fixes regression caused by commit
> 2b1cdb0eddb73f62e4848d4b64840067f1f70865 in:
>ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_frag
> 
> No regressions observed in deqp, CTS or Piglit.
> 
> Signed-off-by: Tapani Pälli 
> Signed-off-by: Kevin Rogovin 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91114
> Cc: "11.0 10.7" 
> ---
>  .../dri/i965/brw_lower_texture_gradients.cpp   | 172 
> -
>  1 file changed, 169 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp 
> b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> index 7a5f983..f8a31b7 100644
> --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> @@ -48,6 +48,7 @@ public:
>  
>  private:
> void emit(ir_variable *, ir_rvalue *);
> +   ir_variable *temp(void *ctx, const glsl_type *type, const char *name);
>  };
>  
>  /**
> @@ -60,6 +61,17 @@ lower_texture_grad_visitor::emit(ir_variable *var, 
> ir_rvalue *value)
> base_ir->insert_before(assign(var, value));
>  }
>  
> +/**
> + * Emit a temporary variable declaration
> + */
> +ir_variable *
> +lower_texture_grad_visitor::temp(void *ctx, const glsl_type *type, const 
> char *name)
> +{
> +   ir_variable *var = new(ctx) ir_variable(type, name, ir_var_temporary);
> +   base_ir->insert_before(var);
> +   return var;
> +}
> +
>  static const glsl_type *
>  txs_type(const glsl_type *type)
>  {
> @@ -162,9 +174,163 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
>  */
> ir->op = ir_txl;
> if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
> -  ir->lod_info.lod = expr(ir_binop_add,
> -  expr(ir_unop_log2, rho),
> -  new(mem_ctx) ir_constant(-1.0f));

It seems that in this case we don't need rho at all, so we should
probably move the rho computation to the else branch entirely.

> +  /* Cubemap texture lookups first generate a texture coordinate 
> normalized
> + to [-1, 1] on the appropiate face. The appropiate face is determined
> + by which component has largest magnitude and its sign. The texture
> + coordinate is the quotient of the remaining texture coordinates 
> against
> + that absolute value of the component of largest magnitude. This 
> division
> + requires that the computing of the derivative of the texel 
> coordinate
> + must use the quotient rule. The high level GLSL code is as follows:

Great comment! Where did you get this from? Is this detailed somwhere in
the spec? In that case maybe we want to add a reference to that as well.

> + Step 1: selection
> +
> + vec3 abs_p, Q, dQdx, dQdy;
> + abs_p = abs(ir->coordinate);
> + if (abs_p.x >= max(abs_p.y, abs_p.z)) {
> +Q = ir->coordinate.yzx;
> +dQdx = ir->lod_info.grad.dPdx.yzx;
> +dQdy = ir->lod_info.grad.dPdy.yzx;
> + }
> + if (abs_p.y >= max(abs_p.x, abs_p.z)) {
> +Q = ir->coordinate.xzy;
> +dQdx = ir->lod_info.grad.dPdx.xzy;
> +dQdy = ir->lod_info.grad.dPdy.xzy;
> + }
> + if (abs_p.z >= max(abs_p.x, abs_p.y)) {
> +Q = ir->coordinate;
> +dQdx = ir->lod_info.grad.dPdx;
> +dQdy = ir->lod_info.grad.dPdy;
> + }

This is a nitpick: you use 'Q, dQdx and dQdy' above, and 'q, dqdx, dqdy'
below... you probably want to be consistent with the capitalization and
use Q everywhere, since that is what you use in the actual
implementation.

> + Step 2: use quotient rule to compute derivative. The normalized to 
> [-1, 1]
> + texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are only 
> concerned
> + with the magnitudes of the derivatives whose values are not 
> affected by the
> + sign. We drop the sign from the computation.
> +
> + vec2 dx, dy;
> + float recip;
> +
> + recip = 1.0 / Q.z;
> + dx = recip * ( dqdx.xy - q.xy * (dqdx.z * recip) );
> + dy = recip * ( dqdy.xy - q.xy * (dqdy.z * recip) );
> +
> + Step 3: compute LOD. At this point we have the derivatives of the
> + texture coordinates normalized to [-1,1]. We 

Re: [Mesa-dev] [PATCH 03/11] glsl: shader-enum to name debug fxns

2015-09-15 Thread Iago Toral
On Tue, 2015-09-15 at 08:07 -0400, Rob Clark wrote:
> On Tue, Sep 15, 2015 at 2:55 AM, Iago Toral <ito...@igalia.com> wrote:
> > On Sun, 2015-09-13 at 11:51 -0400, Rob Clark wrote:
> >> From: Rob Clark <robcl...@freedesktop.org>
> >>
> >> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
> >> ---
> >>  src/Makefile.am   |   1 +
> >>  src/glsl/shader_enums.c   | 202 
> >> ++
> >>  src/glsl/shader_enums.h   |  53 
> >>  src/mesa/Makefile.sources |   4 +-
> >>  4 files changed, 259 insertions(+), 1 deletion(-)
> >>  create mode 100644 src/glsl/shader_enums.c
> >>
> >> diff --git a/src/Makefile.am b/src/Makefile.am
> >> index 0d49bcd..9e15cca 100644
> >> --- a/src/Makefile.am
> >> +++ b/src/Makefile.am
> >> @@ -62,6 +62,7 @@ AM_CPPFLAGS = \
> >>  noinst_LTLIBRARIES = libglsl_util.la
> >>
> >>  libglsl_util_la_SOURCES = \
> >> + glsl/shader_enums.c \
> >>   mesa/main/imports.c \
> >>   mesa/program/prog_hash_table.c \
> >>   mesa/program/symbol_table.c \
> >> diff --git a/src/glsl/shader_enums.c b/src/glsl/shader_enums.c
> >> new file mode 100644
> >> index 000..1c45a40
> >> --- /dev/null
> >> +++ b/src/glsl/shader_enums.c
> >> @@ -0,0 +1,202 @@
> >> +/*
> >> + * Mesa 3-D graphics library
> >> + *
> >> + * Copyright © 2015 Red Hat
> >> + *
> >> + * Permission is hereby granted, free of charge, to any person obtaining a
> >> + * copy of this software and associated documentation files (the 
> >> "Software"),
> >> + * to deal in the Software without restriction, including without 
> >> limitation
> >> + * the rights to use, copy, modify, merge, publish, distribute, 
> >> sublicense,
> >> + * and/or sell copies of the Software, and to permit persons to whom the
> >> + * Software is furnished to do so, subject to the following conditions:
> >> + *
> >> + * The above copyright notice and this permission notice (including the 
> >> next
> >> + * paragraph) shall be included in all copies or substantial portions of 
> >> the
> >> + * Software.
> >> + *
> >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
> >> EXPRESS OR
> >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
> >> MERCHANTABILITY,
> >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 
> >> SHALL
> >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
> >> OTHER
> >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> >> DEALINGS
> >> + * IN THE SOFTWARE.
> >> + *
> >> + * Authors:
> >> + *Rob Clark <robcl...@freedesktop.org>
> >> + */
> >> +
> >> +#include "glsl/shader_enums.h"
> >> +#include "util/macros.h"
> >> +
> >> +#define ENUM(x) [x] = #x
> >> +#define NAME(val) val) < ARRAY_SIZE(names)) && names[(val)]) ? 
> >> names[(val)] : "UNKNOWN")
> >> +
> >> +const char * gl_shader_stage_name(gl_shader_stage stage)
> >> +{
> >> +   static const char *names[] = {
> >> +  ENUM(MESA_SHADER_VERTEX),
> >> +  ENUM(MESA_SHADER_TESS_CTRL),
> >> +  ENUM(MESA_SHADER_TESS_EVAL),
> >> +  ENUM(MESA_SHADER_GEOMETRY),
> >> +  ENUM(MESA_SHADER_FRAGMENT),
> >> +  ENUM(MESA_SHADER_COMPUTE),
> >> +   };
> >> +   return NAME(stage);
> >> +}
> >> +
> >> +const char * gl_vert_attrib_name(gl_vert_attrib attrib)
> >> +{
> >> +   static const char *names[] = {
> >> +  ENUM(VERT_ATTRIB_POS),
> >> +  ENUM(VERT_ATTRIB_WEIGHT),
> >> +  ENUM(VERT_ATTRIB_NORMAL),
> >> +  ENUM(VERT_ATTRIB_COLOR0),
> >> +  ENUM(VERT_ATTRIB_COLOR1),
> >> +  ENUM(VERT_ATTRIB_FOG),
> >> +  ENUM(VERT_ATTRIB_COLOR_INDEX),
> >> +  ENUM(VERT_ATTRIB_EDGEFLAG),
> >> +  ENUM(VERT_ATTRIB_TEX0),
> >> +  ENUM(VERT_ATTRIB_TEX1),
> >> +  ENUM(VERT_ATTRIB_TEX2),
> >> +  ENUM(VERT_ATTRIB_TEX3),
> >> +  ENUM(VERT_ATTRIB_TEX4),
> >> +  ENUM(VERT_ATTRIB_TE

Re: [Mesa-dev] [PATCH 04/11] nir/print: print symbolic names from shader-enum

2015-09-15 Thread Iago Toral
On Tue, 2015-09-15 at 08:27 -0400, Rob Clark wrote:
> On Tue, Sep 15, 2015 at 3:18 AM, Iago Toral <ito...@igalia.com> wrote:
> > On Sun, 2015-09-13 at 11:51 -0400, Rob Clark wrote:
> >> From: Rob Clark <robcl...@freedesktop.org>
> >>
> >> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
> >> ---
> >>  src/glsl/nir/nir_print.c | 73 
> >> ++--
> >>  1 file changed, 59 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
> >> index 69cadba..6c9bd4b 100644
> >> --- a/src/glsl/nir/nir_print.c
> >> +++ b/src/glsl/nir/nir_print.c
> >> @@ -26,6 +26,7 @@
> >>   */
> >>
> >>  #include "nir.h"
> >> +#include "glsl/shader_enums.h"
> >
> > I think you don't need the "glsl/" prefix for this include.
> >
> >>  #include 
> >>  #include 
> >>
> >> @@ -37,6 +38,9 @@ print_tabs(unsigned num_tabs, FILE *fp)
> >>  }
> >>
> >>  typedef struct {
> >> +   FILE *fp;
> >> +
> >> +   nir_shader *shader;
> >> /** map from nir_variable -> printable name */
> >> struct hash_table *ht;
> >>
> >> @@ -206,8 +210,10 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp)
> >>  }
> >>
> >>  static void
> >> -print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
> >> +print_var_decl(nir_variable *var, print_var_state *state)
> >>  {
> >> +   FILE *fp = state->fp;
> >> +
> >> fprintf(fp, "decl_var ");
> >>
> >> const char *const cent = (var->data.centroid) ? "centroid " : "";
> >> @@ -215,10 +221,10 @@ print_var_decl(nir_variable *var, print_var_state 
> >> *state, FILE *fp)
> >> const char *const inv = (var->data.invariant) ? "invariant " : "";
> >> const char *const mode[] = { "shader_in ", "shader_out ", "", "",
> >>  "uniform ", "shader_storage", "system " };
> >> -   const char *const interp[] = { "", "smooth", "flat", "noperspective" };
> >>
> >> fprintf(fp, "%s%s%s%s%s ",
> >> -  cent, samp, inv, mode[var->data.mode], 
> >> interp[var->data.interpolation]);
> >> +  cent, samp, inv, mode[var->data.mode],
> >> +   glsl_interp_qualifier_name(var->data.interpolation));
> >>
> >> glsl_print_type(var->type, fp);
> >>
> >> @@ -241,7 +247,41 @@ print_var_decl(nir_variable *var, print_var_state 
> >> *state, FILE *fp)
> >> var->data.mode == nir_var_shader_out ||
> >> var->data.mode == nir_var_uniform ||
> >> var->data.mode == nir_var_shader_storage) {
> >> -  fprintf(fp, " (%u, %u)", var->data.location, 
> >> var->data.driver_location);
> >> +  const char *loc = NULL;
> >> +  char buf[4];
> >> +
> >> +  switch (state->shader->stage) {
> >> +  case MESA_SHADER_VERTEX:
> >> + if (var->data.mode == nir_var_shader_in)
> >> +loc = gl_vert_attrib_name(var->data.location);
> >> + else if (var->data.mode == nir_var_shader_out)
> >> +loc = gl_varying_slot_name(var->data.location);
> >> + break;
> >> +  case MESA_SHADER_GEOMETRY:
> >> + if ((var->data.mode == nir_var_shader_in) ||
> >> + (var->data.mode == nir_var_shader_out))
> >> +loc = gl_vert_attrib_name(var->data.location);
> >
> > Mmm... shouldn't this be the same as in the case of a vertex shader?
> > that is, use gl_varying_slot_name for GS outputs.
> 
> yeah, and actually gl_varying_slot for input too, according to comment
> in 'struct nir_variable_data'.. I just copy/pasted the wrong thing..
> I've fixed it up locally (and the header path)

Great, you can add my Rb on the fixed version:

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

Iago

> thanks
> 
> BR,
> -R
> 
> >
> >> + break;
> >> +  case MESA_SHADER_FRAGMENT:
> >> + if (var->data.mode == nir_var_shader_in)
> >> +loc = gl_varying_slot_name(var->data.location);
> >> +

Re: [Mesa-dev] SSBO's in UniformBlocks list?

2015-09-29 Thread Iago Toral
Hi ilia,

On Tue, 2015-09-29 at 03:53 -0400, Ilia Mirkin wrote:
> Hi Samuel, and any other onlookers,
> 
> I was wondering why the decision was made to stick SSBO's onto the
> same list as constbufs. Seems like they're entirely separate entities,
> no? Perhaps I'm missing something?

The reason for this was that there is a lot of code in the compiler to
handle uniform blocks and all the rules for them and we needed the same
treatment for SSBOs, so that seemed like a reasonable way forward to
reuse a lot of the code in the compiler front end. I think the only
place where we needed to make explicit distinctions is when we check for
resource limits, since these are different for UBOs and SSBOs of course.
Although UBOs and SSBOs are separate entities they have a lot of
similarities too, so that did not look like a terrible idea, considering
the benefits.

If I remember correctly, Jordan suggested that we might want to change
the names of the structures/files involved so they do not refer to UBOs
explicitly and use something more generic instead, but that would be a
very large change affecting the compiler (and all the drivers) and we
thought it would be best to do that at some other moment, after the
series landed, to avoid being stuck in rebase hell for months.

Iago

> Thanks,
> 
>   -ilia
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V6 13/27] glsl: fix whitespace

2015-09-29 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Tue, 2015-09-29 at 12:42 +1000, Timothy Arceri wrote:
> ---
>  src/glsl/ast_to_hir.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> index a9696dc..2fd4443 100644
> --- a/src/glsl/ast_to_hir.cpp
> +++ b/src/glsl/ast_to_hir.cpp
> @@ -6400,7 +6400,7 @@ ast_interface_block::hir(exec_list *instructions,
>  }
>   }
>  
> -  /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
> + /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
>*
>* * Arrays of arrays of blocks are not allowed
>*/


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Use a system value for gl_PrimitiveIDIn.

2015-09-29 Thread Iago Toral
On Mon, 2015-09-28 at 23:05 -0700, Kenneth Graunke wrote:
> At least on Intel hardware, gl_PrimitiveIDIn comes in as a special part
> of the payload rather than a normal input.  This is typically what we
> use system values for.  Dave and Ilia also agree that a system value
> would be nicer.
> 
> At some point, we should change it at the GLSL IR level as well.  But
> that requires changing most of the drivers.  For now, let's at least
> make NIR do the right thing, which is easy.
> 
> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
> ---
>  src/glsl/nir/glsl_to_nir.cpp  | 5 +
>  src/glsl/nir/nir.c| 5 -
>  src/glsl/nir/nir_intrinsics.h | 1 +
>  src/glsl/shader_enums.h   | 2 +-
>  src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp | 9 +
>  5 files changed, 20 insertions(+), 2 deletions(-)
> 
> I bypassed most of the system value boilerplate in the backend.  Notably,
> this means we just access g1 directly rather than moving it to a VGRF at
> the start of the program and using that later.  This means more HW_REG
> usage, but it also means less seemingly pointless copies.
> 
> I'm hoping to simplify the handling of other system values too, but I'm
> waiting until we delete the GLSL IR code paths first.
> 
> diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
> index f03a107..c0b2634 100644
> --- a/src/glsl/nir/glsl_to_nir.cpp
> +++ b/src/glsl/nir/glsl_to_nir.cpp
> @@ -271,6 +271,11 @@ nir_visitor::visit(ir_variable *ir)
>   /* For whatever reason, GLSL IR makes gl_FrontFacing an input */
>   var->data.location = SYSTEM_VALUE_FRONT_FACE;
>   var->data.mode = nir_var_system_value;
> +  } else if (shader->stage == MESA_SHADER_GEOMETRY &&
> + ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
> + /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
> + var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
> + var->data.mode = nir_var_system_value;
>} else {
>   var->data.mode = nir_var_shader_in;
>}
> diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
> index 1206bb4..7f30b8a 100644
> --- a/src/glsl/nir/nir.c
> +++ b/src/glsl/nir/nir.c
> @@ -1487,10 +1487,11 @@ nir_intrinsic_from_system_value(gl_system_value val)
>return nir_intrinsic_load_local_invocation_id;
> case SYSTEM_VALUE_WORK_GROUP_ID:
>return nir_intrinsic_load_work_group_id;
> +   case SYSTEM_VALUE_PRIMITIVE_ID:
> +  return nir_intrinsic_load_primitive_id;
> /* FINISHME: Add tessellation intrinsics.
> case SYSTEM_VALUE_TESS_COORD:
> case SYSTEM_VALUE_VERTICES_IN:
> -   case SYSTEM_VALUE_PRIMITIVE_ID:
> case SYSTEM_VALUE_TESS_LEVEL_OUTER:
> case SYSTEM_VALUE_TESS_LEVEL_INNER:
>  */
> @@ -1525,6 +1526,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
>return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
> case nir_intrinsic_load_work_group_id:
>return SYSTEM_VALUE_WORK_GROUP_ID;
> +   case nir_intrinsic_load_primitive_id:
> +  return SYSTEM_VALUE_PRIMITIVE_ID;
> /* FINISHME: Add tessellation intrinsics.
>return SYSTEM_VALUE_TESS_COORD;
>return SYSTEM_VALUE_VERTICES_IN;
> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
> index 06f1b02..0d93b12 100644
> --- a/src/glsl/nir/nir_intrinsics.h
> +++ b/src/glsl/nir/nir_intrinsics.h
> @@ -194,6 +194,7 @@ SYSTEM_VALUE(instance_id, 1, 0)
>  SYSTEM_VALUE(sample_id, 1, 0)
>  SYSTEM_VALUE(sample_pos, 2, 0)
>  SYSTEM_VALUE(sample_mask_in, 1, 0)
> +SYSTEM_VALUE(primitive_id, 1, 0)
>  SYSTEM_VALUE(invocation_id, 1, 0)
>  SYSTEM_VALUE(local_invocation_id, 3, 0)
>  SYSTEM_VALUE(work_group_id, 3, 0)
> diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
> index 3978007..f6b71a3 100644
> --- a/src/glsl/shader_enums.h
> +++ b/src/glsl/shader_enums.h
> @@ -399,7 +399,7 @@ typedef enum
> /*@{*/
> SYSTEM_VALUE_TESS_COORD,
> SYSTEM_VALUE_VERTICES_IN,/**< Tessellation vertices in input patch */
> -   SYSTEM_VALUE_PRIMITIVE_ID,   /**< (currently not used by GS) */
> +   SYSTEM_VALUE_PRIMITIVE_ID,
> SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */
> SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
> /*@}*/
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
> index 4f4e1e1..26766a0 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
> @@ -72,6 +72,9 @@ 
> vec4_gs_visitor::nir_setup_system_value_in

Re: [Mesa-dev] SSBO's in UniformBlocks list?

2015-09-30 Thread Iago Toral
On Tue, 2015-09-29 at 11:19 -0400, Ilia Mirkin wrote:
> On Tue, Sep 29, 2015 at 4:33 AM, Iago Toral <ito...@igalia.com> wrote:
> > Hi ilia,
> >
> > On Tue, 2015-09-29 at 03:53 -0400, Ilia Mirkin wrote:
> >> Hi Samuel, and any other onlookers,
> >>
> >> I was wondering why the decision was made to stick SSBO's onto the
> >> same list as constbufs. Seems like they're entirely separate entities,
> >> no? Perhaps I'm missing something?
> >
> > The reason for this was that there is a lot of code in the compiler to
> > handle uniform blocks and all the rules for them and we needed the same
> > treatment for SSBOs, so that seemed like a reasonable way forward to
> > reuse a lot of the code in the compiler front end. I think the only
> > place where we needed to make explicit distinctions is when we check for
> > resource limits, since these are different for UBOs and SSBOs of course.
> > Although UBOs and SSBOs are separate entities they have a lot of
> > similarities too, so that did not look like a terrible idea, considering
> > the benefits.
> 
> My concern is around indexing... now the per-stage indices are in the
> combined UBO/SSBO space -- how do I tease out the individual ones?
> Easy enough when you can loop over NumUniformBlocks and just count the
> right type, but what about in the shader, where I get the buffer index
> in a ir_rvalue?

We assumed that backends could work with a shared index space between
UBOs and SSBOs. In i965 at least that is not a problem and makes things
easy: we simply use the block index we get from the IR directly, the
driver does not use a separate buffer space for them and handles SSBOs
just the same as UBOs, only that the buffer has different flags.

However, if we think this is not ideal we can look into having separate
index spaces.

Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] SSBO's in UniformBlocks list?

2015-09-30 Thread Iago Toral
On Wed, 2015-09-30 at 02:34 -0400, Ilia Mirkin wrote:
> On Wed, Sep 30, 2015 at 2:26 AM, Iago Toral <ito...@igalia.com> wrote:
> > On Tue, 2015-09-29 at 11:19 -0400, Ilia Mirkin wrote:
> >> On Tue, Sep 29, 2015 at 4:33 AM, Iago Toral <ito...@igalia.com> wrote:
> >> > Hi ilia,
> >> >
> >> > On Tue, 2015-09-29 at 03:53 -0400, Ilia Mirkin wrote:
> >> >> Hi Samuel, and any other onlookers,
> >> >>
> >> >> I was wondering why the decision was made to stick SSBO's onto the
> >> >> same list as constbufs. Seems like they're entirely separate entities,
> >> >> no? Perhaps I'm missing something?
> >> >
> >> > The reason for this was that there is a lot of code in the compiler to
> >> > handle uniform blocks and all the rules for them and we needed the same
> >> > treatment for SSBOs, so that seemed like a reasonable way forward to
> >> > reuse a lot of the code in the compiler front end. I think the only
> >> > place where we needed to make explicit distinctions is when we check for
> >> > resource limits, since these are different for UBOs and SSBOs of course.
> >> > Although UBOs and SSBOs are separate entities they have a lot of
> >> > similarities too, so that did not look like a terrible idea, considering
> >> > the benefits.
> >>
> >> My concern is around indexing... now the per-stage indices are in the
> >> combined UBO/SSBO space -- how do I tease out the individual ones?
> >> Easy enough when you can loop over NumUniformBlocks and just count the
> >> right type, but what about in the shader, where I get the buffer index
> >> in a ir_rvalue?

By the way, in i965 this is not a problem either, we have access to the
gl_shader struct from the compiler backend, so if we need to translate
from the shared index space to a separate space we have NumUniformBlocks
available to do that. From your words I get that you can't access this
information from the compiler backend, right? In that case, wouldn't it
be possible to translate the index during the GLSL IR -> TGSI
conversion?

Iago

> > We assumed that backends could work with a shared index space between
> > UBOs and SSBOs. In i965 at least that is not a problem and makes things
> > easy: we simply use the block index we get from the IR directly, the
> > driver does not use a separate buffer space for them and handles SSBOs
> > just the same as UBOs, only that the buffer has different flags.
> >
> > However, if we think this is not ideal we can look into having separate
> > index spaces.
> 
> Hrm that's definitely not the way I was implementing it in TGSI. I
> was going to have a separate thing for buffers, to be used to
> implement both ssbo and counters. Also on NVIDIA hardware, constbufs
> get special binding points while shader buffers are just addresses
> loaded from (ironically) a constbuf. I believe it's a similar
> situation on r600-class hardware, not 100% sure about SI.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] SSBO's in UniformBlocks list?

2015-09-30 Thread Iago Toral
On Tue, 2015-09-29 at 18:41 +0300, Francisco Jerez wrote:
> Ilia Mirkin <imir...@alum.mit.edu> writes:
> 
> > On Tue, Sep 29, 2015 at 4:33 AM, Iago Toral <ito...@igalia.com> wrote:
> >> Hi ilia,
> >>
> >> On Tue, 2015-09-29 at 03:53 -0400, Ilia Mirkin wrote:
> >>> Hi Samuel, and any other onlookers,
> >>>
> >>> I was wondering why the decision was made to stick SSBO's onto the
> >>> same list as constbufs. Seems like they're entirely separate entities,
> >>> no? Perhaps I'm missing something?
> >>
> >> The reason for this was that there is a lot of code in the compiler to
> >> handle uniform blocks and all the rules for them and we needed the same
> >> treatment for SSBOs, so that seemed like a reasonable way forward to
> >> reuse a lot of the code in the compiler front end. I think the only
> >> place where we needed to make explicit distinctions is when we check for
> >> resource limits, since these are different for UBOs and SSBOs of course.
> >> Although UBOs and SSBOs are separate entities they have a lot of
> >> similarities too, so that did not look like a terrible idea, considering
> >> the benefits.
> >
> > My concern is around indexing... now the per-stage indices are in the
> > combined UBO/SSBO space -- how do I tease out the individual ones?
> > Easy enough when you can loop over NumUniformBlocks and just count the
> > right type, but what about in the shader, where I get the buffer index
> > in a ir_rvalue?
> >
> Yeah, this seems rather dubious to me too.  Even if you had re-used the
> current gl_uniform_block type for SSBOs for the sake of code-sharing I
> think it would have made more sense to split them into a different index
> space, because SSBOs are a different index space at the API level and
> drivers will want them to be a different index space too.

In the case of i965 at least I think having them combined makes things
easier because from the driver perspective the only difference between
SSBOs and UBOs is that the underlying buffer storage has different
flags. This was the simplest implementation and in theory, translating
from the shared space to a separate space should be very easy too, so
drivers that need a separate space should be able to get that as well...
That's in theory, the problem is that Ilia is saying that in his case
the compiler backend may not have the information required to make this
translation, and if this is true then we'll have to rethink this...

> I believe that this leads to a bug the i965 implementation -- We expose
> 12 SSBOs per stage and 12 UBOs per stage, but we only have 12 binding
> table entries reserved for the block of the binding table currently
> shared among UBOs and SSBOs, so you might overflow the number of
> available surface entries if the combined number of UBOs and SSBOs is
> greater than 12 for some stage.

Yeah, we forgot to update that. I'll send a patch to fix this. Thanks
Curro!

Iago

> > Thanks,
> >
> >   -ilia
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] SSBO's in UniformBlocks list?

2015-09-30 Thread Iago Toral
On Wed, 2015-09-30 at 11:54 +0300, Francisco Jerez wrote:
> Iago Toral <ito...@igalia.com> writes:
> 
> > On Tue, 2015-09-29 at 18:41 +0300, Francisco Jerez wrote:
> >> Ilia Mirkin <imir...@alum.mit.edu> writes:
> >> 
> >> > On Tue, Sep 29, 2015 at 4:33 AM, Iago Toral <ito...@igalia.com> wrote:
> >> >> Hi ilia,
> >> >>
> >> >> On Tue, 2015-09-29 at 03:53 -0400, Ilia Mirkin wrote:
> >> >>> Hi Samuel, and any other onlookers,
> >> >>>
> >> >>> I was wondering why the decision was made to stick SSBO's onto the
> >> >>> same list as constbufs. Seems like they're entirely separate entities,
> >> >>> no? Perhaps I'm missing something?
> >> >>
> >> >> The reason for this was that there is a lot of code in the compiler to
> >> >> handle uniform blocks and all the rules for them and we needed the same
> >> >> treatment for SSBOs, so that seemed like a reasonable way forward to
> >> >> reuse a lot of the code in the compiler front end. I think the only
> >> >> place where we needed to make explicit distinctions is when we check for
> >> >> resource limits, since these are different for UBOs and SSBOs of course.
> >> >> Although UBOs and SSBOs are separate entities they have a lot of
> >> >> similarities too, so that did not look like a terrible idea, considering
> >> >> the benefits.
> >> >
> >> > My concern is around indexing... now the per-stage indices are in the
> >> > combined UBO/SSBO space -- how do I tease out the individual ones?
> >> > Easy enough when you can loop over NumUniformBlocks and just count the
> >> > right type, but what about in the shader, where I get the buffer index
> >> > in a ir_rvalue?
> >> >
> >> Yeah, this seems rather dubious to me too.  Even if you had re-used the
> >> current gl_uniform_block type for SSBOs for the sake of code-sharing I
> >> think it would have made more sense to split them into a different index
> >> space, because SSBOs are a different index space at the API level and
> >> drivers will want them to be a different index space too.
> >
> > In the case of i965 at least I think having them combined makes things
> > easier because from the driver perspective the only difference between
> > SSBOs and UBOs is that the underlying buffer storage has different
> > flags. This was the simplest implementation and in theory, translating
> > from the shared space to a separate space should be very easy too, so
> > drivers that need a separate space should be able to get that as well...
> > That's in theory, the problem is that Ilia is saying that in his case
> > the compiler backend may not have the information required to make this
> > translation, and if this is true then we'll have to rethink this...
> >
> I think this is largely irrelevant for the i965 driver, code can be
> shared either way if shader UBOs and SSBOs are represented using the
> same data type: Assuming that function f() is shared among SSBOs and
> UBOs, it could also have been shared with separate arrays by changing it
> into f(gl_uniform_block[]) and passing the right U/SSBO array as
> argument.
> 
> Translating from a separate space to a shared index space is trivial and
> has O(1) complexity (shared_ssbo_index = separate_ssbo_index +
> max_ubo_index), although it's unlikely to be necessary on any HW
> arrchitecture I know of.  OTOH translating from the shared space to a
> separatee space (as the GL API and Gallium drivers require) involves
> iterating over all previous U/SSBOs of the shader and has O(n)
> complexity (e.g. as you do here [1]).

However, this will come at the expense of having to modify the compiler
front-end since that assumes that we only have one array at the moment
and all the code that deals with uniform blocks works with that
assumption. Not that we can't do this of course, but I was hoping that
we could avoid it since that part of the compiler is complex enough as
it is... Anyway, at this point I guess the best thing I can do is to
implement the separate index space and see that it looks like,
hopefully it is not that bad and if gallium drivers can't translate from
the shared index space we don't have an alternative anyway.

Iago

> [1] https://patchwork.freedesktop.org/patch/60654/
> 
> >> I believe that this leads to a bug the i965 implementation -- We expose
> >> 12 SSBOs per stage and 12 UBOs per stage, but we only have 12 binding
> >> table entries reserved for the block o

Re: [Mesa-dev] [PATCH] mesa: add GL_UNSIGNED_INT_24_8 to _mesa_pack_depth_span

2015-10-01 Thread Iago Toral
On Thu, 2015-10-01 at 08:28 +0300, Tapani Pälli wrote:
> Patch adds missing type (used with NV_read_depth) so that it gets
> handled correctly. Also add type to _mesa_problem output to aid
> debugging.
> 
> Signed-off-by: Tapani Pälli 
> ---
>  src/mesa/main/pack.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
> index 7147fd6..54a0c42 100644
> --- a/src/mesa/main/pack.c
> +++ b/src/mesa/main/pack.c
> @@ -1074,6 +1074,7 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint 
> n, GLvoid *dest,
>   }
>}
>break;
> +   case GL_UNSIGNED_INT_24_8:

Is it okay to store 32-bit integers in this case? that's what the code
below does. The spec says that the 8 stencil bits are undefined, but
don't we need to convert the depth value to a 24-bit integer scale?
(i.e. make 1.0 translate to 2^24-1 not 2^32-1).

Iago

> case GL_UNSIGNED_INT:
>{
>   GLuint *dst = (GLuint *) dest;
> @@ -1124,7 +1125,8 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint 
> n, GLvoid *dest,
>}
>break;
> default:
> -  _mesa_problem(ctx, "bad type in _mesa_pack_depth_span");
> +  _mesa_problem(ctx, "bad type in _mesa_pack_depth_span (%s)",
> +_mesa_enum_to_string(dstType));
> }
>  
> free(depthCopy);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: emit row_major matrix's SSBO stores only for components in writemask

2015-10-01 Thread Iago Toral
On Thu, 2015-10-01 at 09:41 +0200, Samuel Iglesias Gonsalvez wrote:
> When writing to a column of a row-major matrix, each component of the
> vector is stored to non-consecutive memory addresses, so we generate
> one instruction per component.
> 
> This patch skips the disabled components in the writemask, saving some
> store instructions plus avoid storing wrong data on each disabled
> component.

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

> Signed-off-by: Samuel Iglesias Gonsalvez <sigles...@igalia.com>
> ---
>  src/glsl/lower_ubo_reference.cpp | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/src/glsl/lower_ubo_reference.cpp 
> b/src/glsl/lower_ubo_reference.cpp
> index e581306..247620e 100644
> --- a/src/glsl/lower_ubo_reference.cpp
> +++ b/src/glsl/lower_ubo_reference.cpp
> @@ -754,6 +754,12 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
>  add(base_offset,
>  new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
>   if (is_write) {
> +/* If the component is not in the writemask, then don't
> + * store any value.
> + */
> +if (!((1 << i) & write_mask))
> +   continue;
> +
>  base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), 
> chan_offset, 1));
>   } else {
>  if (!this->is_shader_storage) {


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/11] i965: Pull stage_prog_data.nr_params out of the NIR shader

2015-10-01 Thread Iago Toral
On Thu, 2015-10-01 at 07:58 -0700, Jason Ekstrand wrote:
> On Thu, Oct 1, 2015 at 7:52 AM, Iago Toral <ito...@igalia.com> wrote:
> > On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> >> Previously, we had a bunch of code in each stage to figure out how many
> >> slots we needed in stage_prog_data.param.  This code was mostly identical
> >> across the stages and had been copied and pasted around.  Unfortunately,
> >> this meant that any time you did something special, you had to add code for
> >> it to each of these places.  In particular, none of the stages took
> >> subroutines into account; they were working entirely by accident.  By
> >> taking this data from the NIR shader, we know the exact number of entries
> >> we need and everything goes a bit smoother.
> >> ---
> >>  src/mesa/drivers/dri/i965/brw_cs.c |  4 ++--
> >>  src/mesa/drivers/dri/i965/brw_gs.c |  5 ++---
> >>  src/mesa/drivers/dri/i965/brw_vs.c | 16 
> >>  src/mesa/drivers/dri/i965/brw_wm.c | 10 +++---
> >>  4 files changed, 11 insertions(+), 24 deletions(-)
> >>
> >> diff --git a/src/mesa/drivers/dri/i965/brw_cs.c 
> >> b/src/mesa/drivers/dri/i965/brw_cs.c
> >> index 02eeeda..24120fb 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_cs.c
> >> +++ b/src/mesa/drivers/dri/i965/brw_cs.c
> >> @@ -30,6 +30,7 @@
> >>  #include "intel_mipmap_tree.h"
> >>  #include "brw_state.h"
> >>  #include "intel_batchbuffer.h"
> >> +#include "glsl/nir/nir.h"
> >>
> >>  static bool
> >>  brw_codegen_cs_prog(struct brw_context *brw,
> >> @@ -55,8 +56,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
> >>  * prog_data associated with the compiled program, and which will be 
> >> freed
> >>  * by the state cache.
> >>  */
> >> -   int param_count = cs->base.num_uniform_components +
> >> - cs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
> >> +   int param_count = cp->program.Base.nir->num_uniforms;
> >>
> >> /* The backend also sometimes adds params for texture size. */
> >> param_count += 2 * 
> >> ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
> >> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
> >> b/src/mesa/drivers/dri/i965/brw_gs.c
> >> index 61e7b2a..0cf7ec8 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> >> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> >> @@ -32,6 +32,7 @@
> >>  #include "brw_vec4_gs_visitor.h"
> >>  #include "brw_state.h"
> >>  #include "brw_ff_gs.h"
> >> +#include "glsl/nir/nir.h"
> >>
> >>
> >>  bool
> >> @@ -60,9 +61,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
> >>  * every uniform is a float which gets padded to the size of a vec4.
> >>  */
> >> struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
> >> -   int param_count = gs->num_uniform_components * 4;
> >> -
> >> -   param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
> >> +   int param_count = gp->program.Base.nir->num_uniforms * 4;
> >
> > I think the vec4 nir backend does not handle image uniforms at the
> > moment, does it? At least I see that the FS backend has code
> > specifically for that in fs_visitor::nir_setup_uniform. Not sure if we
> > support images in geometry stages though, but the code you remove seems
> > to account for that...
> 
> We don't.  When Curro initially implemented it, he did have vec4
> support.  However, that was dropped in favor of doing things
> differently in the fs compiler.  It may yet come back though.  This
> code was probably merged a bit speculatively but doesn't really hurt
> anything.

Yeah, that's what I recall too, I was surprised to find image stuff here
and thought that maybe I missed something.

> >> c.prog_data.base.base.param =
> >>rzalloc_array(NULL, const gl_constant_value *, param_count);
> >> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
> >> b/src/mesa/drivers/dri/i965/brw_vs.c
> >> index e1a0d9c..391411c 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> >> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> >> @@ -109,18 +109,10 @@ brw_codegen_vs_prog(struct brw_context *brw,
> >>  * prog_data associated with the compiled program, and which will be 
> >> freed
> >>  * by the sta

Re: [Mesa-dev] [PATCH 02/11] i965/vs: Move lazy NIR creation to codegen_vs_prog

2015-10-01 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> The next commit will add code to codegen_vs_prog that requires the NIR
> shader to be there in all cases.  It doesn't hurt anything to just move it
> from brw_vs_emit to its only caller.
> ---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 12 
>  src/mesa/drivers/dri/i965/brw_vs.c | 13 +
>  2 files changed, 13 insertions(+), 12 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 056ce39..407698f 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -1953,18 +1953,6 @@ brw_vs_emit(struct brw_context *brw,
> if (unlikely(INTEL_DEBUG & DEBUG_VS))
>brw_dump_ir("vertex", prog, >base, >Base);
>  
> -   if (!vp->Base.nir) {
> -  /* Normally we generate NIR in LinkShader() or
> -   * ProgramStringNotify(), but Mesa's fixed-function vertex program
> -   * handling doesn't notify the driver at all.  Just do it here, at
> -   * the last minute, even though it's lame.
> -   */
> -  assert(vp->Base.Id == 0 && prog == NULL);
> -  vp->Base.nir =
> - brw_create_nir(brw, NULL, >Base, MESA_SHADER_VERTEX,
> -brw->intelScreen->compiler->scalar_vs);
> -   }
> -
> if (brw->intelScreen->compiler->scalar_vs) {
>prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
> b/src/mesa/drivers/dri/i965/brw_vs.c
> index 0a348a5..e1a0d9c 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -37,6 +37,7 @@
>  #include "brw_state.h"
>  #include "program/prog_print.h"
>  #include "program/prog_parameter.h"
> +#include "brw_nir.h"
>  
>  #include "util/ralloc.h"
>  
> @@ -81,6 +82,18 @@ brw_codegen_vs_prog(struct brw_context *brw,
> bool start_busy = false;
> double start_time = 0;
>  
> +   if (!vp->program.Base.nir) {
> +  /* Normally we generate NIR in LinkShader() or
> +   * ProgramStringNotify(), but Mesa's fixed-function vertex program
> +   * handling doesn't notify the driver at all.  Just do it here, at
> +   * the last minute, even though it's lame.
> +   */
> +  assert(vp->program.Base.Id == 0 && prog == NULL);
> +  vp->program.Base.nir =
> + brw_create_nir(brw, NULL, >program.Base, MESA_SHADER_VERTEX,
> +brw->intelScreen->compiler->scalar_vs);
> +   }
> +
> if (prog)
>vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
>  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/11] glsl/types: Make subroutine types have a single matrix column

2015-10-01 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> That way, if we do the usual thing of multiplying vector_elements by
> matrix_columns we get the actual number of components in the type as per
> component_slots().
> 
> While we're at it, we also switch to using the actual C++ field
> initializers for vector_elements and matrix_columns.
> ---
>  src/glsl/glsl_types.cpp | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
> index 8586b2e..25927f6 100644
> --- a/src/glsl/glsl_types.cpp
> +++ b/src/glsl/glsl_types.cpp
> @@ -172,7 +172,7 @@ glsl_type::glsl_type(const char *subroutine_name) :
> base_type(GLSL_TYPE_SUBROUTINE),
> sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
> sampler_type(0), interface_packing(0),
> -   vector_elements(0), matrix_columns(0),
> +   vector_elements(1), matrix_columns(1),
> length(0)
>  {
> mtx_lock(_type::mutex);
> @@ -180,7 +180,6 @@ glsl_type::glsl_type(const char *subroutine_name) :
> init_ralloc_type_ctx();
> assert(subroutine_name != NULL);
> this->name = ralloc_strdup(this->mem_ctx, subroutine_name);
> -   this->vector_elements = 1;
> mtx_unlock(_type::mutex);
>  }
>  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/11] i965: Pull stage_prog_data.nr_params out of the NIR shader

2015-10-01 Thread Iago Toral
On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> Previously, we had a bunch of code in each stage to figure out how many
> slots we needed in stage_prog_data.param.  This code was mostly identical
> across the stages and had been copied and pasted around.  Unfortunately,
> this meant that any time you did something special, you had to add code for
> it to each of these places.  In particular, none of the stages took
> subroutines into account; they were working entirely by accident.  By
> taking this data from the NIR shader, we know the exact number of entries
> we need and everything goes a bit smoother.
> ---
>  src/mesa/drivers/dri/i965/brw_cs.c |  4 ++--
>  src/mesa/drivers/dri/i965/brw_gs.c |  5 ++---
>  src/mesa/drivers/dri/i965/brw_vs.c | 16 
>  src/mesa/drivers/dri/i965/brw_wm.c | 10 +++---
>  4 files changed, 11 insertions(+), 24 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_cs.c 
> b/src/mesa/drivers/dri/i965/brw_cs.c
> index 02eeeda..24120fb 100644
> --- a/src/mesa/drivers/dri/i965/brw_cs.c
> +++ b/src/mesa/drivers/dri/i965/brw_cs.c
> @@ -30,6 +30,7 @@
>  #include "intel_mipmap_tree.h"
>  #include "brw_state.h"
>  #include "intel_batchbuffer.h"
> +#include "glsl/nir/nir.h"
>  
>  static bool
>  brw_codegen_cs_prog(struct brw_context *brw,
> @@ -55,8 +56,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
>  * prog_data associated with the compiled program, and which will be freed
>  * by the state cache.
>  */
> -   int param_count = cs->base.num_uniform_components +
> - cs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
> +   int param_count = cp->program.Base.nir->num_uniforms;
>  
> /* The backend also sometimes adds params for texture size. */
> param_count += 2 * 
> ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
> b/src/mesa/drivers/dri/i965/brw_gs.c
> index 61e7b2a..0cf7ec8 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -32,6 +32,7 @@
>  #include "brw_vec4_gs_visitor.h"
>  #include "brw_state.h"
>  #include "brw_ff_gs.h"
> +#include "glsl/nir/nir.h"
>  
> 
>  bool
> @@ -60,9 +61,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
>  * every uniform is a float which gets padded to the size of a vec4.
>  */
> struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
> -   int param_count = gs->num_uniform_components * 4;
> -
> -   param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
> +   int param_count = gp->program.Base.nir->num_uniforms * 4;

I think the vec4 nir backend does not handle image uniforms at the
moment, does it? At least I see that the FS backend has code
specifically for that in fs_visitor::nir_setup_uniform. Not sure if we
support images in geometry stages though, but the code you remove seems
to account for that...

> c.prog_data.base.base.param =
>rzalloc_array(NULL, const gl_constant_value *, param_count);
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
> b/src/mesa/drivers/dri/i965/brw_vs.c
> index e1a0d9c..391411c 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -109,18 +109,10 @@ brw_codegen_vs_prog(struct brw_context *brw,
>  * prog_data associated with the compiled program, and which will be freed
>  * by the state cache.
>  */
> -   int param_count;
> -   if (vs) {
> -  /* We add padding around uniform values below vec4 size, with the worst
> -   * case being a float value that gets blown up to a vec4, so be
> -   * conservative here.
> -   */
> -  param_count = vs->base.num_uniform_components * 4 +
> -vs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
> -  stage_prog_data->nr_image_params = vs->base.NumImages;
> -   } else {
> -  param_count = vp->program.Base.Parameters->NumParameters * 4;
> -   }
> +   int param_count = vp->program.Base.nir->num_uniforms;
> +   if (!brw->intelScreen->compiler->scalar_vs)
> +  param_count *= 4;

Same thing here.

Also, I guess this also means that for the scalar_vs cases we were
computing a larger param_count than we really should, right?

> /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
>  * planes as uniforms.
>  */
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
> b/src/mesa/drivers/dri/i965/brw_wm.c
> index cc97d6a..08f2416 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -35,6 +35,7 @@
>  #include "program/prog_parameter.h"
>  #include "program/program.h"
>  #include "intel_mipmap_tree.h"
> +#include "glsl/nir/nir.h"
>  
>  #include "util/ralloc.h"
>  
> @@ -173,14 +174,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
>  * prog_data associated with the compiled program, and which will be freed
>  * by the state cache.
>  */
> -   int param_count;
> -   if (fs) {
> -  param_count = 

Re: [Mesa-dev] SSBO's in UniformBlocks list?

2015-10-01 Thread Iago Toral
On Wed, 2015-09-30 at 11:27 -0400, Ilia Mirkin wrote:
> On Wed, Sep 30, 2015 at 3:18 AM, Iago Toral <ito...@igalia.com> wrote:
> > On Wed, 2015-09-30 at 02:34 -0400, Ilia Mirkin wrote:
> >> On Wed, Sep 30, 2015 at 2:26 AM, Iago Toral <ito...@igalia.com> wrote:
> >> > On Tue, 2015-09-29 at 11:19 -0400, Ilia Mirkin wrote:
> >> >> On Tue, Sep 29, 2015 at 4:33 AM, Iago Toral <ito...@igalia.com> wrote:
> >> >> > Hi ilia,
> >> >> >
> >> >> > On Tue, 2015-09-29 at 03:53 -0400, Ilia Mirkin wrote:
> >> >> >> Hi Samuel, and any other onlookers,
> >> >> >>
> >> >> >> I was wondering why the decision was made to stick SSBO's onto the
> >> >> >> same list as constbufs. Seems like they're entirely separate 
> >> >> >> entities,
> >> >> >> no? Perhaps I'm missing something?
> >> >> >
> >> >> > The reason for this was that there is a lot of code in the compiler to
> >> >> > handle uniform blocks and all the rules for them and we needed the 
> >> >> > same
> >> >> > treatment for SSBOs, so that seemed like a reasonable way forward to
> >> >> > reuse a lot of the code in the compiler front end. I think the only
> >> >> > place where we needed to make explicit distinctions is when we check 
> >> >> > for
> >> >> > resource limits, since these are different for UBOs and SSBOs of 
> >> >> > course.
> >> >> > Although UBOs and SSBOs are separate entities they have a lot of
> >> >> > similarities too, so that did not look like a terrible idea, 
> >> >> > considering
> >> >> > the benefits.
> >> >>
> >> >> My concern is around indexing... now the per-stage indices are in the
> >> >> combined UBO/SSBO space -- how do I tease out the individual ones?
> >> >> Easy enough when you can loop over NumUniformBlocks and just count the
> >> >> right type, but what about in the shader, where I get the buffer index
> >> >> in a ir_rvalue?
> >
> > By the way, in i965 this is not a problem either, we have access to the
> > gl_shader struct from the compiler backend, so if we need to translate
> > from the shared index space to a separate space we have NumUniformBlocks
> > available to do that. From your words I get that you can't access this
> > information from the compiler backend, right? In that case, wouldn't it
> > be possible to translate the index during the GLSL IR -> TGSI
> > conversion?
> 
> Actually a shader should be available at GLSL -> TGSI translation time
> as well. Somehow I didn't connect that in my mind with the relevant
> info being available. So I guess this moves from "impossible" to
> "annoying", since we have to do a list scan and fix up all the UBO
> logic as well.

I wanted to try and rewrite the compiler bits to have separate index
spaces for UBOs and SSBOs so we can see what that actually involves and
decide what is better. My idea was to send a RFC patch as soon as I get
"something" working, it is probably going to take a few days though.

>  Probably easiest to just compute a remapping table and
> stick it somewhere that will last at link time. I'm wondering if this
> will affect indirect array accesses (ARB_gs5 ubo indexing -- does that
> exist on ssbo btw?), but I think that should be fine.

I haven't seen anything in the specs that prevents this, we support that
in i965.

However, I think this can be a problem in your case, because you can't
remap the block index if you don't know how many blocks in UniformBlocks
before the one you are processing are of a different type (i.e. UBOs if
this is an SSBO or the other way around). And you cannot know how many
blocks you have to count because the index into the array instance
blocks is not constant... We can probably fix this by grouping UBOs and
SSBOS together in the array right before we flow into the backends.

I was actually wondering if we could also split SSBOs/UBOs into separate
arrays only at that point, I might give this a try before I try to
rewrite most of the uniform linking code to deal with two arrays since
the beginning.

Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] main: Fix block index when mixing UBO and SSBO blocks

2015-10-01 Thread Iago Toral
On Thu, 2015-10-01 at 09:13 +0300, Tapani Pälli wrote:
> 
> On 09/29/2015 05:38 PM, Iago Toral Quiroga wrote:
> > Since we store both in UniformBlocks, we can't just compute the index by
> > subtracting the array address start, we need to count the number of
> > buffers of the approriate type.
> > ---
> >
> > Or we can just fall back to calc_resource_index... that would also work.
> > This should be a bit faster though since it only traverses the list of
> > uniform blocks and the code is simple enough, but it probably won't make
> > a significant difference anyway.
> 
> This is correct but I'd vote for using calc_resource_index to reduce 
> special cases. Ideally in some point gl_program_resource starts to be 
> something more than pointer and then it helps to have generic code for 
> these things.

Sure, I'll send a v2. Thanks for looking into it Tapani.

Iago

> 
> >   src/mesa/main/shader_query.cpp | 18 +-
> >   1 file changed, 17 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
> > index 0cada50..33c95b4 100644
> > --- a/src/mesa/main/shader_query.cpp
> > +++ b/src/mesa/main/shader_query.cpp
> > @@ -602,6 +602,22 @@ calc_resource_index(struct gl_shader_program *shProg,
> >  return GL_INVALID_INDEX;
> >   }
> >
> > +static GLuint
> > +calc_ubo_ssbo_index(struct gl_shader_program *shProg,
> > +struct gl_program_resource *res)
> > +{
> > +   unsigned i;
> > +   GLuint index = 0;
> > +   bool is_shader_storage = res->Type == GL_SHADER_STORAGE_BLOCK;
> > +   for (i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
> > +  if (>UniformBlocks[i] == RESOURCE_UBO(res))
> > + return index;
> > +  if (shProg->UniformBlocks[i].IsShaderStorage == is_shader_storage)
> > + index++;
> > +   }
> > +   return GL_INVALID_INDEX;
> > +}
> > +
> >   /**
> >* Calculate index for the given resource.
> >*/
> > @@ -615,7 +631,7 @@ _mesa_program_resource_index(struct gl_shader_program 
> > *shProg,
> >  switch (res->Type) {
> >  case GL_UNIFORM_BLOCK:
> >  case GL_SHADER_STORAGE_BLOCK:
> > -  return RESOURCE_UBO(res)- shProg->UniformBlocks;
> > +  return calc_ubo_ssbo_index(shProg, res);
> >  case GL_ATOMIC_COUNTER_BUFFER:
> > return RESOURCE_ATC(res) - shProg->AtomicBuffers;
> >  case GL_TRANSFORM_FEEDBACK_VARYING:
> >
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] SSBO's in UniformBlocks list?

2015-10-01 Thread Iago Toral
On Thu, 2015-10-01 at 02:18 -0400, Ilia Mirkin wrote:
> On Thu, Oct 1, 2015 at 2:12 AM, Iago Toral <ito...@igalia.com> wrote:
> > However, I think this can be a problem in your case, because you can't
> > remap the block index if you don't know how many blocks in UniformBlocks
> > before the one you are processing are of a different type (i.e. UBOs if
> > this is an SSBO or the other way around). And you cannot know how many
> > blocks you have to count because the index into the array instance
> > blocks is not constant... We can probably fix this by grouping UBOs and
> > SSBOS together in the array right before we flow into the backends.
> 
> Slightly annoying but non-fatal. I think that just the remapping table
> is enough -- the indexing is always done relative to a base index, and
> as long as these arrays are contiguous (which they kinda have to be),
> it shouldn't matter what the offset is. i.e. if the list contains u0
> u1 s0 s1 u2 u3, and i want to index on u2/3, as long as i know that u2
> is the base, I can use its index.

But they are not contiguous, that's why I say that we would need to
group them. In shader code you can in theory have something like:

layout(std140, binding=2) buffer SSBO1 {
   vec4 v0;
   vec4 v1;
} ssbo1[3];

layout(std140, binding=3) uniform UBO {
   vec4 v0;
   vec4 v1;
} ubo[2];

layout(std140, binding=5) buffer SSBO2 {
   vec4 v0;
   vec4 v1;
} ssbo2[2];

and we would add them in that order to the list, which I think would be
a problem for you in the case you mention.

> At least I think that's right, haven't *fully* thought it through tbh.
> But it seems like it could work.
> 
>   -ilia
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] SSBO's in UniformBlocks list?

2015-10-01 Thread Iago Toral
On Thu, 2015-10-01 at 02:30 -0400, Ilia Mirkin wrote:
> On Thu, Oct 1, 2015 at 2:24 AM, Iago Toral <ito...@igalia.com> wrote:
> > On Thu, 2015-10-01 at 02:18 -0400, Ilia Mirkin wrote:
> >> On Thu, Oct 1, 2015 at 2:12 AM, Iago Toral <ito...@igalia.com> wrote:
> >> > However, I think this can be a problem in your case, because you can't
> >> > remap the block index if you don't know how many blocks in UniformBlocks
> >> > before the one you are processing are of a different type (i.e. UBOs if
> >> > this is an SSBO or the other way around). And you cannot know how many
> >> > blocks you have to count because the index into the array instance
> >> > blocks is not constant... We can probably fix this by grouping UBOs and
> >> > SSBOS together in the array right before we flow into the backends.
> >>
> >> Slightly annoying but non-fatal. I think that just the remapping table
> >> is enough -- the indexing is always done relative to a base index, and
> >> as long as these arrays are contiguous (which they kinda have to be),
> >> it shouldn't matter what the offset is. i.e. if the list contains u0
> >> u1 s0 s1 u2 u3, and i want to index on u2/3, as long as i know that u2
> >> is the base, I can use its index.
> >
> > But they are not contiguous, that's why I say that we would need to
> > group them. In shader code you can in theory have something like:
> >
> > layout(std140, binding=2) buffer SSBO1 {
> >vec4 v0;
> >vec4 v1;
> > } ssbo1[3];
> >
> > layout(std140, binding=3) uniform UBO {
> >vec4 v0;
> >vec4 v1;
> > } ubo[2];
> >
> > layout(std140, binding=5) buffer SSBO2 {
> >vec4 v0;
> >vec4 v1;
> > } ssbo2[2];
> >
> > and we would add them in that order to the list, which I think would be
> > a problem for you in the case you mention.
> 
> Wouldn't ubo[0] and ubo[1] get sequential entries in that list? So
> when accessing ubo[n], if I look up the remapping of ubo[0] in that
> table, I should be sure that ubo[1] is adjacent to it, no?

Ah, you meant to have a remap table for the start of each interface
block... yeah, I guess that would work, blocks in each array will be
contiguous in UniformBlocks.

Iago

> 
>   -ilia
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 2/2] mesa: Add {Num}UniformBlocks and {Num}ShaderStorageBlocks to gl_shader_program

2015-10-02 Thread Iago Toral
On Thu, 2015-10-01 at 14:01 -0400, Ilia Mirkin wrote:
> On Thu, Oct 1, 2015 at 7:09 AM, Iago Toral Quiroga <ito...@igalia.com> wrote:
> > These arrays provide backends with separate index spaces for UBOS and SSBOs.
> > ---
> >  src/glsl/linker.cpp | 35 
> > +++
> >  src/glsl/standalone_scaffolding.cpp |  9 +
> >  src/mesa/main/mtypes.h  |  6 ++
> >  3 files changed, 50 insertions(+)
> >
> > diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
> > index e6eba94..3da773d 100644
> > --- a/src/glsl/linker.cpp
> > +++ b/src/glsl/linker.cpp
> > @@ -4107,6 +4107,41 @@ link_shaders(struct gl_context *ctx, struct 
> > gl_shader_program *prog)
> >}
> > }
> >
> > +   /* Split prog->BufferInterfaceBlocks into prog->UniformBlocks and
> > +* prog->ShaderStorageBlocks, so that drivers that need separate index
> > +* spaces for each set can have that.
> > +*/
> > +   unsigned num_ubo_blocks;
> > +   unsigned num_ssbo_blocks;
> > +   num_ubo_blocks = 0;
> > +   num_ssbo_blocks = 0;
> > +   for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
> > +  if (prog->BufferInterfaceBlocks[i].IsShaderStorage)
> > + num_ssbo_blocks++;
> > +  else
> > + num_ubo_blocks++;
> > +   }
> > +
> > +   prog->UniformBlocks =
> > +  ralloc_array(mem_ctx, gl_uniform_block *, num_ubo_blocks);
> > +   prog->NumUniformBlocks = 0;
> > +
> > +   prog->ShaderStorageBlocks =
> > +  ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks);
> > +   prog->NumShaderStorageBlocks = 0;
> > +
> > +   for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
> > +  if (prog->BufferInterfaceBlocks[i].IsShaderStorage)
> > + prog->ShaderStorageBlocks[prog->NumShaderStorageBlocks++] =
> > +>BufferInterfaceBlocks[i];
> > +  else
> > + prog->UniformBlocks[prog->NumUniformBlocks++] =
> > +>BufferInterfaceBlocks[i];
> > +   }
> 
> Shouldn't this go through and also adjust the indices of the linked
> programs? Or... something along those lines? With this, I still need a
> remapping table to go from the index passed to a load_ssbo/load_ubo
> instruction to a ssbo/ubo index.

Maybe I am missing something, but in the case of i965 for example, we
use a lowering pass (lower_ubo_reference) to compute the block indices
(see src/glsl/lower_ubo_reference.cpp). If you look at
lower_ubo_reference_visitor::setup_for_load_or_store, there is the code
we use to compute the block index. This happens in the backend already,
so I was thinking that you would do something similar, but instead of
using BufferInterfaceBlocks you would use these two arrays instead.
Until this point there are no references to any blocks in the shaders.

> Perhaps a few well-placed helper functions can alleviate this? Also
> this should erase the need for some of the O(n) iterations that have
> sprung up as a result of this combined list.
> 
> IMHO ideally the BufferInterfaceBlocks list would get freed at the end
> of this function. But I understand that this will require work, and
> the onus is probably on me (or anyone wanting to add ssbo support to
> other drivers) to do it, or work around it.

Yeah, it could go away I guess, but as you say that would require some
extra work (and also rewrite the i965 driver first to use separate index
spaces). In any case, notice that UniformBlocks and ShaderStorageBlocks
only contain pointers into BufferInterfaceBlocks, so we would only be
saving a few bytes.

Iago

>   -ilia
> 
> > +
> > +   assert(prog->NumUniformBlocks + prog->NumShaderStorageBlocks ==
> > +  prog->NumBufferInterfaceBlocks);
> > +
> > /* FINISHME: Assign fragment shader output locations. */
> >
> >  done:
> > diff --git a/src/glsl/standalone_scaffolding.cpp 
> > b/src/glsl/standalone_scaffolding.cpp
> > index 0c53589..658245f 100644
> > --- a/src/glsl/standalone_scaffolding.cpp
> > +++ b/src/glsl/standalone_scaffolding.cpp
> > @@ -102,6 +102,15 @@ _mesa_clear_shader_program_data(struct 
> > gl_shader_program *shProg)
> > ralloc_free(shProg->BufferInterfaceBlocks);
> > shProg->BufferInterfaceBlocks = NULL;
> > shProg->NumBufferInterfaceBlocks = 0;
> > +
> > +   ralloc_free(shProg->UniformBlocks);
> > +   shProg->UniformBlocks = NULL;
> > +   shProg->NumUniformBlocks = 0;
> > +
> > +   ralloc_free(shProg->Sha

Re: [Mesa-dev] [PATCH] mesa: avoid leaking closure when iterating over a string_to_uint_map

2015-10-02 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Thu, 2015-10-01 at 20:19 -0400, Ilia Mirkin wrote:
> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
> ---
>  src/mesa/program/hash_table.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h
> index e85a836..d0a2abf 100644
> --- a/src/mesa/program/hash_table.h
> +++ b/src/mesa/program/hash_table.h
> @@ -249,6 +249,7 @@ public:
>wrapper->closure = closure;
>  
>hash_table_call_foreach(this->ht, subtract_one_wrapper, wrapper);
> +  free(wrapper);
> }
>  
> /**


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: avoid leaking hiddenUniforms map when there are no uniforms

2015-10-02 Thread Iago Toral
On Thu, 2015-10-01 at 20:22 -0400, Ilia Mirkin wrote:
> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
> ---
>  src/glsl/link_uniforms.cpp | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
> index 47d49c8..740b0a4 100644
> --- a/src/glsl/link_uniforms.cpp
> +++ b/src/glsl/link_uniforms.cpp
> @@ -1131,15 +1131,15 @@ link_assign_uniform_locations(struct 
> gl_shader_program *prog,
> const unsigned num_data_slots = uniform_size.num_values;
> const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms;
>  
> +   /* assign hidden uniforms a slot id */
> +   hiddenUniforms->iterate(assign_hidden_uniform_slot_id, _size);
> +   delete hiddenUniforms;
> +
> /* On the outside chance that there were no uniforms, bail out.
>  */
> if (num_uniforms == 0)
>return;
>  
> -   /* assign hidden uniforms a slot id */
> -   hiddenUniforms->iterate(assign_hidden_uniform_slot_id, _size);
> -   delete hiddenUniforms;
> -

I suppose there is no much gain in simply adding the delete statement
right before the return...

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>


> struct gl_uniform_storage *uniforms =
>rzalloc_array(prog, struct gl_uniform_storage, num_uniforms);
> union gl_constant_value *data =


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: don't forget to free image_param on prog_data free

2015-10-02 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Thu, 2015-10-01 at 20:27 -0400, Ilia Mirkin wrote:
> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
> ---
>  src/mesa/drivers/dri/i965/brw_program.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
> b/src/mesa/drivers/dri/i965/brw_program.c
> index fee96a8..0e4b823 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -551,6 +551,7 @@ brw_stage_prog_data_free(const void *p)
>  
> ralloc_free(prog_data->param);
> ralloc_free(prog_data->pull_param);
> +   ralloc_free(prog_data->image_param);
>  }
>  
>  void


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] nir: split SSBO min/max atomic instrinsics into signed/unsigned versions

2015-10-05 Thread Iago Toral
Hi Markus,

I noticed that you did not reply to mesa-dev in your original e-mail so
I am CCing the list now so we keep the discussion here.

On Mon, 2015-10-05 at 08:07 +0200, Iago Toral wrote:
> Hi Markus,
> 
> On Sun, 2015-10-04 at 18:15 +0200, Markus Wick wrote:
> > Hi Iago,
> > 
> > I've tried your SSBO patch with splitted signed / unsigned handling with 
> > dolphin-emu and it did work fine, so feel free to add
> > Tested-by: Markus Wick <mar...@selfnet.de>
> 
> Thanks!
> 
> > But I have another issue with SSBO on master. I get "First argument to 
> > atomic function must be a buffer variable" because of this code: "buffer 
> > {ivec4 a;} ; ... atomicMax(a[0], 0); ...".
> 
> 
> 
> > Ilia has told me on #dri-devel about having the same issue with 
> > tesselation:
> > imirkin_> the second issue sounds fun though... we had issues like that 
> > with tess for a while
> > imirkin_> probably the same helpers can be reused to peer through the 
> > swizzles
> > 
> > Do you know a way how to fix it?
> 
> I'll look into it and reply here with my findings (or a patch). Thanks
> for reporting it!
> 
> Iago
> 
> > Thanks
> > 
> > Markus - degasus
> > 
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/11] i965/nir: Simplify uniform setup

2015-10-02 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 19 ---
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 21 -
>  2 files changed, 16 insertions(+), 24 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 829c663..eb0fe7b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -185,20 +185,17 @@ fs_visitor::nir_setup_uniforms(nir_shader *shader)
> if (shader_prog) {
>brw_nir_setup_glsl_uniforms(shader, shader_prog, prog,
>stage_prog_data, true);
> -
> -  foreach_list_typed(nir_variable, var, node, >uniforms) {
> - /* UBO's and atomics don't take up space in the uniform file */
> - if (var->interface_type != NULL || var->type->contains_atomic())
> -continue;
> -
> - if(type_size_scalar(var->type) > 0)
> -param_size[var->data.driver_location] = 
> type_size_scalar(var->type);
> -  }
> } else {
>brw_nir_setup_arb_uniforms(shader, prog, stage_prog_data);
> +   }
> +
> +   foreach_list_typed(nir_variable, var, node, >uniforms) {
> +  /* UBO's and atomics don't take up space in the uniform file */
> +  if (var->interface_type != NULL || var->type->contains_atomic())
> + continue;
>  
> -  if(prog->Parameters->NumParameters > 0)
> - param_size[0] = prog->Parameters->NumParameters * 4;
> +  if (type_size_scalar(var->type) > 0)
> + param_size[var->data.driver_location] = type_size_scalar(var->type);
> }
>  }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index 36bb35f..8274d48 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -139,22 +139,17 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
> if (shader_prog) {
>brw_nir_setup_glsl_uniforms(shader, shader_prog, prog,
>stage_prog_data, false);
> -
> -  foreach_list_typed(nir_variable, var, node, >uniforms) {
> - /* UBO's, atomics and samplers don't take up space in the
> -uniform file */
> - if (var->interface_type != NULL || var->type->contains_atomic() ||
> - type_size_vec4(var->type) == 0) {
> -continue;
> - }
> -
> - uniform_size[var->data.driver_location] = type_size_vec4(var->type);
> -  }
> } else {
>brw_nir_setup_arb_uniforms(shader, prog, stage_prog_data);
> +   }
> +
> +   foreach_list_typed(nir_variable, var, node, >uniforms) {
> +  /* UBO's and atomics don't take up space in the uniform file */
> +  if (var->interface_type != NULL || var->type->contains_atomic())
> + continue;
>  
> -  if(prog->Parameters->NumParameters > 0)
> - uniform_size[0] = prog->Parameters->NumParameters;
> +  if (type_size_vec4(var->type) > 0)
> + uniform_size[var->data.driver_location] = type_size_vec4(var->type);
> }
>  }
>  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/11] i965/nir: Pull common ARB program uniform handling into a common function

2015-10-02 Thread Iago Toral
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "brw_shader.h"
> +#include "brw_nir.h"
> +
> +void
> +brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
> +   struct brw_stage_prog_data *stage_prog_data)
> +{
> +   struct gl_program_parameter_list *plist = prog->Parameters;
> +
> +#ifndef NDEBUG
> +   if (!shader->uniforms.is_empty()) {
> +  /* For ARB programs, only a single "parameters" variable is generated 
> to
> +   * support uniform data.
> +   */
> +  assert(shader->uniforms.length() == 1);
> +  nir_variable *var = (nir_variable *) shader->uniforms.get_head();
> +  assert(strcmp(var->name, "parameters") == 0);
> +  assert(var->type->array_size() == (int)plist->NumParameters);
> +   }
> +#endif
> +
> +   for (unsigned p = 0; p < plist->NumParameters; p++) {
> +  /* Parameters should be either vec4 uniforms or single component
> +   * constants; matrices and other larger types should have been broken
> +   * down earlier.
> +   */
> +  assert(plist->Parameters[p].Size <= 4);
> +
> +  unsigned i;
> +  for (i = 0; i < plist->Parameters[p].Size; i++) {
> + stage_prog_data->param[4 * p + i] = >ParameterValues[p][i];
> +  }
> +  for (; i < 4; i++) {
> + static const gl_constant_value zero = { 0.0 };
> + stage_prog_data->param[4 * p + i] = 
> +  }
> +   }
> +}
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index ee94e58..99fd71f 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -153,33 +153,10 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
>  nir_setup_uniform(var);
>}
> } else {
> -  /* For ARB_vertex_program, only a single "parameters" variable is
> -   * generated to support uniform data.
> -   */
> -  nir_variable *var = (nir_variable *) shader->uniforms.get_head();
> -  assert(shader->uniforms.length() == 1 &&
> - strcmp(var->name, "parameters") == 0);
> -
> -  assert(var->data.driver_location == 0);
> -  uniform_size[0] = type_size_vec4(var->type);
> -
> -  struct gl_program_parameter_list *plist = prog->Parameters;
> -  for (unsigned p = 0; p < plist->NumParameters; p++) {
> - /* Parameters should be either vec4 uniforms or single component
> -  * constants; matrices and other larger types should have been 
> broken
> -  * down earlier.
> -  */
> - assert(plist->Parameters[p].Size <= 4);
> +  brw_nir_setup_arb_uniforms(shader, prog, stage_prog_data);
>  
> - unsigned i;
> - for (i = 0; i < plist->Parameters[p].Size; i++) {
> -stage_prog_data->param[p * 4 + i] = 
> >ParameterValues[p][i];
> - }
> - for (; i < 4; i++) {
> -static const gl_constant_value zero = { 0.0 };
> -stage_prog_data->param[p * 4 + i] = 
> - }
> -  }
> +  if(prog->Parameters->NumParameters > 0)
> + uniform_size[0] = prog->Parameters->NumParameters;

Feel free to ignore this, but I wonder: why don't we just remove the
condition? (here and in the FS backend). It does not seem like it helps
anything, right?

With the comment about brw_nir_setup_glsl_uniforms addressed, this is:
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

> }
>  }
>  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/11] i965/shader: Pull setup_image_uniform_values out of backend_shader

2015-10-02 Thread Iago Toral
On Fri, 2015-10-02 at 10:29 +0200, Iago Toral wrote:
> On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> > I tried to do this once before but Curro pointed out that having it in
> > backend_shader meant it could use the setup_vec4_uniform_values helper
> > which did different things in vec4 and fs.  Now the setup_uniform_values
> > function differs only by an assert in the two backends so there's no real
> > good reason to be using it anymore.
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp |  3 +-
> >  src/mesa/drivers/dri/i965/brw_shader.cpp | 52 
> > +---
> >  src/mesa/drivers/dri/i965/brw_shader.h   |  7 +++--
> >  3 files changed, 42 insertions(+), 20 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > index 7a965cd..d33e452 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > @@ -236,7 +236,8 @@ fs_visitor::nir_setup_uniform(nir_variable *var)
> >}
> >  
> >if (storage->type->is_image()) {
> > - setup_image_uniform_values(index, storage);
> > + brw_setup_image_uniform_values(stage, stage_prog_data,
> > +index, storage);
> >} else {
> >   unsigned slots = storage->type->component_slots();
> >   if (storage->array_elements)
> > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
> > b/src/mesa/drivers/dri/i965/brw_shader.cpp
> > index 72388ce..1d184a7 100644
> > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> > @@ -1419,32 +1419,50 @@ 
> > backend_shader::assign_common_binding_table_offsets(uint32_t 
> > next_binding_table_
> > /* prog_data->base.binding_table.size will be set by 
> > brw_mark_surface_used. */
> >  }
> >  
> > +static void
> > +setup_vec4_uniform_value(const gl_constant_value **params,
> > + const gl_constant_value *values,
> > + unsigned n)
> > +{
> > +   static const gl_constant_value zero = { 0 };
> > +
> > +   for (unsigned i = 0; i < n; ++i)
> > +  params[i] = [i];
> > +
> > +   for (unsigned i = n; i < 4; ++i)
> > +  params[i] = 
> > +}
> 
> I actually liked the version that received an offset into params better,
> since that allows us to assert that we are not messing our writes to
> params. 

Oh, but that was a requirement exclusive to the vec4 backend, so I guess
this is okay...

> Not a big deal though, so either way:
> 
> Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>
> 
> >  void
> > -backend_shader::setup_image_uniform_values(unsigned param_offset,
> > -   const gl_uniform_storage 
> > *storage)
> > +brw_setup_image_uniform_values(gl_shader_stage stage,
> > +   struct brw_stage_prog_data *stage_prog_data,
> > +   unsigned param_start_index,
> > +   const gl_uniform_storage *storage)
> >  {
> > -   const unsigned stage = _mesa_program_enum_to_shader_stage(prog->Target);
> > +   const gl_constant_value **param =
> > +  _prog_data->param[param_start_index];
> >  
> > for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) {
> >const unsigned image_idx = storage->image[stage].index + i;
> > -  const brw_image_param *param = 
> > _prog_data->image_param[image_idx];
> > +  const brw_image_param *image_param =
> > + _prog_data->image_param[image_idx];
> >  
> >/* Upload the brw_image_param structure.  The order is expected to 
> > match
> > * the BRW_IMAGE_PARAM_*_OFFSET defines.
> > */
> > -  setup_vec4_uniform_value(param_offset + 
> > BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
> > - (const gl_constant_value *)>surface_idx, 1);
> > -  setup_vec4_uniform_value(param_offset + 
> > BRW_IMAGE_PARAM_OFFSET_OFFSET,
> > - (const gl_constant_value *)param->offset, 2);
> > -  setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SIZE_OFFSET,
> > - (const gl_constant_value *)param->size, 3);
> > -  setup_vec4_uniform_value(param_offset + 
> > BRW_IMAGE_PARAM_STRIDE_OFFSET,
> > - (const gl_constant_value *)param->stride, 4);
> > -  setu

Re: [Mesa-dev] [PATCH 08/11] i965/vec4: Use the uniform count from nir_assign_var_locations

2015-10-02 Thread Iago Toral
On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> Previously, we were counting up uniforms as we set them up.  However, this
> count should be exactly identical to shader->num_uniforms provided by
> nir_assign_var_locations.  (If it's not, we're in trouble anyway because
> that means that locations don't match up.)  This matches what the fs
> backend is already doing.
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 32 
> ++
>  1 file changed, 11 insertions(+), 21 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index b0abfc1..ee94e58 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -134,7 +134,7 @@ vec4_visitor::nir_setup_inputs(nir_shader *shader)
>  void
>  vec4_visitor::nir_setup_uniforms(nir_shader *shader)
>  {
> -   uniforms = 0;
> +   uniforms = shader->num_uniforms;
>  
> if (shader_prog) {
>foreach_list_typed(nir_variable, var, node, >uniforms) {
> @@ -145,8 +145,7 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
>  continue;
>   }
>  
> - assert(uniforms < uniform_array_size);
> - uniform_size[uniforms] = type_size_vec4(var->type);
> + uniform_size[var->data.driver_location] = type_size_vec4(var->type);
>  
>   if (strncmp(var->name, "gl_", 3) == 0)
>  nir_setup_builtin_uniform(var);
> @@ -161,8 +160,8 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
>assert(shader->uniforms.length() == 1 &&
>   strcmp(var->name, "parameters") == 0);
>  
> -  assert(uniforms < uniform_array_size);
> -  uniform_size[uniforms] = type_size_vec4(var->type);
> +  assert(var->data.driver_location == 0);
> +  uniform_size[0] = type_size_vec4(var->type);
>  
>struct gl_program_parameter_list *plist = prog->Parameters;
>for (unsigned p = 0; p < plist->NumParameters; p++) {
> @@ -174,14 +173,12 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
>  
>   unsigned i;
>   for (i = 0; i < plist->Parameters[p].Size; i++) {
> -stage_prog_data->param[uniforms * 4 + i] = 
> >ParameterValues[p][i];
> +stage_prog_data->param[p * 4 + i] = 
> >ParameterValues[p][i];
>   }
>   for (; i < 4; i++) {
>  static const gl_constant_value zero = { 0.0 };
> -stage_prog_data->param[uniforms * 4 + i] = 
> +stage_prog_data->param[p * 4 + i] = 
>   }
> -
> - uniforms++;
>}
> }
>  }
> @@ -197,6 +194,7 @@ vec4_visitor::nir_setup_uniform(nir_variable *var)
>  * order we'd walk the type, so walk the list of storage and find anything
>  * with our name, or the prefix of a component that starts with our name.
>  */
> +unsigned index = var->data.driver_location * 4;

Maybe call this uniform_index for consistency with
nir_setup_builtin_uniform below.

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

>  for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
> struct gl_uniform_storage *storage = _prog->UniformStorage[u];
>  
> @@ -215,19 +213,14 @@ vec4_visitor::nir_setup_uniform(nir_variable *var)
>  storage->type->matrix_columns);
>  
> for (unsigned s = 0; s < vector_count; s++) {
> -  assert(uniforms < uniform_array_size);
> -
>int i;
>for (i = 0; i < storage->type->vector_elements; i++) {
> - stage_prog_data->param[uniforms * 4 + i] = components;
> - components++;
> + stage_prog_data->param[index++] = components++;
>}
>for (; i < 4; i++) {
>   static const gl_constant_value zero = { 0.0 };
> - stage_prog_data->param[uniforms * 4 + i] = 
> + stage_prog_data->param[index++] = 
>}
> -
> -  uniforms++;
> }
>  }
>  }
> @@ -238,6 +231,7 @@ vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
> const nir_state_slot *const slots = var->state_slots;
> assert(var->state_slots != NULL);
>  
> +   unsigned uniform_index = var->data.driver_location * 4;
> for (unsigned int i = 0; i < var->num_state_slots; i++) {
>/* This state reference has already been setup by ir_to_mesa,
> * but we'll get the same index back here.  We can reference
> @@ -249,13 +243,9 @@ vec4_visitor::nir_set

Re: [Mesa-dev] [PATCH 07/11] i965/shader: Get rid of the setup_vec4_uniform_value helper

2015-10-02 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> It's not used by anything anymore
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp   | 14 --
>  src/mesa/drivers/dri/i965/brw_fs.h |  4 
>  src/mesa/drivers/dri/i965/brw_shader.h |  4 
>  src/mesa/drivers/dri/i965/brw_vec4.h   |  3 ---
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 16 
>  5 files changed, 41 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 64215ae..b062219 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -948,20 +948,6 @@ fs_visitor::import_uniforms(fs_visitor *v)
> this->param_size = v->param_size;
>  }
>  
> -void
> -fs_visitor::setup_vec4_uniform_value(unsigned param_offset,
> - const gl_constant_value *values,
> - unsigned n)
> -{
> -   static const gl_constant_value zero = { 0 };
> -
> -   for (unsigned i = 0; i < n; ++i)
> -  stage_prog_data->param[param_offset + i] = [i];
> -
> -   for (unsigned i = n; i < 4; ++i)
> -  stage_prog_data->param[param_offset + i] = 
> -}
> -
>  fs_reg *
>  fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
>   bool origin_upper_left)
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index a8b6726..6231652 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -294,10 +294,6 @@ public:
>  
> struct brw_reg interp_reg(int location, int channel);
>  
> -   virtual void setup_vec4_uniform_value(unsigned param_offset,
> - const gl_constant_value *values,
> - unsigned n);
> -
> int implied_mrf_writes(fs_inst *inst);
>  
> virtual void dump_instructions();
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
> b/src/mesa/drivers/dri/i965/brw_shader.h
> index ee6afce..eeb3306 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.h
> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
> @@ -269,10 +269,6 @@ public:
> void assign_common_binding_table_offsets(uint32_t 
> next_binding_table_offset);
>  
> virtual void invalidate_live_intervals() = 0;
> -
> -   virtual void setup_vec4_uniform_value(unsigned param_offset,
> - const gl_constant_value *values,
> - unsigned n) = 0;
>  };
>  
>  uint32_t brw_texture_offset(int *offsets, unsigned num_components);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 76b13a6..b3a62d2 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -132,9 +132,6 @@ public:
> bool run();
> void fail(const char *msg, ...);
>  
> -   virtual void setup_vec4_uniform_value(unsigned param_offset,
> - const gl_constant_value *values,
> - unsigned n);
> int setup_uniforms(int payload_reg);
>  
> bool reg_allocate_trivial();
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index af01d8e..bc9d9a0 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -688,22 +688,6 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct 
> glsl_type *type)
> this->type = brw_type_for_base_type(type);
>  }
>  
> -void
> -vec4_visitor::setup_vec4_uniform_value(unsigned param_offset,
> -   const gl_constant_value *values,
> -   unsigned n)
> -{
> -   static const gl_constant_value zero = { 0 };
> -
> -   assert(param_offset % 4 == 0);
> -
> -   for (unsigned i = 0; i < n; ++i)
> -  stage_prog_data->param[param_offset + i] = [i];
> -
> -   for (unsigned i = n; i < 4; ++i)
> -  stage_prog_data->param[param_offset + i] = 
> -}
> -
>  vec4_instruction *
>  vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg 
> dst,
>src_reg src0, src_reg src1)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/11] i965/nir: Pull GLSL uniform handling into a common function

2015-10-02 Thread Iago Toral
omponents = storage->storage;
> + unsigned vector_count = (MAX2(storage->array_elements, 1) *
> +  storage->type->matrix_columns);
> + unsigned vector_size = storage->type->vector_elements;
> +
> + for (unsigned s = 0; s < vector_count; s++) {
> +unsigned i;
> +for (i = 0; i < vector_size; i++) {
> +   stage_prog_data->param[index++] = components++;
> +}
> +
> +/* Pad out with zeros if needed (only needed for vec4) */
> +for (; i < comps_per_unit; i++) {
> +   static const gl_constant_value zero = { 0.0 };
> +   stage_prog_data->param[index++] = 
> +}
> + }
> +  }
> +   }
> +}
> +
> +void
> +brw_nir_setup_glsl_uniforms(nir_shader *shader,
> +struct gl_shader_program *shader_prog,
> +const struct gl_program *prog,
> +struct brw_stage_prog_data *stage_prog_data,
> +bool is_scalar)

As I mentioned in a previous patch, you need to pull the header
declaration of this function into this patch.

With that and the static functions, this is:

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

> +{
> +   unsigned comps_per_unit = is_scalar ? 1 : 4;
> +
> +   foreach_list_typed(nir_variable, var, node, >uniforms) {
> +  /* UBO's, atomics and samplers don't take up space in the
> + uniform file */
> +  if (var->interface_type != NULL || var->type->contains_atomic())
> + continue;
> +
> +  if (strncmp(var->name, "gl_", 3) == 0) {
> + brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data,
> +comps_per_unit);
> +  } else {
> + brw_nir_setup_glsl_uniform(shader->stage, var, shader_prog,
> +stage_prog_data, comps_per_unit);
> +  }
> +   }
> +}
>  
>  void
>  brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index b3a62d2..098fff01 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -334,8 +334,6 @@ public:
> virtual void emit_nir_code();
> virtual void nir_setup_inputs(nir_shader *shader);
> virtual void nir_setup_uniforms(nir_shader *shader);
> -   virtual void nir_setup_uniform(nir_variable *var);
> -   virtual void nir_setup_builtin_uniform(nir_variable *var);
> virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
> virtual void nir_setup_system_values(nir_shader *shader);
> virtual void nir_emit_impl(nir_function_impl *impl);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index 99fd71f..36bb35f 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -137,6 +137,9 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
> uniforms = shader->num_uniforms;
>  
> if (shader_prog) {
> +  brw_nir_setup_glsl_uniforms(shader, shader_prog, prog,
> +  stage_prog_data, false);
> +
>foreach_list_typed(nir_variable, var, node, >uniforms) {
>   /* UBO's, atomics and samplers don't take up space in the
>  uniform file */
> @@ -146,11 +149,6 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
>   }
>  
>   uniform_size[var->data.driver_location] = type_size_vec4(var->type);
> -
> - if (strncmp(var->name, "gl_", 3) == 0)
> -nir_setup_builtin_uniform(var);
> - else
> -nir_setup_uniform(var);
>}
> } else {
>brw_nir_setup_arb_uniforms(shader, prog, stage_prog_data);
> @@ -161,72 +159,6 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
>  }
>  
>  void
> -vec4_visitor::nir_setup_uniform(nir_variable *var)
> -{
> -   int namelen = strlen(var->name);
> -
> -   /* The data for our (non-builtin) uniforms is stored in a series of
> -* gl_uniform_driver_storage structs for each subcomponent that
> -* glGetUniformLocation() could name.  We know it's been set up in the 
> same
> -* order we'd walk the type, so walk the list of storage and find anything
> -* with our name, or the prefix of a component that starts with our name.
> -*/
> -unsigned index = var->data.driver_location * 4;
> -for (unsigned u = 0; u < shader_prog->Num

Re: [Mesa-dev] [PATCH 06/11] i965/shader: Pull setup_image_uniform_values out of backend_shader

2015-10-02 Thread Iago Toral
On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> I tried to do this once before but Curro pointed out that having it in
> backend_shader meant it could use the setup_vec4_uniform_values helper
> which did different things in vec4 and fs.  Now the setup_uniform_values
> function differs only by an assert in the two backends so there's no real
> good reason to be using it anymore.
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp |  3 +-
>  src/mesa/drivers/dri/i965/brw_shader.cpp | 52 
> +---
>  src/mesa/drivers/dri/i965/brw_shader.h   |  7 +++--
>  3 files changed, 42 insertions(+), 20 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 7a965cd..d33e452 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -236,7 +236,8 @@ fs_visitor::nir_setup_uniform(nir_variable *var)
>}
>  
>if (storage->type->is_image()) {
> - setup_image_uniform_values(index, storage);
> + brw_setup_image_uniform_values(stage, stage_prog_data,
> +index, storage);
>} else {
>   unsigned slots = storage->type->component_slots();
>   if (storage->array_elements)
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 72388ce..1d184a7 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -1419,32 +1419,50 @@ 
> backend_shader::assign_common_binding_table_offsets(uint32_t 
> next_binding_table_
> /* prog_data->base.binding_table.size will be set by 
> brw_mark_surface_used. */
>  }
>  
> +static void
> +setup_vec4_uniform_value(const gl_constant_value **params,
> + const gl_constant_value *values,
> + unsigned n)
> +{
> +   static const gl_constant_value zero = { 0 };
> +
> +   for (unsigned i = 0; i < n; ++i)
> +  params[i] = [i];
> +
> +   for (unsigned i = n; i < 4; ++i)
> +  params[i] = 
> +}

I actually liked the version that received an offset into params better,
since that allows us to assert that we are not messing our writes to
params. Not a big deal though, so either way:

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

>  void
> -backend_shader::setup_image_uniform_values(unsigned param_offset,
> -   const gl_uniform_storage *storage)
> +brw_setup_image_uniform_values(gl_shader_stage stage,
> +   struct brw_stage_prog_data *stage_prog_data,
> +   unsigned param_start_index,
> +   const gl_uniform_storage *storage)
>  {
> -   const unsigned stage = _mesa_program_enum_to_shader_stage(prog->Target);
> +   const gl_constant_value **param =
> +  _prog_data->param[param_start_index];
>  
> for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) {
>const unsigned image_idx = storage->image[stage].index + i;
> -  const brw_image_param *param = 
> _prog_data->image_param[image_idx];
> +  const brw_image_param *image_param =
> + _prog_data->image_param[image_idx];
>  
>/* Upload the brw_image_param structure.  The order is expected to 
> match
> * the BRW_IMAGE_PARAM_*_OFFSET defines.
> */
> -  setup_vec4_uniform_value(param_offset + 
> BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
> - (const gl_constant_value *)>surface_idx, 1);
> -  setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_OFFSET_OFFSET,
> - (const gl_constant_value *)param->offset, 2);
> -  setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SIZE_OFFSET,
> - (const gl_constant_value *)param->size, 3);
> -  setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_STRIDE_OFFSET,
> - (const gl_constant_value *)param->stride, 4);
> -  setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_TILING_OFFSET,
> - (const gl_constant_value *)param->tiling, 3);
> -  setup_vec4_uniform_value(param_offset + 
> BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
> - (const gl_constant_value *)param->swizzling, 2);
> -  param_offset += BRW_IMAGE_PARAM_SIZE;
> +  setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
> + (const gl_constant_value *)_param->surface_idx, 1);
> +  setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
> + (const gl_constant_value *)image_param->offset, 2);
> +  setup_vec4_uniform_value

Re: [Mesa-dev] [PATCH 1/2] glsl/opt_array_splitting: Fix crash when doing array indexing into other arrays

2015-10-02 Thread Iago Toral
I think this never got a review, anyone willing to take it?

Iago

On Mon, 2015-09-14 at 13:49 +0200, Iago Toral Quiroga wrote:
> When we find indirect indexing into an array, the current implementation
> of the array spliiting optimization pass does not look further into the
> expression tree. However, if the variable expression involves variable
> indexing into other arrays, we can miss that these other arrays also have
> variable indexing. If that happens, the pass will crash later on after
> hitting an assertion put there to ensure that split arrays are in fact
> always indexed via constants:
> 
> shader_runner: opt_array_splitting.cpp:296:
> void ir_array_splitting_visitor::split_deref(ir_dereference**): Assertion 
> `constant' failed.
> 
> This patch fixes the problem by letting the pass step into the variable
> index expression to identify these cases properly.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89607
> ---
>  src/glsl/opt_array_splitting.cpp | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/src/glsl/opt_array_splitting.cpp 
> b/src/glsl/opt_array_splitting.cpp
> index 9e73f3c..1fdd013 100644
> --- a/src/glsl/opt_array_splitting.cpp
> +++ b/src/glsl/opt_array_splitting.cpp
> @@ -185,8 +185,18 @@ 
> ir_array_reference_visitor::visit_enter(ir_dereference_array *ir)
> /* If the access to the array has a variable index, we wouldn't
>  * know which split variable this dereference should go to.
>  */
> -   if (entry && !ir->array_index->as_constant())
> -  entry->split = false;
> +   if (!ir->array_index->as_constant()) {
> +  if (entry)
> + entry->split = false;
> +  /* This variable indexing could come from a different array dereference
> +   * that also has variable indexing, that is, something like 
> a[b[a[b[0.
> +   * If we return visit_continue_with_parent here for the first 
> appearence
> +   * of a, then we can miss that b also has indirect indexing (if this is
> +   * the only place in the program where such indirect indexing into b
> +   * happens), so keep going.
> +   */
> +  return visit_continue;
> +   }
>  
> return visit_continue_with_parent;
>  }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/11] i965/vec4: Use the actual channels used in pack_uniform_registers

2015-10-02 Thread Iago Toral
On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> Previously, pack_uniform_registers worked based on the size of the uniform
> as given to us when we initially set up the uniforms.  However, we have to
> walk through the uniforms and figure out liveness anyway, so we migh as
> well record the number of channels used as we go.  This may also allow us
> to pack things tighter in a few cases.
> ---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 52 
> +-
>  1 file changed, 38 insertions(+), 14 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 407698f..f0fa07e 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -518,11 +518,11 @@ vec4_visitor::split_uniform_registers()
>  void
>  vec4_visitor::pack_uniform_registers()
>  {
> -   bool uniform_used[this->uniforms];
> +   uint8_t chans_used[this->uniforms];
> int new_loc[this->uniforms];
> int new_chan[this->uniforms];
>  
> -   memset(uniform_used, 0, sizeof(uniform_used));
> +   memset(chans_used, 0, sizeof(chans_used));
> memset(new_loc, 0, sizeof(new_loc));
> memset(new_chan, 0, sizeof(new_chan));
>  
> @@ -532,10 +532,36 @@ vec4_visitor::pack_uniform_registers()
>  */
> foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
>for (int i = 0 ; i < 3; i++) {
> -  if (inst->src[i].file != UNIFORM)
> - continue;
> + if (inst->src[i].file != UNIFORM)
> +continue;

The switch statement below should go before the loop.

> + unsigned readmask;
> + switch (inst->opcode) {
> + case VEC4_OPCODE_PACK_BYTES:
> + case BRW_OPCODE_DP4:
> + case BRW_OPCODE_DPH:
> +readmask = 0xf;
> +break;
> + case BRW_OPCODE_DP3:
> +readmask = 0x7;
> +break;
> + case BRW_OPCODE_DP2:
> +readmask = 0x3;
> +break;

I don't know if there are other opcodes that could also be
special-handled like these, but if there are we are only missing the
opportunity to do tighter packing for them (which we are not doing now
anyway)...

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

> + default:
> +readmask = inst->dst.writemask;
> +break;
> + }
> +
> + int reg = inst->src[i].reg;
> +
> + for (int c = 0; c < 4; c++) {
> +if (!(readmask & (1 << c)))
> +   continue;
>  
> -  uniform_used[inst->src[i].reg] = true;
> +chans_used[reg] = MAX2(chans_used[reg],
> +   BRW_GET_SWZ(inst->src[i].swizzle, c) + 1);
> + }
>}
> }
>  
> @@ -546,17 +572,15 @@ vec4_visitor::pack_uniform_registers()
>  */
> for (int src = 0; src < uniforms; src++) {
>assert(src < uniform_array_size);
> -  int size = this->uniform_vector_size[src];
> +  int size = chans_used[src];
>  
> -  if (!uniform_used[src]) {
> -  this->uniform_vector_size[src] = 0;
> -  continue;
> -  }
> +  if (size == 0)
> + continue;
>  
>int dst;
>/* Find the lowest place we can slot this uniform in. */
>for (dst = 0; dst < src; dst++) {
> -  if (this->uniform_vector_size[dst] + size <= 4)
> +  if (chans_used[dst] + size <= 4)
>   break;
>}
>  
> @@ -565,7 +589,7 @@ vec4_visitor::pack_uniform_registers()
>new_chan[src] = 0;
>} else {
>new_loc[src] = dst;
> -  new_chan[src] = this->uniform_vector_size[dst];
> +  new_chan[src] = chans_used[dst];
>  
>/* Move the references to the data */
>for (int j = 0; j < size; j++) {
> @@ -573,8 +597,8 @@ vec4_visitor::pack_uniform_registers()
>  stage_prog_data->param[src * 4 + j];
>}
>  
> -  this->uniform_vector_size[dst] += size;
> -  this->uniform_vector_size[src] = 0;
> +  chans_used[dst] += size;
> +  chans_used[src] = 0;
>}
>  
>new_uniform_count = MAX2(new_uniform_count, dst + 1);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/11] i965/vec4: Get rid of the uniform_vector_size array

2015-10-02 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Wed, 2015-09-30 at 18:41 -0700, Jason Ekstrand wrote:
> The uniform_vector_size array was only ever used by pack_uniform_registers
> which no longer needs it.
> ---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp|  1 -
>  src/mesa/drivers/dri/i965/brw_vec4.h  |  3 +--
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp| 15 ---
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp|  3 ---
>  src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp |  1 -
>  5 files changed, 5 insertions(+), 18 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index f0fa07e..4e9f3f7 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -1667,7 +1667,6 @@ vec4_visitor::setup_uniforms(int reg)
>  */
> if (devinfo->gen < 6 && this->uniforms == 0) {
>assert(this->uniforms < this->uniform_array_size);
> -  this->uniform_vector_size[this->uniforms] = 1;
>  
>stage_prog_data->param =
>   reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 
> 4);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 341897e..76b13a6 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -124,8 +124,7 @@ public:
> dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
> const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
> int *uniform_size;
> -   int *uniform_vector_size;
> -   int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
> +   int uniform_array_size; /*< Size of the uniform_size array */
> int uniforms;
>  
> src_reg shader_start_time;
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index 2555038..b0abfc1 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -166,16 +166,14 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
>  
>struct gl_program_parameter_list *plist = prog->Parameters;
>for (unsigned p = 0; p < plist->NumParameters; p++) {
> - uniform_vector_size[uniforms] = plist->Parameters[p].Size;
> -
>   /* Parameters should be either vec4 uniforms or single component
>* constants; matrices and other larger types should have been 
> broken
>* down earlier.
>*/
> - assert(uniform_vector_size[uniforms] <= 4);
> + assert(plist->Parameters[p].Size <= 4);
>  
> - int i;
> - for (i = 0; i < uniform_vector_size[uniforms]; i++) {
> + unsigned i;
> + for (i = 0; i < plist->Parameters[p].Size; i++) {
>  stage_prog_data->param[uniforms * 4 + i] = 
> >ParameterValues[p][i];
>   }
>   for (; i < 4; i++) {
> @@ -218,10 +216,9 @@ vec4_visitor::nir_setup_uniform(nir_variable *var)
>  
> for (unsigned s = 0; s < vector_count; s++) {
>assert(uniforms < uniform_array_size);
> -  uniform_vector_size[uniforms] = storage->type->vector_elements;
>  
>int i;
> -  for (i = 0; i < uniform_vector_size[uniforms]; i++) {
> +  for (i = 0; i < storage->type->vector_elements; i++) {
>   stage_prog_data->param[uniforms * 4 + i] = components;
>   components++;
>}
> @@ -258,10 +255,6 @@ vec4_visitor::nir_setup_builtin_uniform(nir_variable 
> *var)
>   stage_prog_data->param[uniforms * 4 + j] =
>  [GET_SWZ(slots[i].swizzle, j)];
>  
> -  uniform_vector_size[uniforms] =
> - (var->type->is_scalar() || var->type->is_vector() ||
> -  var->type->is_matrix() ? var->type->vector_elements : 4);
> -
>uniforms++;
> }
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index f303080..af01d8e 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -702,8 +702,6 @@ vec4_visitor::setup_vec4_uniform_value(unsigned 
> param_offset,
>  
> for (unsigned i = n; i < 4; ++i)
>stage_prog_data->param[param_offset + i] = 
> -
> -   uniform_vector_size[param_offset / 4] = n;
>  }
>  
>  vec4_instruction *
> @@ -1886,7 +1884,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler 
> *compiler,
>

Re: [Mesa-dev] ssbo support missing imin/umin dinstinction

2015-09-28 Thread Iago Toral
On Fri, 2015-09-25 at 21:15 -0400, Ilia Mirkin wrote:
> Hi Samuel,
> 
> It seems like there's only a single atomic_min intrinsic for ssbo
> (same for max), but the ssbo spec actually calls for both signed and
> unsigned semantics:
> 
> uint atomicMin(inout uint mem, uint data);
> int atomicMin(inout int mem, int data);
> 
> Should there be separate intrinsics for these (and their Max friends)
> or is there some other way to tell whether the min/max should be done
> signed/unsigned?
> 
> Cheers,

Good point. This is a problem in NIR that we overlooked when we ported
the code from GLSL IR. In GLSL IR you have the type information
available but that is lost once you go into NIR, the current code in the
i965 backend checks the register type, but that is going to be the
default we use for NIR values, not the real type from GLSL IR... I think
we will need two intrinsics in this case. I'll send a patch.

uThanks for bring this up!

Iago


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v5 45/70] glsl: atomic counters can be declared as buffer-qualified variables

2015-09-28 Thread Iago Toral
On Mon, 2015-09-28 at 13:13 +0300, Francisco Jerez wrote:
> Iago Toral Quiroga <ito...@igalia.com> writes:
> 
> > From: Kristian Høgsberg <k...@bitplanet.net>
> >
> > ---
> >  src/glsl/ast_to_hir.cpp | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> > index 566cc87..a364aae 100644
> > --- a/src/glsl/ast_to_hir.cpp
> > +++ b/src/glsl/ast_to_hir.cpp
> > @@ -2789,7 +2789,7 @@ apply_type_qualifier_to_variable(const struct 
> > ast_type_qualifier *qual,
> > }
> >  
> > if (var->type->contains_atomic()) {
> > -  if (var->data.mode == ir_var_uniform) {
> > +  if (var->data.mode == ir_var_uniform || var->data.mode == 
> > ir_var_shader_storage) {
> >   if (var->data.explicit_binding) {
> >  unsigned *offset =
> > >atomic_counter_offsets[var->data.binding];
> > @@ -2807,8 +2807,8 @@ apply_type_qualifier_to_variable(const struct 
> > ast_type_qualifier *qual,
> >   }
> >} else if (var->data.mode != ir_var_function_in) {
> >   _mesa_glsl_error(loc, state, "atomic counters may only be 
> > declared as "
> > -  "function parameters or uniform-qualified "
> > -  "global variables");
> > +  "function parameters, uniform-qualified or "
> > +  "buffer-qualified global variables");
> >}
> > }
> >  
> 
> Spec quote?  Declaring an atomic counter to have buffer storage is
> illegal AFAIK.

I think you are right, the specs don't seem to include any explicit
restrictions affecting the allowed types for buffer variables, however,
the description of opaque types seems to be in conflict with that:

"The opaque types declare variables that are effectively opaque 
handles to other objects. These objects are
accessed through built-in functions, not through direct reading or
writing of the declared variable.
(...)
Opaque variables cannot be treated as l-values;(...)"

I am thinking that we probably want to revert this patch and instead
check that buffer variables do not contain references to any opaque
types and produce a compile-time error if that is the case. I'll send
patches for this. Maybe the specs should be amended to mention this
explicitly too. Ian, what do you think?

Iago


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v5 45/70] glsl: atomic counters can be declared as buffer-qualified variables

2015-09-28 Thread Iago Toral
On Mon, 2015-09-28 at 13:45 +0200, Iago Toral wrote:
> On Mon, 2015-09-28 at 13:13 +0300, Francisco Jerez wrote:
> > Iago Toral Quiroga <ito...@igalia.com> writes:
> > 
> > > From: Kristian Høgsberg <k...@bitplanet.net>
> > >
> > > ---
> > >  src/glsl/ast_to_hir.cpp | 6 +++---
> > >  1 file changed, 3 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> > > index 566cc87..a364aae 100644
> > > --- a/src/glsl/ast_to_hir.cpp
> > > +++ b/src/glsl/ast_to_hir.cpp
> > > @@ -2789,7 +2789,7 @@ apply_type_qualifier_to_variable(const struct 
> > > ast_type_qualifier *qual,
> > > }
> > >  
> > > if (var->type->contains_atomic()) {
> > > -  if (var->data.mode == ir_var_uniform) {
> > > +  if (var->data.mode == ir_var_uniform || var->data.mode == 
> > > ir_var_shader_storage) {
> > >   if (var->data.explicit_binding) {
> > >  unsigned *offset =
> > > >atomic_counter_offsets[var->data.binding];
> > > @@ -2807,8 +2807,8 @@ apply_type_qualifier_to_variable(const struct 
> > > ast_type_qualifier *qual,
> > >   }
> > >} else if (var->data.mode != ir_var_function_in) {
> > >   _mesa_glsl_error(loc, state, "atomic counters may only be 
> > > declared as "
> > > -  "function parameters or uniform-qualified "
> > > -  "global variables");
> > > +  "function parameters, uniform-qualified or "
> > > +  "buffer-qualified global variables");
> > >}
> > > }
> > >  
> > 
> > Spec quote?  Declaring an atomic counter to have buffer storage is
> > illegal AFAIK.
> 
> I think you are right, the specs don't seem to include any explicit
> restrictions affecting the allowed types for buffer variables, however,
> the description of opaque types seems to be in conflict with that:
> 
> "The opaque types declare variables that are effectively opaque 
> handles to other objects. These objects are
> accessed through built-in functions, not through direct reading or
> writing of the declared variable.
> (...)
> Opaque variables cannot be treated as l-values;(...)"
> 
> I am thinking that we probably want to revert this patch and instead
> check that buffer variables do not contain references to any opaque
> types and produce a compile-time error if that is the case. I'll send
> patches for this. Maybe the specs should be amended to mention this
> explicitly too. Ian, what do you think?

Well, actually we already check that opaque types are disallowed in
interface blocks, so we only need to revert this patch.

Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] glsl: Fix forward NULL dereference coverity warning

2015-09-28 Thread Iago Toral
CCing Ian...

On Mon, 2015-09-28 at 13:02 +0200, Iago Toral wrote:
> Ian, you wrote the original code, so you might want to have a look at
> this one just in case I missed something even if piglit does not
> complain. In any case, I guess that either the code or the comment
> should be fixed.
> 
> Iago
> 
> On Mon, 2015-09-28 at 12:59 +0200, Iago Toral Quiroga wrote:
> > The comment says that it should be impossible for decl_type to be NULL
> > here, so don't try to handle the case where it is, simply add an assert.
> > 
> > >>> CID 1324977:  Null pointer dereferences  (FORWARD_NULL)
> > >>> Comparing "decl_type" to null implies that "decl_type" might be 
> > >>> null.
> > 
> > No piglit regressions observed.
> > ---
> >  src/glsl/ast_to_hir.cpp | 13 ++---
> >  1 file changed, 6 insertions(+), 7 deletions(-)
> > 
> > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> > index b8d66dd..1259d50 100644
> > --- a/src/glsl/ast_to_hir.cpp
> > +++ b/src/glsl/ast_to_hir.cpp
> > @@ -5732,17 +5732,16 @@ ast_process_structure_or_interface_block(exec_list 
> > *instructions,
> >* is_interface case, will have resulted in compilation having
> >* already halted due to a syntax error.
> >*/
> > - const struct glsl_type *field_type =
> > -decl_type != NULL ? decl_type : glsl_type::error_type;
> > + assert(decl_type);
> >  
> > - if (is_interface && field_type->contains_opaque()) {
> > + if (is_interface && decl_type->contains_opaque()) {
> >  YYLTYPE loc = decl_list->get_location();
> >  _mesa_glsl_error(, state,
> >   "uniform/buffer in non-default interface 
> > block contains "
> >   "opaque variable");
> >   }
> >  
> > - if (field_type->contains_atomic()) {
> > + if (decl_type->contains_atomic()) {
> >  /* From section 4.1.7.3 of the GLSL 4.40 spec:
> >   *
> >   *"Members of structures cannot be declared as atomic 
> > counter
> > @@ -5753,7 +5752,7 @@ ast_process_structure_or_interface_block(exec_list 
> > *instructions,
> >   "shader storage block or uniform block");
> >   }
> >  
> > - if (field_type->contains_image()) {
> > + if (decl_type->contains_image()) {
> >  /* FINISHME: Same problem as with atomic counters.
> >   * FINISHME: Request clarification from Khronos and add
> >   * FINISHME: spec quotation here.
> > @@ -5784,8 +5783,8 @@ ast_process_structure_or_interface_block(exec_list 
> > *instructions,
> >   "to struct or interface block members");
> >   }
> >  
> > - field_type = process_array_type(, decl_type,
> > - decl->array_specifier, state);
> > + const struct glsl_type *field_type =
> > +process_array_type(, decl_type, decl->array_specifier, 
> > state);
> >   fields[i].type = field_type;
> >   fields[i].name = decl->identifier;
> >   fields[i].location = -1;
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] glsl: Fix forward NULL dereference coverity warning

2015-09-28 Thread Iago Toral
Ian, you wrote the original code, so you might want to have a look at
this one just in case I missed something even if piglit does not
complain. In any case, I guess that either the code or the comment
should be fixed.

Iago

On Mon, 2015-09-28 at 12:59 +0200, Iago Toral Quiroga wrote:
> The comment says that it should be impossible for decl_type to be NULL
> here, so don't try to handle the case where it is, simply add an assert.
> 
> >>> CID 1324977:  Null pointer dereferences  (FORWARD_NULL)
> >>> Comparing "decl_type" to null implies that "decl_type" might be null.
> 
> No piglit regressions observed.
> ---
>  src/glsl/ast_to_hir.cpp | 13 ++---
>  1 file changed, 6 insertions(+), 7 deletions(-)
> 
> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> index b8d66dd..1259d50 100644
> --- a/src/glsl/ast_to_hir.cpp
> +++ b/src/glsl/ast_to_hir.cpp
> @@ -5732,17 +5732,16 @@ ast_process_structure_or_interface_block(exec_list 
> *instructions,
>* is_interface case, will have resulted in compilation having
>* already halted due to a syntax error.
>*/
> - const struct glsl_type *field_type =
> -decl_type != NULL ? decl_type : glsl_type::error_type;
> + assert(decl_type);
>  
> - if (is_interface && field_type->contains_opaque()) {
> + if (is_interface && decl_type->contains_opaque()) {
>  YYLTYPE loc = decl_list->get_location();
>  _mesa_glsl_error(, state,
>   "uniform/buffer in non-default interface block 
> contains "
>   "opaque variable");
>   }
>  
> - if (field_type->contains_atomic()) {
> + if (decl_type->contains_atomic()) {
>  /* From section 4.1.7.3 of the GLSL 4.40 spec:
>   *
>   *"Members of structures cannot be declared as atomic counter
> @@ -5753,7 +5752,7 @@ ast_process_structure_or_interface_block(exec_list 
> *instructions,
>   "shader storage block or uniform block");
>   }
>  
> - if (field_type->contains_image()) {
> + if (decl_type->contains_image()) {
>  /* FINISHME: Same problem as with atomic counters.
>   * FINISHME: Request clarification from Khronos and add
>   * FINISHME: spec quotation here.
> @@ -5784,8 +5783,8 @@ ast_process_structure_or_interface_block(exec_list 
> *instructions,
>   "to struct or interface block members");
>   }
>  
> - field_type = process_array_type(, decl_type,
> - decl->array_specifier, state);
> + const struct glsl_type *field_type =
> +process_array_type(, decl_type, decl->array_specifier, 
> state);
>   fields[i].type = field_type;
>   fields[i].name = decl->identifier;
>   fields[i].location = -1;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v5 45/70] glsl: atomic counters can be declared as buffer-qualified variables

2015-09-28 Thread Iago Toral
On Mon, 2015-09-28 at 15:23 +0300, Francisco Jerez wrote:
> Iago Toral <ito...@igalia.com> writes:
> 
> > On Mon, 2015-09-28 at 13:13 +0300, Francisco Jerez wrote:
> >> Iago Toral Quiroga <ito...@igalia.com> writes:
> >> 
> >> > From: Kristian Høgsberg <k...@bitplanet.net>
> >> >
> >> > ---
> >> >  src/glsl/ast_to_hir.cpp | 6 +++---
> >> >  1 file changed, 3 insertions(+), 3 deletions(-)
> >> >
> >> > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> >> > index 566cc87..a364aae 100644
> >> > --- a/src/glsl/ast_to_hir.cpp
> >> > +++ b/src/glsl/ast_to_hir.cpp
> >> > @@ -2789,7 +2789,7 @@ apply_type_qualifier_to_variable(const struct 
> >> > ast_type_qualifier *qual,
> >> > }
> >> >  
> >> > if (var->type->contains_atomic()) {
> >> > -  if (var->data.mode == ir_var_uniform) {
> >> > +  if (var->data.mode == ir_var_uniform || var->data.mode == 
> >> > ir_var_shader_storage) {
> >> >   if (var->data.explicit_binding) {
> >> >  unsigned *offset =
> >> > >atomic_counter_offsets[var->data.binding];
> >> > @@ -2807,8 +2807,8 @@ apply_type_qualifier_to_variable(const struct 
> >> > ast_type_qualifier *qual,
> >> >   }
> >> >} else if (var->data.mode != ir_var_function_in) {
> >> >   _mesa_glsl_error(loc, state, "atomic counters may only be 
> >> > declared as "
> >> > -  "function parameters or uniform-qualified "
> >> > -  "global variables");
> >> > +  "function parameters, uniform-qualified or "
> >> > +  "buffer-qualified global variables");
> >> >}
> >> > }
> >> >  
> >> 
> >> Spec quote?  Declaring an atomic counter to have buffer storage is
> >> illegal AFAIK.
> >
> > I think you are right, the specs don't seem to include any explicit
> > restrictions affecting the allowed types for buffer variables, however,
> > the description of opaque types seems to be in conflict with that:
> >
> > "The opaque types declare variables that are effectively opaque 
> > handles to other objects. These objects are
> > accessed through built-in functions, not through direct reading or
> > writing of the declared variable.
> > (...)
> > Opaque variables cannot be treated as l-values;(...)"
> >
> > I am thinking that we probably want to revert this patch and instead
> > check that buffer variables do not contain references to any opaque
> > types and produce a compile-time error if that is the case. I'll send
> > patches for this. Maybe the specs should be amended to mention this
> > explicitly too. Ian, what do you think?
> >
> They mention it explicitly AFAICT (section 4.1.7 of the GL 4.5 spec):
> 
>  "[Opaque Types] can only be declared as function parameters or in
>   uniform-qualified variables."

Right, I meant that this is not explicit in the description of buffer
variables.

Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] i965: Maximum allowed size of SEND messages is 15 (4 bits)

2015-09-20 Thread Iago Toral
On Fri, 2015-09-18 at 09:09 -0700, Kenneth Graunke wrote:
> On Friday, September 18, 2015 10:08:52 AM Iago Toral Quiroga wrote:
> > Until now we only used MRFs 1..15 for regular SEND messages, so the
> > message length could not possibly exceed the maximum size. Now that
> > we allow to use MRF registers 1..23 in gen6, we need to be careful
> > not to build messages that can go beyond the limit. That could occur,
> > specifically, when building URB write messages, which we may need to
> > split in chunks due to their size. Previously we would simply go and
> > create a new message when we reached MRF 13 (since 13..15 were
> > reserved for spilling), now we also want to check the size of the
> > message explicitly.
> > 
> > Besides adding that condition to split URB write messages properly,
> > this patch also adds asserts in the generator. Notice that
> > brw_inst_set_mlen already asserts for this, but asserting in the
> > generators is easy and can make debugging easier in some cases.
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 1 +
> >  src/mesa/drivers/dri/i965/brw_inst.h | 3 +++
> >  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 1 +
> >  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 5 +++--
> >  4 files changed, 8 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
> > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > index 6cbd42f..c65084d 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > @@ -1561,6 +1561,7 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> > dispatch_width)
> >brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
> >  
> >assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
> > +  assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
> >  
> >switch (inst->exec_size) {
> >case 1:
> > diff --git a/src/mesa/drivers/dri/i965/brw_inst.h 
> > b/src/mesa/drivers/dri/i965/brw_inst.h
> > index 46eff1d..c5132ba 100644
> > --- a/src/mesa/drivers/dri/i965/brw_inst.h
> > +++ b/src/mesa/drivers/dri/i965/brw_inst.h
> > @@ -39,6 +39,9 @@
> >  extern "C" {
> >  #endif
> >  
> > +/** Maximum SEND message length */
> > +#define BRW_MAX_MSG_LENGTH 15
> > +
> >  /* brw_context.h has a forward declaration of brw_inst, so name the 
> > struct. */
> >  typedef struct brw_inst {
> > uint64_t data[2];
> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
> > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> > index d5943d2..05f2044 100644
> > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> > @@ -1136,6 +1136,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
> >brw_set_default_acc_write_control(p, inst->writes_accumulator);
> >  
> >assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
> > +  assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
> >  
> >unsigned pre_emit_nr_insn = p->nr_insn;
> >  
> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
> > b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> > index 7f06050..514ccd6 100644
> > --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> > @@ -3320,9 +3320,10 @@ vec4_visitor::emit_vertex()
> > prog_data->vue_map.slot_to_varying[slot]);
> >  
> >   /* If this was max_usable_mrf, we can't fit anything more into 
> > this
> > -  * URB WRITE.
> > +  * URB WRITE. Same thing if we reached the maximum length 
> > available.
> >*/
> > - if (mrf > max_usable_mrf) {
> > + if (mrf > max_usable_mrf ||
> > + align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > 
> > BRW_MAX_MSG_LENGTH) {
> >  slot++;
> >  break;
> >   }
> > 
> 
> Doesn't this hunk need to go before patch 2?  It seems like otherwise
> we'll be emitting URB write messages that are too long until patch 5.

Yeah, you're right. I'll fix that before pushing.

> With that fixed, the series is:
> Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>
> 
> Thanks for doing this, Iago!

Thanks for reviewing it! ;)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Don't fuse fmul into ffma if used by more than 4 fadds.

2015-09-23 Thread Iago Toral
On Tue, 2015-09-22 at 15:52 -0700, Matt Turner wrote:
> total instructions in shared programs: 6596689 -> 6595563 (-0.02%)
> instructions in affected programs: 103154 -> 102028 (-1.09%)
> helped:253
> HURT:  217
> 
> It's kind of a wash in terms of programs helped/hurt, but of the
> programs helped 169 are by more than 10%.

Yeah, that is a very significant gain for the helped cases. Out of
curiosity, how bad are hurt programs in comparison?

> ---
> I tried values of 2-6, and 4 seemed to be the best. I can provide
> full shader-db result files if other people want to investigate.

I was wondering if shader-db metrics are the best option for selecting
thresholds such as these, meaning that when you are replacing
instructions that have different latencies simply comparing instruction
counts may not be the best thing to do. However, if ffma operations are
more expensive that fadd, a higher instruction count should always lead
to worse performance, so that should be ok.

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

>  src/glsl/nir/nir_opt_peephole_ffma.c | 20 
>  1 file changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c 
> b/src/glsl/nir/nir_opt_peephole_ffma.c
> index 4f0f0da..3e8a34f 100644
> --- a/src/glsl/nir/nir_opt_peephole_ffma.c
> +++ b/src/glsl/nir/nir_opt_peephole_ffma.c
> @@ -39,7 +39,7 @@ struct peephole_ffma_state {
>  };
>  
>  static inline bool
> -are_all_uses_fadd(nir_ssa_def *def)
> +are_all_uses_fadd(nir_ssa_def *def, unsigned *num_uses)
>  {
> if (!list_empty(>if_uses))
>return false;
> @@ -53,6 +53,7 @@ are_all_uses_fadd(nir_ssa_def *def)
>nir_alu_instr *use_alu = nir_instr_as_alu(use_instr);
>switch (use_alu->op) {
>case nir_op_fadd:
> + (*num_uses)++;
>   break; /* This one's ok */
>  
>case nir_op_imov:
> @@ -60,7 +61,7 @@ are_all_uses_fadd(nir_ssa_def *def)
>case nir_op_fneg:
>case nir_op_fabs:
>   assert(use_alu->dest.dest.is_ssa);
> - if (!are_all_uses_fadd(_alu->dest.dest.ssa))
> + if (!are_all_uses_fadd(_alu->dest.dest.ssa, num_uses))
>  return false;
>   break;
>  
> @@ -101,15 +102,18 @@ get_mul_for_src(nir_alu_src *src, int num_components,
>*abs = true;
>break;
>  
> -   case nir_op_fmul:
> -  /* Only absorb a fmul into a ffma if the fmul is is only used in fadd
> -   * operations.  This prevents us from being too aggressive with our
> -   * fusing which can actually lead to more instructions.
> +   case nir_op_fmul: {
> +  /* Only fuse an fmul into an ffma if its result is used by not more 
> than
> +   * four fadd operations. This prevents us from too aggressively fusing
> +   * operations which can actually lead to more instructions or many ffma
> +   * operations performing the same multiply.
> */
> -  if (!are_all_uses_fadd(>dest.dest.ssa))
> +
> +  unsigned num_uses = 0;
> +  if (!are_all_uses_fadd(>dest.dest.ssa, _uses) || num_uses > 4)
>   return NULL;
>break;
> -
> +   }
> default:
>return NULL;
> }

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] doc: Set GL_OES_geometry_shader as started

2015-09-23 Thread Iago Toral
On Wed, 2015-09-23 at 14:07 +0200, Eduardo Lima Mitev wrote:
> On 09/23/2015 10:42 AM, Marta Lofstedt wrote:
> > From: Marta Lofstedt 
> > 
> > Signed-off-by: Marta Lofstedt 
> > ---
> >  docs/GL3.txt | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/docs/GL3.txt b/docs/GL3.txt
> > index 92941cf..e1c6049 100644
> > --- a/docs/GL3.txt
> > +++ b/docs/GL3.txt
> > @@ -238,7 +238,7 @@ GLES3.2, GLSL ES 3.2
> >GL_OES_copy_imagenot started (based 
> > on GL_ARB_copy_image, which is done for some drivers)
> >GL_OES_draw_buffers_indexed  not started
> >GL_OES_draw_elements_base_vertex not started (based 
> > on GL_ARB_draw_elements_base_verte, which is done for all drivers)
> > -  GL_OES_geometry_shader   not started (based 
> > on GL_ARB_geometry_shader4, which is done for all drivers)
> > +  GL_OES_geometry_shader   started (Marta)
> >GL_OES_gpu_shader5   not started (based 
> > on parts of GL_ARB_gpu_shader5, which is done for some drivers)
> >GL_OES_primitive_bounding boxnot started
> >GL_OES_sample_shadingnot started (based 
> > on parts of GL_ARB_sample_shading, which is done for some drivers)
> > 
> 
> Not directly related with the patch, but I wonder why the
> GL_OES_geometry_shader entry is in the "GLES3.2 GLSL ES 3.2" section of
> this file. Per
> https://www.khronos.org/registry/gles/extensions/OES/OES_geometry_shader.txt:
> 
> "This specification is written against the OpenGL ES 3.1
>  (March 17, 2014) and OpenGL ES 3.10 Shading Language
>  (March 17, 2014) Specifications."
> 
> Shouldn't this entry be moved to the previous section "GLES3.1, GLSL ES
> 3.1" altogether?

Geometry shaders were introduced with GLES 3.2, they are not part of the
GLES 3.1 spec, I imagine the extension has been there for a while though
and that's why it was written against the 3.1 spec.

Iago

> In any case:
> 
> Reviewed-by: Eduardo Lima Mitev 
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] doc: Set GL_OES_geometry_shader as started

2015-09-23 Thread Iago Toral
On Wed, 2015-09-23 at 14:34 +, Lofstedt, Marta wrote:
> > -Original Message-
> > From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On
> > Behalf Of Iago Toral
> > Sent: Wednesday, September 23, 2015 3:18 PM
> > To: Eduardo Lima Mitev
> > Cc: mesa-dev@lists.freedesktop.org
> > Subject: Re: [Mesa-dev] [PATCH 7/7] doc: Set GL_OES_geometry_shader as
> > started
> > 
> > On Wed, 2015-09-23 at 14:07 +0200, Eduardo Lima Mitev wrote:
> > > On 09/23/2015 10:42 AM, Marta Lofstedt wrote:
> > > > From: Marta Lofstedt <marta.lofst...@intel.com>
> > > >
> > > > Signed-off-by: Marta Lofstedt <marta.lofst...@linux.intel.com>
> > > > ---
> > > >| 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > >
> > > > diff --git a/docs/GL3.txt b/docs/GL3.txt index 92941cf..e1c6049
> > > > 100644
> > > > --- a/docs/GL3.txt
> > > > +++ b/docs/GL3.txt
> > > > @@ -238,7 +238,7 @@ GLES3.2, GLSL ES 3.2
> > > >GL_OES_copy_imagenot started 
> > > > (based on
> > GL_ARB_copy_image, which is done for some drivers)
> > > >GL_OES_draw_buffers_indexed  not started
> > > >GL_OES_draw_elements_base_vertex not started 
> > > > (based on
> > GL_ARB_draw_elements_base_verte, which is done for all drivers)
> > > > -  GL_OES_geometry_shader   not started 
> > > > (based on
> > GL_ARB_geometry_shader4, which is done for all drivers)
> > > > +  GL_OES_geometry_shader   started (Marta)
> > > >GL_OES_gpu_shader5   not started 
> > > > (based on parts of
> > GL_ARB_gpu_shader5, which is done for some drivers)
> > > >GL_OES_primitive_bounding boxnot started
> > > >GL_OES_sample_shadingnot started 
> > > > (based on parts of
> > GL_ARB_sample_shading, which is done for some drivers)
> > > >
> > >
> > > Not directly related with the patch, but I wonder why the
> > > GL_OES_geometry_shader entry is in the "GLES3.2 GLSL ES 3.2" section
> > > of this file. Per
> > >
> > https://www.khronos.org/registry/gles/extensions/OES/OES_geometry_sh
> > ader.txt:
> > >
> > > "This specification is written against the OpenGL ES 3.1
> > >  (March 17, 2014) and OpenGL ES 3.10 Shading Language
> > >  (March 17, 2014) Specifications."
> > >
> > > Shouldn't this entry be moved to the previous section "GLES3.1, GLSL
> > > ES 3.1" altogether?
> > 
> > Geometry shaders were introduced with GLES 3.2, they are not part of the
> > GLES 3.1 spec, I imagine the extension has been there for a while though and
> > that's why it was written against the 3.1 spec.
> >
> GL_EXT_geometry_shader is also part of the GLES 3.1, Android Extension Pack:
> https://www.khronos.org/registry/gles/extensions/ANDROID/ANDROID_extension_pack_es31a.txt
>  
> So, if some driver wanted that we would still need to expose those extensions 
> under GLES 3.1. 

Yeah, what I mean is that it only became part of the core spec in GL ES
3.2, so that's why I think docs/GL3.txt includes it under GLES 3.2.

> /Marta
> >
> > Iago
> > 
> > > In any case:
> > >
> > > Reviewed-by: Eduardo Lima Mitev <el...@igalia.com>
> > >
> > > ___
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > 
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] Enable up to 24 MRF registers in gen6

2015-09-24 Thread Iago Toral
On Mon, 2015-09-21 at 09:21 -0700, Matt Turner wrote:
> On Mon, Sep 21, 2015 at 8:00 AM, Iago Toral <ito...@igalia.com> wrote:
> > On Mon, 2015-09-21 at 07:49 -0700, Kenneth Graunke wrote:
> >> On Monday, September 21, 2015 09:46:24 AM Mark Janes wrote:
> >> > This series hits an assertion on ILK and G45:
> >> >
> >> > src/mesa/drivers/dri/i965/brw_eu_emit.c:150: brw_set_dest: Assertion
> >> > `dest.nr < (devinfo->gen == 6 ? 24 : 16)' failed.
> >> >
> >> > It triggers about 8k piglit assertions on those platforms.  I'm turning
> >> > off testing for G45 and ILK until it is resolved.
> >> >
> >> > https://bugs.freedesktop.org/show_bug.cgi?id=92066
> >> >
> >> > -Mark
> >>
> >> I've pushed a fix for this:
> >>
> >> commit c1070550c289d48ef389aeb8c564d1abd1123ad1
> >> Author: Kenneth Graunke <kenn...@whitecape.org>
> >> Date:   Mon Sep 21 07:42:27 2015 -0700
> >>
> >> i965: Fix MRF register number assertions for compr4.
> >>
> >> compr4 is represented by setting the high bit on the MRF number.
> >> We need to mask it out before sanity checking the register number.
> >>
> >> Fixes ~8000 assert fails on Ironlake and G45.
> >>
> >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92066
> >> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
> >>
> >> Easy mistake...I always forget about compr4.  Hopefully we should be
> >> good now.
> >
> > that was fast, thanks Ken! I was scratching my head over this...
> >
> > BTW, I just noticed that the ILK docs also say that they have 24 MRFs...
> > (volume 4, part 2, 5.3.3 MRF Registers). Assuming that we don't find any
> > other issues, would we want to extend the fix to ILK too?
> 
> The ILK docs are notoriously bad and often contain more information
> about Sandybridge than Ironlake. I suspect that information is
> actually about SNB, though I suppose it couldn't hurt to try on ILK,
> though I'm doubtful.

For the record: I tested this and it does not seem to work in ILK, so
you're probably right.

In the process, however, I noticed that forcing spilling on the vec4
backend with current master in ILK breaks a lot of tests and also
produces at least one GPU hang, so there is something seriously broken
there. I filed this bug:
https://bugs.freedesktop.org/show_bug.cgi?id=92100

I'll try to see what is going on with that...

Iago


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v5 00/70] ARB_shader_storage_buffer_object (mesa, i965)

2015-09-24 Thread Iago Toral
On Wed, 2015-09-23 at 10:38 -0700, Kristian Høgsberg wrote:
> On Wed, Sep 23, 2015 at 12:06 AM, Samuel Iglesias Gonsálvez
> <sigles...@igalia.com> wrote:
> >
> >
> > On 19/09/15 01:56, Kristian Høgsberg wrote:
> >> On Thu, Sep 10, 2015 at 03:35:16PM +0200, Iago Toral Quiroga wrote:
> >>> Hi,
> >>>
> >>> this is the latest version of the ARB_shader_storage_buffer_object
> >>> implementation. A good part of the frontend bits for this are already in
> >>> master, but this adds some more missing pieces, specifically std430 and
> >>> memory qualifiers. Additionally, this provides the i965 implementation.
> >>
> >> I've gone through all patches in the series and I replied to patches
> >> where I had comments.  Overall, the series look good and with the
> >> comments addressed, I'm ready to give my Reviewed-by for the series.
> >> I want to take a closer look at the atomics lowering in patches 49+,
> >> but I'm done for today.  Base on the quick look-through I did, I don't
> >> expect to find any showstoppers there.
> >>
> >> Here's a summary of what I found:
> >>
> >> [PATCH v5 01/70] mesa: set MAX_SHADER_STORAGE_BUFFERS to 15.
> >>
> >>   Update limit to 16 and drop the comment
> >>
> >> [PATCH v5 02/70] i965: Use 16-byte offset alignment for shader storage 
> >> buffers
> >>
> >>   ctx->Const.ShaderStorageBufferOffsetAlignment should be 64
> >>
> >> [PATCH v5 23/70] glsl: refactor parser processing of an interface block 
> >> definition
> >>
> >>   Clarify that the commit is just moving code.
> >>
> >> [PATCH v5 26/70] glsl: Add parser/compiler support for std430 interface 
> >> packing qualifier
> >>
> >>   Update the error to also mention shader storage blocks, not just ubos?
> >>
> >> [PATCH v5 28/70] glsl: add std430 interface packing support to ssbo 
> >> related operations
> >>
> >>   Why are we passing false for is_std430 here (emit_access in
> >>   handle_rvalue)?  We use handle_rvalue for both UBO and SSBO loads,
> >>   right?  Also, for consistency, I'd prefer if we could just pass
> >>   'packing' around instead of is_std430.
> >>
> >> [PATCH v5 29/70] glsl: a shader storage buffer must be smaller than the 
> >> maximum size allowed
> >>
> >>   Two chunks look like they should be their own patch ("Add unsized
> >>   array support to glsl_type::std140_size" or such).
> >>
> >> [PATCH v5 38/70] i965/nir/vec4: Implement nir_intrinsic_store_ssbo
> >>
> >>   Shouldn't this be 'skipped_channels += num_channels;' to handle write 
> >> mask reg.yw?
> >>
> >> [PATCH v5 40/70] nir: Implement __intrinsic_load_ssbo
> >>
> >>   Refactor handling of cmp instruction for converting to bool
> >>
> >> [PATCH v5 54/70] glsl: First argument to atomic functions must be a buffer 
> >> variable
> >>
> >>   Nitpick: move check that only looks at first element in list to after 
> >> loop
> >>
> >> Also, I expect that before we land this series (thought that shouldn't
> >> be far off), we'll have deleted the vec4 GLSL IR visitor so we can
> >> drop these patches (I didn't review them):
> >>
> >> [PATCH v5 16/70] i965/vec4: Implement ir_unop_get_buffer_size
> >> [PATCH v5 39/70] i965/vec4: Implement __intrinsic_store_ssbo
> >> [PATCH v5 43/70] i965/vec4: Implement __intrinsic_load_ssbo
> >> [PATCH v5 53/70] i965/vec4: Implement lowered SSBO atomic intrinsics
> >>
> >> I wrote the initial prototype of the SSBO functionality, but I don't
> >> recall writing:
> >>
> >> [PATCH v5 45/70] glsl: atomic counters can be declared as buffer-qualified 
> >> variables
> >>
> >> I don't think I did anything for atomics in my patch. Feel free to
> >> take ownership of that one and add my Reviewed-by.
> >>
> >
> > OK, I will take the ownership.
> 
> Cool.  With the update of patch 40, the entire series looks good to me:
> 
> Reviewed-by: Kristian Høgsberg <k...@bitplanet.net>
> 
> I think we should try to land this before the end of the week. With
> Marks Jenkins run showing only an couple of issues that have now been
> addressed, I don't see any reason to not merge this. We're early in
> the release cycle and it's a good time to land this and give it some
> exposure.

Great! we will push this later today.
Thanks to everyone for all the reviews!

Iago

> thanks,
> Kristian
> 
> > Thanks for the review,
> >
> > Sam
> >
> >> thanks,
> >> Kristian
> >>
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] Enable up to 24 MRF registers in gen6

2015-09-22 Thread Iago Toral
Hi Mark,

On Mon, 2015-09-21 at 17:45 -0700, Mark Janes wrote:
> Hi Iago,
> 
> According to my tests, this patch series fixes the gles2/gles3
> "functional.uniform_api.random.23" tests in dEQP, on sandybridge.
> 
> Do you see the same results?  Should this patch series be applied to the
> stable branch?

I can try to verify this, but I would not be surprised if that was the
case. This seems to fix a couple of bugs in bugzilla and it fixes a lot
of piglit tests in SNB when we force spilling, so if that test is
triggering spilling, there is a good chance that it had the same problem
and this patch fixed it. I'll try to confirm this today and let you
know.

As for whether this should go to stable, I suppose it depends on whether
we are confident that this won't break anything else... if you have not
seen any more issues with piglit/deqp on any other platform I guess
that's a good sign, but I'll let Ken make that call.

Iago

> thanks,
> 
> Mark
> 
> Iago Toral Quiroga <ito...@igalia.com> writes:
> 
> > There are some bug reports about shaders failing to compile in gen6
> > because MRF 14 is used when we need to spill. For example:
> > https://bugs.freedesktop.org/show_bug.cgi?id=86469
> > https://bugs.freedesktop.org/show_bug.cgi?id=90631
> >
> > Discussion in bugzilla pointed to the fact that gen6 might actually have
> > 24 MRF registers available instead of 16, so we could use other MRF
> > registers and avoid these conflicts (we still need to investigate why
> > some shaders need up to MRF 14 anyway, since this is not expected).
> >
> > Notice that the hardware docs are not clear about this fact:
> >
> > SNB PRM Vol4 Part2's "Table 5-4. MRF Registers Available in Device
> > Hardware" says "Number per Thread" - "24 registers"
> >
> > However, SNB PRM Vol4 Part1, 1.6.1 Message Register File (MRF) says:
> >
> > "Normal threads should construct their messages in m1..m15. (...)
> > Regardless of actual hardware implementation, the thread should
> > not assume th at MRF addresses above m15 wrap to legal MRF registers."
> >
> > Therefore experimentation was necessary to evaluate if we had these extra
> > MRF registers available or not. This was tested in gen6 using MRF
> > registers 21..23 for spilling and doing a full piglit run (all.py) forcing
> > spilling of everything on the FS backend. It was also tested by doing
> > spilling of everything on both the FS and the VS backends with a piglit run
> > of shader.py. In both cases no regressions were observed. In fact, many of
> > these tests where helped in the cases where we forced spilling, since that
> > triggered the same underlying problem described in the bug reports. Here are
> > some results using INTEL_DEBUG=spill_fs,spill_vec4 for a shader.py run on
> > gen6 hardware:
> >
> > Using MRFs 13..15 for spilling:
> > crash: 2, fail: 113, pass: 6621, skip: 5461
> > 
> > Using MRFs 21..23 for spilling:
> > crash: 2, fail: 12, pass: 6722, skip: 5461
> >
> > We might want to test this further with other instances of gen6 hardware
> > though... I am not sure that we can safely conclude that all implementations
> > of gen6 hardware have 24 MRF registers from my tests on just one particular
> > SandyBridge laptop.
> >
> > Iago Toral Quiroga (5):
> >   i965: Move MRF register asserts out of brw_reg.h
> >   i965: Turn BRW_MAX_MRF into a macro that accepts a hardware generation
> >   i965/fs: Use MRF registers 21-23 for spilling in gen6
> >   i965/vec4: Use MRF registers 21-23 for spilling in gen6
> >   i965: Maximum allowed size of SEND messages is 15 (4 bits)
> >
> >  src/mesa/drivers/dri/i965/brw_eu_emit.c| 11 +
> >  src/mesa/drivers/dri/i965/brw_fs.cpp   |  4 ++--
> >  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 15 
> >  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  | 27 
> > --
> >  src/mesa/drivers/dri/i965/brw_inst.h   |  3 +++
> >  src/mesa/drivers/dri/i965/brw_ir_vec4.h|  2 +-
> >  src/mesa/drivers/dri/i965/brw_reg.h|  9 
> >  .../drivers/dri/i965/brw_schedule_instructions.cpp |  4 ++--
> >  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp   | 10 +---
> >  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 15 +++-
> >  10 files changed, 61 insertions(+), 39 deletions(-)
> >
> > -- 
> > 1.9.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/3] Make pull constant loads in gen6 start at MRFs 16/17

2015-09-23 Thread Iago Toral
On Tue, 2015-09-22 at 14:00 +0200, Iago Toral Quiroga wrote:
> Originally, these could conflict with our spills, but now that we moved the
> latter to MRFs 21..23, that is no longer the case. Still, in gen6 we
> now use MRFs 1..15 for URB writes, so we probably want to make our pull
> constant loads out of that MRF space (currently, they start at MRFs 13/14).
> 
> Even if we do not want to do this for some reason, I still think we should
> at least apply the first patch, since that plugs a hardcoded array size of
> 16 MRF registers. For some reason this only became a problem when I tried
> to move pull constant loads to MRFs > 15 and not when I did the same for
> spills, but it looks like the right thing to do in any case.

I have also just added a 4th patch to this series:

i965/gs/gen6: Maximum allowed size of SEND messages is 15 (4 bits)

That makes sure that the gen6 GS code path has the same changes we did
for VS URB writes so we can use MRFs 13..15.

Iago

> I tested this on SandyBridge and IvyBridge and did not observe any
> regressions in piglit.
> 
> Iago Toral Quiroga (3):
>   i965: Fix remove_duplicate_mrf_writes so it can handle 24 MRFs in gen6
>   i965: make pull constant loads in gen6 start at MRFs 16/17
>   i965: Define FIRST_SPILL_MRF and FIRST_PULL_LOAD_MRF only once and in
> one place
> 
>  src/mesa/drivers/dri/i965/brw_fs.cpp  | 6 +++---
>  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 --
>  src/mesa/drivers/dri/i965/brw_inst.h  | 6 ++
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 4 +---
>  4 files changed, 10 insertions(+), 8 deletions(-)
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v5 38/70] i965/nir/vec4: Implement nir_intrinsic_store_ssbo

2015-09-21 Thread Iago Toral
On Fri, 2015-09-18 at 13:02 -0700, Kristian Høgsberg wrote:
> On Thu, Sep 10, 2015 at 03:35:54PM +0200, Iago Toral Quiroga wrote:
> > ---
> >  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 148 
> > +
> >  1 file changed, 148 insertions(+)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> > b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> > index f47b029..450441d 100644
> > --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> > @@ -23,8 +23,13 @@
> >  
> >  #include "brw_nir.h"
> >  #include "brw_vec4.h"
> > +#include "brw_vec4_builder.h"
> > +#include "brw_vec4_surface_builder.h"
> >  #include "glsl/ir_uniform.h"
> >  
> > +using namespace brw;
> > +using namespace brw::surface_access;
> > +
> >  namespace brw {
> >  
> >  void
> > @@ -556,6 +561,149 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
> > *instr)
> >break;
> > }
> >  
> > +   case nir_intrinsic_store_ssbo_indirect:
> > +  has_indirect = true;
> > +  /* fallthrough */
> > +   case nir_intrinsic_store_ssbo: {
> > +  assert(devinfo->gen >= 7);
> > +
> > +  /* Block index */
> > +  src_reg surf_index;
> > +  nir_const_value *const_uniform_block =
> > + nir_src_as_const_value(instr->src[1]);
> > +  if (const_uniform_block) {
> > + unsigned index = prog_data->base.binding_table.ubo_start +
> > +  const_uniform_block->u[0];
> > + surf_index = src_reg(index);
> > + brw_mark_surface_used(_data->base, index);
> > +  } else {
> > + surf_index = src_reg(this, glsl_type::uint_type);
> > + emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
> > +  src_reg(prog_data->base.binding_table.ubo_start)));
> > + surf_index = emit_uniformize(surf_index);
> > +
> > + brw_mark_surface_used(_data->base,
> > +   prog_data->base.binding_table.ubo_start +
> > +   shader_prog->NumUniformBlocks - 1);
> > +  }
> > +
> > +  /* Offset */
> > +  src_reg offset_reg = src_reg(this, glsl_type::uint_type);
> > +  unsigned const_offset_bytes = 0;
> > +  if (has_indirect) {
> > + emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
> > +  } else {
> > + const_offset_bytes = instr->const_index[0];
> > + emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
> > +  }
> > +
> > +  /* Value */
> > +  src_reg val_reg = get_nir_src(instr->src[0], 4);
> > +
> > +  /* Writemask */
> > +  unsigned write_mask = instr->const_index[1];
> > +
> > +  /* IvyBridge does not have a native SIMD4x2 untyped write message so 
> > untyped
> > +   * writes will use SIMD8 mode. In order to hide this and keep 
> > symmetry across
> > +   * typed and untyped messages and across hardware platforms, the
> > +   * current implementation of the untyped messages will transparently 
> > convert
> > +   * the SIMD4x2 payload into an equivalent SIMD8 payload by 
> > transposing it
> > +   * and enabling only channel X on the SEND instruction.
> > +   *
> > +   * The above, works well for full vector writes, but not for partial 
> > writes
> > +   * where we want to write some channels and not others, like when we 
> > have
> > +   * code such as v.xyw = vec3(1,2,4). Because the untyped write 
> > messages are
> > +   * quite restrictive with regards to the channel enables we can 
> > configure in
> > +   * the message descriptor (not all combinations are allowed) we 
> > cannot simply
> > +   * implement these scenarios with a single message while keeping the
> > +   * aforementioned symmetry in the implementation. For now we de 
> > decided that
> > +   * it is better to keep the symmetry to reduce complexity, so in 
> > situations
> > +   * such as the one described we end up emitting two untyped write 
> > messages
> > +   * (one for xy and another for w).
> > +   *
> > +   * The code below packs consecutive channels into a single write 
> > message,
> > +   * detects gaps in the vector write and if needed, sends a second 
> >

Re: [Mesa-dev] [RFC 0/3] i965: Enable up to 24 MRF registers in gen6

2015-09-22 Thread Iago Toral
On Wed, 2015-09-16 at 12:32 -0700, Kenneth Graunke wrote:
> On Wednesday, September 16, 2015 11:17:53 AM Iago Toral Quiroga wrote:
> > It seems that we have some bugs where we fail to compile shaders in gen6
> > because we do not having enough MRF registers available (see bugs 86469 and
> > 90631 for example). That triggered some discussion about the fact that SNB
> > might actually have 24 MRF registers available, but since the docs where not
> > very clear about this, it was suggested that it would be nice to try and
> > experiment if that was the case.
> > 
> > These series of patches implement such test, basically they turn our fixed
> > BRW_MAX_MRF into a macro that accepts the hardware generation and then 
> > changes
> > the spilling code in brw_fs_reg_allocate.cpp to use MRF registers 21-23 for
> > gen6 (something similar can be done for the vec4 code, I just did not do it
> > yet).
> > 
> > The good news is that this seems to work fine, at least I can do a full 
> > piglit
> > run without issues in SNB.
> 
> Sweet!
> 
> > In fact, this seems to help a lot of tests when I
> > force spilling of everything in the FS backend (INTEL_DEBUG=spill_fs):
> > 
> > Using MRF registers 13-15 for spilling:
> > crash: 5, fail 267, pass: 15853, skip: 11679, warn: 3
> > 
> > Using MRF registers 21-23 for spilling:
> > crash: 5, fail 140, pass: 15980, skip: 11679, warn: 3
> > 
> > As you can see, we drop the fail ratio to almost 50%...
> 
> That seems odd - I wouldn't think using m13-15 vs. m21-23 would actually
> make a difference.  Perhaps it's papering over a bug where we're failing
> to notice that MRFs are in use?  If so, we should probably fix that (in
> addition to making this change).

(...)

I've looking into this. The problem was happening because our UBO loads
(via sampler messages) use MRF14 in gen6, so as soon as we had one of
these, get_used_mrfs in brw_fs_reg_allocate.cpp would detect that MRF14
is used and fail building any shader that needs spilling. This is quite
extreme, since just because MRF14 is used by something else tt does not
mean that there is an actual conflict, in fact, the test I was playing
with (fs-bools.shader_test) was a case of a false positive.

I suppose that a better way to do this would be to check if any write to
MRF14 prior to the current instruction needing spilling has been already
consumed by a matching SEND instruction also happening before the
instruction we need to spill for. However, now that we moved spilling to
a different MRF range this can't happen any more, so I wonder if we want
to implement this.

One more thing that this brought to my attention is that we probably
want to move UBO loads in gen6 to MRFs > 15 (and < 21, where we do our
spills). This is because our URB writes in gen6 can now happen in MRF
1..15. The likelihood of a conflict, I guess, is very small anyway,
specially because URB writes are bound to be the last thing to happen,
but maybe there is a rare case where this could bite us so I think we
are probably better off moving our UBO loads out of the conflict zone.

BTW, apparently, this does not happen in gen7+ because that uses GRF
registers for SEND instructions instead of MRFs and this code checks for
MRF destinations.

Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] Enable up to 24 MRF registers in gen6

2015-09-22 Thread Iago Toral
On Tue, 2015-09-22 at 08:10 +0200, Iago Toral wrote:
> Hi Mark,
> 
> On Mon, 2015-09-21 at 17:45 -0700, Mark Janes wrote:
> > Hi Iago,
> > 
> > According to my tests, this patch series fixes the gles2/gles3
> > "functional.uniform_api.random.23" tests in dEQP, on sandybridge.
> > 
> > Do you see the same results?  Should this patch series be applied to the
> > stable branch?
> 
> I can try to verify this, but I would not be surprised if that was the
> case. This seems to fix a couple of bugs in bugzilla and it fixes a lot
> of piglit tests in SNB when we force spilling, so if that test is
> triggering spilling, there is a good chance that it had the same problem
> and this patch fixed it. I'll try to confirm this today and let you
> know.

Yes, I see the same result. Also, I verified that the test fails without
my patches and that the reason it failed before is the same as all the
other cases that my patches fix: if fails to link because it can't use
m14 for spilling.

Iago

> As for whether this should go to stable, I suppose it depends on whether
> we are confident that this won't break anything else... if you have not
> seen any more issues with piglit/deqp on any other platform I guess
> that's a good sign, but I'll let Ken make that call.
> 
> Iago
> 
> > thanks,
> > 
> > Mark
> > 
> > Iago Toral Quiroga <ito...@igalia.com> writes:
> > 
> > > There are some bug reports about shaders failing to compile in gen6
> > > because MRF 14 is used when we need to spill. For example:
> > > https://bugs.freedesktop.org/show_bug.cgi?id=86469
> > > https://bugs.freedesktop.org/show_bug.cgi?id=90631
> > >
> > > Discussion in bugzilla pointed to the fact that gen6 might actually have
> > > 24 MRF registers available instead of 16, so we could use other MRF
> > > registers and avoid these conflicts (we still need to investigate why
> > > some shaders need up to MRF 14 anyway, since this is not expected).
> > >
> > > Notice that the hardware docs are not clear about this fact:
> > >
> > > SNB PRM Vol4 Part2's "Table 5-4. MRF Registers Available in Device
> > > Hardware" says "Number per Thread" - "24 registers"
> > >
> > > However, SNB PRM Vol4 Part1, 1.6.1 Message Register File (MRF) says:
> > >
> > > "Normal threads should construct their messages in m1..m15. (...)
> > > Regardless of actual hardware implementation, the thread should
> > > not assume th at MRF addresses above m15 wrap to legal MRF registers."
> > >
> > > Therefore experimentation was necessary to evaluate if we had these extra
> > > MRF registers available or not. This was tested in gen6 using MRF
> > > registers 21..23 for spilling and doing a full piglit run (all.py) forcing
> > > spilling of everything on the FS backend. It was also tested by doing
> > > spilling of everything on both the FS and the VS backends with a piglit 
> > > run
> > > of shader.py. In both cases no regressions were observed. In fact, many of
> > > these tests where helped in the cases where we forced spilling, since that
> > > triggered the same underlying problem described in the bug reports. Here 
> > > are
> > > some results using INTEL_DEBUG=spill_fs,spill_vec4 for a shader.py run on
> > > gen6 hardware:
> > >
> > > Using MRFs 13..15 for spilling:
> > > crash: 2, fail: 113, pass: 6621, skip: 5461
> > > 
> > > Using MRFs 21..23 for spilling:
> > > crash: 2, fail: 12, pass: 6722, skip: 5461
> > >
> > > We might want to test this further with other instances of gen6 hardware
> > > though... I am not sure that we can safely conclude that all 
> > > implementations
> > > of gen6 hardware have 24 MRF registers from my tests on just one 
> > > particular
> > > SandyBridge laptop.
> > >
> > > Iago Toral Quiroga (5):
> > >   i965: Move MRF register asserts out of brw_reg.h
> > >   i965: Turn BRW_MAX_MRF into a macro that accepts a hardware generation
> > >   i965/fs: Use MRF registers 21-23 for spilling in gen6
> > >   i965/vec4: Use MRF registers 21-23 for spilling in gen6
> > >   i965: Maximum allowed size of SEND messages is 15 (4 bits)
> > >
> > >  src/mesa/drivers/dri/i965/brw_eu_emit.c| 11 +
> > >  src/mesa/drivers/dri/i965/brw_fs.cpp   |  4 ++--
> > >  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 15 
> > >  src/mesa/dr

Re: [Mesa-dev] [PATCH] glsl: bail out early in _mesa_ShaderSource if no shaderobj

2015-09-22 Thread Iago Toral
On Tue, 2015-09-22 at 14:34 +0300, Tapani Pälli wrote:
> Patch fixes a crash in conformance test that tries out different
> invalid arguments for glShaderSource and glGetShaderSource:
> 
>ES2-CTS.gtf.GL.glGetShaderSource.getshadersource_programhandle
> 
> This is a regression from commit:
>04e201d0c02cd30ace5c6fe80e9f021ebb733682
> 
> Additions in v2 also fix following failing deqp test:
>dEQP-GLES[2|3].functional.negative_api.shader.shader_source

Nice! I wasn't expecting that :)

Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

> v2: cleanup function, do check earlier (Iago Toral)
> 
> Signed-off-by: Tapani Pälli <tapani.pa...@intel.com>
> ---
>  src/mesa/main/shaderapi.c | 20 +---
>  1 file changed, 9 insertions(+), 11 deletions(-)
> 
> diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
> index f31980b..edc23bc 100644
> --- a/src/mesa/main/shaderapi.c
> +++ b/src/mesa/main/shaderapi.c
> @@ -931,13 +931,9 @@ get_shader_source(struct gl_context *ctx, GLuint shader, 
> GLsizei maxLength,
>   * glShaderSource[ARB].
>   */
>  static void
> -shader_source(struct gl_context *ctx, GLuint shader, const GLchar *source)
> +shader_source(struct gl_shader *sh, const GLchar *source)
>  {
> -   struct gl_shader *sh;
> -
> -   sh = _mesa_lookup_shader_err(ctx, shader, "glShaderSource");
> -   if (!sh)
> -  return;
> +   assert(sh);
>  
> /* free old shader source string and install new one */
> free((void *)sh->Source);
> @@ -1639,13 +1635,17 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei 
> count,
> GLint *offsets;
> GLsizei i, totalLength;
> GLcharARB *source;
> +   struct gl_shader *sh;
>  
>  #if defined(HAVE_SHA1)
> GLcharARB *replacement;
> -   struct gl_shader *sh;
>  #endif /* HAVE_SHA1 */
>  
> -   if (!shaderObj || string == NULL) {
> +   sh = _mesa_lookup_shader_err(ctx, shaderObj, "glShaderSourceARB");
> +   if (!sh)
> +  return;
> +
> +   if (string == NULL) {
>_mesa_error(ctx, GL_INVALID_VALUE, "glShaderSourceARB");
>return;
> }
> @@ -1697,8 +1697,6 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
> source[totalLength - 2] = '\0';
>  
>  #if defined(HAVE_SHA1)
> -   sh = _mesa_lookup_shader(ctx, shaderObj);
> -
> /* Dump original shader source to MESA_SHADER_DUMP_PATH and replace
>  * if corresponding entry found from MESA_SHADER_READ_PATH.
>  */
> @@ -1711,7 +1709,7 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
> }
>  #endif /* HAVE_SHA1 */
>  
> -   shader_source(ctx, shaderObj, source);
> +   shader_source(sh, source);
>  
> free(offsets);
>  }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] i965: fix textureGrad for cubemaps

2015-09-18 Thread Iago Toral
On Fri, 2015-09-18 at 15:02 +0300, Tapani Pälli wrote:
> Fixes bugs exposed by commit
> 2b1cdb0eddb73f62e4848d4b64840067f1f70865 in:
>ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_frag
> 
> No regressions observed in deqp, CTS or Piglit.
> 
> v2: address review feedback from Iago Toral:
>- move rho calculation to else branch
>- optimize dx and dy calculation
>- fix documentation inconsistensies
> 
> Signed-off-by: Tapani Pälli <tapani.pa...@intel.com>
> Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91114
> Cc: "10.6 11.0" <mesa-sta...@lists.freedesktop.org>
> ---
>  .../dri/i965/brw_lower_texture_gradients.cpp   | 200 
> +++--
>  1 file changed, 181 insertions(+), 19 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp 
> b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> index 7a5f983..03dc021 100644
> --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> @@ -48,6 +48,7 @@ public:
>  
>  private:
> void emit(ir_variable *, ir_rvalue *);
> +   ir_variable *temp(void *ctx, const glsl_type *type, const char *name);
>  };
>  
>  /**
> @@ -60,6 +61,17 @@ lower_texture_grad_visitor::emit(ir_variable *var, 
> ir_rvalue *value)
> base_ir->insert_before(assign(var, value));
>  }
>  
> +/**
> + * Emit a temporary variable declaration
> + */
> +ir_variable *
> +lower_texture_grad_visitor::temp(void *ctx, const glsl_type *type, const 
> char *name)
> +{
> +   ir_variable *var = new(ctx) ir_variable(type, name, ir_var_temporary);
> +   base_ir->insert_before(var);
> +   return var;
> +}
> +
>  static const glsl_type *
>  txs_type(const glsl_type *type)
>  {
> @@ -144,28 +156,178 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
>new(mem_ctx) ir_variable(grad_type, "dPdy", ir_var_temporary);
> emit(dPdy, mul(size, ir->lod_info.grad.dPdy));
>  
> -   /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
> -   ir_rvalue *rho;
> -   if (dPdx->type->is_scalar()) {
> -  rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
> -expr(ir_unop_abs, dPdy));
> -   } else {
> -  rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
> -expr(ir_unop_sqrt, dot(dPdy, dPdy)));
> -   }
> -
> -   /* lambda_base = log2(rho).  We're ignoring GL state biases for now.
> -*
> -* For cube maps the result of these formulas is giving us a value of rho
> -* that is twice the value we should use, so divide it by 2 or,
> -* alternatively, remove one unit from the result of the log2 computation.
> -*/
> ir->op = ir_txl;
> if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
> -  ir->lod_info.lod = expr(ir_binop_add,
> -  expr(ir_unop_log2, rho),
> -  new(mem_ctx) ir_constant(-1.0f));
> +  /* Cubemap texture lookups first generate a texture coordinate 
> normalized
> +   * to [-1, 1] on the appropiate face. The appropiate face is determined
> +   * by which component has largest magnitude and its sign. The texture
> +   * coordinate is the quotient of the remaining texture coordinates 
> against
> +   * that absolute value of the component of largest magnitude. This
> +   * division requires that the computing of the derivative of the texel
> +   * coordinate must use the quotient rule. The high level GLSL code is 
> as
> +   * follows:
> +   *
> +   * Step 1: selection
> +   *
> +   * vec3 abs_p, Q, dQdx, dQdy;
> +   * abs_p = abs(ir->coordinate);
> +   * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
> +   *Q = ir->coordinate.yzx;
> +   *dQdx = ir->lod_info.grad.dPdx.yzx;
> +   *dQdy = ir->lod_info.grad.dPdy.yzx;
> +   * }
> +   * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
> +   *Q = ir->coordinate.xzy;
> +   *dQdx = ir->lod_info.grad.dPdx.xzy;
> +   *dQdy = ir->lod_info.grad.dPdy.xzy;
> +   * }
> +   * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
> +   *Q = ir->coordinate;
> +   *dQdx = ir->lod_info.grad.dPdx;
> +   *dQdy = ir->lod_info.grad.dPdy;
> +   * }
> +   *
> +   * Step 2: use quotient rule to compute derivative. The normalized to
> +   * [-1, 1] texel coordin

Re: [Mesa-dev] [PATCH v5 00/70] ARB_shader_storage_buffer_object (mesa, i965)

2015-09-21 Thread Iago Toral
Hi Kristian,

On Fri, 2015-09-18 at 16:56 -0700, Kristian Høgsberg wrote:
> On Thu, Sep 10, 2015 at 03:35:16PM +0200, Iago Toral Quiroga wrote:
> > Hi,
> > 
> > this is the latest version of the ARB_shader_storage_buffer_object
> > implementation. A good part of the frontend bits for this are already in
> > master, but this adds some more missing pieces, specifically std430 and
> > memory qualifiers. Additionally, this provides the i965 implementation.
> 
> I've gone through all patches in the series and I replied to patches
> where I had comments.  Overall, the series look good and with the
> comments addressed, I'm ready to give my Reviewed-by for the series.
> I want to take a closer look at the atomics lowering in patches 49+,
> but I'm done for today.  Base on the quick look-through I did, I don't
> expect to find any showstoppers there.

great, thanks for reviewing this! We will send new versions of the
patches for which you had comments or reply to your comments otherwise.

Iago

> Here's a summary of what I found:
> 
> [PATCH v5 01/70] mesa: set MAX_SHADER_STORAGE_BUFFERS to 15.
> 
>   Update limit to 16 and drop the comment
> 
> [PATCH v5 02/70] i965: Use 16-byte offset alignment for shader storage buffers
> 
>   ctx->Const.ShaderStorageBufferOffsetAlignment should be 64
> 
> [PATCH v5 23/70] glsl: refactor parser processing of an interface block 
> definition
> 
>   Clarify that the commit is just moving code.
> 
> [PATCH v5 26/70] glsl: Add parser/compiler support for std430 interface 
> packing qualifier
> 
>   Update the error to also mention shader storage blocks, not just ubos?
> 
> [PATCH v5 28/70] glsl: add std430 interface packing support to ssbo related 
> operations
> 
>   Why are we passing false for is_std430 here (emit_access in
>   handle_rvalue)?  We use handle_rvalue for both UBO and SSBO loads,
>   right?  Also, for consistency, I'd prefer if we could just pass
>   'packing' around instead of is_std430.
> 
> [PATCH v5 29/70] glsl: a shader storage buffer must be smaller than the 
> maximum size allowed
> 
>   Two chunks look like they should be their own patch ("Add unsized
>   array support to glsl_type::std140_size" or such).
> 
> [PATCH v5 38/70] i965/nir/vec4: Implement nir_intrinsic_store_ssbo
> 
>   Shouldn't this be 'skipped_channels += num_channels;' to handle write mask 
> reg.yw?
> 
> [PATCH v5 40/70] nir: Implement __intrinsic_load_ssbo
> 
>   Refactor handling of cmp instruction for converting to bool
> 
> [PATCH v5 54/70] glsl: First argument to atomic functions must be a buffer 
> variable
> 
>   Nitpick: move check that only looks at first element in list to after loop
> 
> Also, I expect that before we land this series (thought that shouldn't
> be far off), we'll have deleted the vec4 GLSL IR visitor so we can
> drop these patches (I didn't review them):
> 
> [PATCH v5 16/70] i965/vec4: Implement ir_unop_get_buffer_size
> [PATCH v5 39/70] i965/vec4: Implement __intrinsic_store_ssbo
> [PATCH v5 43/70] i965/vec4: Implement __intrinsic_load_ssbo
> [PATCH v5 53/70] i965/vec4: Implement lowered SSBO atomic intrinsics
> 
> I wrote the initial prototype of the SSBO functionality, but I don't
> recall writing:
> 
> [PATCH v5 45/70] glsl: atomic counters can be declared as buffer-qualified 
> variables
> 
> I don't think I did anything for atomics in my patch. Feel free to
> take ownership of that one and add my Reviewed-by.
> 
> thanks,
> Kristian
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] Enable up to 24 MRF registers in gen6

2015-09-21 Thread Iago Toral
On Mon, 2015-09-21 at 07:49 -0700, Kenneth Graunke wrote:
> On Monday, September 21, 2015 09:46:24 AM Mark Janes wrote:
> > This series hits an assertion on ILK and G45:
> > 
> > src/mesa/drivers/dri/i965/brw_eu_emit.c:150: brw_set_dest: Assertion
> > `dest.nr < (devinfo->gen == 6 ? 24 : 16)' failed.
> > 
> > It triggers about 8k piglit assertions on those platforms.  I'm turning
> > off testing for G45 and ILK until it is resolved.
> > 
> > https://bugs.freedesktop.org/show_bug.cgi?id=92066
> > 
> > -Mark
> 
> I've pushed a fix for this:
> 
> commit c1070550c289d48ef389aeb8c564d1abd1123ad1
> Author: Kenneth Graunke 
> Date:   Mon Sep 21 07:42:27 2015 -0700
> 
> i965: Fix MRF register number assertions for compr4.
> 
> compr4 is represented by setting the high bit on the MRF number.
> We need to mask it out before sanity checking the register number.
> 
> Fixes ~8000 assert fails on Ironlake and G45.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92066
> Signed-off-by: Kenneth Graunke 
> 
> Easy mistake...I always forget about compr4.  Hopefully we should be
> good now.

that was fast, thanks Ken! I was scratching my head over this...

BTW, I just noticed that the ILK docs also say that they have 24 MRFs...
(volume 4, part 2, 5.3.3 MRF Registers). Assuming that we don't find any
other issues, would we want to extend the fix to ILK too?

Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: do not attempt to dump_shader if no shaderobj

2015-09-21 Thread Iago Toral
On Mon, 2015-09-21 at 09:15 +0300, Tapani Pälli wrote:
> Patch fixes a crash in conformance test that tries out different
> invalid arguments for glShaderSource and glGetShaderSource:
> 
>ES2-CTS.gtf.GL.glGetShaderSource.getshadersource_programhandle
> 
> This is a regression from commit:
>04e201d0c02cd30ace5c6fe80e9f021ebb733682
> 
> Signed-off-by: Tapani Pälli 
> ---
>  src/mesa/main/shaderapi.c | 18 ++
>  1 file changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
> index f31980b..7733d02 100644
> --- a/src/mesa/main/shaderapi.c
> +++ b/src/mesa/main/shaderapi.c
> @@ -1699,15 +1699,17 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei 
> count,
>  #if defined(HAVE_SHA1)
> sh = _mesa_lookup_shader(ctx, shaderObj);

Why not call this earlier in that function (before we even process the
shader string) and return if it is NULL? There is no point in waiting
until this moment to check for that.

Then, when we call shader_source right below this code, we could just
pass the sh object directly instead of having that function call
_mesa_lookup_shader again (we could maybe replace that call with an
assert to make sure that we passed a valid shader object)

Iago

> -   /* Dump original shader source to MESA_SHADER_DUMP_PATH and replace
> -* if corresponding entry found from MESA_SHADER_READ_PATH.
> -*/
> -   dump_shader(sh->Stage, source);
> +   if (sh) {
> +  /* Dump original shader source to MESA_SHADER_DUMP_PATH and replace
> +   * if corresponding entry found from MESA_SHADER_READ_PATH.
> +   */
> +  dump_shader(sh->Stage, source);
>  
> -   replacement = read_shader(sh->Stage, source);
> -   if (replacement) {
> -  free(source);
> -  source = replacement;
> +  replacement = read_shader(sh->Stage, source);
> +  if (replacement) {
> + free(source);
> + source = replacement;
> +  }
> }
>  #endif /* HAVE_SHA1 */
>  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Make vec4_visitor's destructor virtual

2015-10-05 Thread Iago Toral
On Mon, 2015-10-05 at 13:08 +0300, Francisco Jerez wrote:
> Iago Toral Quiroga <ito...@igalia.com> writes:
> 
> > We need a virtual destructor when at least one of the class' methods is 
> > virtual.
> > Failure to do so leads to undefined behavior when destructing derived 
> > classes.
> > Fixes the following warning:
> >
> > brw_vec4_gs_visitor.cpp: In function 'const unsigned int* 
> > brw::brw_gs_emit(brw_context*, gl_shader_program*, brw_gs_compile*, void*, 
> > unsigned int*)':
> > brw_vec4_gs_visitor.cpp:703:11: warning: deleting object of polymorphic 
> > class type 'brw::vec4_gs_visitor' which has non-virtual destructor might 
> > cause undefined behaviour [-Wdelete-non-virtual-dtor]
> > delete gs;
> 
> I don't think this was leading to an actual bug because AFAICT
> gen6_gs_visitor is the only subclass of vec4_visitor destroyed through a
> pointer of a base class (vec4_gs_visitor *), and its destructor is
> basically the same as its parent's.  Anyway it seems sensible to change
> this so it doesn't bite us in the future.  If you clarify that in the
> commit message this patch is:
> 
> Reviewed-by: Francisco Jerez <curroje...@riseup.net>

Will do, thanks Curro!

Iago

> > ---
> >  src/mesa/drivers/dri/i965/brw_vec4.h | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
> > b/src/mesa/drivers/dri/i965/brw_vec4.h
> > index d1fa095..5e3500c 100644
> > --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> > @@ -76,7 +76,7 @@ public:
> > void *mem_ctx,
> >  bool no_spills,
> >  int shader_time_index);
> > -   ~vec4_visitor();
> > +   virtual ~vec4_visitor();
> >  
> > dst_reg dst_null_f()
> > {
> > -- 
> > 1.9.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] i965: dump scheduling cycle estimates

2015-10-05 Thread Iago Toral
On Fri, 2015-10-02 at 17:43 -0400, Connor Abbott wrote:
> On Fri, Oct 2, 2015 at 5:37 PM, Connor Abbott  wrote:
> > The heuristic we're using is rather lame, since it assumes everything is
> > non-uniform and loops execute 50 times, but it should be enough for
> > measuring improvements in the scheduler that don't result in a change in
> > the number of instructions.

Our spilling code assumes that loops run 10 times for the purpose of
evaluating spilling costs... shouldn't we use the same estimation
everywhere?

Iago 

> > Signed-off-by: Connor Abbott 
> > ---
> >  src/mesa/drivers/dri/i965/brw_cfg.h  |  4 
> >  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 11 ++-
> >  .../drivers/dri/i965/brw_schedule_instructions.cpp   | 20 
> > 
> >  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  9 +
> >  4 files changed, 35 insertions(+), 9 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
> > b/src/mesa/drivers/dri/i965/brw_cfg.h
> > index a094917..d0bdb00 100644
> > --- a/src/mesa/drivers/dri/i965/brw_cfg.h
> > +++ b/src/mesa/drivers/dri/i965/brw_cfg.h
> > @@ -90,6 +90,8 @@ struct bblock_t {
> > struct exec_list parents;
> > struct exec_list children;
> > int num;
> > +
> > +   unsigned cycle_count;
> >  };
> >
> >  static inline struct backend_instruction *
> > @@ -285,6 +287,8 @@ struct cfg_t {
> > int num_blocks;
> >
> > bool idom_dirty;
> > +
> > +   unsigned cycle_count;
> >  };
> >
> >  /* Note that this is implemented with a double for loop -- break will
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
> > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > index 6f8b75e..9540012 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > @@ -2181,9 +2181,9 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> > dispatch_width)
> >
> > if (unlikely(debug_flag)) {
> >fprintf(stderr, "Native code for %s\n"
> > -  "SIMD%d shader: %d instructions. %d loops. %d:%d 
> > spills:fills. Promoted %u constants. Compacted %d to %d"
> > +  "SIMD%d shader: %d instructions. %u cycles. %d loops. %d:%d 
> > spills:fills. Promoted %u constants. Compacted %d to %d"
> >" bytes (%.0f%%)\n",
> > -  shader_name, dispatch_width, before_size / 16, loop_count,
> > +  shader_name, dispatch_width, before_size / 16, 
> > cfg->cycle_count, loop_count,
> >spill_count, fill_count, promoted_constants, before_size, 
> > after_size,
> >100.0f * (before_size - after_size) / before_size);
> >
> > @@ -2193,12 +2193,13 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> > dispatch_width)
> > }
> >
> > compiler->shader_debug_log(log_data,
> > -  "%s SIMD%d shader: %d inst, %d loops, "
> > +  "%s SIMD%d shader: %d inst, %u cycles, %d 
> > loops, "
> >"%d:%d spills:fills, Promoted %u constants, "
> >"compacted %d to %d bytes.\n",
> >stage_abbrev, dispatch_width, before_size / 
> > 16,
> > -  loop_count, spill_count, fill_count,
> > -  promoted_constants, before_size, after_size);
> > +  cfg->cycle_count, loop_count, spill_count,
> > +  fill_count, promoted_constants, before_size,
> > +  after_size);
> >
> > return start_offset;
> >  }
> > diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp 
> > b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> > index 1652261..22a493f 100644
> > --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> > @@ -1467,6 +1467,24 @@ 
> > instruction_scheduler::schedule_instructions(bblock_t *block)
> > if (block->end()->opcode == BRW_OPCODE_NOP)
> >block->end()->remove(block);
> > assert(instructions_to_schedule == 0);
> > +
> > +   block->cycle_count = time;
> > +}
> > +
> > +static unsigned get_cycle_count(cfg_t *cfg)
> > +{
> > +   unsigned count = 0, multiplier = 1;
> > +   foreach_block(block, cfg) {
> > +  if (block->start()->opcode == BRW_OPCODE_DO)
> > + multiplier *= 50; /* assume that loops have ~50 instructions */
> 
> Whoops, this should say "assume that loops are run ~50 times"...
> 
> > +
> > +  count += block->cycle_count * multiplier;
> > +
> > +  if (block->end()->opcode == BRW_OPCODE_WHILE)
> > + multiplier /= 50;
> > +   }
> > +
> > +   return count;
> >  }
> >
> >  void
> > @@ -1507,6 +1525,8 @@ instruction_scheduler::run(cfg_t *cfg)
> >post_reg_alloc);
> >bs->dump_instructions();
> > }
> > 

Re: [Mesa-dev] [PATCH v4] i965/fs: Handle nir shared variable store intrinsic

2015-12-01 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

On Tue, 2015-12-01 at 14:35 -0800, Jordan Justen wrote:
> v4:
>  * Apply similar optimization for shared variable stores as
>0cb7d7b4b7c32246d4c4225a1d17d7ff79a7526d. This was causing a
>OpenGLES 3.1 CTS failure, but
>867c436ca841b4196b4dde4786f5086c76b20dd7 fixes that.
> 
> Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com>
> Cc: Iago Toral Quiroga <ito...@igalia.com>
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 48 
> 
>  1 file changed, 48 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 12a8b59..d945cef 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -2472,6 +2472,54 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
> nir_intrinsic_instr *instr
>break;
> }
>  
> +   case nir_intrinsic_store_shared_indirect:
> +  has_indirect = true;
> +  /* fallthrough */
> +   case nir_intrinsic_store_shared: {
> +  assert(devinfo->gen >= 7);
> +
> +  /* Block index */
> +  fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
> +
> +  /* Value */
> +  fs_reg val_reg = get_nir_src(instr->src[0]);
> +
> +  /* Writemask */
> +  unsigned writemask = instr->const_index[1];
> +
> +  /* Combine groups of consecutive enabled channels in one write
> +   * message. We use ffs to find the first enabled channel and then ffs 
> on
> +   * the bit-inverse, down-shifted writemask to determine the length of
> +   * the block of enabled bits.
> +   */
> +  while (writemask) {
> + unsigned first_component = ffs(writemask) - 1;
> + unsigned length = ffs(~(writemask >> first_component)) - 1;
> + fs_reg offset_reg;
> +
> + if (!has_indirect) {
> +offset_reg = brw_imm_ud(instr->const_index[0] + 4 * 
> first_component);
> + } else {
> +offset_reg = vgrf(glsl_type::uint_type);
> +bld.ADD(offset_reg,
> +retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
> +brw_imm_ud(4 * first_component));
> + }
> +
> + emit_untyped_write(bld, surf_index, offset_reg,
> +offset(val_reg, bld, first_component),
> +1 /* dims */, length,
> +BRW_PREDICATE_NONE);
> +
> + /* Clear the bits in the writemask that we just wrote, then try
> +  * again to see if more channels are left.
> +  */
> + writemask &= (15 << (first_component + length));
> +  }
> +
> +  break;
> +   }
> +
> case nir_intrinsic_load_input_indirect:
>has_indirect = true;
>/* fallthrough */


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 00/44] Computer shader shared variables

2015-12-02 Thread Iago Toral
Hi Jordan,

On Tue, 2015-12-01 at 00:19 -0800, Jordan Justen wrote:
> git://people.freedesktop.org/~jljusten/mesa cs-shared-variables-v3
> http://patchwork.freedesktop.org/bundle/jljusten/cs-shared-variables-v3
> 
> I received lots of good suggestions for v2 of the series, and the
> patches needed a rebase. Thanks!
> 
> So far 33 of the 44 patches have a Reviewed-by. The patchwork link
> above shows which patches have been reviewed.

it seems that with the patch I reviewed today the entire series has been
reviewed. If I am not mistaken shared variables was the only missing
piece for Compute Shaders in Mesa / i965, right?

Iago

> For reference, here is the cover letter for v2 of the series:
> http://lists.freedesktop.org/archives/mesa-dev/2015-November/100564.html
> 
> Francisco Jerez (16):
>   i965: Define symbolic constants for some useful L3 cache control
> registers.
>   i965: Adjust gen check in can_do_pipelined_register_writes
>   i965: Keep track of whether LRI is allowed in the context struct.
>   i965: Define state flag to signal that the URB size has been altered.
>   i965/gen8: Don't add workaround bits to PIPE_CONTROL stalls if DC
> flush is set.
>   i965: Add slice count to the brw_device_info structure.
>   i965: Import tables enumerating the set of validated L3
> configurations.
>   i965: Implement programming of the L3 configuration.
>   i965/hsw: Enable L3 atomics.
>   i965: Define and use REG_MASK macro to make masked MMIO writes
> slightly more readable.
>   i965: Implement selection of the closest L3 configuration based on a
> vector of weights.
>   i965: Calculate appropriate L3 partition weights for the current
> pipeline state.
>   i965: Implement L3 state atom.
>   i965: Add debug flag to print out the new L3 state during transitions.
>   i965: Work around L3 state leaks during context switches.
>   i965: Hook up L3 partitioning state atom.
> 
> Iago Toral Quiroga (1):
>   glsl: Don't assert on shared variable matrices with 'inherited' layout
> 
> Jordan Justen (27):
>   glsl ubo/ssbo: Use enum to track current buffer access type
>   glsl ubo/ssbo: Split buffer access to insert_buffer_access
>   glsl ubo/ssbo: Add lower_buffer_access class
>   glsl ubo/ssbo: Move is_dereferenced_thing_row_major into
> lower_buffer_access
>   glsl ubo/ssbo: Move common code into
> lower_buffer_access::setup_buffer_access
>   glsl: Remove mem_ctx as member variable in lower_ubo_reference_visitor
>   glsl: Don't lower_variable_index_to_cond_assign for shared variables
>   glsl: Add lowering pass for shared variable references
>   nir: Translate glsl shared var load intrinsic to nir intrinsic
>   nir: Translate glsl shared var store intrinsic to nir intrinsic
>   i965: Disable vector splitting on shared variables
>   i965/fs: Handle nir shared variable load intrinsic
>   i965/fs: Handle nir shared variable store intrinsic
>   i965: Enable shared local memory for CS shared variables
>   i965: Lower shared variable references to intrinsic calls
>   glsl: Allow atomic functions to be used with shared variables
>   glsl: Replace atomic_ssbo and ssbo_atomic with atomic
>   glsl: Check for SSBO variable in SSBO atomic lowering
>   glsl: Check for SSBO variable in check_for_ssbo_store
>   glsl: Translate atomic intrinsic functions on shared variables
>   glsl: Buffer atomics are supported for compute shaders
>   glsl: Disable several optimizations on shared variables
>   nir: Add nir intrinsics for shared variable atomic operations
>   i965/nir: Implement shared variable atomic operations
>   i965: Enable ARB_compute_shader extension on supported hardware
>   docs: Mark ARB_compute_shader as done for i965
>   docs: Add ARB_compute_shader to 11.2.0 release notes
> 
>  docs/GL3.txt   |   4 +-
>  docs/relnotes/11.2.0.html  |   1 +
>  src/glsl/Makefile.sources  |   3 +
>  src/glsl/ast_function.cpp  |  18 +-
>  src/glsl/builtin_functions.cpp | 236 
>  src/glsl/ir_optimization.h |   1 +
>  src/glsl/linker.cpp|   4 +
>  src/glsl/lower_buffer_access.cpp   | 490 +
>  src/glsl/lower_buffer_access.h |  65 +++
>  src/glsl/lower_shared_reference.cpp| 495 +
>  src/glsl/lower_ubo_reference.cpp   | 601 
> -
>  src/glsl/lower_variable_index_to_cond_assign.cpp   |   3 +
>  src/glsl/nir/glsl_to_nir.cpp   | 145 -
>  src/glsl/nir/nir_intrinsics.h  |  31 +-
>  src/glsl/opt_constant_propagation.cp

Re: [Mesa-dev] [PATCH v3 30/44] i965/fs: Handle nir shared variable store intrinsic

2015-12-01 Thread Iago Toral
On Tue, 2015-12-01 at 13:00 -0800, Jordan Justen wrote:
> On 2015-12-01 04:45:05, Iago Toral wrote:
> > On Tue, 2015-12-01 at 00:19 -0800, Jordan Justen wrote:
> > > Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com>
> > > ---
> > >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 52 
> > > 
> > >  1 file changed, 52 insertions(+)
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> > > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > > index 12a8b59..6cbb0e2 100644
> > > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > > @@ -2472,6 +2472,58 @@ fs_visitor::nir_emit_intrinsic(const fs_builder 
> > > , nir_intrinsic_instr *instr
> > >break;
> > > }
> > >  
> > > +   case nir_intrinsic_store_shared_indirect:
> > > +  has_indirect = true;
> > > +  /* fallthrough */
> > > +   case nir_intrinsic_store_shared: {
> > > +  assert(devinfo->gen >= 7);
> > > +
> > > +  /* Block index */
> > > +  fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
> > > +
> > > +  /* Offset */
> > > +  fs_reg offset_reg = vgrf(glsl_type::uint_type);
> > > +  unsigned const_offset_bytes = 0;
> > > +  if (has_indirect) {
> > > + bld.MOV(offset_reg, get_nir_src(instr->src[1]));
> > > +  } else {
> > > + const_offset_bytes = instr->const_index[0];
> > > + bld.MOV(offset_reg, brw_imm_ud(const_offset_bytes));
> > > +  }
> > > +
> > > +  /* Value */
> > > +  fs_reg val_reg = get_nir_src(instr->src[0]);
> > > +
> > > +  /* Writemask */
> > > +  unsigned writemask = instr->const_index[1];
> > > +
> > > +  /* Write each component present in the writemask */
> > 
> > I made a comment in v2 that this loop is based on early ssbo code that
> > was not optimized (it always emits a write for each component). The
> > current implementation for ssbo store is better and I think it is only a
> > matter of copying the same loop here, since the implementation is the
> > same as in the case of ssbos.
> 
> Noted (in v2 :)
> 
> http://lists.freedesktop.org/archives/mesa-dev/2015-November/101866.html
> 
> -Jordan

Ah, sorry I had missed your reply. I have just reviewed the v4 version
with the optimized version.

Iago

> > 
> > > +  unsigned skipped_channels = 0;
> > > +  for (int i = 0; i < instr->num_components; i++) {
> > > + int component_mask = 1 << i;
> > > + if (writemask & component_mask) {
> > > +if (skipped_channels) {
> > > +   if (!has_indirect) {
> > > +  const_offset_bytes += 4 * skipped_channels;
> > > +  bld.MOV(offset_reg, brw_imm_ud(const_offset_bytes));
> > > +   } else {
> > > +  bld.ADD(offset_reg, offset_reg,
> > > +   brw_imm_ud(4 * skipped_channels));
> > > +   }
> > > +   skipped_channels = 0;
> > > +}
> > > +
> > > +emit_untyped_write(bld, surf_index, offset_reg,
> > > +   offset(val_reg, bld, i),
> > > +   1 /* dims */, 1 /* size */,
> > > +   BRW_PREDICATE_NONE);
> > > + }
> > > +
> > > + skipped_channels++;
> > > +  }
> > > +  break;
> > > +   }
> > > +
> > > case nir_intrinsic_load_input_indirect:
> > >has_indirect = true;
> > >/* fallthrough */
> > 
> > 
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 5/5] i965: Skip execution size adjustment for instructions of width 4

2015-12-09 Thread Iago Toral
On Wed, 2015-12-09 at 08:10 -0800, Jason Ekstrand wrote:
> 
> On Dec 9, 2015 4:16 AM, "Iago Toral Quiroga" <ito...@igalia.com>
> wrote:
> >
> > This code in brw_set_dest adjusts the execution size of any
> instruction
> > with a dst.width < 8. However, we don't want to do this with
> instructions
> > operating on doubles, since these will have a width of 4, but still
> > need an execution size of 8 (for SIMD8). Unfortunately, we can't
> just check
> > the size of the operands involved to detect if we are doing an
> operation on
> > doubles, because we can have instructions that do operations on
> double
> > operands interpreted as UD, operating on any of its 2 32-bit
> components.
> >
> > Previous commits have made it so we never emit instructions with a
> horizontal
> > width of 4 that don't have the correct execution size set for
> gen7/gen8, so
> > we can skip it in this case, avoiding the conflicts with fp64
> requirements.
> >
> > Expanding the same fix to other hardware generations requires many
> more
> > changes but since we are not targetting fp64 support on them
> > wer don't really care for now.
> > ---
> >  src/mesa/drivers/dri/i965/brw_eu_emit.c | 14 +-
> >  1 file changed, 13 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> > index 78f2c8c..50a8771 100644
> > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> > @@ -202,8 +202,20 @@ brw_set_dest(struct brw_codegen *p, brw_inst
> *inst, struct brw_reg dest)
> > /* Generators should set a default exec_size of either 8
> (SIMD4x2 or SIMD8)
> >  * or 16 (SIMD16), as that's normally correct.  However, when
> dealing with
> >  * small registers, we automatically reduce it to match the
> register size.
> > +*
> > +* In platforms that support fp64 we can emit instructions with
> a width of
> > +* 4 that need two SIMD8 registers and an exec_size of 8 or 16.
> In these
> > +* cases we need to make sure that these instructions have their
> exec sizes
> > +* set properly when they are emitted and we can't rely on this
> code to fix
> > +* it.
> >  */
> > -   if (dest.width < BRW_EXECUTE_8)
> > +   bool fix_exec_size;
> > +   if (devinfo->gen == 7 || devinfo->gen == 8)
> 
> If we're doing to take this approach, we definitely want to make it
> gen > 6 or something so we include future gens.  Really gen > 4 is
> probably doable since the only real problem is the legacy clipping
> code.

Strips and fans is also a problem, but it is certainly doable if we want
to do it.

Iago


> > +  fix_exec_size = dest.width < BRW_EXECUTE_4;
> > +   else
> > +  fix_exec_size = dest.width < BRW_EXECUTE_8;
> > +
> > +   if (fix_exec_size)
> >brw_inst_set_exec_size(devinfo, inst, dest.width);
> >  }
> >
> > --
> > 2.1.4
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 5/5] i965: Skip execution size adjustment for instructions of width 4

2015-12-10 Thread Iago Toral
On Wed, 2015-12-09 at 23:51 -0800, Jason Ekstrand wrote:
> 
> On Dec 9, 2015 11:47 PM, "Iago Toral" <ito...@igalia.com> wrote:
> >
> > On Wed, 2015-12-09 at 08:10 -0800, Jason Ekstrand wrote:
> > >
> > > On Dec 9, 2015 4:16 AM, "Iago Toral Quiroga" <ito...@igalia.com>
> > > wrote:
> > > >
> > > > This code in brw_set_dest adjusts the execution size of any
> > > instruction
> > > > with a dst.width < 8. However, we don't want to do this with
> > > instructions
> > > > operating on doubles, since these will have a width of 4, but
> still
> > > > need an execution size of 8 (for SIMD8). Unfortunately, we can't
> > > just check
> > > > the size of the operands involved to detect if we are doing an
> > > operation on
> > > > doubles, because we can have instructions that do operations on
> > > double
> > > > operands interpreted as UD, operating on any of its 2 32-bit
> > > components.
> > > >
> > > > Previous commits have made it so we never emit instructions with
> a
> > > horizontal
> > > > width of 4 that don't have the correct execution size set for
> > > gen7/gen8, so
> > > > we can skip it in this case, avoiding the conflicts with fp64
> > > requirements.
> > > >
> > > > Expanding the same fix to other hardware generations requires
> many
> > > more
> > > > changes but since we are not targetting fp64 support on them
> > > > wer don't really care for now.
> > > > ---
> > > >  src/mesa/drivers/dri/i965/brw_eu_emit.c | 14 +-
> > > >  1 file changed, 13 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> > > b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> > > > index 78f2c8c..50a8771 100644
> > > > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> > > > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> > > > @@ -202,8 +202,20 @@ brw_set_dest(struct brw_codegen *p,
> brw_inst
> > > *inst, struct brw_reg dest)
> > > > /* Generators should set a default exec_size of either 8
> > > (SIMD4x2 or SIMD8)
> > > >  * or 16 (SIMD16), as that's normally correct.  However,
> when
> > > dealing with
> > > >  * small registers, we automatically reduce it to match the
> > > register size.
> > > > +*
> > > > +* In platforms that support fp64 we can emit instructions
> with
> > > a width of
> > > > +* 4 that need two SIMD8 registers and an exec_size of 8 or
> 16.
> > > In these
> > > > +* cases we need to make sure that these instructions have
> their
> > > exec sizes
> > > > +* set properly when they are emitted and we can't rely on
> this
> > > code to fix
> > > > +* it.
> > > >  */
> > > > -   if (dest.width < BRW_EXECUTE_8)
> > > > +   bool fix_exec_size;
> > > > +   if (devinfo->gen == 7 || devinfo->gen == 8)
> > >
> > > If we're doing to take this approach, we definitely want to make
> it
> > > gen > 6 or something so we include future gens.  Really gen > 4 is
> > > probably doable since the only real problem is the legacy clipping
> > > code.
> >
> > Strips and fans is also a problem, but it is certainly doable if we
> want
> > to do it.
> 
> Yeah, my primary point is that we should make it as little of an
> edge-case as possible.  We could go back to at least gen6 and we
> should go forward.  That said, it'll take a little testing from the
> Intel side.

Sure, this sounds sensible to me. I'll scan the code for gen9 paths that
work with a horizontal width of 4 and include patches to cover gen6 as
well. When we have that I'll send the series for testing.

Thanks Jason!

Iago
> > Iago
> >
> >
> > > > +  fix_exec_size = dest.width < BRW_EXECUTE_4;
> > > > +   else
> > > > +  fix_exec_size = dest.width < BRW_EXECUTE_8;
> > > > +
> > > > +   if (fix_exec_size)
> > > >brw_inst_set_exec_size(devinfo, inst, dest.width);
> > > >  }
> > > >
> > > > --
> > > > 2.1.4
> > > >
> > > > ___
> > > > mesa-dev mailing list
> > > > mesa-dev@lists.freedesktop.org
> > > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > >
> > >
> >
> >
> 
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 1/5] i965/eu: set correct execution size in brw_NOP

2015-12-10 Thread Iago Toral
On Wed, 2015-12-09 at 10:38 -0800, Matt Turner wrote:
> On Wed, Dec 9, 2015 at 4:15 AM, Iago Toral Quiroga <ito...@igalia.com> wrote:
> > ---
> >  src/mesa/drivers/dri/i965/brw_eu_emit.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
> > b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> > index f8c0f80..9543d5e 100644
> > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> > @@ -1256,6 +1256,7 @@ brw_F16TO32(struct brw_codegen *p, struct brw_reg 
> > dst, struct brw_reg src)
> >  void brw_NOP(struct brw_codegen *p)
> >  {
> > brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
> > +   brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4);
> 
> I don't follow this change. Was this implicitly set before?

Yes, brw_NOP's uses brw_vec4_grf() for both dst and src0, so the code in
brw_set_dest will set an execution size of 4 (otherwise we hit the
assertion in validate_reg that checks execsize >= width). I have changed
this to use brw_vec1_grf and an execsize of 1 and it seems to work fine
though, so I'll merge that change in the patch.

Thanks Matt!

Iago

> At least in newer documentation, NOP is defined to have nearly all
> fields 0 which would mean execution size must be 1.
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] i965/gen8/cs: Gen8 requires 64 byte alignment for push constant data

2015-12-17 Thread Iago Toral
On Wed, 2015-12-16 at 11:39 -0800, Kenneth Graunke wrote:
> On Wednesday, December 16, 2015 10:02:16 AM Iago Toral Quiroga wrote:
> > The BDW PRM Vol2a: Command Reference: Instructions, section 
> > MEDIA_CURBE_LOAD,
> > says that 'CURBE Total Data Length' and 'CURBE Data Start Address' are
> > 64-byte aligned. This is different from previous gens, that were 32-byte
> > aligned.
> > 
> > v2 (Jordan):
> >   - CURBE Data Start Address is also 64-byte aligned.
> >   - The call to brw_state_batch should also use 64-byte alignment.
> >   - Improve PRM reference.
> > 
> > Fixes the following SSBO CTS tests on BDW:
> > ES31-CTS.shader_storage_buffer_object.basic-atomic-case1-cs
> > ES31-CTS.shader_storage_buffer_object.basic-operations-case1-cs
> > ES31-CTS.shader_storage_buffer_object.basic-operations-case2-cs
> > ES31-CTS.shader_storage_buffer_object.basic-stdLayout_UBO_SSBO-case2-cs
> > ES31-CTS.shader_storage_buffer_object.advanced-write-fragment-cs
> > ES31-CTS.shader_storage_buffer_object.advanced-indirectAddressing-case2-cs
> > ES31-CTS.shader_storage_buffer_object.advanced-matrix-cs
> > 
> > And many other CS CTS tests as reported by Marta Lofstedt.
> > ---
> >  src/mesa/drivers/dri/i965/gen7_cs_state.c | 12 
> >  1 file changed, 8 insertions(+), 4 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c 
> > b/src/mesa/drivers/dri/i965/gen7_cs_state.c
> > index 1fde69c..df0f301 100644
> > --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
> > @@ -68,7 +68,7 @@ brw_upload_cs_state(struct brw_context *brw)
> >  
> > uint32_t *bind = (uint32_t*) brw_state_batch(brw, 
> > AUB_TRACE_BINDING_TABLE,
> >  
> > prog_data->binding_table.size_bytes,
> > -32, 
> > _state->bind_bo_offset);
> > +64, 
> > _state->bind_bo_offset);
> 
> I don't understand this hunk - binding tables don't have anything to do
> with push constants.  These are for pull constants and UBOs.  At least
> in the 3D pipeline, we only align these to 32B, not 64.
> 
> > unsigned local_id_dwords = 0;
> >  
> > @@ -77,7 +77,8 @@ brw_upload_cs_state(struct brw_context *brw)
> >  
> > unsigned push_constant_data_size =
> >(prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
> > -   unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
> > +   unsigned reg_aligned_constant_size =
> > +  ALIGN(push_constant_data_size, brw->gen < 8 ? 32 : 64);
> > unsigned push_constant_regs = reg_aligned_constant_size / 32;
> > unsigned threads = get_cs_thread_count(cs_prog_data);
> >  
> > @@ -138,11 +139,13 @@ brw_upload_cs_state(struct brw_context *brw)
> > ADVANCE_BATCH();
> >  
> > if (reg_aligned_constant_size > 0) {
> > +  const unsigned aligned_push_const_offset =
> > + ALIGN(stage_state->push_const_offset, brw->gen < 8 ? 32 : 64);
> 
> This is wrong.  What you want is to change:
> 
>   param = (gl_constant_value*)
>  brw_state_batch(brw, type,
>  reg_aligned_constant_size * threads,
>  32, _state->push_const_offset);
> 
> to use an alignment of 64 instead of 32 on Gen8+.  That way, it'll
> actually upload the data to a portion of the buffer that starts on
> a 64B aligned boundary.
> 
> As is, you're uploading the data to a 32B aligned section and then
> just fudging the pointer to be 64B aligned, possibly skipping over
> the first 32B.  Probably not what you wanted :)
> 
> Maybe you accidentally changed the wrong brw_state_batch call?

Ouch! yeah, I meant to change the other call, sorry about that. Anyway,
I think the patch proposed by Jordan is good so I won't send another
version.

Thanks for the review Ken!

Iago

> >BEGIN_BATCH(4);
> >OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
> >OUT_BATCH(0);
> >OUT_BATCH(reg_aligned_constant_size * threads);
> > -  OUT_BATCH(stage_state->push_const_offset);
> > +  OUT_BATCH(aligned_push_const_offset);
> >ADVANCE_BATCH();
> > }
> >  
> > @@ -241,7 +244,8 @@ brw_upload_cs_push_constants(struct brw_context *brw,
> >  
> >const unsigned push_constant_data_size =
> >   (local_id_dwords + prog_data->nr_params) * 
> > sizeof(gl_constant_value);
> > -  const unsigned reg_aligned_constant_size = 
> > ALIGN(push_constant_data_size, 32);
> > +  const unsigned reg_aligned_constant_size =
> > + ALIGN(push_constant_data_size, brw->gen < 8 ? 32 : 64);
> >const unsigned param_aligned_count =
> >   reg_aligned_constant_size / sizeof(*param);
> >  
> > 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] i965/gen8/cs: Gen8 requires 64 byte alignment for push constant data

2015-12-17 Thread Iago Toral
On Wed, 2015-12-16 at 14:48 -0800, Jordan Justen wrote:
> On 2015-12-16 11:39:00, Kenneth Graunke wrote:
> > On Wednesday, December 16, 2015 10:02:16 AM Iago Toral Quiroga wrote:
> > > The BDW PRM Vol2a: Command Reference: Instructions, section 
> > > MEDIA_CURBE_LOAD,
> > > says that 'CURBE Total Data Length' and 'CURBE Data Start Address' are
> > > 64-byte aligned. This is different from previous gens, that were 32-byte
> > > aligned.
> > > 
> > > v2 (Jordan):
> > >   - CURBE Data Start Address is also 64-byte aligned.
> > >   - The call to brw_state_batch should also use 64-byte alignment.
> > >   - Improve PRM reference.
> > > 
> > > Fixes the following SSBO CTS tests on BDW:
> > > ES31-CTS.shader_storage_buffer_object.basic-atomic-case1-cs
> > > ES31-CTS.shader_storage_buffer_object.basic-operations-case1-cs
> > > ES31-CTS.shader_storage_buffer_object.basic-operations-case2-cs
> > > ES31-CTS.shader_storage_buffer_object.basic-stdLayout_UBO_SSBO-case2-cs
> > > ES31-CTS.shader_storage_buffer_object.advanced-write-fragment-cs
> > > ES31-CTS.shader_storage_buffer_object.advanced-indirectAddressing-case2-cs
> > > ES31-CTS.shader_storage_buffer_object.advanced-matrix-cs
> > > 
> > > And many other CS CTS tests as reported by Marta Lofstedt.
> > > ---
> > >  src/mesa/drivers/dri/i965/gen7_cs_state.c | 12 
> > >  1 file changed, 8 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c 
> > > b/src/mesa/drivers/dri/i965/gen7_cs_state.c
> > > index 1fde69c..df0f301 100644
> > > --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
> > > +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
> > > @@ -68,7 +68,7 @@ brw_upload_cs_state(struct brw_context *brw)
> > >  
> > > uint32_t *bind = (uint32_t*) brw_state_batch(brw, 
> > > AUB_TRACE_BINDING_TABLE,
> > >  
> > > prog_data->binding_table.size_bytes,
> > > -32, 
> > > _state->bind_bo_offset);
> > > +64, 
> > > _state->bind_bo_offset);
> > 
> > I don't understand this hunk - binding tables don't have anything to do
> > with push constants.  These are for pull constants and UBOs.  At least
> > in the 3D pipeline, we only align these to 32B, not 64.
> 
> Yeah. I think he wants to update the call you pointed out below in
> brw_upload_cs_push_constants.
> 
> Also, how about consistently applying the alignment change? Either,
> just bump the base and size alignment to 64, or also check the gen to
> align the base to 32 on gen7.
> 
> How about the attached patch?

Yeah, that looks simpler. The patch is:

Tested-by: Iago Toral Quiroga <ito...@igalia.com>
Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>

Thanks Jordan!

> -Jordan
> 
> > > unsigned local_id_dwords = 0;
> > >  
> > > @@ -77,7 +77,8 @@ brw_upload_cs_state(struct brw_context *brw)
> > >  
> > > unsigned push_constant_data_size =
> > >(prog_data->nr_params + local_id_dwords) * 
> > > sizeof(gl_constant_value);
> > > -   unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 
> > > 32);
> > > +   unsigned reg_aligned_constant_size =
> > > +  ALIGN(push_constant_data_size, brw->gen < 8 ? 32 : 64);
> > > unsigned push_constant_regs = reg_aligned_constant_size / 32;
> > > unsigned threads = get_cs_thread_count(cs_prog_data);
> > >  
> > > @@ -138,11 +139,13 @@ brw_upload_cs_state(struct brw_context *brw)
> > > ADVANCE_BATCH();
> > >  
> > > if (reg_aligned_constant_size > 0) {
> > > +  const unsigned aligned_push_const_offset =
> > > + ALIGN(stage_state->push_const_offset, brw->gen < 8 ? 32 : 64);
> > 
> > This is wrong.  What you want is to change:
> > 
> >   param = (gl_constant_value*)
> >  brw_state_batch(brw, type,
> >  reg_aligned_constant_size * threads,
> >  32, _state->push_const_offset);
> > 
> > to use an alignment of 64 instead of 32 on Gen8+.  That way, it'll
> > actually upload the data to a portion of the buffer that starts on
> > a 64B aligned boundary.
> > 
> > As is, you're uploading the data to a 32B aligned section and then
> > just fudging the pointer 

Re: [Mesa-dev] [PATCH v2 1/2] mesa: Add a _mesa_active_fragment_shader_has_side_effects helper

2015-12-17 Thread Iago Toral
On Thu, 2015-12-17 at 16:29 +0200, Francisco Jerez wrote:
> Iago Toral Quiroga <ito...@igalia.com> writes:
> 
> > Some drivers can disable the FS unit if there is nothing in the shader code
> > that writes to an output (i.e. color, depth, etc). Right now, mesa has
> > a function to check for atomic buffers and the i965 driver also checks for
> > images. Refactor this logic into a generic function that we can use for
> > any source of side effects in a fragment shader. Sugested by Jason.
> > ---
> >  src/mesa/drivers/dri/i965/gen7_wm_state.c |  6 +-
> >  src/mesa/drivers/dri/i965/gen8_ps_state.c |  3 +--
> >  src/mesa/main/mtypes.h| 15 ---
> >  3 files changed, 14 insertions(+), 10 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
> > b/src/mesa/drivers/dri/i965/gen7_wm_state.c
> > index 06d5e65..a6d1028 100644
> > --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
> > @@ -77,13 +77,9 @@ upload_wm_state(struct brw_context *brw)
> >dw1 |= GEN7_WM_KILL_ENABLE;
> > }
> >  
> > -   if (_mesa_active_fragment_shader_has_atomic_ops(>ctx)) {
> > -  dw1 |= GEN7_WM_DISPATCH_ENABLE;
> > -   }
> > -
> > /* _NEW_BUFFERS | _NEW_COLOR */
> > if (brw_color_buffer_write_enabled(brw) || writes_depth ||
> > -   prog_data->base.nr_image_params ||
> > +   _mesa_active_fragment_shader_has_side_effects(>ctx) ||
> > dw1 & GEN7_WM_KILL_ENABLE) {
> >dw1 |= GEN7_WM_DISPATCH_ENABLE;
> > }
> 
> Hey, it looks like SSBOs are still missing a couple of things that could
> make their side effects rather non-deterministic on i965 hardware: On
> HSW you should probably set the UAV_ONLY WM state bit when there are no
> colour or depth buffer writes as is done for images below in this same
> function, and on all hardware you should set the early depth/stencil
> control field to PSEXEC unless early fragment tests are enabled to make
> sure that the fragment shader is executed regardless of whether
> per-fragment tests pass or not as the spec requires.

Sure, I'll add this and send a new version. Thanks Curro!

BTW, I see that we are doing these two things only for images at the
moment, I guess we should we do it for atomic buffers as well, right?

> > diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c 
> > b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> > index 945f710..3cc8c68 100644
> > --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> > @@ -90,8 +90,7 @@ gen8_upload_ps_extra(struct brw_context *brw,
> >  *
> >  * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | 
> > _NEW_COLOR
> >  */
> > -   if ((_mesa_active_fragment_shader_has_atomic_ops(>ctx) ||
> > -prog_data->base.nr_image_params) &&
> > +   if (_mesa_active_fragment_shader_has_side_effects(>ctx) &&
> > !brw_color_buffer_write_enabled(brw))
> >dw1 |= GEN8_PSX_SHADER_HAS_UAV;
> >  
> > diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> > index 191a9ea..834ba59 100644
> > --- a/src/mesa/main/mtypes.h
> > +++ b/src/mesa/main/mtypes.h
> > @@ -4538,11 +4538,20 @@ enum _debug
> > DEBUG_INCOMPLETE_FBO = (1 << 3)
> >  };
> >  
> > +/**
> > + * Checks if the active fragment shader program can have side effects due
> > + * to use of things like atomic buffers or images
> > + */
> >  static inline bool
> > -_mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx)
> > +_mesa_active_fragment_shader_has_side_effects(const struct gl_context *ctx)
> >  {
> > -   return ctx->Shader._CurrentFragmentProgram != NULL &&
> > -  
> > ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers
> >  > 0;
> > +   const struct gl_shader *sh;
> > +
> > +   if (!ctx->Shader._CurrentFragmentProgram)
> > +  return false;
> > +
> > +   sh = 
> > ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
> > +   return sh->NumAtomicBuffers > 0 || sh->NumImages > 0;
> >  }
> >  
> >  #ifdef __cplusplus
> > -- 
> > 1.9.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


<    1   2   3   4   5   6   7   8   9   10   >