date:20150910

Re: [Mesa-dev] [PATCH 2/3] glsl: Use hash tables for brw_fs_vector_splitting().

2015-09-10 Thread Kenneth Graunke

On Saturday, September 05, 2015 08:39:22 PM Timothy Arceri wrote:
> On Sat, 2015-09-05 at 02:21 -0700, Kenneth Graunke wrote:
> > Cuts compile/link time of the fragment shader in #91857 by 25%
> > (21.64 -> 16.28).
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91857
> > Signed-off-by: Kenneth Graunke 
> > ---
> >  .../drivers/dri/i965/brw_fs_vector_splitting.cpp   | 48 ---
> > ---
> >  1 file changed, 26 insertions(+), 22 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
> > b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
> > index 96d4f37..ef1ff03 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
> > @@ -43,6 +43,7 @@
> >  #include "glsl/ir_visitor.h"
> >  #include "glsl/ir_rvalue_visitor.h"
> >  #include "glsl/glsl_types.h"
> > +#include "util/hash_table.h"
> >  
> >  static bool debug = false;
> >  
> > @@ -72,11 +73,13 @@ public:
> > ir_vector_reference_visitor(void)
> > {
> >this->mem_ctx = ralloc_context(NULL);
> > -  this->variable_list.make_empty();
> > +  this->ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
> > + _mesa_key_pointer_equal);
> > }
> >  
> > ~ir_vector_reference_visitor(void)
> > {
> > +  _mesa_hash_table_destroy(ht, NULL);
> 
> Not a big deal but you created the table with mem_ctx so you dont need to do
> this right?

Oh, good point.  Will drop it.

> >ralloc_free(mem_ctx);
> > }
> >  
> > @@ -89,7 +92,7 @@ public:
> > variable_entry *get_variable_entry(ir_variable *var);
> >  
> > /* List of variable_entry */
> > -   exec_list variable_list;
> > +   struct hash_table *ht;
> >  
> > void *mem_ctx;
> >  };
> > @@ -119,13 +122,12 @@
> > ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
> >break;
> > }
> >  
> > -   foreach_in_list(variable_entry, entry, _list) {
> > -  if (entry->var == var)
> > -return entry;
> > -   }
> > +   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
> > +   if (hte)
> > +  return (struct variable_entry *) hte->data;
> >  
> > variable_entry *entry = new(mem_ctx) variable_entry(var);
> > -   this->variable_list.push_tail(entry);
> > +   _mesa_hash_table_insert(ht, var, entry);
> > return entry;
> >  }
> >  
> > @@ -195,9 +197,9 @@
> > ir_vector_reference_visitor::visit_enter(ir_function_signature *ir)
> >  
> >  class ir_vector_splitting_visitor : public ir_rvalue_visitor {
> >  public:
> > -   ir_vector_splitting_visitor(exec_list *vars)
> > +   ir_vector_splitting_visitor(struct hash_table *vars)
> > {
> > -  this->variable_list = vars;
> > +  this->ht = vars;
> > }
> >  
> > virtual ir_visitor_status visit_leave(ir_assignment *);
> > @@ -205,7 +207,7 @@ public:
> > void handle_rvalue(ir_rvalue **rvalue);
> > variable_entry *get_splitting_entry(ir_variable *var);
> >  
> > -   exec_list *variable_list;
> > +   struct hash_table *ht;
> >  };
> >  
> >  variable_entry *
> > @@ -216,13 +218,8 @@
> > ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
> > if (!var->type->is_vector())
> >return NULL;
> >  
> > -   foreach_in_list(variable_entry, entry, variable_list) {
> > -  if (entry->var == var) {
> > -return entry;
> > -  }
> > -   }
> > -
> > -   return NULL;
> > +   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
> > +   return hte ? (struct variable_entry *) hte->data : NULL;
> >  }
> >  
> >  void
> > @@ -329,12 +326,16 @@ ir_vector_splitting_visitor::visit_leave(ir_assignment
> > *ir)
> >  bool
> >  brw_do_vector_splitting(exec_list *instructions)
> >  {
> > +   struct hash_entry *hte;
> > +
> > ir_vector_reference_visitor refs;
> >  
> > visit_list_elements(, instructions);
> >  
> > /* Trim out variables we can't split. */
> > -   foreach_in_list_safe(variable_entry, entry, _list) {
> > +   bool ht_empty = true;
> > +   hash_table_foreach(refs.ht, hte) {
> > +  struct variable_entry *entry = (struct variable_entry *) hte->data;
> >if (debug) {
> >  fprintf(stderr, "vector %s@%p: whole_access %d\n",
> >   entry->var->name, (void *) entry->var,
> > @@ -342,11 +343,13 @@ brw_do_vector_splitting(exec_list *instructions)
> >}
> >  
> >if (entry->whole_vector_access) {
> > -entry->remove();
> > + _mesa_hash_table_remove(refs.ht, hte);
> > +  } else {
> > + ht_empty = false;
> >}
> > }
> >  
> > -   if (refs.variable_list.is_empty())
> > +   if (ht_empty)
> 
> hash_table has an entries field I think you could check if its empty like
> this:
> 
> if (refs.ht->entries == 0)
> 
> Other than this and the comment higher up.
> 
> Reviewed-by: Timothy Arceri 

Hey, thanks!  I knew there must be an obvious way to do

Re: [Mesa-dev] [PATCH 2/7] vbo: Add a predraw resolve callback

2015-09-10 Thread Chris Wilson

On Wed, Sep 09, 2015 at 10:19:10AM -0700, Ian Romanick wrote:
> On 09/09/2015 10:10 AM, Kenneth Graunke wrote:
> > On Wednesday, September 09, 2015 02:38:56 PM Chris Wilson wrote:
> >> A common problem with using HiZ and multisampling is that surfaces need
> >> to resolved prior to use. Currently i965 does this inside its state
> >> update hook, but that is a comparatively heavyweight operation that need
> >> not be performed so frequently. The obvious solution (and therefore
> >> fraught with dragons) is to move the HiZ/color resolves into the
> >> brw_draw_prims() - however, the resolves are performed using meta and
> >> end up re-entering brw_draw_prims() corrupting the context state of the
> >> original call. To avoid the meta recursion, we can add a new callback
> >> (vbo->resolve()) into the vbo pipeline that is called just before
> >> vbo->draw().
> >>
> >> Signed-off-by: Chris Wilson 
> >> Cc: Brian Paul 
> >> Cc: Jordan Justen 
> >> Cc: Jason Ekstrand 
> >> Cc: Kenneth Graunke 
> >> Cc: Francisco Jerez 
> >> ---
> >>  src/mesa/vbo/vbo.h|  1 +
> >>  src/mesa/vbo/vbo_context.c| 19 +++
> >>  src/mesa/vbo/vbo_context.h|  1 +
> >>  src/mesa/vbo/vbo_exec_array.c |  1 +
> >>  src/mesa/vbo/vbo_exec_draw.c  |  5 -
> >>  src/mesa/vbo/vbo_save_draw.c  |  2 ++
> >>  6 files changed, 28 insertions(+), 1 deletion(-)
> > 
> > What problem are you trying to solve with this patch series?
> > Are you trying to fix bugs?  If so, what triggers them?
> > Are you trying to improve performance?  If so, do you have any data
> > demonstrating that it benefits some workload?
> 
> In 0/7 he says, "By moving the current HiZ/color resolves
> we need before drawing from out of the notify and into the draw itself,
> we can save a few percent of overhead in OglBatch7 on Atom class
> devices."  Given the amount of change here, I too would like to see some
> actual performance data.

http://people.freedesktop.org/~ickle/vbo/

ministat 63c4b7e_bench_synmark:OglBatch7 2fbce8b_bench_synmark:OglBatch7
x 63c4b7e_bench_synmark:OglBatch7
+ 2fbce8b_bench_synmark:OglBatch7
+--+
|x   + |
|x   + |
|x   x   + +  +|
|x   x   +++ +++   |
|   x  x x   x x +++ +++   |
|   x  xxx  xx xx+++   |
|  xx x x+  ++ |
|x  x  x   **xx xx  x++ +++|
|  |AM|   |A|  |
+--+
N   Min   MaxMedian   AvgStddev
x  50   8.96925   10.4881   10.2213 10.2036390.22548474
+  50 10.16   10.8186   10.6612 10.6491340.12242131
Difference at 95.0% confidence
0.445495 +/- 0.0719896
4.36604% +/- 0.705529%
(Student's t, pooled s = 0.181425)

Looking at the total number of intel_update_state() vs brw_draw_prims()
for OglBatch7 doesn't alone explain the change as there are nearly the
same number of calls to both. (Though we do see similar changes in the
INTEL_NO_HW versions of OglBatch0 and OglBatch7, but that data is much
more noisy.)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 91889] Planetary Anihilation: Titans display content of other processes buffers

2015-09-10 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=91889

--- Comment #5 from Albert Freeman  ---
The GPU has its own physical ram that is managed with a completely different
system than system ram.

There are actually two issues here. One is the bug in the game causing strange
behavior. The other is the security issue that just so happens to (sometimes)
arise due to that bug.

This is probably not going to work, but try:
export MESA_EXTENSION_OVERRIDE=GL_ARB_gpu_shader5
then run the game in the same console window.

That seems to get rid of a rather critical warning when I replay the trace.

Strange thing is, when I replay this on mesa and catalyst, the same visual
corruption occurs (in the areas I can remember, exactly the same). Though
catalyst shows no warnings/errors but mesa does. This shouldn't happen as
apitrace simply records GL commands/data before they reach the driver. When
replayed they get sent to my driver for display.

Someone on irc commented that apitrace does not always capture all data needed
to display the replay flawlessly. However the corruption seems to be in areas
which shouldn't (in normal circumstances) be affected by missing data (as far
as I know) (e.g. empty sky OR big things crosscutting many different bits of
geometry/UI, like rectangles).

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC 1/2] nir: Add a new lowering pass nir_lower_vec_and_coalesce

2015-09-10 Thread Eduardo Lima Mitev

On 09/09/2015 07:10 PM, Jason Ekstrand wrote:
> 
> On Sep 8, 2015 23:27, "Eduardo Lima Mitev"  > wrote:
>>
>> This pass will propagate the destination components of a vecN
> instructions,
>> as destination of the instructions that define its sources; if certain
>> conditions are met.
>>
>> If all the components of the destination register in the vecN instruction
>> can be propagated, the instruction is removed. Otherwise, a new, reduced
>> vecN instruction is emitted with the channels that remained.
>>
>> This effectively coalesces registers and reduces indirection.
>>
>> By now, this pass will only propagate to ALU instructions, but it could
>> be extended to include other instructions like load_input intrinsic.
>>
>> It also propagates to instructions within the same block as the vecN
>> instruction. But it could be made to work cross-block in the future,
>> though there are non-trivial issues with this like considering
>> registers that are written in different branches of a conditional.
>> More analysis is needed to correctly cover these cases.
>>
>> This pass works on a NIR shader in final form (after SSA), and is
>> expected to run before nir_lower_vec_to_movs().
>> ---
>>  src/glsl/Makefile.sources |   1 +
>>  src/glsl/nir/nir.h|   1 +
>>  src/glsl/nir/nir_lower_vec_and_coalesce.c | 301
> ++
>>  3 files changed, 303 insertions(+)
>>  create mode 100644 src/glsl/nir/nir_lower_vec_and_coalesce.c
>>
>> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
>> index c422303..015f242 100644
>> --- a/src/glsl/Makefile.sources
>> +++ b/src/glsl/Makefile.sources
>> @@ -48,6 +48,7 @@ NIR_FILES = \
>> nir/nir_lower_vars_to_ssa.c \
>> nir/nir_lower_var_copies.c \
>> nir/nir_lower_vec_to_movs.c \
>> +   nir/nir_lower_vec_and_coalesce.c \
>> nir/nir_metadata.c \
>> nir/nir_normalize_cubemap_coords.c \
>> nir/nir_opt_constant_folding.c \
>> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>> index 3c375f3..6a89f1d 100644
>> --- a/src/glsl/nir/nir.h
>> +++ b/src/glsl/nir/nir.h
>> @@ -1786,6 +1786,7 @@ void nir_lower_vars_to_ssa(nir_shader *shader);
>>  void nir_remove_dead_variables(nir_shader *shader);
>>
>>  void nir_lower_vec_to_movs(nir_shader *shader);
>> +void nir_lower_vec_and_coalesce(nir_shader *shader);
>>  void nir_lower_alu_to_scalar(nir_shader *shader);
>>  void nir_lower_load_const_to_scalar(nir_shader *shader);
>>
>> diff --git a/src/glsl/nir/nir_lower_vec_and_coalesce.c
> b/src/glsl/nir/nir_lower_vec_and_coalesce.c
>> new file mode 100644
>> index 000..2b21ec1
>> --- /dev/null
>> +++ b/src/glsl/nir/nir_lower_vec_and_coalesce.c
>> @@ -0,0 +1,301 @@
>> +/*
>> + * Copyright © 2015 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person
> obtaining a
>> + * copy of this software and associated documentation files (the
> "Software"),
>> + * to deal in the Software without restriction, including without
> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including
> the next
>> + * paragraph) shall be included in all copies or substantial portions
> of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
> OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *Eduardo Lima Mitev (el...@igalia.com )
>> + *
>> + */
>> +
>> +#include "nir.h"
>> +
>> +/*
>> + * Implements a pass that lowers vecN instructions by propagating the
>> + * components of their destinations, as the destination of the
>> + * instructions that defines the sources of the vecN instruction.
>> + *
>> + * This effectively coalesces registers and reduces indirection.
>> + *
>> + * If all the components of the destination register in the vecN
>> + * instruction can be propagated, the instruction is removed. Otherwise,
>> + * a new, reduced vecN instruction is emitted with the channels that
>> + * remained.
>> + *
>> + * By now, this pass will only propagate to ALU instructions, but it
> could
>> + * be extended to include load_const instructions or some intrinsics like
>> + * load_input.
>> + *
>> + * This pass works on a NIR shader in final form (after SSA), and is
>> + *

Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB

2015-09-10 Thread Erik Faye-Lund

On Wed, Sep 9, 2015 at 12:41 PM, Chris Wilson  wrote:
> On Wed, Sep 09, 2015 at 12:09:40PM +0200, Erik Faye-Lund wrote:
>> On Wed, Sep 9, 2015 at 11:25 AM, Chris Wilson  
>> wrote:
>> > On Wed, Sep 09, 2015 at 11:11:59AM +0200, Erik Faye-Lund wrote:
>> >> On Thu, Sep 3, 2015 at 6:05 PM, Chris Wilson  
>> >> wrote:
>> >> > If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5
>> >> > and specifies a generic unsized GL_RGB internal format, match that to a
>> >> > texture format of MESA_FORMAT_B5G6R5 if supported by the hardware.
>> >> >
>> >> > Noticed while playing with mesa-demos/teximage:
>> >> >
>> >> >   TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec
>> >> >   TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec
>> >> >   GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec
>> >> >
>> >> > becomes
>> >> >
>> >> >   TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec
>> >> >   TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec
>> >> >   GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec
>> >> >
>> >> > on a puny Baytrail which is still far from what it is capable of. The
>> >> > reason for the disparity is that the teximage demo uses a busy texture
>> >> > which is performs an accelerated pixel conversion from the user's B5G6R5
>> >> > into the native X8B8G8R8. After the patch, no conversion is required
>> >> > allowing use of the blitter and memcpy fast paths.
>> >> >
>> >> > Signed-off-by: Chris Wilson 
>> >> > ---
>> >> >  src/mesa/main/texformat.c | 2 ++
>> >> >  1 file changed, 2 insertions(+)
>> >> >
>> >> > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
>> >> > index fd9f335..866c7b3 100644
>> >> > --- a/src/mesa/main/texformat.c
>> >> > +++ b/src/mesa/main/texformat.c
>> >> > @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, 
>> >> > GLenum target,
>> >> > case GL_RGB:
>> >> >if (type == GL_UNSIGNED_INT_2_10_10_10_REV) {
>> >> >   RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM);
>> >> > +  } else if (type == GL_UNSIGNED_SHORT_5_6_5) {
>> >> > + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM);
>> >>
>> >> Shouldn't this be MESA_FORMAT_R5G6B5_UNORM? AFAICT, the reason for
>> >> using BGR above, is the _REV suffix on the type...
>> >
>> > No, it's the first line that's "wrong" since the B10G10R10A2 matches
>> > GL_BGRA + GL_UNSIGNED_INT_2_10_10_10_REV, GL_RGB implies that the
>> > incoming data only has 3 channels (no alpha at all).
>>
>> Good point about the alpha channel. It sounds like it should be
>> changed to MESA_FORMAT_B10G10R10X2_UNORM instead.
>>
>> But according to src/mesa/main/format_pack.c's
>> pack_ubyte_b10g10r10a2_unorm(), mesa's B10G10R10A2 corresponds to
>> OpenGL's UNSIGNED_INT_2_10_10_10_REV. So I think it matches GL_RGBA,
>> not GL_BGRA. The latter would mean another swizzle, AFAICT.
>
> The mapping is (_mesa_format_from_format_and_type):
>
>case GL_UNSIGNED_INT_2_10_10_10_REV:
>   if (format == GL_RGB)
>  return MESA_FORMAT_R10G10B10X2_UNORM;
>   if (format == GL_RGBA)
>  return MESA_FORMAT_R10G10B10A2_UNORM;
>   else if (format == GL_RGBA_INTEGER)
>  return MESA_FORMAT_R10G10B10A2_UINT;
>   else if (format == GL_BGRA)
>  return MESA_FORMAT_B10G10R10A2_UNORM;
>   else if (format == GL_BGRA_INTEGER)
>  return MESA_FORMAT_B10G10R10A2_UINT;
>   break;
>
> The trick is that the packed formats are written as lsb first (or it may
> just be native and my lsb bias is showing).
>
>> > i965 know how to do B5G6R5 and not R5G6B5, but for completeness we could
>> > also add RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM);
>>
>> I think intel-specific hacks (like preferring B5G6R5 over R5G6B5)
>> shouldn't leak into _mesa_choose_tex_format(),
>
> Hah, did you look at _mesa_choose_tex_format()? :)
> I sent an another patch to do the hardware agnostic unswizzled conversions.
>
>> So I think it'd be a
>> good move to add "RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM);"
>> before the latter return.
>
> Not quite, because the mapping for 565 is:
>
>case GL_UNSIGNED_SHORT_5_6_5:
>   if (format == GL_RGB)
>  return MESA_FORMAT_B5G6R5_UNORM;
>   else if (format == GL_BGR)
>  return MESA_FORMAT_R5G6B5_UNORM;
>   else if (format == GL_RGB_INTEGER)
>  return MESA_FORMAT_B5G6R5_UINT;
>   break;

Ahh, you're right. A closer reading of the OpenGL 4.5 spec shows that
I get confused by the fact that RGB + 565 matches the encoding for
MESA_FORMAT_B5G6R5_UNORM, not MESA_FORMAT_R5G6B5_UNORM as would seem
intuitive. But of course, this confusion comes from OpenGL's pretty
confused "backwards" formats, where the first channel is in the most
significant bits, not in the least significant bits as mesa's R5G6B5
is defined.

This patch does indeed seem

[Mesa-dev] [Bug 91889] Planetary Anihilation: Titans display content of other processes buffers

2015-09-10 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=91889

--- Comment #7 from Albert Freeman  ---
*with the replay

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 91889] Planetary Anihilation: Titans display content of other processes buffers

2015-09-10 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=91889

--- Comment #8 from Albert Freeman  ---
Created attachment 118184
  --> https://bugs.freedesktop.org/attachment.cgi?id=118184=edit
Warning messages from apitrace replay before "export
MESA_EXTENSION_OVERRIDE=GL_ARB_gpu_shader5"

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 8/9] tgsi: Add code for handling lodq opcode

2015-09-10 Thread Krzesimir Nowak

On Wed, Sep 9, 2015 at 5:26 PM, Brian Paul  wrote:

> On 09/09/2015 04:35 AM, Krzesimir Nowak wrote:
>
>> This introduces new vfunc in tgsi_sampler just for this opcode. I
>> decided against extending get_samples vfunc to return the mipmap level
>> and LOD - the function's prototype is already too scary and doing the
>> sampling for textureQueryLod would be a waste of time.
>> ---
>>   src/gallium/auxiliary/tgsi/tgsi_exec.c | 44
>> ++
>>   src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 
>>   2 files changed, 54 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> index 9544623..054ad08 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> @@ -2132,6 +2132,44 @@ exec_tex(struct tgsi_exec_machine *mach,
>>  }
>>   }
>>
>> +static void
>> +exec_lodq(struct tgsi_exec_machine *mach,
>> +  const struct tgsi_full_instruction *inst)
>> +{
>> +   uint unit;
>> +   int dim;
>> +   int i;
>> +   union tgsi_exec_channel coords[4];
>> +   const union tgsi_exec_channel *args[Elements(coords)];
>> +   union tgsi_exec_channel r[2];
>> +
>> +   unit = fetch_sampler_unit(mach, inst, 1);
>> +   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
>> +   assert(dim <= Elements(coords));
>> +   /* fetch coordinates */
>> +   for (i = 0; i < dim; i++) {
>> +  FETCH([i], 0, TGSI_CHAN_X + i);
>> +  args[i] = [i];
>> +   }
>> +   for (i = dim; i < Elements(coords); i++) {
>> +  args[i] = 
>> +   }
>> +   mach->Sampler->query_lod(mach->Sampler, unit, unit,
>> +args[0]->f,
>> +args[1]->f,
>> +args[2]->f,
>> +args[3]->f,
>> +tgsi_sampler_lod_none,
>> +r[0].f,
>> +r[1].f);
>> +
>> +   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
>> +  store_dest(mach, [0], >Dst[0], inst, TGSI_CHAN_X,
>> TGSI_EXEC_DATA_FLOAT);
>> +   }
>> +   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
>> +  store_dest(mach, [1], >Dst[0], inst, TGSI_CHAN_Y,
>> TGSI_EXEC_DATA_FLOAT);
>> +   }
>> +}
>>
>>   static void
>>   exec_txd(struct tgsi_exec_machine *mach,
>> @@ -4378,6 +4416,12 @@ exec_instruction(
>> exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
>> break;
>>
>> +   case TGSI_OPCODE_LODQ:
>> +  /* src[0] = texcoord */
>> +  /* src[1] = sampler unit */
>> +  exec_lodq(mach, inst);
>> +  break;
>> +
>>  case TGSI_OPCODE_UP2H:
>> assert (0);
>> break;
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h
>> b/src/gallium/auxiliary/tgsi/tgsi_exec.h
>> index 5d56aab..556e0af 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
>> @@ -138,6 +138,16 @@ struct tgsi_sampler
>>const int j[TGSI_QUAD_SIZE], const int
>> k[TGSI_QUAD_SIZE],
>>const int lod[TGSI_QUAD_SIZE], const int8_t
>> offset[3],
>>float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
>> +   void (*query_lod)(struct tgsi_sampler *tgsi_sampler,
>>
>
> Can tgsi_sampler be const-qualified?
>

It cannot be. I tried, but later, on softpipe side, we get the sp_sampler
instance and modify it in convert_cube. It is nothing that ugly cast cannot
"fix", but yeah...

This seems like hack I could fix in followup commits:
http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/softpipe/sp_tex_sample.h?id=bf58a2c362d5afdba512f40b3eb300154201c7f0#n122


>
>
> + const unsigned sview_index,
>> + const unsigned sampler_index,
>> + const float s[TGSI_QUAD_SIZE],
>> + const float t[TGSI_QUAD_SIZE],
>> + const float p[TGSI_QUAD_SIZE],
>> + const float c0[TGSI_QUAD_SIZE],
>> + enum tgsi_sampler_control control,
>> + float mipmap[TGSI_QUAD_SIZE],
>> + float lod[TGSI_QUAD_SIZE]);
>>   };
>>
>>   #define TGSI_EXEC_NUM_TEMPS   4096
>>
>>
> Reviewed-by: Brian Paul 
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 08/10] tgsi: Add code for handling lodq opcode

2015-09-10 Thread Krzesimir Nowak

This introduces new vfunc in tgsi_sampler just for this opcode. I
decided against extending get_samples vfunc to return the mipmap level
and LOD - the function's prototype is already too scary and doing the
sampling for textureQueryLod would be a waste of time.

v2:
  - splitted too long lines

Reviewed-by: Brian Paul 
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 46 ++
 src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 
 2 files changed, 56 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 9544623..a3a79a0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2132,6 +2132,46 @@ exec_tex(struct tgsi_exec_machine *mach,
}
 }
 
+static void
+exec_lodq(struct tgsi_exec_machine *mach,
+  const struct tgsi_full_instruction *inst)
+{
+   uint unit;
+   int dim;
+   int i;
+   union tgsi_exec_channel coords[4];
+   const union tgsi_exec_channel *args[Elements(coords)];
+   union tgsi_exec_channel r[2];
+
+   unit = fetch_sampler_unit(mach, inst, 1);
+   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
+   assert(dim <= Elements(coords));
+   /* fetch coordinates */
+   for (i = 0; i < dim; i++) {
+  FETCH([i], 0, TGSI_CHAN_X + i);
+  args[i] = [i];
+   }
+   for (i = dim; i < Elements(coords); i++) {
+  args[i] = 
+   }
+   mach->Sampler->query_lod(mach->Sampler, unit, unit,
+args[0]->f,
+args[1]->f,
+args[2]->f,
+args[3]->f,
+tgsi_sampler_lod_none,
+r[0].f,
+r[1].f);
+
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+  store_dest(mach, [0], >Dst[0], inst, TGSI_CHAN_X,
+ TGSI_EXEC_DATA_FLOAT);
+   }
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+  store_dest(mach, [1], >Dst[0], inst, TGSI_CHAN_Y,
+ TGSI_EXEC_DATA_FLOAT);
+   }
+}
 
 static void
 exec_txd(struct tgsi_exec_machine *mach,
@@ -4378,6 +4418,12 @@ exec_instruction(
   exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
   break;
 
+   case TGSI_OPCODE_LODQ:
+  /* src[0] = texcoord */
+  /* src[1] = sampler unit */
+  exec_lodq(mach, inst);
+  break;
+
case TGSI_OPCODE_UP2H:
   assert (0);
   break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h 
b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 5d56aab..a07d727 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -138,6 +138,16 @@ struct tgsi_sampler
  const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
  const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+   void (*query_lod)(struct tgsi_sampler *tgsi_sampler,
+ const unsigned sview_index,
+ const unsigned sampler_index,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ const enum tgsi_sampler_control control,
+ float mipmap[TGSI_QUAD_SIZE],
+ float lod[TGSI_QUAD_SIZE]);
 };
 
 #define TGSI_EXEC_NUM_TEMPS   4096
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 10/10] docs: Update wrt. textureQueryLod on softpipe

2015-09-10 Thread Krzesimir Nowak

---
 docs/GL3.txt  | 2 +-
 docs/relnotes/11.1.0.html | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 561f204..8ad1aac 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -116,7 +116,7 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi
   GL_ARB_texture_buffer_object_rgb32   DONE (i965, r600, 
llvmpipe, softpipe)
   GL_ARB_texture_cube_map_arrayDONE (i965, nv50, r600, 
llvmpipe, softpipe)
   GL_ARB_texture_gatherDONE (i965, nv50, r600, 
llvmpipe, softpipe)
-  GL_ARB_texture_query_lod DONE (i965, nv50, r600)
+  GL_ARB_texture_query_lod DONE (i965, nv50, r600, 
softpipe)
   GL_ARB_transform_feedback2   DONE (i965, nv50, r600, 
llvmpipe, softpipe)
   GL_ARB_transform_feedback3   DONE (i965, nv50, r600, 
llvmpipe, softpipe)
 
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 7f80206..4b56f69 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -44,6 +44,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 
+GL_ARB_texture_query_lod on softpipe
 TBD.
 
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 06/10] softpipe: Split 3D to 2D coords conversion into separate function

2015-09-10 Thread Krzesimir Nowak

This is to avoid tying the conversion to the sampling -
textureQueryLod will need to do the conversion too, but it does not do
any sampling.

So instead of a "get_samples" vfunc, there is just a bool saying
whether the conversion is needed or not. This solution keeps a nice
property of not adding any overhead for the common case (2D textures).

v2:
  - replaced the "convert_coords" vfunc with a "need_cube_convert"
boolean to avoid overhead of copying arrays in common case
  - removed an unused typedef
  - splitted too long lines in convert_cube
  - const fixes in convert_cube

Reviewed-by: Brian Paul 
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 81 +++-
 src/gallium/drivers/softpipe/sp_tex_sample.h | 15 +-
 2 files changed, 45 insertions(+), 51 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 4bfb300..d7b656e 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -3003,27 +3003,22 @@ sample_mip(struct sp_sampler_view *sp_sview,
 
 
 /**
- * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
- * Put face info into the sampler faces[] array.
+ * This function uses cube texture coordinates to choose a face of a cube and
+ * computes the 2D cube face coordinates. Puts face info into the sampler
+ * faces[] array.
  */
 static void
-sample_cube(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
-const float s[TGSI_QUAD_SIZE],
-const float t[TGSI_QUAD_SIZE],
-const float p[TGSI_QUAD_SIZE],
-const float c0[TGSI_QUAD_SIZE],
-const float c1[TGSI_QUAD_SIZE],
-const struct filter_args *filt_args,
-float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+convert_cube(struct sp_sampler_view *sp_sview,
+ struct sp_sampler *sp_samp,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE])
 {
unsigned j;
-   float [4], [4];
-
-   /* Not actually used, but the intermediate steps that do the
-* dereferencing don't know it.
-*/
-   static float [4] = { 0, 0, 0, 0 };
 
[0] = c0[0];
[1] = c0[1];
@@ -3061,8 +3056,9 @@ sample_cube(struct sp_sampler_view *sp_sview,
   const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
 
   if (arx >= ary && arx >= arz) {
- float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
- uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
+ const float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
+ const uint face = (rx >= 0.0F) ?
+PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
  for (j = 0; j < TGSI_QUAD_SIZE; j++) {
 const float ima = -0.5F / fabsf(s[j]);
 [j] = sign *  p[j] * ima + 0.5F;
@@ -3071,8 +3067,9 @@ sample_cube(struct sp_sampler_view *sp_sview,
  }
   }
   else if (ary >= arx && ary >= arz) {
- float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
- uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
+ const float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
+ const uint face = (ry >= 0.0F) ?
+PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
  for (j = 0; j < TGSI_QUAD_SIZE; j++) {
 const float ima = -0.5F / fabsf(t[j]);
 [j] =-s[j] * ima + 0.5F;
@@ -3081,8 +3078,9 @@ sample_cube(struct sp_sampler_view *sp_sview,
  }
   }
   else {
- float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
- uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
+ const float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
+ const uint face = (rz >= 0.0F) ?
+PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
  for (j = 0; j < TGSI_QUAD_SIZE; j++) {
 const float ima = -0.5F / fabsf(p[j]);
 [j] = sign * -s[j] * ima + 0.5F;
@@ -3091,8 +3089,6 @@ sample_cube(struct sp_sampler_view *sp_sview,
  }
   }
}
-
-   sample_mip(sp_sview, sp_samp, , , , c0, c1, filt_args, rgba);
 }
 
 
@@ -3411,12 +3407,8 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
  sview->need_swizzle = TRUE;
   }
 
-  if (view->target == PIPE_TEXTURE_CUBE ||
-  view->target == PIPE_TEXTURE_CUBE_ARRAY)
- sview->get_samples = sample_cube;
-  else {
- sview->get_samples = sample_mip;
-  }
+  sview->need_cube_convert = (view->target == PIPE_TEXTURE_CUBE ||
+  view->target == PIPE_TEXTURE_CUBE_ARRAY);
   sview->pot2d = spr->pot &&
  (view->target ==

[Mesa-dev] [PATCH v2 00/10] Reroll of textureQueryLod in softpipe

2015-09-10 Thread Krzesimir Nowak

I tried to address most of the issues and nitpicks you had in previous
version. Please see updated commit messages to see what has changed.

I also added another commit updating the release notes and the GL3.txt.

Krzesimir Nowak (10):
  tgsi: Remove trailing backslash in comment
  softpipe: Fix textureLod with nonzero GL_TEXTURE_LOD_BIAS value
  softpipe: Split compute_lambda_lod into two functions
  softpipe: Put mip_filter_func inside a struct
  softpipe: Split code getting a filter into separate function
  softpipe: Split 3D to 2D coords conversion into separate function
  softpipe: Add functions for computing relative mipmap level
  tgsi: Add code for handling lodq opcode
  softpipe: Implement and enable textureQueryLod
  docs: Update wrt. textureQueryLod on softpipe

 docs/GL3.txt |   2 +-
 docs/relnotes/11.1.0.html|   1 +
 src/gallium/auxiliary/tgsi/tgsi_exec.c   |  48 +++-
 src/gallium/auxiliary/tgsi/tgsi_exec.h   |  10 +
 src/gallium/drivers/softpipe/sp_screen.c |   2 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c | 403 +--
 src/gallium/drivers/softpipe/sp_tex_sample.h |  23 +-
 7 files changed, 393 insertions(+), 96 deletions(-)

-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 04/10] softpipe: Put mip_filter_func inside a struct

2015-09-10 Thread Krzesimir Nowak

Putting this function pointer into a struct enables grouping of
several related functions in a single place. For now it is just a
single function, but the struct will be later extended with a
mip_level_func for returning relative mip level.

v2:
  - renamed sp_mip struct to sp_filter_funcs
  - renamed sp_filter_funcs instances from mip_foo to funcs_foo
  - splitted too long lines
  - sp_sampler now holds a pointer to sp_filter_funcs instead of an
instance of it
  - some const fixes

Reviewed-by: Brian Paul 
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 45 +---
 src/gallium/drivers/softpipe/sp_tex_sample.h |  5 +++-
 2 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 30c9cb0..8f7cb1a 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2515,6 +2515,29 @@ mip_filter_linear_2d_linear_repeat_POT(
}
 }
 
+static const struct sp_filter_funcs funcs_linear = {
+   mip_filter_linear
+};
+
+static const struct sp_filter_funcs funcs_nearest = {
+   mip_filter_nearest
+};
+
+static const struct sp_filter_funcs funcs_none = {
+   mip_filter_none
+};
+
+static const struct sp_filter_funcs funcs_none_no_filter_select = {
+   mip_filter_none_no_filter_select
+};
+
+static const struct sp_filter_funcs funcs_linear_aniso = {
+   mip_filter_linear_aniso
+};
+
+static const struct sp_filter_funcs funcs_linear_2d_linear_repeat_POT = {
+   mip_filter_linear_2d_linear_repeat_POT
+};
 
 /**
  * Do shadow/depth comparisons.
@@ -2918,18 +2941,18 @@ sample_mip(struct sp_sampler_view *sp_sview,
const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
-   mip_filter_func mip_filter;
+   const struct sp_filter_funcs *funcs = NULL;
img_filter_func min_img_filter = NULL;
img_filter_func mag_img_filter = NULL;
 
if (filt_args->control == tgsi_sampler_gather) {
-  mip_filter = mip_filter_nearest;
+  funcs = _nearest;
   min_img_filter = get_img_filter(sp_sview, _samp->base, 
PIPE_TEX_FILTER_LINEAR, true);
} else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
-  mip_filter = mip_filter_linear_2d_linear_repeat_POT;
+  funcs = _linear_2d_linear_repeat_POT;
}
else {
-  mip_filter = sp_samp->mip_filter;
+  funcs = sp_samp->filter_funcs;
   min_img_filter = get_img_filter(sp_sview, _samp->base, 
sp_samp->min_img_filter, false);
   if (sp_samp->min_mag_equal) {
  mag_img_filter = min_img_filter;
@@ -2939,8 +2962,8 @@ sample_mip(struct sp_sampler_view *sp_sview,
   }
}
 
-   mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
-  s, t, p, c0, lod, filt_args, rgba);
+   funcs->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
+ s, t, p, c0, lod, filt_args, rgba);
 
if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
   sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, 
rgba);
@@ -3239,13 +3262,13 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
switch (sampler->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NONE:
   if (sampler->min_img_filter == sampler->mag_img_filter)
- samp->mip_filter = mip_filter_none_no_filter_select;
+ samp->filter_funcs = _none_no_filter_select;
   else
- samp->mip_filter = mip_filter_none;
+ samp->filter_funcs = _none;
   break;
 
case PIPE_TEX_MIPFILTER_NEAREST:
-  samp->mip_filter = mip_filter_nearest;
+  samp->filter_funcs = _nearest;
   break;
 
case PIPE_TEX_MIPFILTER_LINEAR:
@@ -3257,11 +3280,11 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
   sampler->max_anisotropy <= 1) {
  samp->min_mag_equal_repeat_linear = TRUE;
   }
-  samp->mip_filter = mip_filter_linear;
+  samp->filter_funcs = _linear;
 
   /* Anisotropic filtering extension. */
   if (sampler->max_anisotropy > 1) {
- samp->mip_filter = mip_filter_linear_aniso;
+ samp->filter_funcs = _linear_aniso;
 
  /* Override min_img_filter:
   * min_img_filter needs to be set to NEAREST since we need to access
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h 
b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 7d1aafc..cee545d 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -128,6 +128,9 @@ struct sp_sampler_view
 
 };
 
+struct sp_filter_funcs {
+   mip_filter_func filter;
+};
 
 struct sp_sampler {
struct pipe_sampler_state base;
@@ -144,7 +147,7 @@ struct sp_sampler {
wrap_linear_func linear_texcoord_t;
wrap_linear_func linear_texcoord_p;
 
-   mip_filter_func mip_filter;
+   const struct sp_filter_funcs *filter_funcs;
 };
 
 
-- 
2.4.3

Re: [Mesa-dev] [PATCH v2 3/3] glsl/cs: Initialize gl_GlobalInvocationID in main()

2015-09-10 Thread Alejandro Piñeiro

Not a full review, but a comment. See inline.

On 23/08/15 09:09, Jordan Justen wrote:
> We initialize gl_GlobalInvocationID based on the extension spec
> formula:
>
> gl_GlobalInvocationID =
> gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
>
> https://www.opengl.org/registry/specs/ARB/compute_shader.txt
>
> Signed-off-by: Jordan Justen 
> Cc: Ilia Mirkin 
> ---
>  src/glsl/builtin_variables.cpp  | 58 
> +
>  src/glsl/glsl_parser_extras.cpp |  2 ++
>  src/glsl/ir.h   |  3 +++
>  3 files changed, 63 insertions(+)
>
> diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
> index 5d9e446..8f8be90 100644
> --- a/src/glsl/builtin_variables.cpp
> +++ b/src/glsl/builtin_variables.cpp
> @@ -22,6 +22,8 @@
>   */
>  
>  #include "ir.h"
> +#include "ir_builder.h"
> +#include "linker.h"
>  #include "glsl_parser_extras.h"
>  #include "glsl_symbol_table.h"
>  #include "main/core.h"
> @@ -1056,6 +1058,7 @@ builtin_variable_generator::generate_cs_special_vars()
>  "gl_LocalInvocationID");
> add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, glsl_type::uvec3_type,
>  "gl_WorkGroupID");
> +   add_variable("gl_GlobalInvocationID", glsl_type::uvec3_type, ir_var_auto, 
> 0);
> /* TODO: finish this. */
>  }
>  
> @@ -1212,3 +1215,58 @@ _mesa_glsl_initialize_variables(exec_list 
> *instructions,
>break;
> }
>  }
> +
> +
> +/**
> + * Initialize compute shader variables with values that are derived from 
> other
> + * compute shader variable.
> + */
> +static void
> +initialize_cs_derived_variables(gl_shader *shader,
> +ir_function_signature *const main_sig)
> +{
> +   assert(shader->Stage == MESA_SHADER_COMPUTE);
> +
> +   ir_variable *gl_GlobalInvocationID =
> +  shader->symbols->get_variable("gl_GlobalInvocationID");
> +   assert(gl_GlobalInvocationID);
> +   ir_variable *gl_WorkGroupID =
> +  shader->symbols->get_variable("gl_WorkGroupID");
> +   assert(gl_WorkGroupID);
> +   ir_variable *gl_WorkGroupSize =
> +  shader->symbols->get_variable("gl_WorkGroupSize");
> +   assert(gl_WorkGroupSize);

This assert seems somewhat too restrictive at this point. After this
commit, the following piglit tests are failing:
  *
http://cgit.freedesktop.org/piglit/tree/tests/spec/arb_compute_shader/compiler/gl_WorkGroupSize_without_layout.comp
  *
http://cgit.freedesktop.org/piglit/tree/tests/spec/arb_compute_shader/linker/no_local_work_size.shader_test

The correct outcome for both tests are an error (compile and link error
respectively). But the assert causes it to crash.

> +   ir_variable *gl_LocalInvocationID =
> +  shader->symbols->get_variable("gl_LocalInvocationID");
> +   assert(gl_LocalInvocationID);
> +
> +   /* gl_GlobalInvocationID =
> +*gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
> +*/
> +   ir_instruction *inst =
> +  ir_builder::assign(gl_GlobalInvocationID,
> + ir_builder::add(ir_builder::mul(gl_WorkGroupID,
> + gl_WorkGroupSize),
> + gl_LocalInvocationID));
> +   main_sig->body.push_head(inst);
> +}
> +
> +
> +/**
> + * Initialize builtin variables with values based on other builtin variables.
> + * These are initialized in the main function.
> + */
> +void
> +_mesa_glsl_initialize_derived_variables(gl_shader *shader)
> +{
> +   /* We only need to set CS variables currently. */
> +   if (shader->Stage != MESA_SHADER_COMPUTE)
> +  return;
> +
> +   ir_function_signature *const main_sig =
> +  _mesa_get_main_function_signature(shader);
> +   if (main_sig == NULL)
> +  return;
> +
> +   initialize_cs_derived_variables(shader, main_sig);
> +}
> diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
> index 6440a96..eefa12a 100644
> --- a/src/glsl/glsl_parser_extras.cpp
> +++ b/src/glsl/glsl_parser_extras.cpp
> @@ -1692,6 +1692,8 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, 
> struct gl_shader *shader,
>}
> }
>  
> +   _mesa_glsl_initialize_derived_variables(shader);
> +
> delete state->symbols;
> ralloc_free(state);
>  }
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index 750321e..4c88144 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -2513,6 +2513,9 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
>   struct _mesa_glsl_parse_state *state);
>  
>  extern void
> +_mesa_glsl_initialize_derived_variables(gl_shader *shader);
> +
> +extern void
>  _mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state);
>  
>  extern void

-- 
Alejandro Piñeiro (apinhe...@igalia.com)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 11/70] glsl: Add parser/compiler support for unsized array's length()

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

The unsized array length is computed with the following formula:

array.length() =
   max((buffer_object_size - offset_of_array) / stride_of_array, 0)

Of these, only the buffer size needs to be provided by the backends, the
frontend already knows the values of the two other variables.

This patch identifies the cases where we need to get the length of an
unsized array, injecting ir_unop_ssbo_unsized_array_length expressions
that will be lowered (in a later patch) to inject the formula mentioned
above.

It also adds the ir_unop_get_buffer_size expression that drivers will
implement to provide the buffer length.

v2:
- Do not define a triop that will force backends to implement the
  entire formula, they should only need to provide the buffer size
  since the other values are known by the frontend (Curro).

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/ast_function.cpp | 13 +
 src/glsl/ir.cpp   |  7 +++
 src/glsl/ir.h | 19 ++-
 src/glsl/ir_validate.cpp  | 11 +++
 src/glsl/link_uniforms.cpp|  8 +++-
 .../drivers/dri/i965/brw_fs_channel_expressions.cpp   |  2 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp|  8 
 src/mesa/program/ir_to_mesa.cpp   |  2 ++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp|  5 +
 9 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 803edf5..0fb8928 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -1593,11 +1593,16 @@ ast_function_expression::handle_method(exec_list 
*instructions,
 
   if (op->type->is_array()) {
  if (op->type->is_unsized_array()) {
-_mesa_glsl_error(, state, "length called on unsized array");
-goto fail;
+if (!state->ARB_shader_storage_buffer_object_enable) {
+   _mesa_glsl_error(, state, "length called on unsized array"
+ " only available with "
+ 
"ARB_shader_storage_buffer_object");
+}
+/* Calculate length of an unsized array in run-time */
+result = new(ctx) ir_expression(ir_unop_ssbo_unsized_array_length, 
op);
+ } else {
+result = new(ctx) ir_constant(op->type->array_size());
  }
-
- result = new(ctx) ir_constant(op->type->array_size());
   } else if (op->type->is_vector()) {
  if (state->ARB_shading_language_420pack_enable) {
 /* .length() returns int. */
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 594fc33..60d8770 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -342,6 +342,11 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
   op0->type->vector_elements, 1);
   break;
 
+   case ir_unop_get_buffer_size:
+   case ir_unop_ssbo_unsized_array_length:
+  this->type = glsl_type::int_type;
+  break;
+
default:
   assert(!"not reached: missing automatic type setup for ir_expression");
   this->type = op0->type;
@@ -571,6 +576,8 @@ static const char *const operator_strs[] = {
"noise",
"subroutine_to_int",
"interpolate_at_centroid",
+   "get_buffer_size",
+   "ssbo_unsized_array_length",
"+",
"-",
"*",
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 9559dc4..ca6179c 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1423,9 +1423,26 @@ enum ir_expression_operation {
ir_unop_interpolate_at_centroid,
 
/**
+* Ask the driver for the total size of a buffer block.
+*
+* operand0 is the ir_constant buffer block index in the linked shader.
+*/
+   ir_unop_get_buffer_size,
+
+   /**
+* Calculate length of an unsized array inside a buffer block.
+* This opcode is going to be replaced in a lowering pass inside
+* the linker.
+*
+* operand0 is the unsized array's ir_value for the calculation
+* of its length.
+*/
+   ir_unop_ssbo_unsized_array_length,
+
+   /**
 * A sentinel marking the last of the unary operations.
 */
-   ir_last_unop = ir_unop_interpolate_at_centroid,
+   ir_last_unop = ir_unop_ssbo_unsized_array_length,
 
ir_binop_add,
ir_binop_sub,
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 3f0dea7..935571a 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -409,6 +409,17 @@ ir_validate::visit_leave(ir_expression *ir)
   assert(ir->operands[0]->type->is_float());
   break;
 
+   case ir_unop_get_buffer_size:
+  assert(ir->type == glsl_type::int_type);
+  assert(ir->operands[0]->type == glsl_type::uint_type);
+  break;
+
+   case

[Mesa-dev] [PATCH v5 00/70] ARB_shader_storage_buffer_object (mesa, i965)

2015-09-10 Thread Iago Toral Quiroga

Hi,

this is the latest version of the ARB_shader_storage_buffer_object
implementation. A good part of the frontend bits for this are already in
master, but this adds some more missing pieces, specifically std430 and
memory qualifiers. Additionally, this provides the i965 implementation.

Changes in this version include:

- A new implementation of std430: we no longer have to propagate the layout
qualifier information down to the type of each buffer variable, which makes
the implementation a lot easier.

- Added a Nir/Vec4 implementation for all the pieces: loads, stores, atomics
and unsized arrays.

- A new implementation for computing the length of unsized arrays in SSBO
definitions that pushes less work to the backends.

- A bunch of fixes to various patches throughout the series.

- All the review feedback addressed.

This branch no longer depends on other uncommitted work, the few  remaining
patches we needed (a vec4 implementation of Curro's IR builder together with
some helper functions) are now part of this series (patches 33-35).

We have not dropped the old vec4_visitor implementation from the patch set,
however, now that Nir/Vec4 has been enabled by default and that there are
patches in the mailing list that address the shader-db regressions it looks
like we might not be interested in these patches any more, feel free to skip
the review of these if that is the case, they are patches:

Patch 16: i965/vec4: Implement ir_unop_get_buffer_size
Patch 39: i965/vec4: Implement __intrinsic_store_ssbo
Patch 43: i965/vec4: Implement __intrinsic_load_ssbo
Patch 53: i965/vec4: Implement lowered SSBO atomic intrinsics

Finally, notice that there are 23 patches in this series that already have
a Reviewed-by.

Development branch:

https://github.com/Igalia/mesa.git
branch itoral-ARB_shader_storage_buffer_object-v5

Piglit:

All SSBO piglit tests are in piglit master under
tests/spec/arb_shader_storage_buffer_object.

Samuel has a modified version of Ian's branch for random testing of UBOs that
he used to verify the implementation of std430. We found a few bugs thanks to
that! If someone wants to play with it, this modified version is here:

https://github.com/Igalia/piglit.git
branch ssbo-random-tests-std430

Antia Puentes (1):
  glsl: Mark as active all elements of shared/std140 block arrays

Francisco Jerez (3):
  i965/vec4: Introduce VEC4 IR builder.
  i965/vec4: Import helpers to convert vectors into arrays and back.
  i965/vec4: Import surface message builder functions.

Iago Toral Quiroga (29):
  i965: Use 16-byte offset alignment for shader storage buffers
  i965: Implement DriverFlags.NewShaderStorageBuffer
  i965: Set MaxShaderStorageBuffers for compute shaders
  i965: Upload Shader Storage Buffer Object surfaces
  i965: handle visiting of ir_var_shader_storage variables
  i965/fs: Do not split buffer variables
  nir: Implement __intrinsic_store_ssbo
  i965/nir/fs: Implement nir_intrinsic_store_ssbo
  i965/nir/vec4: Implement nir_intrinsic_store_ssbo
  i965/vec4: Implement __intrinsic_store_ssbo
  nir: Implement __intrinsic_load_ssbo
  i965/nir/fs: Implement nir_intrinsic_load_ssbo
  i965/nir/vec4: Implement nir_intrinsic_load_ssbo
  i965/vec4: Implement __intrinsic_load_ssbo
  nir/glsl_to_nir: ignore an instruction's dest if it hasn't any
  glsl: Rename atomic counter functions
  glsl: Add atomic functions from ARB_shader_storage_buffer_object
  glsl: lower SSBO atomic intrinsics
  nir: Implement lowered SSBO atomic intrinsics
  i965/nir/fs: Implement nir_intrinsic_ssbo_atomic_*
  i965/nir/vec4: Implement nir_intrinsic_ssbo_atomic_*
  i965/vec4: Implement lowered SSBO atomic intrinsics
  glsl: First argument to atomic functions must be a buffer variable
  mesa: Add queries for GL_SHADER_STORAGE_BUFFER
  glsl: Allow use of memory qualifiers with
ARB_shader_storage_buffer_object.
  glsl: Apply memory qualifiers to buffer variables
  glsl: Do not allow assignments to read-only buffer variables
  glsl: Do not allow reads from write-only buffer variables
  docs: Mark ARB_shader_storage_buffer_object as done for i965

Kristian Høgsberg (1):
  glsl: atomic counters can be declared as buffer-qualified variables

Samuel Iglesias Gonsalvez (36):
  mesa: set MAX_SHADER_STORAGE_BUFFERS to 15.
  i965: set ARB_shader_storage_buffer_object related constant values
  glsl: return error if unsized arrays are found in OpenGL ES
  glsl: add support for unsized arrays in shader storage blocks
  glsl: Add parser/compiler support for unsized array's length()
  glsl: implement unsized array length
  nir: Implement ir_unop_get_buffer_size
  i965/vec4: Implement VS_OPCODE_GET_BUFFER_SIZE
  i965/vec4/nir: implement nir_intrinsic_get_buffer_size
  i965/vec4: Implement ir_unop_get_buffer_size
  i965/fs: Implement FS_OPCODE_GET_BUFFER_SIZE
  i965/fs/nir: implement nir_intrinsic_get_buffer_size
  i965/wm: emit null buffer surfaces when null buffers are attached
  i965/wm: surfaces should have the API buffer size, not the drm

[Mesa-dev] [PATCH v5 06/70] i965: Upload Shader Storage Buffer Object surfaces

2015-09-10 Thread Iago Toral Quiroga

Since these are a special kind of UBOs we emit them together reusing the
same infrastructure, however, we use a RAW surface so we can reuse
existing untyped read/write/atomic messages which include a pixel mask
header that we need to set to obtain correct behavior with helper
invocations of the fragment shader.

Reviewed-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.h  |  6 +++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 64 +++-
 2 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 41ba769..c46f343 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1778,6 +1778,12 @@ void brw_create_constant_surface(struct brw_context *brw,
  uint32_t size,
  uint32_t *out_offset,
  bool dword_pitch);
+void brw_create_buffer_surface(struct brw_context *brw,
+   drm_intel_bo *bo,
+   uint32_t offset,
+   uint32_t size,
+   uint32_t *out_offset,
+   bool dword_pitch);
 void brw_update_buffer_texture_surface(struct gl_context *ctx,
unsigned unit,
uint32_t *surf_offset);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 8213f4e..4c96572 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -411,6 +411,29 @@ brw_create_constant_surface(struct brw_context *brw,
 }
 
 /**
+ * Create the buffer surface. Shader buffer variables will be
+ * read from / write to this buffer with Data Port Read/Write
+ * instructions/messages.
+ */
+void
+brw_create_buffer_surface(struct brw_context *brw,
+  drm_intel_bo *bo,
+  uint32_t offset,
+  uint32_t size,
+  uint32_t *out_offset,
+  bool dword_pitch)
+{
+   /* Use a raw surface so we can reuse existing untyped read/write/atomic
+* messages. We need these specifically for the fragment shader since they
+* include a pixel mask header that we need to ensure correct behavior
+* with helper invocations, which cannot write to the buffer.
+*/
+   brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
+   BRW_SURFACEFORMAT_RAW,
+   size, 1, true);
+}
+
+/**
  * Set up a binding table entry for use by stream output logic (transform
  * feedback).
  *
@@ -898,25 +921,40 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
uint32_t *surf_offsets =
   _state->surf_offset[prog_data->binding_table.ubo_start];
 
-   for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
-  struct gl_uniform_buffer_binding *binding;
+   for (int i = 0; i < shader->NumUniformBlocks; i++) {
   struct intel_buffer_object *intel_bo;
 
-  binding = >UniformBufferBindings[shader->UniformBlocks[i].Binding];
-  intel_bo = intel_buffer_object(binding->BufferObject);
-  drm_intel_bo *bo =
- intel_bufferobj_buffer(brw, intel_bo,
-binding->Offset,
-binding->BufferObject->Size - binding->Offset);
-
   /* Because behavior for referencing outside of the binding's size in the
* glBindBufferRange case is undefined, we can just bind the whole buffer
* glBindBufferBase wants and be a correct implementation.
*/
-  brw_create_constant_surface(brw, bo, binding->Offset,
-  bo->size - binding->Offset,
-  _offsets[i],
-  dword_pitch);
+  if (!shader->UniformBlocks[i].IsShaderStorage) {
+ struct gl_uniform_buffer_binding *binding;
+ binding =
+>UniformBufferBindings[shader->UniformBlocks[i].Binding];
+ intel_bo = intel_buffer_object(binding->BufferObject);
+ drm_intel_bo *bo =
+intel_bufferobj_buffer(brw, intel_bo,
+   binding->Offset,
+   binding->BufferObject->Size - 
binding->Offset);
+ brw_create_constant_surface(brw, bo, binding->Offset,
+ bo->size - binding->Offset,
+ _offsets[i],
+ dword_pitch);
+  } else {
+ struct gl_shader_storage_buffer_binding *binding;
+ binding =
+
>ShaderStorageBufferBindings[shader->UniformBlocks[i].Binding];
+ intel_bo =

[Mesa-dev] [PATCH v5 10/70] glsl: add support for unsized arrays in shader storage blocks

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

They only can be defined in the last position of the shader
storage blocks.

When an unsized array is used in different shaders, it might be
converted in different sized arrays, avoid get a linker error
in that case.

v2:
- Rework error condition and error messages (Timothy Arceri)

v3:
- Move OpenGL ES check to its own patch.

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/glsl/ast_array_index.cpp |   3 +-
 src/glsl/ast_to_hir.cpp  |  74 +-
 src/glsl/ir.cpp  |   1 +
 src/glsl/ir.h|  14 ++
 src/glsl/linker.cpp  | 107 ---
 5 files changed, 150 insertions(+), 49 deletions(-)

diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp
index ae399f0..dfb3107 100644
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -226,7 +226,8 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
  * by the linker.
  */
  }
- else {
+ else if (array->variable_referenced()->data.mode !=
+  ir_var_shader_storage) {
 _mesa_glsl_error(, state, "unsized array index must be 
constant");
  }
   } else if (array->type->fields.array->is_interface()
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index b67ae70..92038a6 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -5880,6 +5880,19 @@ private:
bool found;
 };
 
+static bool
+is_unsized_array_last_element(ir_variable *v)
+{
+   const glsl_type *interface_type = v->get_interface_type();
+   int length = interface_type->length;
+
+   assert(v->type->is_unsized_array());
+
+   /* Check if it is the last element of the interface */
+   if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0)
+  return true;
+   return false;
+}
 
 ir_rvalue *
 ast_interface_block::hir(exec_list *instructions,
@@ -6253,18 +6266,29 @@ ast_interface_block::hir(exec_list *instructions,
  handle_tess_ctrl_shader_output_decl(state, loc, var);
 
   for (unsigned i = 0; i < num_variables; i++) {
- /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
-  *
-  * "If an array is declared as the last member of a shader storage
-  * block and the size is not specified at compile-time, it is
-  * sized at run-time. In all other cases, arrays are sized only
-  * at compile-time."
-  */
- if (state->es_shader && fields[i].type->is_unsized_array()) {
- _mesa_glsl_error(, state, "unsized array `%s' definition: "
-  "only last member of a shader storage block "
-  "can be defined as unsized array",
-  fields[i].name);
+ if (fields[i].type->is_unsized_array()) {
+if (var_mode == ir_var_shader_storage) {
+   if (i != (num_variables - 1)) {
+  _mesa_glsl_error(, state, "unsized array `%s' 
definition: "
+   "only last member of a shader storage block 
"
+   "can be defined as unsized array",
+   fields[i].name);
+   }
+} else {
+   /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+   *
+   * "If an array is declared as the last member of a shader 
storage
+   * block and the size is not specified at compile-time, it is
+   * sized at run-time. In all other cases, arrays are sized only
+   * at compile-time."
+   */
+   if (state->es_shader) {
+  _mesa_glsl_error(, state, "unsized array `%s' 
definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ fields[i].name);
+   }
+}
  }
   }
 
@@ -6359,6 +6383,32 @@ ast_interface_block::hir(exec_list *instructions,
  var->data.explicit_binding = this->layout.flags.q.explicit_binding;
  var->data.binding = this->layout.binding;
 
+ if (var->type->is_unsized_array()) {
+if (var->is_in_shader_storage_block()) {
+   if (!is_unsized_array_last_element(var)) {
+  _mesa_glsl_error(, state, "unsized array `%s' 
definition: "
+   "only last member of a shader storage block 
"
+   "can be defined as unsized array",
+   var->name);
+   }
+   var->data.from_ssbo_unsized_array = true;
+} else {
+   /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+

[Mesa-dev] [PATCH v5 04/70] i965: set ARB_shader_storage_buffer_object related constant values

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

v2:
- Add tessellation shader constants assignment

v3:
- Set MaxShaderStorageBufferBindings to 36.

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index c8e8a68..9982049 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -562,6 +562,18 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.TextureBufferOffsetAlignment = 16;
ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 
+   /* FIXME: Tessellation stages are not yet supported in i965, so
+* MaxCombinedShaderStorageBlocks doesn't take them into account.
+*/
+   ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = 12;
+   ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = 12;
+   ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
+   ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
+   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = 12;
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = 12;
+   ctx->Const.MaxCombinedShaderStorageBlocks = 12 * 3;
+   ctx->Const.MaxShaderStorageBufferBindings = 36;
+
if (brw->gen >= 6) {
   ctx->Const.MaxVarying = 32;
   ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCHv3 1/2] egl/dri2: Close file descriptor on error.

2015-09-10 Thread Emil Velikov

From: Matt Turner 

v2: [Emil Velikov]
Rework the error path to a common goto, close only if we own the fd.
v3; [Emil Velikov]
Always close the fd (we either opened the device or dup'd) (Boyan, Ian)

Signed-off-by: Emil Velikov 
Reviewed-by: Boyan Ding 
---
 src/egl/drivers/dri2/platform_drm.c | 27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_drm.c 
b/src/egl/drivers/dri2/platform_drm.c
index eda5087..7e97280 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -623,26 +623,20 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
   dri2_dpy->own_device = 1;
   gbm = gbm_create_device(fd);
   if (gbm == NULL)
- return EGL_FALSE;
+ goto cleanup;
}
 
-   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
-  free(dri2_dpy);
-  return EGL_FALSE;
-   }
+   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0)
+  goto cleanup;
 
dri2_dpy->gbm_dri = gbm_dri_device(gbm);
-   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) {
-  free(dri2_dpy);
-  return EGL_FALSE;
-   }
+   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI)
+  goto cleanup;
 
if (fd < 0) {
   fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
-  if (fd < 0) {
- free(dri2_dpy);
- return EGL_FALSE;
-  }
+  if (fd < 0)
+ goto cleanup;
}
 
dri2_dpy->fd = fd;
@@ -727,4 +721,11 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
dri2_dpy->vtbl = _drm_display_vtbl;
 
return EGL_TRUE;
+
+cleanup:
+   if (fd >= 0)
+  close(fd);
+
+   free(dri2_dpy);
+   return EGL_FALSE;
 }
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 05/10] softpipe: Split code getting a filter into separate function

2015-09-10 Thread Krzesimir Nowak

This function will be later used by textureQueryLod. The
img_filter_func are optional, because textureQueryLod will not need
them.

v2:
  - adapted to changes in previous commit (renames)
  - simplified conditions a bit
  - updated docs
  - splitted too long lines

Reviewed-by: Brian Paul 
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 58 
 1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 8f7cb1a..4bfb300 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2929,6 +2929,43 @@ get_img_filter(const struct sp_sampler_view *sp_sview,
}
 }
 
+/**
+ * Get mip filter funcs, and optionally both img min filter and img mag
+ * filter. Note that both img filter function pointers must be either non-NULL
+ * or NULL.
+ */
+static void
+get_filters(struct sp_sampler_view *sp_sview,
+struct sp_sampler *sp_samp,
+enum tgsi_sampler_control control,
+const struct sp_filter_funcs **funcs,
+img_filter_func *min,
+img_filter_func *mag)
+{
+   assert(funcs);
+   if (control == tgsi_sampler_gather) {
+  *funcs = _nearest;
+  if (min) {
+ *min = get_img_filter(sp_sview, _samp->base,
+   PIPE_TEX_FILTER_LINEAR, true);
+  }
+   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
+  *funcs = _linear_2d_linear_repeat_POT;
+   } else {
+  *funcs = sp_samp->filter_funcs;
+  if (min) {
+ assert(mag);
+ *min = get_img_filter(sp_sview, _samp->base,
+   sp_samp->min_img_filter, false);
+ if (sp_samp->min_mag_equal) {
+*mag = *min;
+ } else {
+*mag = get_img_filter(sp_sview, _samp->base,
+  sp_samp->base.mag_img_filter, false);
+ }
+  }
+   }
+}
 
 static void
 sample_mip(struct sp_sampler_view *sp_sview,
@@ -2945,28 +2982,15 @@ sample_mip(struct sp_sampler_view *sp_sview,
img_filter_func min_img_filter = NULL;
img_filter_func mag_img_filter = NULL;
 
-   if (filt_args->control == tgsi_sampler_gather) {
-  funcs = _nearest;
-  min_img_filter = get_img_filter(sp_sview, _samp->base, 
PIPE_TEX_FILTER_LINEAR, true);
-   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
-  funcs = _linear_2d_linear_repeat_POT;
-   }
-   else {
-  funcs = sp_samp->filter_funcs;
-  min_img_filter = get_img_filter(sp_sview, _samp->base, 
sp_samp->min_img_filter, false);
-  if (sp_samp->min_mag_equal) {
- mag_img_filter = min_img_filter;
-  }
-  else {
- mag_img_filter = get_img_filter(sp_sview, _samp->base, 
sp_samp->base.mag_img_filter, false);
-  }
-   }
+   get_filters(sp_sview, sp_samp, filt_args->control,
+   , _img_filter, _img_filter);
 
funcs->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
  s, t, p, c0, lod, filt_args, rgba);
 
if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
-  sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, 
rgba);
+  sample_compare(sp_sview, sp_samp, s, t, p, c0,
+ lod, filt_args->control, rgba);
}
 
if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) {
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 02/10] softpipe: Fix textureLod with nonzero GL_TEXTURE_LOD_BIAS value

2015-09-10 Thread Krzesimir Nowak

The level-of-detail bias wasn't simply added in the explicit LOD case.
This case seems to be tested only in piglit's
fs-texturequerylod-nearest-biased test, which is currently skipped, as
softpipe does not support textureQueryLod at the moment.

Reviewed-by: Brian Paul 
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 565fca6..19188b0 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1892,7 +1892,7 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
   break;
case tgsi_sampler_lod_explicit:
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
- lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
+ lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod);
   }
   break;
case tgsi_sampler_lod_zero:
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 09/10] softpipe: Implement and enable textureQueryLod

2015-09-10 Thread Krzesimir Nowak

Passes the shader piglit tests and introduces no regressions.

This commit finally makes use of the refactoring in previous
commits.

v2:
  - adapted the code to changes in previous commits (renames,
need_cube_convert stuff)
  - splitted too long lines

Reviewed-by: Brian Paul 
---
 src/gallium/drivers/softpipe/sp_screen.c |  2 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c | 55 +++-
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_screen.c 
b/src/gallium/drivers/softpipe/sp_screen.c
index 0bfd9c3..7ca8a67 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -193,9 +193,9 @@ softpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
   return 4;
case PIPE_CAP_TEXTURE_GATHER_SM5:
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
   return 1;
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
-   case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
   return 0;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 4a4cddf..9f2ba01 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -3603,6 +3603,59 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
}
 }
 
+static void
+sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler,
+  const unsigned sview_index,
+  const unsigned sampler_index,
+  const float s[TGSI_QUAD_SIZE],
+  const float t[TGSI_QUAD_SIZE],
+  const float p[TGSI_QUAD_SIZE],
+  const float c0[TGSI_QUAD_SIZE],
+  const enum tgsi_sampler_control control,
+  float mipmap[TGSI_QUAD_SIZE],
+  float lod[TGSI_QUAD_SIZE])
+{
+   static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
+
+   struct sp_tgsi_sampler *sp_tgsi_samp =
+  (struct sp_tgsi_sampler *)tgsi_sampler;
+   struct sp_sampler_view *sp_sview;
+   struct sp_sampler *sp_samp;
+   const struct sp_filter_funcs *funcs;
+   int i;
+
+   assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+   assert(sampler_index < PIPE_MAX_SAMPLERS);
+   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+   sp_sview = _tgsi_samp->sp_sview[sview_index];
+   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
+   /* always have a view here but texture is NULL if no sampler view was
+* set. */
+   if (!sp_sview->base.texture) {
+  for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ mipmap[i] = 0.0f;
+ lod[i] = 0.0f;
+  }
+  return;
+   }
+
+   if (sp_sview->need_cube_convert) {
+  float cs[TGSI_QUAD_SIZE];
+  float ct[TGSI_QUAD_SIZE];
+  float cp[TGSI_QUAD_SIZE];
+
+  convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
+  compute_lambda_lod_unclamped(sp_sview, sp_samp,
+   cs, ct, cp, lod_in, control, lod);
+   } else {
+  compute_lambda_lod_unclamped(sp_sview, sp_samp,
+   s, t, p, lod_in, control, lod);
+   }
+
+   get_filters(sp_sview, sp_samp, control, , NULL, NULL);
+   funcs->relative_level(sp_sview, sp_samp, lod, mipmap);
+}
 
 static void
 sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
@@ -3639,7 +3692,7 @@ sp_create_tgsi_sampler(void)
samp->base.get_dims = sp_tgsi_get_dims;
samp->base.get_samples = sp_tgsi_get_samples;
samp->base.get_texel = sp_tgsi_get_texel;
+   samp->base.query_lod = sp_tgsi_query_lod;
 
return samp;
 }
-
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 01/10] tgsi: Remove trailing backslash in comment

2015-09-10 Thread Krzesimir Nowak

It clearly is here by accident.

Reviewed-by: Brian Paul 
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 75cd0d5..9544623 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2021,7 +2021,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
 /*
  * execute a texture instruction.
  *
- * modifier is used to control the channel routing for the\
+ * modifier is used to control the channel routing for the
  * instruction variants like proj, lod, and texture with lod bias.
  * sampler indicates which src register the sampler is contained in.
  */
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 03/10] softpipe: Split compute_lambda_lod into two functions

2015-09-10 Thread Krzesimir Nowak

textureQueryLod returns a vec2 with a mipmap information and a
LOD. The latter needs to be not clamped.

v2:
  - changed the "not_clamped" part to "unclamped"
  - corrected "clamp into" to "clamp to"
  - splitted too long lines

Reviewed-by: Brian Paul 
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 57 +++-
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 19188b0..30c9cb0 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1855,24 +1855,23 @@ compute_lod(const struct pipe_sampler_state *sampler,
 }
 
 
-/* Calculate level of detail for every fragment.
+/* Calculate level of detail for every fragment. The computed value is not
+ * clamped to lod_min and lod_max.
  * \param lod_in per-fragment lod_bias or explicit_lod.
  * \param lod results per-fragment lod.
  */
 static inline void
-compute_lambda_lod(struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
-   const float s[TGSI_QUAD_SIZE],
-   const float t[TGSI_QUAD_SIZE],
-   const float p[TGSI_QUAD_SIZE],
-   const float lod_in[TGSI_QUAD_SIZE],
-   enum tgsi_sampler_control control,
-   float lod[TGSI_QUAD_SIZE])
+compute_lambda_lod_unclamped(struct sp_sampler_view *sp_sview,
+ struct sp_sampler *sp_samp,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float lod_in[TGSI_QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float lod[TGSI_QUAD_SIZE])
 {
const struct pipe_sampler_state *sampler = _samp->base;
-   float lod_bias = sampler->lod_bias;
-   float min_lod = sampler->min_lod;
-   float max_lod = sampler->max_lod;
+   const float lod_bias = sampler->lod_bias;
float lambda;
uint i;
 
@@ -1881,24 +1880,22 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
   /* XXX FIXME */
case tgsi_sampler_derivs_explicit:
   lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
-  lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
+  lod[0] = lod[1] = lod[2] = lod[3] = lambda;
   break;
case tgsi_sampler_lod_bias:
   lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
  lod[i] = lambda + lod_in[i];
- lod[i] = CLAMP(lod[i], min_lod, max_lod);
   }
   break;
case tgsi_sampler_lod_explicit:
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
- lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod);
+ lod[i] = lod_in[i] + lod_bias;
   }
   break;
case tgsi_sampler_lod_zero:
case tgsi_sampler_gather:
-  /* this is all static state in the sampler really need clamp here? */
-  lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
+  lod[0] = lod[1] = lod[2] = lod[3] = lod_bias;
   break;
default:
   assert(0);
@@ -1906,6 +1903,32 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
}
 }
 
+/* Calculate level of detail for every fragment.
+ * \param lod_in per-fragment lod_bias or explicit_lod.
+ * \param lod results per-fragment lod.
+ */
+static inline void
+compute_lambda_lod(struct sp_sampler_view *sp_sview,
+   struct sp_sampler *sp_samp,
+   const float s[TGSI_QUAD_SIZE],
+   const float t[TGSI_QUAD_SIZE],
+   const float p[TGSI_QUAD_SIZE],
+   const float lod_in[TGSI_QUAD_SIZE],
+   enum tgsi_sampler_control control,
+   float lod[TGSI_QUAD_SIZE])
+{
+   const struct pipe_sampler_state *sampler = _samp->base;
+   const float min_lod = sampler->min_lod;
+   const float max_lod = sampler->max_lod;
+   int i;
+
+   compute_lambda_lod_unclamped(sp_sview, sp_samp,
+s, t, p, lod_in, control, lod);
+   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+  lod[i] = CLAMP(lod[i], min_lod, max_lod);
+   }
+}
+
 static inline unsigned
 get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
 {
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 07/10] softpipe: Add functions for computing relative mipmap level

2015-09-10 Thread Krzesimir Nowak

These functions will be used by textureQueryLod.

v2:

  - renamed mip_level_* funcs to mip_rel_level_* to indicate that
these functions return mip level relative to base level and
documented them
  - renamed a level member in sp_filter_funcs struct to relative_level
  - changed mip_rel_level_none and mip_rel_level_nearest to return mip
level relative to base level, mip_rel_level_linear already did
that
  - documented clamp_lod function

Reviewed-by: Brian Paul 
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 113 +++
 src/gallium/drivers/softpipe/sp_tex_sample.h |   7 ++
 2 files changed, 120 insertions(+)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index d7b656e..4a4cddf 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1936,6 +1936,43 @@ get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
return (*(unsigned int *)lod_in) & 0x3;
 }
 
+/**
+ * Clamps given lod to both lod limits and mip level limits. Clamping to the
+ * latter limits is done so that lod is relative to the first (base) level.
+ */
+static void
+clamp_lod(const struct sp_sampler_view *sp_sview,
+  const struct sp_sampler *sp_samp,
+  const float lod[TGSI_QUAD_SIZE],
+  float clamped[TGSI_QUAD_SIZE])
+{
+   const float min_lod = sp_samp->base.min_lod;
+   const float max_lod = sp_samp->base.max_lod;
+   const float min_level = sp_sview->base.u.tex.first_level;
+   const float max_level = sp_sview->base.u.tex.last_level;
+   int i;
+
+   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+  float cl = lod[i];
+
+  cl = CLAMP(cl, min_lod, max_lod);
+  cl = CLAMP(cl, 0, max_level - min_level);
+  clamped[i] = cl;
+   }
+}
+
+/**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear(struct sp_sampler_view *sp_sview,
+ struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float level[TGSI_QUAD_SIZE])
+{
+   clamp_lod(sp_sview, sp_samp, lod, level);
+}
+
 static void
 mip_filter_linear(struct sp_sampler_view *sp_sview,
   struct sp_sampler *sp_samp,
@@ -1999,6 +2036,25 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
 
 
 /**
+ * Get mip level relative to base level for nearest mip filter
+ */
+static void
+mip_rel_level_nearest(struct sp_sampler_view *sp_sview,
+  struct sp_sampler *sp_samp,
+  const float lod[TGSI_QUAD_SIZE],
+  float level[TGSI_QUAD_SIZE])
+{
+   int j;
+
+   clamp_lod(sp_sview, sp_samp, lod, level);
+   for (j = 0; j < TGSI_QUAD_SIZE; j++)
+  /* TODO: It should rather be:
+   * level[j] = ceil(level[j] + 0.5F) - 1.0F;
+   */
+  level[j] = (int)(level[j] + 0.5F);
+}
+
+/**
  * Compute nearest mipmap level from texcoords.
  * Then sample the texture level for four elements of a quad.
  * \param c0  the LOD bias factors, or absolute LODs (depending on control)
@@ -2049,6 +2105,22 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
 }
 
 
+/**
+ * Get mip level relative to base level for none mip filter
+ */
+static void
+mip_rel_level_none(struct sp_sampler_view *sp_sview,
+   struct sp_sampler *sp_samp,
+   const float lod[TGSI_QUAD_SIZE],
+   float level[TGSI_QUAD_SIZE])
+{
+   int j;
+
+   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+  level[j] = 0;
+   }
+}
+
 static void
 mip_filter_none(struct sp_sampler_view *sp_sview,
 struct sp_sampler *sp_samp,
@@ -2087,6 +2159,18 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
 }
 
 
+/**
+ * Get mip level relative to base level for none mip filter
+ */
+static void
+mip_rel_level_none_no_filter_select(struct sp_sampler_view *sp_sview,
+struct sp_sampler *sp_samp,
+const float lod[TGSI_QUAD_SIZE],
+float level[TGSI_QUAD_SIZE])
+{
+   mip_rel_level_none(sp_sview, sp_samp, lod, level);
+}
+
 static void
 mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
  struct sp_sampler *sp_samp,
@@ -2340,6 +2424,18 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
 
 
 /**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear_aniso(struct sp_sampler_view *sp_sview,
+   struct sp_sampler *sp_samp,
+   const float lod[TGSI_QUAD_SIZE],
+   float level[TGSI_QUAD_SIZE])
+{
+   mip_rel_level_linear(sp_sview, sp_samp, lod, level);
+}
+
+/**
  * Sample 2D texture using an anisotropic filter.
  */
 static void
@@ -2450,6 +2546,17 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
}
 }
 
+/**
+ * Get mip level

Re: [Mesa-dev] [PATCH 9/9] softpipe: Implement and enable textureQueryLod

2015-09-10 Thread Krzesimir Nowak

On Wed, Sep 9, 2015 at 5:26 PM, Brian Paul  wrote:

> On 09/09/2015 04:35 AM, Krzesimir Nowak wrote:
>
>> Passes the shader piglit tests and introduces no regressions.
>>
>> This commit finally makes use of the refactoring in previous
>> commits.
>> ---
>>   src/gallium/drivers/softpipe/sp_screen.c |  2 +-
>>   src/gallium/drivers/softpipe/sp_tex_sample.c | 47
>> +++-
>>   2 files changed, 47 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/drivers/softpipe/sp_screen.c
>> b/src/gallium/drivers/softpipe/sp_screen.c
>> index 0bfd9c3..7ca8a67 100644
>> --- a/src/gallium/drivers/softpipe/sp_screen.c
>> +++ b/src/gallium/drivers/softpipe/sp_screen.c
>> @@ -193,9 +193,9 @@ softpipe_get_param(struct pipe_screen *screen, enum
>> pipe_cap param)
>>  case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
>> return 4;
>>  case PIPE_CAP_TEXTURE_GATHER_SM5:
>> +   case PIPE_CAP_TEXTURE_QUERY_LOD:
>> return 1;
>>  case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
>> -   case PIPE_CAP_TEXTURE_QUERY_LOD:
>>  case PIPE_CAP_SAMPLE_SHADING:
>>  case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
>> return 0;
>> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c
>> b/src/gallium/drivers/softpipe/sp_tex_sample.c
>> index 6e639e0..499c8f9 100644
>> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
>> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
>> @@ -3566,6 +3566,51 @@ sp_tgsi_get_samples(struct tgsi_sampler
>> *tgsi_sampler,
>>  sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, _args, rgba);
>>   }
>>
>> +static void
>> +sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler,
>> +  const unsigned sview_index,
>> +  const unsigned sampler_index,
>> +  const float s[TGSI_QUAD_SIZE],
>> +  const float t[TGSI_QUAD_SIZE],
>> +  const float p[TGSI_QUAD_SIZE],
>> +  const float c0[TGSI_QUAD_SIZE],
>> +  enum tgsi_sampler_control control,
>> +  float mipmap[TGSI_QUAD_SIZE],
>> +  float lod[TGSI_QUAD_SIZE])
>> +{
>> +   static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
>> +
>> +   struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler
>> *)tgsi_sampler;
>>
>
> Can that be const-qualified, and the tgsi_sampler function parameter?
>

It cannot for now - please see my reply for patch 8/9.


>
> Ideally, we'd also have a cast-wrapper function to use instead of an
> inline cast here and elsewhere.  That could be done as a follow-up.
>
>
>
> +   struct sp_sampler_view *sp_sview;
>> +   struct sp_sampler *sp_samp;
>> +   struct sp_mip *mip;
>> +   int i;
>> +   float cs[TGSI_QUAD_SIZE];
>> +   float ct[TGSI_QUAD_SIZE];
>> +   float cp[TGSI_QUAD_SIZE];
>> +
>> +   assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
>> +   assert(sampler_index < PIPE_MAX_SAMPLERS);
>> +   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
>> +
>> +   sp_sview = _tgsi_samp->sp_sview[sview_index];
>> +   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
>> +   /* always have a view here but texture is NULL if no sampler view was
>> set. */
>> +   if (!sp_sview->base.texture) {
>> +  for (i = 0; i < TGSI_QUAD_SIZE; i++) {
>> + mipmap[i] = 0.0f;
>> + lod[i] = 0.0f;
>> +  }
>> +  return;
>> +   }
>> +
>> +   sp_sview->convert_coords(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
>> +
>> +   compute_lambda_lod_not_clamped(sp_sview, sp_samp,
>> +  cs, ct, cp, lod_in, control, lod);
>> +   get_filters(sp_sview, sp_samp, control, , NULL, NULL);
>> +   mip->level(sp_sview, sp_samp, lod, mipmap);
>> +}
>>
>>   static void
>>   sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
>> @@ -3602,7 +3647,7 @@ sp_create_tgsi_sampler(void)
>>  samp->base.get_dims = sp_tgsi_get_dims;
>>  samp->base.get_samples = sp_tgsi_get_samples;
>>  samp->base.get_texel = sp_tgsi_get_texel;
>> +   samp->base.query_lod = sp_tgsi_query_lod;
>>
>>  return samp;
>>   }
>> -
>>
>>
> Reviewed-by: Brian Paul 
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/7] vbo: Add a predraw resolve callback

2015-09-10 Thread Marek Olšák

Hi Chris,

Instead of the nop functions, could you just do:

if (vbo_context(ctx)->resolve)
   vbo_context(ctx)->resolve(ctx);

Marek

On Wed, Sep 9, 2015 at 3:38 PM, Chris Wilson  wrote:
> A common problem with using HiZ and multisampling is that surfaces need
> to resolved prior to use. Currently i965 does this inside its state
> update hook, but that is a comparatively heavyweight operation that need
> not be performed so frequently. The obvious solution (and therefore
> fraught with dragons) is to move the HiZ/color resolves into the
> brw_draw_prims() - however, the resolves are performed using meta and
> end up re-entering brw_draw_prims() corrupting the context state of the
> original call. To avoid the meta recursion, we can add a new callback
> (vbo->resolve()) into the vbo pipeline that is called just before
> vbo->draw().
>
> Signed-off-by: Chris Wilson 
> Cc: Brian Paul 
> Cc: Jordan Justen 
> Cc: Jason Ekstrand 
> Cc: Kenneth Graunke 
> Cc: Francisco Jerez 
> ---
>  src/mesa/vbo/vbo.h|  1 +
>  src/mesa/vbo/vbo_context.c| 19 +++
>  src/mesa/vbo/vbo_context.h|  1 +
>  src/mesa/vbo/vbo_exec_array.c |  1 +
>  src/mesa/vbo/vbo_exec_draw.c  |  5 -
>  src/mesa/vbo/vbo_save_draw.c  |  2 ++
>  6 files changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
> index 2aaff5d..b64c468 100644
> --- a/src/mesa/vbo/vbo.h
> +++ b/src/mesa/vbo/vbo.h
> @@ -89,6 +89,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx,
>   struct _glapi_table *exec);
>
>
> +typedef void (*vbo_resolve_func)( struct gl_context *ctx);
>  typedef void (*vbo_draw_func)( struct gl_context *ctx,
>const struct _mesa_prim *prims,
>GLuint nr_prims,
> diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
> index e3eb286..1f0b46a 100644
> --- a/src/mesa/vbo/vbo_context.c
> +++ b/src/mesa/vbo/vbo_context.c
> @@ -148,11 +148,30 @@ static void init_mat_currval(struct gl_context *ctx)
>  }
>
>
> +static void nop_resolve(struct gl_context *ctx)
> +{
> +}
> +
> +static void nop_draw(struct gl_context *ctx,
> + const struct _mesa_prim *prims,
> + GLuint nr_prims,
> + const struct _mesa_index_buffer *ib,
> + GLboolean index_bounds_valid,
> + GLuint min_index,
> + GLuint max_index,
> + struct gl_transform_feedback_object *tfb_vertcount,
> + unsigned stream,
> + struct gl_buffer_object *indirect)
> +{
> +}
> +
>  GLboolean _vbo_CreateContext( struct gl_context *ctx )
>  {
> struct vbo_context *vbo = CALLOC_STRUCT(vbo_context);
>
> ctx->vbo_context = vbo;
> +   vbo->draw_prims = nop_draw;
> +   vbo->resolve = nop_resolve;
>
> /* Initialize the arrayelt helper
>  */
> diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
> index a376efe..c4033ee4 100644
> --- a/src/mesa/vbo/vbo_context.h
> +++ b/src/mesa/vbo/vbo_context.h
> @@ -75,6 +75,7 @@ struct vbo_context {
> /* Callback into the driver.  This must always succeed, the driver
>  * is responsible for initiating any fallback actions required:
>  */
> +   vbo_resolve_func resolve;
> vbo_draw_func draw_prims;
>  };
>
> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
> index 34d2c1d..d592ae4 100644
> --- a/src/mesa/vbo/vbo_exec_array.c
> +++ b/src/mesa/vbo/vbo_exec_array.c
> @@ -549,6 +549,7 @@ vbo_bind_arrays(struct gl_context *ctx)
> struct vbo_context *vbo = vbo_context(ctx);
> struct vbo_exec_context *exec = >exec;
>
> +   vbo->resolve(ctx);
> vbo_draw_method(vbo, DRAW_ARRAYS);
>
> if (exec->array.recalculate_inputs) {
> diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
> index 2bfb0c3..fa5b06b 100644
> --- a/src/mesa/vbo/vbo_exec_draw.c
> +++ b/src/mesa/vbo/vbo_exec_draw.c
> @@ -388,11 +388,14 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, 
> GLboolean keepUnmapped)
>
>if (exec->vtx.copied.nr != exec->vtx.vert_count) {
>  struct gl_context *ctx = exec->ctx;
> +
> + vbo_context(ctx)->resolve( ctx );
>
>  /* Before the update_state() as this may raise _NEW_VARYING_VP_INPUTS
>* from _mesa_set_varying_vp_inputs().
>   */
> -vbo_exec_bind_arrays( ctx );
> + vbo_draw_method( vbo_context(ctx), DRAW_BEGIN_END);
> + vbo_exec_bind_arrays( ctx );
>
>   if (ctx->NewState)
>  _mesa_update_state( ctx );
> diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
> index b1fd689..2103b8e 100644
> --- a/src/mesa/vbo/vbo_save_draw.c
> +++

[Mesa-dev] [PATCH v5 27/70] glsl: Add std430 interface packing support to program_resource_visitor's member functions

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

They are used to calculate the offset, array stride of uniform/shader storage
buffer variables. Take into account this info to get the right value for std430.

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/link_uniform_blocks.cpp | 19 +++---
 src/glsl/link_uniforms.cpp   | 81 
 src/glsl/linker.h|  6 ++-
 3 files changed, 76 insertions(+), 30 deletions(-)

diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
index c891d03..8f65f4a 100644
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -68,14 +68,18 @@ private:
}
 
virtual void enter_record(const glsl_type *type, const char *,
- bool row_major) {
+ bool row_major, const unsigned packing) {
   assert(type->is_record());
-  this->offset = glsl_align(
+  if (packing == GLSL_INTERFACE_PACKING_STD430)
+ this->offset = glsl_align(
+this->offset, type->std430_base_alignment(row_major));
+  else
+ this->offset = glsl_align(
 this->offset, type->std140_base_alignment(row_major));
}
 
virtual void leave_record(const glsl_type *type, const char *,
- bool row_major) {
+ bool row_major, const unsigned packing) {
   assert(type->is_record());
 
   /* If this is the last field of a structure, apply rule #9.  The
@@ -85,12 +89,17 @@ private:
* the member following the sub-structure is rounded up to the next
* multiple of the base alignment of the structure."
*/
-  this->offset = glsl_align(
+  if (packing == GLSL_INTERFACE_PACKING_STD430)
+ this->offset = glsl_align(
+this->offset, type->std430_base_alignment(row_major));
+  else
+ this->offset = glsl_align(
 this->offset, type->std140_base_alignment(row_major));
}
 
virtual void visit_field(const glsl_type *type, const char *name,
 bool row_major, const glsl_type *,
+const unsigned packing,
 bool /* last_field */)
{
   assert(this->index < this->num_variables);
@@ -122,7 +131,7 @@ private:
   unsigned alignment = 0;
   unsigned size = 0;
 
-  if (v->Type->interface_packing == GLSL_INTERFACE_PACKING_STD430) {
+  if (packing == GLSL_INTERFACE_PACKING_STD430) {
  alignment = type->std430_base_alignment(v->RowMajor);
  size = type->std430_size(v->RowMajor);
   } else {
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index fefc1ec..1d678c2 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -63,7 +63,9 @@ program_resource_visitor::process(const glsl_type *type, 
const char *name)
   || type->without_array()->is_interface());
 
char *name_copy = ralloc_strdup(NULL, name);
-   recursion(type, _copy, strlen(name), false, NULL, false);
+   unsigned packing = type->interface_packing;
+
+   recursion(type, _copy, strlen(name), false, NULL, packing, false);
ralloc_free(name_copy);
 }
 
@@ -74,6 +76,10 @@ program_resource_visitor::process(ir_variable *var)
const bool row_major =
   var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
 
+   const unsigned packing = var->get_interface_type() ?
+  var->get_interface_type()->interface_packing :
+  var->type->interface_packing;
+
/* false is always passed for the row_major parameter to the other
 * processing functions because no information is available to do
 * otherwise.  See the warning in linker.h.
@@ -110,7 +116,8 @@ program_resource_visitor::process(ir_variable *var)
   * lowering is only applied to non-uniform interface blocks, so we
   * can safely pass false for row_major.
   */
- recursion(var->type, , new_length, row_major, NULL, false);
+ recursion(var->type, , new_length, row_major, NULL, packing,
+   false);
   }
   ralloc_free(name);
} else if (var->data.from_named_ifc_block_nonarray) {
@@ -134,22 +141,22 @@ program_resource_visitor::process(ir_variable *var)
* is only applied to non-uniform interface blocks, so we can safely
* pass false for row_major.
*/
-  recursion(var->type, , strlen(name), row_major, NULL, false);
+  recursion(var->type, , strlen(name), row_major, NULL, packing, 
false);
   ralloc_free(name);
} else if (t->without_array()->is_record()) {
   char *name = ralloc_strdup(NULL, var->name);
-  recursion(var->type, , strlen(name), row_major, NULL, false);
+  recursion(var->type, , strlen(name), row_major, NULL, packing, 
false);
   ralloc_free(name);
} else if (t->is_interface()) {
   char *name = ralloc_strdup(NULL,

[Mesa-dev] [PATCH v5 38/70] i965/nir/vec4: Implement nir_intrinsic_store_ssbo

2015-09-10 Thread Iago Toral Quiroga

---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 148 +
 1 file changed, 148 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index f47b029..450441d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -23,8 +23,13 @@
 
 #include "brw_nir.h"
 #include "brw_vec4.h"
+#include "brw_vec4_builder.h"
+#include "brw_vec4_surface_builder.h"
 #include "glsl/ir_uniform.h"
 
+using namespace brw;
+using namespace brw::surface_access;
+
 namespace brw {
 
 void
@@ -556,6 +561,149 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
   break;
}
 
+   case nir_intrinsic_store_ssbo_indirect:
+  has_indirect = true;
+  /* fallthrough */
+   case nir_intrinsic_store_ssbo: {
+  assert(devinfo->gen >= 7);
+
+  /* Block index */
+  src_reg surf_index;
+  nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[1]);
+  if (const_uniform_block) {
+ unsigned index = prog_data->base.binding_table.ubo_start +
+  const_uniform_block->u[0];
+ surf_index = src_reg(index);
+ brw_mark_surface_used(_data->base, index);
+  } else {
+ surf_index = src_reg(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
+  src_reg(prog_data->base.binding_table.ubo_start)));
+ surf_index = emit_uniformize(surf_index);
+
+ brw_mark_surface_used(_data->base,
+   prog_data->base.binding_table.ubo_start +
+   shader_prog->NumUniformBlocks - 1);
+  }
+
+  /* Offset */
+  src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+  unsigned const_offset_bytes = 0;
+  if (has_indirect) {
+ emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
+  } else {
+ const_offset_bytes = instr->const_index[0];
+ emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+  }
+
+  /* Value */
+  src_reg val_reg = get_nir_src(instr->src[0], 4);
+
+  /* Writemask */
+  unsigned write_mask = instr->const_index[1];
+
+  /* IvyBridge does not have a native SIMD4x2 untyped write message so 
untyped
+   * writes will use SIMD8 mode. In order to hide this and keep symmetry 
across
+   * typed and untyped messages and across hardware platforms, the
+   * current implementation of the untyped messages will transparently 
convert
+   * the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it
+   * and enabling only channel X on the SEND instruction.
+   *
+   * The above, works well for full vector writes, but not for partial 
writes
+   * where we want to write some channels and not others, like when we have
+   * code such as v.xyw = vec3(1,2,4). Because the untyped write messages 
are
+   * quite restrictive with regards to the channel enables we can 
configure in
+   * the message descriptor (not all combinations are allowed) we cannot 
simply
+   * implement these scenarios with a single message while keeping the
+   * aforementioned symmetry in the implementation. For now we de decided 
that
+   * it is better to keep the symmetry to reduce complexity, so in 
situations
+   * such as the one described we end up emitting two untyped write 
messages
+   * (one for xy and another for w).
+   *
+   * The code below packs consecutive channels into a single write message,
+   * detects gaps in the vector write and if needed, sends a second message
+   * with the remaining channels. If in the future we decide that we want 
to
+   * emit a single message at the expense of losing the symmetry in the
+   * implementation we can:
+   *
+   * 1) For IvyBridge: Only use the red channel of the untyped write SIMD8
+   *message payload. In this mode we can write up to 8 offsets and 
dwords
+   *to the red channel only (for the two vec4s in the SIMD4x2 
execution)
+   *and select which of the 8 channels carry data to write by setting 
the
+   *appropriate writemask in the dst register of the SEND instruction.
+   *It would require to write a new generator opcode specifically for
+   *IvyBridge since we would need to prepare a SIMD8 payload that could
+   *use any channel, not just X.
+   *
+   * 2) For Haswell+: Simply send a single write message but set the 
writemask
+   *on the dst of the SEND instruction to select the channels we want 
to
+   *write. It would require to modify the current messages to receive
+   *and honor the writemask provided.
+   */
+  const vec4_builder bld = vec4_builder(this).at_end()
+   .annotate(current_annotation, base_ir);
+
+  int

[Mesa-dev] [PATCH v5 29/70] glsl: a shader storage buffer must be smaller than the maximum size allowed

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Otherwise, generate a link time error as per the
ARB_shader_storage_buffer_object spec.

v2:
- Fix error message (Jordan)

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/glsl/glsl_types.cpp  |  9 +++--
 src/glsl/link_uniform_blocks.cpp | 19 +++
 src/glsl/linker.cpp  |  2 +-
 src/glsl/linker.h|  1 +
 4 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index d97991a..f8227df 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -1325,7 +1325,7 @@ glsl_type::std140_size(bool row_major) const
 * rounded up to the next multiple of the base alignment of the
 * structure.
 */
-   if (this->is_record()) {
+   if (this->is_record() || this->is_interface()) {
   unsigned size = 0;
   unsigned max_align = 0;
 
@@ -1341,7 +1341,12 @@ glsl_type::std140_size(bool row_major) const
 
 const struct glsl_type *field_type = this->fields.structure[i].type;
 unsigned align = field_type->std140_base_alignment(field_row_major);
-size = glsl_align(size, align);
+
+ /* Ignore unsized arrays when calculating size */
+ if (field_type->is_unsized_array())
+continue;
+
+ size = glsl_align(size, align);
 size += field_type->std140_size(field_row_major);
 
  max_align = MAX2(align, max_align);
diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
index 8f65f4a..7ceffee 100644
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -187,6 +187,7 @@ struct block {
 
 unsigned
 link_uniform_blocks(void *mem_ctx,
+struct gl_context *ctx,
 struct gl_shader_program *prog,
 struct gl_shader **shader_list,
 unsigned num_shaders,
@@ -308,6 +309,15 @@ link_uniform_blocks(void *mem_ctx,
 
 blocks[i].UniformBufferSize = parcel.buffer_size;
 
+/* Check SSBO size is lower than maximum supported size for SSBO */
+if (b->is_shader_storage &&
+parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
+   linker_error(prog, "shader storage block `%s' has size %d, "
+"which is larger than than the maximum allowed 
(%d)",
+block_type->name,
+parcel.buffer_size,
+ctx->Const.MaxShaderStorageBlockSize);
+}
 blocks[i].NumUniforms =
(unsigned)(ptrdiff_t)([parcel.index] - 
blocks[i].Uniforms);
 
@@ -328,6 +338,15 @@ link_uniform_blocks(void *mem_ctx,
 
  blocks[i].UniformBufferSize = parcel.buffer_size;
 
+ /* Check SSBO size is lower than maximum supported size for SSBO */
+ if (b->is_shader_storage &&
+ parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
+linker_error(prog, "shader storage block `%s' has size %d, "
+ "which is larger than than the maximum allowed (%d)",
+ block_type->name,
+ parcel.buffer_size,
+ ctx->Const.MaxShaderStorageBlockSize);
+ }
  blocks[i].NumUniforms =
 (unsigned)(ptrdiff_t)([parcel.index] - 
blocks[i].Uniforms);
 
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index e078f86..cf9f1f6 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2023,7 +2023,7 @@ link_intrastage_shaders(void *mem_ctx,
 
/* Link up uniform blocks defined within this stage. */
const unsigned num_uniform_blocks =
-  link_uniform_blocks(mem_ctx, prog, shader_list, num_shaders,
+  link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders,
   _blocks);
if (!prog->LinkStatus)
   return NULL;
diff --git a/src/glsl/linker.h b/src/glsl/linker.h
index fb8f81e..953b49f 100644
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -56,6 +56,7 @@ link_uniform_blocks_are_compatible(const gl_uniform_block *a,
 
 extern unsigned
 link_uniform_blocks(void *mem_ctx,
+struct gl_context *ctx,
 struct gl_shader_program *prog,
 struct gl_shader **shader_list,
 unsigned num_shaders,
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 36/70] nir: Implement __intrinsic_store_ssbo

2015-09-10 Thread Iago Toral Quiroga

v2 (Connor):
 - Make the STORE() macro take arguments for the extra sources (and their
   size) and any extra indices required.
---
 src/glsl/nir/glsl_to_nir.cpp  | 36 
 src/glsl/nir/nir_intrinsics.h | 20 
 2 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 0666250..6f1e20a 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -644,6 +644,8 @@ nir_visitor::visit(ir_call *ir)
  op = nir_intrinsic_memory_barrier;
   } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
  op = nir_intrinsic_image_size;
+  } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
+ op = nir_intrinsic_store_ssbo;
   } else {
  unreachable("not reached");
   }
@@ -740,6 +742,40 @@ nir_visitor::visit(ir_call *ir)
   }
   case nir_intrinsic_memory_barrier:
  break;
+  case nir_intrinsic_store_ssbo: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+
+ /* Check if we need the indirect version */
+ ir_constant *const_offset = offset->as_constant();
+ if (!const_offset) {
+op = nir_intrinsic_store_ssbo_indirect;
+ralloc_free(instr);
+instr = nir_intrinsic_instr_create(shader, op);
+instr->src[2] = evaluate_rvalue(offset);
+instr->const_index[0] = 0;
+ } else {
+instr->const_index[0] = const_offset->value.u[0];
+ }
+
+ instr->const_index[1] = write_mask->value.u[0];
+
+ instr->src[0] = evaluate_rvalue(val);
+ instr->num_components = val->type->vector_elements;
+
+ instr->src[1] = evaluate_rvalue(block);
+ break;
+  }
   default:
  unreachable("not reached");
   }
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 88f57f4..38f22c1 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -179,15 +179,19 @@ LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | 
NIR_INTRINSIC_CAN_REORDER)
 /*
  * Stores work the same way as loads, except now the first register input is
  * the value or array to store and the optional second input is the indirect
- * offset.
+ * offset. SSBO stores are similar, but they accept an extra source for the
+ * block index and an extra index with the writemask to use.
  */
 
-#define STORE(name, num_indices, flags) \
-   INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
-   INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
- num_indices, flags) \
+#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
+   INTRINSIC(store_##name, 1 + extra_srcs, \
+ ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
+ false, 0, 0, 1 + extra_indices, flags) \
+   INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
+ ARR(0, 1, extra_srcs_size, extra_srcs_size), \
+ false, 0, 0, 1 + extra_indices, flags)
 
-STORE(output, 1, 0)
-/* STORE(ssbo, 2, 0) */
+STORE(output, 0, 0, 0, 0)
+STORE(ssbo, 1, 1, 1, 0)
 
-LAST_INTRINSIC(store_output_indirect)
+LAST_INTRINSIC(store_ssbo_indirect)
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 35/70] i965/vec4: Import surface message builder functions.

2015-09-10 Thread Iago Toral Quiroga

From: Francisco Jerez 

Implement helper functions that can be used to construct and send
untyped and typed surface read, write and atomic messages to the
shared dataport unit.

v2: Split from the FS implementation.
v3: Rewrite to avoid evil array_reg, emit_collect and emit_zip.
---
 .../drivers/dri/i965/brw_vec4_surface_builder.cpp  | 234 +
 .../drivers/dri/i965/brw_vec4_surface_builder.h|  39 
 2 files changed, 273 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
index b77cd74..a7c286d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
@@ -96,3 +96,237 @@ namespace {
   }
}
 }
+
+namespace brw {
+   namespace surface_access {
+  namespace {
+ using namespace array_utils;
+
+ /**
+  * Generate a send opcode for a surface message and return the
+  * result.
+  */
+ src_reg
+ emit_send(const vec4_builder , enum opcode op,
+   const src_reg ,
+   const src_reg , unsigned addr_sz,
+   const src_reg , unsigned src_sz,
+   const src_reg ,
+   unsigned arg, unsigned ret_sz,
+   brw_predicate pred = BRW_PREDICATE_NONE)
+ {
+/* Calculate the total number of components of the payload. */
+const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
+const unsigned sz = header_sz + addr_sz + src_sz;
+
+/* Construct the payload. */
+const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+unsigned n = 0;
+
+if (header_sz)
+   bld.exec_all().MOV(offset(payload, n++),
+  retype(header, BRW_REGISTER_TYPE_UD));
+
+for (unsigned i = 0; i < addr_sz; i++)
+   bld.MOV(offset(payload, n++),
+   offset(retype(addr, BRW_REGISTER_TYPE_UD), i));
+
+for (unsigned i = 0; i < src_sz; i++)
+   bld.MOV(offset(payload, n++),
+   offset(retype(src, BRW_REGISTER_TYPE_UD), i));
+
+/* Reduce the dynamically uniform surface index to a single
+ * scalar.
+ */
+const src_reg usurface = bld.emit_uniformize(surface);
+
+/* Emit the message send instruction. */
+const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
+vec4_instruction *inst =
+   bld.emit(op, dst, src_reg(payload), usurface, arg);
+inst->mlen = sz;
+inst->regs_written = ret_sz;
+inst->header_size = header_sz;
+inst->predicate = pred;
+
+return src_reg(dst);
+ }
+  }
+
+  /**
+   * Emit an untyped surface read opcode.  \p dims determines the number
+   * of components of the address and \p size the number of components of
+   * the returned value.
+   */
+  src_reg
+  emit_untyped_read(const vec4_builder ,
+const src_reg , const src_reg ,
+unsigned dims, unsigned size,
+brw_predicate pred)
+  {
+ return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
+  emit_insert(bld, addr, dims, true), 1,
+  src_reg(), 0,
+  surface, size, 1, pred);
+  }
+
+  /**
+   * Emit an untyped surface write opcode.  \p dims determines the number
+   * of components of the address and \p size the number of components of
+   * the argument.
+   */
+  void
+  emit_untyped_write(const vec4_builder , const src_reg ,
+ const src_reg , const src_reg ,
+ unsigned dims, unsigned size,
+ brw_predicate pred)
+  {
+ const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+   bld.shader->devinfo->is_haswell);
+ emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
+   emit_insert(bld, addr, dims, has_simd4x2),
+   has_simd4x2 ? 1 : dims,
+   emit_insert(bld, src, size, has_simd4x2),
+   has_simd4x2 ? 1 : size,
+   surface, size, 0, pred);
+  }
+
+  /**
+   * Emit an untyped surface atomic opcode.  \p dims determines the number
+   * of components of the address and \p rsize the number of components of
+   * the returned value (either zero or one).
+   */
+  src_reg
+  emit_untyped_atomic(const vec4_builder ,
+  const src_reg , const src_reg ,
+  const src_reg , const src_reg ,
+  unsigned dims, unsigned

[Mesa-dev] [PATCH v5 37/70] i965/nir/fs: Implement nir_intrinsic_store_ssbo

2015-09-10 Thread Iago Toral Quiroga

---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 71 
 1 file changed, 71 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 3c55a12..6ccbf89 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -29,8 +29,10 @@
 #include "brw_fs.h"
 #include "brw_fs_surface_builder.h"
 #include "brw_nir.h"
+#include "brw_fs_surface_builder.h"
 
 using namespace brw;
+using namespace brw::surface_access;
 
 void
 fs_visitor::emit_nir_code()
@@ -1680,6 +1682,75 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
+   case nir_intrinsic_store_ssbo_indirect:
+  has_indirect = true;
+  /* fallthrough */
+   case nir_intrinsic_store_ssbo: {
+  assert(devinfo->gen >= 7);
+
+  /* Block index */
+  fs_reg surf_index;
+  nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[1]);
+  if (const_uniform_block) {
+ unsigned index = stage_prog_data->binding_table.ubo_start +
+  const_uniform_block->u[0];
+ surf_index = fs_reg(index);
+ brw_mark_surface_used(prog_data, index);
+  } else {
+ surf_index = vgrf(glsl_type::uint_type);
+ bld.ADD(surf_index, get_nir_src(instr->src[1]),
+  fs_reg(stage_prog_data->binding_table.ubo_start));
+ surf_index = bld.emit_uniformize(surf_index);
+
+ brw_mark_surface_used(prog_data,
+   stage_prog_data->binding_table.ubo_start +
+   shader_prog->NumUniformBlocks - 1);
+  }
+
+  /* Offset */
+  fs_reg offset_reg = vgrf(glsl_type::uint_type);
+  unsigned const_offset_bytes = 0;
+  if (has_indirect) {
+ bld.MOV(offset_reg, get_nir_src(instr->src[2]));
+  } else {
+ const_offset_bytes = instr->const_index[0];
+ bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+  }
+
+  /* Value */
+  fs_reg val_reg = get_nir_src(instr->src[0]);
+
+  /* Writemask */
+  unsigned writemask = instr->const_index[1];
+
+  /* Write each component present in the writemask */
+  unsigned skipped_channels = 0;
+  for (int i = 0; i < instr->num_components; i++) {
+ int component_mask = 1 << i;
+ if (writemask & component_mask) {
+if (skipped_channels) {
+   if (!has_indirect) {
+  const_offset_bytes += 4 * skipped_channels;
+  bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+   } else {
+  bld.ADD(offset_reg, offset_reg,
+   brw_imm_ud(4 * skipped_channels));
+   }
+   skipped_channels = 0;
+}
+
+emit_untyped_write(bld, surf_index, offset_reg,
+   offset(val_reg, bld, i),
+   1 /* dims */, 1 /* size */,
+   BRW_PREDICATE_NONE);
+ }
+
+ skipped_channels++;
+  }
+  break;
+   }
+
case nir_intrinsic_store_output_indirect:
   has_indirect = true;
   /* fallthrough */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 33/70] i965/vec4: Introduce VEC4 IR builder.

2015-09-10 Thread Iago Toral Quiroga

From: Francisco Jerez 

See "i965/fs: Introduce FS IR builder." for the rationale.

v2: Drop scalarizing VEC4 builder.
v3: Take a backend_shader as constructor argument.  Improve handling
of debug annotations and execution control flags.  Rename "instr"
variable.  Initialize cursor to NULL by default and add method to
explicitly point the builder at the end of the program.
---
 src/mesa/drivers/dri/i965/Makefile.sources   |   1 +
 src/mesa/drivers/dri/i965/brw_vec4_builder.h | 602 +++
 2 files changed, 603 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_builder.h

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index dfdad75..88e64fb 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -115,6 +115,7 @@ i965_FILES = \
brw_urb.c \
brw_util.c \
brw_util.h \
+   brw_vec4_builder.h \
brw_vec4_copy_propagation.cpp \
brw_vec4.cpp \
brw_vec4_cse.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h 
b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
new file mode 100644
index 000..a90cadb
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
@@ -0,0 +1,602 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2010-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_BUILDER_H
+#define BRW_VEC4_BUILDER_H
+
+#include "brw_ir_vec4.h"
+#include "brw_ir_allocator.h"
+#include "brw_context.h"
+
+namespace brw {
+   /**
+* Toolbox to assemble a VEC4 IR program out of individual instructions.
+*
+* This object is meant to have an interface consistent with
+* brw::fs_builder.  They cannot be fully interchangeable because
+* brw::fs_builder generates scalar code while brw::vec4_builder generates
+* vector code.
+*/
+   class vec4_builder {
+   public:
+  /** Type used in this IR to represent a source of an instruction. */
+  typedef brw::src_reg src_reg;
+
+  /** Type used in this IR to represent the destination of an instruction. 
*/
+  typedef brw::dst_reg dst_reg;
+
+  /** Type used in this IR to represent an instruction. */
+  typedef vec4_instruction instruction;
+
+  /**
+   * Construct a vec4_builder that inserts instructions into \p shader.
+   */
+  vec4_builder(backend_shader *shader) :
+ shader(shader), block(NULL), cursor(NULL),
+ force_writemask_all(false),
+ annotation()
+  {
+  }
+
+  /**
+   * Construct a vec4_builder that inserts instructions into \p shader
+   * before instruction \p inst in basic block \p block.  The default
+   * execution controls and debug annotation are initialized from the
+   * instruction passed as argument.
+   */
+  vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) 
:
+ shader(shader), block(block), cursor(inst)
+  {
+ annotation.str = inst->annotation;
+ annotation.ir = inst->ir;
+  }
+
+  /**
+   * Construct a vec4_builder that inserts instructions before \p cursor
+   * in basic block \p block, inheriting other code generation parameters
+   * from this.
+   */
+  vec4_builder
+  at(bblock_t *block, exec_node *cursor) const
+  {
+ vec4_builder bld = *this;
+ bld.block = block;
+ bld.cursor = cursor;
+ return bld;
+  }
+
+  /**
+   * Construct a vec4_builder appending instructions at the end of the
+   * instruction list of the shader, inheriting other code generation
+   * parameters from this.
+   */
+  vec4_builder
+  at_end() const
+  {
+ return at(NULL, (exec_node *)>instructions.tail);
+  }
+
+  /**
+   * Construct a builder

[Mesa-dev] [PATCH v5 31/70] glsl: ignore buffer variables when counting uniform components

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/glsl/link_uniforms.cpp | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 1d678c2..cc38287 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -304,7 +304,7 @@ public:
   : num_active_uniforms(0), num_values(0), num_shader_samplers(0),
 num_shader_images(0), num_shader_uniform_components(0),
 num_shader_subroutines(0),
-is_ubo_var(false), map(map)
+is_ubo_var(false), is_shader_storage(false), map(map)
{
   /* empty */
}
@@ -320,6 +320,7 @@ public:
void process(ir_variable *var)
{
   this->is_ubo_var = var->is_in_buffer_block();
+  this->is_shader_storage = var->is_in_shader_storage_block();
   if (var->is_interface_instance())
  program_resource_visitor::process(var->get_interface_type(),
var->get_interface_type()->name);
@@ -358,6 +359,7 @@ public:
unsigned num_shader_subroutines;
 
bool is_ubo_var;
+   bool is_shader_storage;
 
 private:
virtual void visit_field(const glsl_type *type, const char *name,
@@ -386,13 +388,14 @@ private:
   * components in the default block.  The spec allows image
   * uniforms to use up no more than one scalar slot.
   */
- this->num_shader_uniform_components += values;
+ if(!is_shader_storage)
+this->num_shader_uniform_components += values;
   } else {
 /* Accumulate the total number of uniform slots used by this shader.
  * Note that samplers do not count against this limit because they
  * don't use any storage on current hardware.
  */
-if (!is_ubo_var)
+if (!is_ubo_var && !is_shader_storage)
this->num_shader_uniform_components += values;
   }
 
@@ -1028,8 +1031,10 @@ link_assign_uniform_locations(struct gl_shader_program 
*prog,
   sh->num_combined_uniform_components = sh->num_uniform_components;
 
   for (unsigned i = 0; i < sh->NumUniformBlocks; i++) {
-sh->num_combined_uniform_components +=
-   sh->UniformBlocks[i].UniformBufferSize / 4;
+ if (!sh->UniformBlocks[i].IsShaderStorage) {
+   sh->num_combined_uniform_components +=
+  sh->UniformBlocks[i].UniformBufferSize / 4;
+ }
   }
}
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 32/70] glsl: shader storage blocks use different max block size values than uniforms

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/linker.cpp | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index a9a6dc5..323c162 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2830,13 +2830,23 @@ check_resources(struct gl_context *ctx, struct 
gl_shader_program *prog)
unsigned total_shader_storage_blocks = 0;
 
for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
-  if (prog->UniformBlocks[i].UniformBufferSize > 
ctx->Const.MaxUniformBlockSize) {
+  /* Don't check SSBOs for Uniform Block Size */
+  if (!prog->UniformBlocks[i].IsShaderStorage &&
+  prog->UniformBlocks[i].UniformBufferSize > 
ctx->Const.MaxUniformBlockSize) {
  linker_error(prog, "Uniform block %s too big (%d/%d)\n",
   prog->UniformBlocks[i].Name,
   prog->UniformBlocks[i].UniformBufferSize,
   ctx->Const.MaxUniformBlockSize);
   }
 
+  if (prog->UniformBlocks[i].IsShaderStorage &&
+  prog->UniformBlocks[i].UniformBufferSize > 
ctx->Const.MaxShaderStorageBlockSize) {
+ linker_error(prog, "Shader storage block %s too big (%d/%d)\n",
+  prog->UniformBlocks[i].Name,
+  prog->UniformBlocks[i].UniformBufferSize,
+  ctx->Const.MaxShaderStorageBlockSize);
+  }
+
   for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
 if (prog->UniformBlockStageIndex[j][i] != -1) {
 struct gl_shader *sh = prog->_LinkedShaders[j];
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 34/70] i965/vec4: Import helpers to convert vectors into arrays and back.

2015-09-10 Thread Iago Toral Quiroga

From: Francisco Jerez 

These functions handle the conversion of a vec4 into the form expected
by the dataport unit in message and message return payloads.  The
conversion is not always trivial because some messages don't support
SIMD4x2 for some generations, in which case a strided copy may be
necessary.

v2: Split from the FS implementation.
v3: Rewrite to avoid evil array_reg, emit_collect and emit_zip.
---
 src/mesa/drivers/dri/i965/Makefile.sources |  2 +
 .../drivers/dri/i965/brw_vec4_surface_builder.cpp  | 98 ++
 .../drivers/dri/i965/brw_vec4_surface_builder.h| 30 +++
 3 files changed, 130 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 88e64fb..049e293 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -129,6 +129,8 @@ i965_FILES = \
brw_vec4_nir.cpp \
brw_vec4_gs_nir.cpp \
brw_vec4_reg_allocate.cpp \
+   brw_vec4_surface_builder.cpp \
+   brw_vec4_surface_builder.h \
brw_vec4_visitor.cpp \
brw_vec4_vp.cpp \
brw_vec4_vs_visitor.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
new file mode 100644
index 000..b77cd74
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4_surface_builder.h"
+
+using namespace brw;
+
+namespace {
+   namespace array_utils {
+  /**
+   * Copy one every \p src_stride logical components of the argument into
+   * one every \p dst_stride logical components of the result.
+   */
+  src_reg
+  emit_stride(const vec4_builder , const src_reg , unsigned size,
+  unsigned dst_stride, unsigned src_stride)
+  {
+ if (src_stride == 1 && dst_stride == 1) {
+return src;
+ } else {
+const dst_reg dst = bld.vgrf(src.type,
+ DIV_ROUND_UP(size * dst_stride, 4));
+
+for (unsigned i = 0; i < size; ++i)
+   bld.MOV(writemask(offset(dst, i * dst_stride / 4),
+ 1 << (i * dst_stride % 4)),
+   swizzle(offset(src, i * src_stride / 4),
+   brw_swizzle_for_mask(1 << (i * src_stride % 
4;
+
+return src_reg(dst);
+ }
+  }
+
+  /**
+   * Convert a VEC4 into an array of registers with the layout expected by
+   * the recipient shared unit.  If \p has_simd4x2 is true the argument is
+   * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
+   * a SIMD8 vector.
+   */
+  src_reg
+  emit_insert(const vec4_builder , const src_reg ,
+  unsigned n, bool has_simd4x2)
+  {
+ if (src.file == BAD_FILE || n == 0) {
+return src_reg();
+
+ } else {
+/* Pad unused components with zeroes. */
+const unsigned mask = (1 << n) - 1;
+const dst_reg tmp = bld.vgrf(src.type);
+
+bld.MOV(writemask(tmp, mask), src);
+if (n < 4)
+   bld.MOV(writemask(tmp, ~mask), 0);
+
+return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
+ }
+  }
+
+  /**
+   * Convert an array of registers back into a VEC4 according to the
+   * layout expected from some shared unit.  If \p has_simd4x2 is true the
+   * argument is left unmodified in SIMD4x2 form, otherwise it will be
+   *

[Mesa-dev] [PATCH v5 28/70] glsl: add std430 interface packing support to ssbo related operations

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

v2:
- Get interface packing information from interface's type, not the variable 
type.
- Simplify is_std430 condition in emit_access() for readability (Jordan)
- Add a commment explaing why array of three-component vector case is different
  in std430 than the rest of cases.
- Add calls to std430_array_stride().

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/lower_ubo_reference.cpp | 102 ++-
 1 file changed, 78 insertions(+), 24 deletions(-)

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 8694383..7e45a26 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -147,7 +147,8 @@ public:
 ir_rvalue **offset,
 unsigned *const_offset,
 bool *row_major,
-int *matrix_columns);
+int *matrix_columns,
+unsigned packing);
ir_expression *ubo_load(const struct glsl_type *type,
   ir_rvalue *offset);
ir_call *ssbo_load(const struct glsl_type *type,
@@ -164,7 +165,7 @@ public:
void emit_access(bool is_write, ir_dereference *deref,
 ir_variable *base_offset, unsigned int deref_offset,
 bool row_major, int matrix_columns,
-unsigned write_mask);
+bool is_std430, unsigned write_mask);
 
ir_visitor_status visit_enter(class ir_expression *);
ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
@@ -176,7 +177,8 @@ public:
 ir_variable *);
ir_expression *emit_ssbo_get_buffer_size();
 
-   unsigned calculate_unsized_array_stride(ir_dereference *deref);
+   unsigned calculate_unsized_array_stride(ir_dereference *deref,
+   unsigned packing);
 
void *mem_ctx;
struct gl_shader *shader;
@@ -257,7 +259,8 @@ 
lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
  ir_rvalue **offset,
  unsigned *const_offset,
  bool *row_major,
- int *matrix_columns)
+ int *matrix_columns,
+ unsigned packing)
 {
/* Determine the name of the interface block */
ir_rvalue *nonconst_block_index;
@@ -343,8 +346,15 @@ 
lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
 const bool array_row_major =
is_dereferenced_thing_row_major(deref_array);
 
-array_stride = deref_array->type->std140_size(array_row_major);
-array_stride = glsl_align(array_stride, 16);
+/* The array type will give the correct interface packing
+ * information
+ */
+if (packing == GLSL_INTERFACE_PACKING_STD430) {
+   array_stride = 
deref_array->type->std430_array_stride(array_row_major);
+} else {
+   array_stride = deref_array->type->std140_size(array_row_major);
+   array_stride = glsl_align(array_stride, 16);
+}
  }
 
  ir_rvalue *array_index = deref_array->array_index;
@@ -380,7 +390,12 @@ 
lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
 
 ralloc_free(field_deref);
 
-unsigned field_align = 
type->std140_base_alignment(field_row_major);
+unsigned field_align = 0;
+
+if (packing == GLSL_INTERFACE_PACKING_STD430)
+   field_align = type->std430_base_alignment(field_row_major);
+else
+   field_align = type->std140_base_alignment(field_row_major);
 
 intra_struct_offset = glsl_align(intra_struct_offset, field_align);
 
@@ -388,7 +403,10 @@ 
lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
deref_record->field) == 0)
break;
 
-intra_struct_offset += type->std140_size(field_row_major);
+if (packing == GLSL_INTERFACE_PACKING_STD430)
+   intra_struct_offset += type->std430_size(field_row_major);
+else
+   intra_struct_offset += type->std140_size(field_row_major);
 
 /* If the field just examined was itself a structure, apply rule
  * #9:
@@ -437,13 +455,15 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
**rvalue)
unsigned const_offset;
bool row_major;
int matrix_columns;
+   unsigned packing = var->get_interface_type()->interface_packing;
 
/* Compute the offset

[Mesa-dev] [PATCH v5 26/70] glsl: Add parser/compiler support for std430 interface packing qualifier

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

v2:
- Fix a missing check in has_layout()

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/glsl/ast.h   |  1 +
 src/glsl/ast_to_hir.cpp  | 20 
 src/glsl/ast_type.cpp|  2 ++
 src/glsl/glsl_parser.yy  |  2 ++
 src/glsl/glsl_types.h|  3 ++-
 src/glsl/link_uniform_blocks.cpp | 15 ---
 src/mesa/main/mtypes.h   |  3 ++-
 7 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index cca32b3..4c31436 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -491,6 +491,7 @@ struct ast_type_qualifier {
 /** \name Layout qualifiers for GL_ARB_uniform_buffer_object */
 /** \{ */
  unsigned std140:1;
+ unsigned std430:1;
  unsigned shared:1;
  unsigned packed:1;
  unsigned column_major:1;
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 92038a6..566cc87 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2920,11 +2920,12 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
var->data.depth_layout = ir_depth_layout_none;
 
if (qual->flags.q.std140 ||
+   qual->flags.q.std430 ||
qual->flags.q.packed ||
qual->flags.q.shared) {
   _mesa_glsl_error(loc, state,
-   "uniform block layout qualifiers std140, packed, and "
-   "shared can only be applied to uniform blocks, not "
+   "uniform block layout qualifiers std140, std430, 
packed, "
+   "and shared can only be applied to uniform blocks, not "
"members");
}
 
@@ -5691,12 +5692,14 @@ ast_process_structure_or_interface_block(exec_list 
*instructions,
  const struct ast_type_qualifier *const qual =
 & decl_list->type->qualifier;
  if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
  qual->flags.q.packed ||
  qual->flags.q.shared) {
 _mesa_glsl_error(, state,
  "uniform/shader storage block layout qualifiers "
- "std140, packed, and shared can only be applied "
- "to uniform/shader storage blocks, not members");
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform/shader storage blocks, not "
+ "members");
  }
 
  if (qual->flags.q.constant) {
@@ -5908,6 +5911,13 @@ ast_interface_block::hir(exec_list *instructions,
this->block_name);
}
 
+   if (!this->layout.flags.q.buffer &&
+   this->layout.flags.q.std430) {
+  _mesa_glsl_error(, state,
+   "std430 storage block layout qualifier is supported "
+   "only for shader storage blocks");
+   }
+
/* The ast_interface_block has a list of ast_declarator_lists.  We
 * need to turn those into ir_variables with an association
 * with this uniform block.
@@ -5917,6 +5927,8 @@ ast_interface_block::hir(exec_list *instructions,
   packing = GLSL_INTERFACE_PACKING_SHARED;
} else if (this->layout.flags.q.packed) {
   packing = GLSL_INTERFACE_PACKING_PACKED;
+   } else if (this->layout.flags.q.std430) {
+  packing = GLSL_INTERFACE_PACKING_STD430;
} else {
   /* The default layout is std140.
*/
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index a4671e2..08a4504 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -65,6 +65,7 @@ ast_type_qualifier::has_layout() const
   || this->flags.q.depth_less
   || this->flags.q.depth_unchanged
   || this->flags.q.std140
+  || this->flags.q.std430
   || this->flags.q.shared
   || this->flags.q.column_major
   || this->flags.q.row_major
@@ -123,6 +124,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
ubo_layout_mask.flags.q.std140 = 1;
ubo_layout_mask.flags.q.packed = 1;
ubo_layout_mask.flags.q.shared = 1;
+   ubo_layout_mask.flags.q.std430 = 1;
 
ast_type_qualifier ubo_binding_mask;
ubo_binding_mask.flags.i = 0;
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 028974e..4cb018a 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1199,6 +1199,8 @@ layout_qualifier_id:
 $$.flags.q.std140 = 1;
  } else if (match_layout_qualifier($1, "shared", state) == 0) {
 $$.flags.q.shared = 1;
+ } else if (match_layout_qualifier($1, "std430", state) == 0) {
+$$.flags.q.std430 = 1;
  } else if (match_layout_qualifier($1, "column_major", state) == 0) {
 $$.flags.q.column_major = 1;

[Mesa-dev] [PATCH v5 18/70] i965/fs/nir: implement nir_intrinsic_get_buffer_size

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 25 +
 1 file changed, 25 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index a6c6a2f..3c55a12 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1702,6 +1702,31 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   emit_barrier();
   break;
 
+   case nir_intrinsic_get_buffer_size: {
+  nir_const_value *const_uniform_block = 
nir_src_as_const_value(instr->src[0]);
+  unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+  int reg_width = dispatch_width / 8;
+
+  assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+  /* Set LOD = 0 */
+  fs_reg source = fs_reg(0);
+
+  int mlen = 1 * reg_width;
+  fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
+  BRW_REGISTER_TYPE_UD);
+  bld.LOAD_PAYLOAD(src_payload, , 1, 0);
+
+  fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + 
ubo_index);
+  fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
+   src_payload, surf_index);
+  inst->header_size = 0;
+  inst->mlen = mlen;
+  inst->regs_written = 4 * reg_width;
+  bld.emit(inst);
+  break;
+   }
+
default:
   unreachable("unknown intrinsic");
}
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 19/70] i965/wm: emit null buffer surfaces when null buffers are attached

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Otherwise we can expect odd things to happen if, for example, we ask
for the size of the attached buffer from shader code, since that
might query this value from the surface we uploaded and get random
results.

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 44 ++--
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 4c96572..5b73c28 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -932,28 +932,36 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
  struct gl_uniform_buffer_binding *binding;
  binding =
 >UniformBufferBindings[shader->UniformBlocks[i].Binding];
- intel_bo = intel_buffer_object(binding->BufferObject);
- drm_intel_bo *bo =
-intel_bufferobj_buffer(brw, intel_bo,
-   binding->Offset,
-   binding->BufferObject->Size - 
binding->Offset);
- brw_create_constant_surface(brw, bo, binding->Offset,
- bo->size - binding->Offset,
- _offsets[i],
- dword_pitch);
+ if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, _offsets[i]);
+ } else {
+intel_bo = intel_buffer_object(binding->BufferObject);
+drm_intel_bo *bo =
+   intel_bufferobj_buffer(brw, intel_bo,
+  binding->Offset,
+  binding->BufferObject->Size - 
binding->Offset);
+brw_create_constant_surface(brw, bo, binding->Offset,
+bo->size - binding->Offset,
+_offsets[i],
+dword_pitch);
+ }
   } else {
  struct gl_shader_storage_buffer_binding *binding;
  binding =
 
>ShaderStorageBufferBindings[shader->UniformBlocks[i].Binding];
- intel_bo = intel_buffer_object(binding->BufferObject);
- drm_intel_bo *bo =
-intel_bufferobj_buffer(brw, intel_bo,
-   binding->Offset,
-   binding->BufferObject->Size - 
binding->Offset);
- brw_create_buffer_surface(brw, bo, binding->Offset,
-   bo->size - binding->Offset,
-   _offsets[i],
-   dword_pitch);
+ if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, _offsets[i]);
+ } else {
+intel_bo = intel_buffer_object(binding->BufferObject);
+drm_intel_bo *bo =
+   intel_bufferobj_buffer(brw, intel_bo,
+  binding->Offset,
+  binding->BufferObject->Size - 
binding->Offset);
+brw_create_buffer_surface(brw, bo, binding->Offset,
+  bo->size - binding->Offset,
+  _offsets[i],
+  dword_pitch);
+ }
   }
}
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 24/70] glsl: allow default qualifiers for shader storage block definitions

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

This kind of definitions:

layout(xxx) buffer;

was not supported by commit 84fc5fece006.

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/glsl_parser.yy | 46 -
 src/glsl/glsl_parser_extras.cpp | 20 --
 src/glsl/glsl_parser_extras.h   |  7 +++
 3 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 7f00929..028974e 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -169,6 +169,7 @@ static bool match_layout_qualifier(const char *s1, const 
char *s2,
 %token  IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER
 %type  any_identifier
 %type  instance_name_opt
+%type  buffer_instance_name_opt
 %token  FLOATCONSTANT
 %token  DOUBLECONSTANT
 %token  INTCONSTANT UINTCONSTANT BOOLCONSTANT
@@ -218,6 +219,7 @@ static bool match_layout_qualifier(const char *s1, const 
char *s2,
 %type  subroutine_qualifier
 %type  subroutine_type_list
 %type  interface_qualifier
+%type  buffer_interface_qualifier
 %type  type_specifier
 %type  type_specifier_nonarray
 %type  array_specifier
@@ -2638,6 +2640,17 @@ basic_interface_block:
 
   $$ = block;
}
+   | buffer_interface_qualifier NEW_IDENTIFIER '{' member_list '}' 
buffer_instance_name_opt ';'
+   {
+  ast_interface_block *const block = $6;
+
+  block->block_name = $2;
+  block->declarations.push_degenerate_list_at_head(& $4->link);
+
+  _mesa_ast_process_interface_block(& @1, state, block, $1);
+
+  $$ = block;
+   }
;
 
 interface_qualifier:
@@ -2656,7 +2669,10 @@ interface_qualifier:
   memset(& $$, 0, sizeof($$));
   $$.flags.q.uniform = 1;
}
-   | BUFFER
+   ;
+
+buffer_interface_qualifier:
+   BUFFER
{
   memset(& $$, 0, sizeof($$));
   $$.flags.q.buffer = 1;
@@ -2683,6 +2699,26 @@ instance_name_opt:
}
;
 
+buffer_instance_name_opt:
+   /* empty */
+   {
+  $$ = new(state) 
ast_interface_block(*state->default_shader_storage_qualifier,
+  NULL, NULL);
+   }
+   | NEW_IDENTIFIER
+   {
+  $$ = new(state) 
ast_interface_block(*state->default_shader_storage_qualifier,
+  $1, NULL);
+  $$->set_location(@1);
+   }
+   | NEW_IDENTIFIER array_specifier
+   {
+  $$ = new(state) 
ast_interface_block(*state->default_shader_storage_qualifier,
+  $1, $2);
+  $$->set_location_range(@1, @2);
+   }
+   ;
+
 member_list:
member_declaration
{
@@ -2729,6 +2765,14 @@ layout_defaults:
   $$ = NULL;
}
 
+   | layout_qualifier BUFFER ';'
+   {
+  if (!state->default_shader_storage_qualifier->merge_qualifier(& @1, 
state, $1)) {
+ YYERROR;
+  }
+  $$ = NULL;
+   }
+
| layout_qualifier IN_TOK ';'
{
   $$ = NULL;
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 0a3f745..7a8d430 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -244,6 +244,11 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct 
gl_context *_ctx,
this->default_uniform_qualifier->flags.q.column_major = 1;
this->default_uniform_qualifier->is_default_qualifier = true;
 
+   this->default_shader_storage_qualifier = new(this) ast_type_qualifier();
+   this->default_shader_storage_qualifier->flags.q.shared = 1;
+   this->default_shader_storage_qualifier->flags.q.column_major = 1;
+   this->default_shader_storage_qualifier->is_default_qualifier = true;
+
this->fs_uses_gl_fragcoord = false;
this->fs_redeclares_gl_fragcoord = false;
this->fs_origin_upper_left = false;
@@ -864,7 +869,17 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
   ast_interface_block *const block,
   const struct ast_type_qualifier q)
 {
-   if (q.flags.q.uniform) {
+   if (q.flags.q.buffer) {
+  if (!state->has_shader_storage_buffer_objects()) {
+ _mesa_glsl_error(locp, state,
+  "#version 430 / GL_ARB_shader_storage_buffer_object "
+  "required for defining shader storage blocks");
+  } else if (state->ARB_shader_storage_buffer_object_warn) {
+ _mesa_glsl_warning(locp, state,
+"#version 430 / 
GL_ARB_shader_storage_buffer_object "
+"required for defining shader storage blocks");
+  }
+   } else if (q.flags.q.uniform) {
   if (!state->has_uniform_buffer_objects()) {
  _mesa_glsl_error(locp, state,
   "#version 140 / GL_ARB_uniform_buffer_object "
@@ -908,7 +923,7 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
uint64_t interface_type_mask;
struct ast_type_qualifier temp_type_qualifier;
 
-   /* Get a bitmask containing only the in/out/uniform
+   /* Get a

[Mesa-dev] [PATCH v5 30/70] glsl: number of active shader storage blocks must be within allowed limits

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Notice that we should differentiate between shader storage blocks and
uniform blocks, since they have different limits.

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/glsl/linker.cpp | 43 +++
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index cf9f1f6..a9a6dc5 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2826,6 +2826,8 @@ check_resources(struct gl_context *ctx, struct 
gl_shader_program *prog)
 
unsigned blocks[MESA_SHADER_STAGES] = {0};
unsigned total_uniform_blocks = 0;
+   unsigned shader_blocks[MESA_SHADER_STAGES] = {0};
+   unsigned total_shader_storage_blocks = 0;
 
for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
   if (prog->UniformBlocks[i].UniformBufferSize > 
ctx->Const.MaxUniformBlockSize) {
@@ -2837,8 +2839,15 @@ check_resources(struct gl_context *ctx, struct 
gl_shader_program *prog)
 
   for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
 if (prog->UniformBlockStageIndex[j][i] != -1) {
-   blocks[j]++;
-   total_uniform_blocks++;
+struct gl_shader *sh = prog->_LinkedShaders[j];
+int stage_index = prog->UniformBlockStageIndex[j][i];
+if (sh && sh->UniformBlocks[stage_index].IsShaderStorage) {
+   shader_blocks[j]++;
+   total_shader_storage_blocks++;
+} else {
+   blocks[j]++;
+   total_uniform_blocks++;
+}
 }
   }
 
@@ -2859,6 +2868,24 @@ check_resources(struct gl_context *ctx, struct 
gl_shader_program *prog)
}
 }
   }
+
+  if (total_shader_storage_blocks > 
ctx->Const.MaxCombinedShaderStorageBlocks) {
+ linker_error(prog, "Too many combined shader storage blocks 
(%d/%d)\n",
+  total_shader_storage_blocks,
+  ctx->Const.MaxCombinedShaderStorageBlocks);
+  } else {
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+const unsigned max_shader_storage_blocks =
+   ctx->Const.Program[i].MaxShaderStorageBlocks;
+if (shader_blocks[i] > max_shader_storage_blocks) {
+   linker_error(prog, "Too many %s shader storage blocks 
(%d/%d)\n",
+_mesa_shader_stage_to_string(i),
+shader_blocks[i],
+max_shader_storage_blocks);
+   break;
+}
+ }
+  }
}
 }
 
@@ -2913,6 +2940,7 @@ check_image_resources(struct gl_context *ctx, struct 
gl_shader_program *prog)
 {
unsigned total_image_units = 0;
unsigned fragment_outputs = 0;
+   unsigned total_shader_storage_blocks = 0;
 
if (!ctx->Extensions.ARB_shader_image_load_store)
   return;
@@ -2928,6 +2956,12 @@ check_image_resources(struct gl_context *ctx, struct 
gl_shader_program *prog)
 
  total_image_units += sh->NumImages;
 
+ for (unsigned j = 0; j < prog->NumUniformBlocks; j++) {
+int stage_index = prog->UniformBlockStageIndex[i][j];
+if (stage_index != -1 && 
sh->UniformBlocks[stage_index].IsShaderStorage)
+   total_shader_storage_blocks++;
+ }
+
  if (i == MESA_SHADER_FRAGMENT) {
 foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *var = node->as_variable();
@@ -2941,9 +2975,10 @@ check_image_resources(struct gl_context *ctx, struct 
gl_shader_program *prog)
if (total_image_units > ctx->Const.MaxCombinedImageUniforms)
   linker_error(prog, "Too many combined image uniforms\n");
 
-   if (total_image_units + fragment_outputs >
+   if (total_image_units + fragment_outputs + total_shader_storage_blocks >
ctx->Const.MaxCombinedShaderOutputResources)
-  linker_error(prog, "Too many combined image uniforms and fragment 
outputs\n");
+  linker_error(prog, "Too many combined image uniforms, shader storage "
+ " buffers and fragment outputs\n");
 }
 
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 25/70] glsl: Add std430 related member functions to glsl_type class

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

They are used to calculate size, base alignment and array stride values
for a glsl_type following std430 rules.

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/glsl_types.cpp | 209 
 src/glsl/glsl_types.h   |  19 +
 2 files changed, 228 insertions(+)

diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index 755618a..d97991a 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -1357,6 +1357,215 @@ glsl_type::std140_size(bool row_major) const
return -1;
 }
 
+unsigned
+glsl_type::std430_base_alignment(bool row_major) const
+{
+
+   unsigned N = is_double() ? 8 : 4;
+
+   /* (1) If the member is a scalar consuming  basic machine units, the
+* base alignment is .
+*
+* (2) If the member is a two- or four-component vector with components
+* consuming  basic machine units, the base alignment is 2 or
+* 4, respectively.
+*
+* (3) If the member is a three-component vector with components consuming
+*  basic machine units, the base alignment is 4.
+*/
+   if (this->is_scalar() || this->is_vector()) {
+  switch (this->vector_elements) {
+  case 1:
+ return N;
+  case 2:
+ return 2 * N;
+  case 3:
+  case 4:
+ return 4 * N;
+  }
+   }
+
+   /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+*
+* "When using the "std430" storage layout, shader storage
+* blocks will be laid out in buffer storage identically to uniform and
+* shader storage blocks using the "std140" layout, except that the base
+* alignment of arrays of scalars and vectors in rule (4) and of structures
+* in rule (9) are not rounded up a multiple of the base alignment of a
+* vec4."
+*/
+
+   /* (1) If the member is a scalar consuming  basic machine units, the
+* base alignment is .
+*
+* (2) If the member is a two- or four-component vector with components
+* consuming  basic machine units, the base alignment is 2 or
+* 4, respectively.
+*
+* (3) If the member is a three-component vector with components consuming
+*  basic machine units, the base alignment is 4.
+*/
+   if (this->is_array())
+  return this->fields.array->std430_base_alignment(row_major);
+
+   /* (5) If the member is a column-major matrix with  columns and
+*  rows, the matrix is stored identically to an array of
+*  column vectors with  components each, according to
+* rule (4).
+*
+* (7) If the member is a row-major matrix with  columns and 
+* rows, the matrix is stored identically to an array of 
+* row vectors with  components each, according to rule (4).
+*/
+   if (this->is_matrix()) {
+  const struct glsl_type *vec_type, *array_type;
+  int c = this->matrix_columns;
+  int r = this->vector_elements;
+
+  if (row_major) {
+ vec_type = get_instance(base_type, c, 1);
+ array_type = glsl_type::get_array_instance(vec_type, r);
+  } else {
+ vec_type = get_instance(base_type, r, 1);
+ array_type = glsl_type::get_array_instance(vec_type, c);
+  }
+
+  return array_type->std430_base_alignment(false);
+   }
+
+  /* (9) If the member is a structure, the base alignment of the
+* structure is , where  is the largest base alignment
+* value of any of its members, and rounded up to the base
+* alignment of a vec4. The individual members of this
+* sub-structure are then assigned offsets by applying this set
+* of rules recursively, where the base offset of the first
+* member of the sub-structure is equal to the aligned offset
+* of the structure. The structure may have padding at the end;
+* the base offset of the member following the sub-structure is
+* rounded up to the next multiple of the base alignment of the
+* structure.
+*/
+   if (this->is_record()) {
+  unsigned base_alignment = 0;
+  for (unsigned i = 0; i < this->length; i++) {
+ bool field_row_major = row_major;
+ const enum glsl_matrix_layout matrix_layout =
+glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+ if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+field_row_major = true;
+ } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+field_row_major = false;
+ }
+
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ base_alignment = MAX2(base_alignment,
+   
field_type->std430_base_alignment(field_row_major));
+  }
+  return base_alignment;
+   }
+   assert(!"not reached");
+   return -1;
+}
+
+unsigned
+glsl_type::std430_array_stride(bool row_major) const
+{
+   unsigned N =

[Mesa-dev] [PATCH v5 21/70] glsl: layout qualifier can appear more than once since OpenGL 4.20

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Also if GL_ARB_shading_language_420pack extension is enabled.

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/glsl_parser.yy | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 59e4527..4dd5384 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -2595,9 +2595,15 @@ interface_block:
{
   $$ = $1;
}
-   | layout_qualifier basic_interface_block
+   | layout_qualifier interface_block
{
-  ast_interface_block *block = $2;
+  ast_interface_block *block = (ast_interface_block *) $2;
+
+  if (!state->has_420pack() && block->layout.has_layout()) {
+ _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
+ YYERROR;
+  }
+
   if (!block->layout.merge_qualifier(& @1, state, $1)) {
  YYERROR;
   }
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 22/70] glsl: ignore default qualifier declarations when checking for duplicate layout qualifiers

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/ast.h  | 3 +++
 src/glsl/glsl_parser.yy | 5 -
 src/glsl/glsl_parser_extras.cpp | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index d8c6cea..335f426 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -639,6 +639,9 @@ struct ast_type_qualifier {
 */
glsl_base_type image_base_type;
 
+   /** Flag to know if this represents a default value for a qualifier */
+   bool is_default_qualifier;
+
/**
 * Return true if and only if an interpolation qualifier is present.
 */
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 4dd5384..42108a3 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -2599,7 +2599,8 @@ interface_block:
{
   ast_interface_block *block = (ast_interface_block *) $2;
 
-  if (!state->has_420pack() && block->layout.has_layout()) {
+  if (!state->has_420pack() && block->layout.has_layout() &&
+  !block->layout.is_default_qualifier) {
  _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
  YYERROR;
   }
@@ -2608,6 +2609,8 @@ interface_block:
  YYERROR;
   }
 
+  block->layout.is_default_qualifier = false;
+
   foreach_list_typed (ast_declarator_list, member, link, 
>declarations) {
  ast_type_qualifier& qualifier = member->type->qualifier;
  if (qualifier.flags.q.stream && qualifier.stream != 
block->layout.stream) {
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 939a03c..20bbe6e 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -242,6 +242,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct 
gl_context *_ctx,
this->default_uniform_qualifier = new(this) ast_type_qualifier();
this->default_uniform_qualifier->flags.q.shared = 1;
this->default_uniform_qualifier->flags.q.column_major = 1;
+   this->default_uniform_qualifier->is_default_qualifier = true;
 
this->fs_uses_gl_fragcoord = false;
this->fs_redeclares_gl_fragcoord = false;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 91889] Planetary Anihilation: Titans display content of other processes buffers

2015-09-10 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=91889

--- Comment #9 from Albert Freeman  ---
When I said "bug in the game", I meant "bug with the game".

Can you upload another trace (please try to reproduce the bug as fast as
possible (since parts of the trace can't be skipped since the game could be
[uploading resources to the GPU]/[setting state] at anytime (likely to be used
in the future)))? Can you also take screenshots of the actual issue, so I can
compare your screenshots to my apitrace replay.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/9] softpipe: Add functions for computing mipmap level

2015-09-10 Thread Krzesimir Nowak

On Wed, Sep 9, 2015 at 8:19 PM, Ilia Mirkin  wrote:

> On Wed, Sep 9, 2015 at 2:17 PM, Roland Scheidegger 
> wrote:
> > Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak:
> >> These functions will be used by textureQueryLod.
> >> ---
> >>  src/gallium/drivers/softpipe/sp_tex_sample.c | 100
> +--
> >>  src/gallium/drivers/softpipe/sp_tex_sample.h |   7 ++
> >>  2 files changed, 101 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c
> b/src/gallium/drivers/softpipe/sp_tex_sample.c
> >> index cdec984..6e639e0 100644
> >> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> >> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
> >> @@ -1937,6 +1937,38 @@ get_gather_component(const float
> lod_in[TGSI_QUAD_SIZE])
> >>  }
> >>
> >>  static void
> >> +clamp_lod(const struct sp_sampler_view *sp_sview,
> >> +  const struct sp_sampler *sp_samp,
> >> +  const float lod[TGSI_QUAD_SIZE],
> >> +  float clamped[TGSI_QUAD_SIZE])
> >> +{
> >> +   const float min_lod = sp_samp->base.min_lod;
> >> +   const float max_lod = sp_samp->base.max_lod;
> >> +   const float min_level = sp_sview->base.u.tex.first_level;
> >> +   const float max_level = sp_sview->base.u.tex.last_level;
> >> +   int i;
> >> +
> >> +   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
> >> +  float cl = lod[i];
> >> +
> >> +  cl = CLAMP(cl, min_lod, max_lod);
> >> +  /* XXX: Is min_level ever different from 0?
> > I think the comment is bogus. min_level can easily be different from 0,
> > at least when using ARB_texture_view afaik (playing around with base
> > level might also cause this to be non-zero, though I'm not too familiar
> > with what the state tracker does for those legacy GL weirdness things).
>
> Either setting the base level or a texture view (or both!) can cause
> the min level to be 0.
>

I believe you wanted to say that setting base level or a texture view can
cause min level to be different from 0?

Anyway, I scrapped the comment.


>
> >
> >> +   */
> >> +  cl = CLAMP(cl, 0, max_level - min_level);
> >> +  clamped[i] = cl;
> >> +   }
> >> +}
> >> +
> >> +static void
> >> +mip_level_linear(struct sp_sampler_view *sp_sview,
> >> + struct sp_sampler *sp_samp,
> >> + const float lod[TGSI_QUAD_SIZE],
> >> + float level[TGSI_QUAD_SIZE])
> >> +{
> >> +   clamp_lod(sp_sview, sp_samp, lod, level);
> >> +}
> >> +
> >> +static void
> >>  mip_filter_linear(struct sp_sampler_view *sp_sview,
> >>struct sp_sampler *sp_samp,
> >>img_filter_func min_filter,
> >> @@ -1998,6 +2030,23 @@ mip_filter_linear(struct sp_sampler_view
> *sp_sview,
> >>  }
> >>
> >>
> >> +static void
> >> +mip_level_nearest(struct sp_sampler_view *sp_sview,
> >> +  struct sp_sampler *sp_samp,
> >> +  const float lod[TGSI_QUAD_SIZE],
> >> +  float level[TGSI_QUAD_SIZE])
> >> +{
> >> +   const int first_level = sp_sview->base.u.tex.first_level;
> >> +   int j;
> >> +
> >> +   clamp_lod(sp_sview, sp_samp, lod, level);
> >> +   for (j = 0; j < TGSI_QUAD_SIZE; j++)
> >> +  /* TODO: It should rather be:
> >> +   * level[j] = first_level + ceil(level[j] + 0.5F) - 1.0F;
> >> +   */
> >> +  level[j] = first_level + (int)(level[j] + 0.5F);
> >> +}
> >> +
> >>  /**
> >>   * Compute nearest mipmap level from texcoords.
> >>   * Then sample the texture level for four elements of a quad.
> >> @@ -2050,6 +2099,19 @@ mip_filter_nearest(struct sp_sampler_view
> *sp_sview,
> >>
> >>
> >>  static void
> >> +mip_level_none(struct sp_sampler_view *sp_sview,
> >> +   struct sp_sampler *sp_samp,
> >> +   const float lod[TGSI_QUAD_SIZE],
> >> +   float level[TGSI_QUAD_SIZE])
> >> +{
> >> +   int j;
> >> +
> >> +   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> >> +  level[j] = sp_sview->base.u.tex.first_level;
> >> +   }
> >> +}
> >> +
> >> +static void
> >>  mip_filter_none(struct sp_sampler_view *sp_sview,
> >>  struct sp_sampler *sp_samp,
> >>  img_filter_func min_filter,
> >> @@ -2088,6 +2150,15 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
> >>
> >>
> >>  static void
> >> +mip_level_none_no_filter_select(struct sp_sampler_view *sp_sview,
> >> +struct sp_sampler *sp_samp,
> >> +const float lod[TGSI_QUAD_SIZE],
> >> +float level[TGSI_QUAD_SIZE])
> >> +{
> >> +   mip_level_none(sp_sview, sp_samp, lod, level);
> >> +}
> >> +
> >> +static void
> >>  mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
> >>   struct sp_sampler *sp_samp,
> >>   img_filter_func min_filter,
> >> @@ -2339,6 +2410,15 @@ img_filter_2d_ewa(struct sp_sampler_view
> *sp_sview,
> >>  }
> >>
>

Re: [Mesa-dev] [PATCH] ralloc: Use attribute((destructor)) instead of atexit(3)

2015-09-10 Thread Jose Fonseca


On 10/09/15 02:54, Ian Romanick wrote:

On 09/07/2015 06:54 AM, Jose Fonseca wrote:

On 07/09/15 10:17, Jean-Sébastien Pédron wrote:

On 04.09.2015 01:37, Matt Turner wrote:

You need to test for this support in configure.ac. It's as simple as
adding a call to AX_GCC_FUNC_ATTRIBUTE in the existing alphabetized
list and then a little bit of preprocessor in src/util/macros.h.


Should the code fallbacks on atexit(3) if the attribute is not
supported?


At least on Windows, with MSVC,  atexit should be the right thing to do,
since we statically link MSVC RunTime,



Can I use the HAVE_FUNC_ATTRIBUTE_DESTRUCTOR macro in
ralloc.c for this purpose?


For the record, another alternative (way more portable), is you have a
simple .cpp file with a static destructior:


Arg. :(  We used to have code that used this in several places, but that
idiom has gradually been removed.  See c61bc6ed (although that was using
a constructor to initialize instead of a destructor to free).  I don't
care too much what idiom we pick, but I think we should pick one and
stick with it.


It's not necessary to do it in several places, at least for the 
destructor -- we could have a single C++ module inside src/util, which 
provided a "dl_atexit".


I don't feel too strongly either way, but my impression is that the 
hassle of doing this with non-standard vendor-specific C extensions is 
greater than having a small C++ file somewhere.


For the constructor problem, we could just rely on the standard C11 
once_flag/call_once.


Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/5] nir: Don't insert a fake link if unnecessary.

2015-09-10 Thread Connor Abbott

On Wed, Sep 9, 2015 at 1:37 AM, Kenneth Graunke  wrote:
> On Saturday, September 05, 2015 11:10:58 AM Connor Abbott wrote:
>> On Sat, Sep 5, 2015 at 2:31 AM, Kenneth Graunke  
>> wrote:
>> > On Friday, September 04, 2015 11:56:29 AM Connor Abbott wrote:
>> >> I'm confused as to how this can happen. The fake link is only for the
>> >> situation where we have an infinite loop:
>
> Okay...
>
>> In that case, I would check that this is true:
>>
>> assert(last->successors[0] ==
>> nir_cf_node_as_block(nir_loop_first_cf_node(loop)));
>> assert(last->successors[1] == NULL);
>>
>> That is, the last block of the loop should only have one successor
>> which is the beginning of the loop. You can also check that
>> cf_node_prev(>cf_node) is actually a loop (it seems I was lazy
>> with that).
>
> So, looking at tests/shaders/loopfunc.shader_test, I'm seeing the
> following NIR before opt_dead_cf() runs:
>
> decl_var shader_in  vec4 gl_Vertex (0, 0)
> decl_var shader_out  vec4 gl_FrontColor (1, 0)
> decl_var shader_out  vec4 gl_Position (0, 0)
> decl_overload main returning void
>
> impl main {
> decl_var vec4 const_temp
> decl_var vec4 gl_Position@0
> decl_var vec4 gl_FrontColor@1
> block block_0 (0x7ab640):
> /* preds: */
> vec4 ssa_2 = load_const (0x /* 0.00 */, 0x3f80 /* 
> 1.00 */, 0x /* 0.00 */, 0x3f80 /* 1.00 */)
> /* succs: block_1 (0x7ab880) */
> loop {
> block block_1 (0x7ab880):
> /* preds: block_0 (0x7ab640) */
> break
> /* succs: block_2 (0x7a9db0) */
> }
> block block_2 (0x7a9db0):
> /* preds: block_1 (0x7ab880) */
> vec4 ssa_4 = intrinsic load_var () (gl_Vertex) ()
> intrinsic store_var (ssa_4) (gl_Position) ()
> intrinsic store_var (ssa_2) (gl_FrontColor) ()
> /* succs: block_3 (0x7ab760) */
> block block_3:
> }
>
> Because the loop contains an unconditional break, the block does only
> have one successor - but to the block after the loop.  This seems
> reasonable...but it's not an infinite loop.
>
> So, we extract the loop, delete the extracted section...which calls
> unlink_jump on the "break".  This gives block_1 a fake link, so both
> successors then point at block_2, and things go very wrong.
>
> Does this seem like valid NIR to you, Connor?  If so, I think we need
> to adjust the fake-link conditions to account for this...

Oh, ok, I see... it wasn't handling correctly the case where the loop
ends in a break, where we have to add both new edges (i.e. the edge to
the beginning of the loop and the fake edge to after the loop) *after*
unlinking the break. Sigh... so it was broken even without the
fake-edge code, but doing this papered over the problem. Does that
seem right?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 68/70] i965: Enable ARB_shader_storage_buffer_object extension for gen7+

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 3c764be..445b61a 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -329,6 +329,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_shader_image_size = true;
   ctx->Extensions.ARB_texture_compression_bptc = true;
   ctx->Extensions.ARB_texture_view = true;
+  ctx->Extensions.ARB_shader_storage_buffer_object = true;
 
   if (can_do_pipelined_register_writes(brw)) {
  ctx->Extensions.ARB_draw_indirect = true;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 65/70] glapi: add ARB_shader_storage_block_buffer_object

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 .../glapi/gen/ARB_shader_storage_buffer_object.xml | 36 ++
 src/mapi/glapi/gen/GL4x.xml| 18 ++-
 src/mapi/glapi/gen/Makefile.am |  1 +
 src/mapi/glapi/gen/gl_API.xml  |  6 +++-
 4 files changed, 59 insertions(+), 2 deletions(-)
 create mode 100644 src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml

diff --git a/src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml 
b/src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml
new file mode 100644
index 000..6901bdf
--- /dev/null
+++ b/src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/mapi/glapi/gen/GL4x.xml b/src/mapi/glapi/gen/GL4x.xml
index dee5027..dd48c83 100644
--- a/src/mapi/glapi/gen/GL4x.xml
+++ b/src/mapi/glapi/gen/GL4x.xml
@@ -41,7 +41,23 @@
 
 
 
-  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
 
 
 
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index 7d8dfcb..a5a26a6 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -153,6 +153,7 @@ API_XML = \
ARB_shader_atomic_counters.xml \
ARB_shader_image_load_store.xml \
ARB_shader_subroutine.xml \
+   ARB_shader_storage_buffer_object.xml \
ARB_sync.xml \
ARB_tessellation_shader.xml \
ARB_texture_barrier.xml \
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index f0dcdca..ec83cd4 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8211,7 +8211,11 @@
 
 http://www.w3.org/2001/XInclude"/>
 
-
+
+
+http://www.w3.org/2001/XInclude"/>
+
+
 
 http://www.w3.org/2001/XInclude"/>
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 55/70] mesa: add glShaderStorageBlockBinding()

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Defined in ARB_shader_storage_buffer_object extension.

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/mesa/main/uniforms.c | 52 
 src/mesa/main/uniforms.h |  4 
 2 files changed, 56 insertions(+)

diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 973b877..c491707 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -1036,6 +1036,58 @@ _mesa_UniformBlockBinding(GLuint program,
}
 }
 
+void GLAPIENTRY
+_mesa_ShaderStorageBlockBinding(GLuint program,
+   GLuint shaderStorageBlockIndex,
+   GLuint shaderStorageBlockBinding)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg;
+
+   if (!ctx->Extensions.ARB_shader_storage_buffer_object) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderStorageBlockBinding");
+  return;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program,
+   "glShaderStorageBlockBinding");
+   if (!shProg)
+  return;
+
+   if (shaderStorageBlockIndex >= shProg->NumUniformBlocks) {
+  _mesa_error(ctx, GL_INVALID_VALUE,
+ "glShaderStorageBlockBinding(block index %u >= %u)",
+ shaderStorageBlockIndex, shProg->NumUniformBlocks);
+  return;
+   }
+
+   if (shaderStorageBlockBinding >= ctx->Const.MaxShaderStorageBufferBindings) 
{
+  _mesa_error(ctx, GL_INVALID_VALUE,
+ "glShaderStorageBlockBinding(block binding %u >= %u)",
+ shaderStorageBlockBinding,
+  ctx->Const.MaxShaderStorageBufferBindings);
+  return;
+   }
+
+   if (shProg->UniformBlocks[shaderStorageBlockIndex].Binding !=
+   shaderStorageBlockBinding) {
+  int i;
+
+  FLUSH_VERTICES(ctx, 0);
+  ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+  shProg->UniformBlocks[shaderStorageBlockIndex].Binding = 
shaderStorageBlockBinding;
+
+  for (i = 0; i < MESA_SHADER_STAGES; i++) {
+int stage_index = 
shProg->UniformBlockStageIndex[i][shaderStorageBlockIndex];
+
+if (stage_index != -1) {
+   struct gl_shader *sh = shProg->_LinkedShaders[i];
+   sh->UniformBlocks[stage_index].Binding = shaderStorageBlockBinding;
+}
+  }
+   }
+}
 
 /**
  * Generic program resource property query.
diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h
index c3c9c1e..96172b7 100644
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -225,6 +225,10 @@ _mesa_UniformBlockBinding(GLuint program,
  GLuint uniformBlockIndex,
  GLuint uniformBlockBinding);
 void GLAPIENTRY
+_mesa_ShaderStorageBlockBinding(GLuint program,
+GLuint shaderStorageBlockIndex,
+GLuint shaderStorageBlockBinding);
+void GLAPIENTRY
 _mesa_GetActiveAtomicCounterBufferiv(GLuint program, GLuint bufferIndex,
  GLenum pname, GLint *params);
 void GLAPIENTRY
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 64/70] main/tests: add ARB_shader_storage_buffer_object tokens to enum_strings

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/mesa/main/tests/enum_strings.cpp | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/mesa/main/tests/enum_strings.cpp 
b/src/mesa/main/tests/enum_strings.cpp
index 8218cc9..96b2246 100644
--- a/src/mesa/main/tests/enum_strings.cpp
+++ b/src/mesa/main/tests/enum_strings.cpp
@@ -1780,6 +1780,7 @@ const struct enum_info everything[] = {
{ 0x8E5F, "GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET" },
{ 0x8F36, "GL_COPY_READ_BUFFER" },
{ 0x8F37, "GL_COPY_WRITE_BUFFER" },
+   { 0x8F39, "GL_MAX_COMBINED_SHADER_OUTPUT_RESOURCES" },
{ 0x8F90, "GL_RED_SNORM" },
{ 0x8F91, "GL_RG_SNORM" },
{ 0x8F92, "GL_RGB_SNORM" },
@@ -1797,6 +1798,20 @@ const struct enum_info everything[] = {
{ 0x8F9E, "GL_PRIMITIVE_RESTART_INDEX" },
{ 0x8F9F, "GL_MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB" },
{ 0x906F, "GL_RGB10_A2UI" },
+   { 0x90D2, "GL_SHADER_STORAGE_BUFFER" },
+   { 0x90D3, "GL_SHADER_STORAGE_BUFFER_BINDING" },
+   { 0x90D4, "GL_SHADER_STORAGE_BUFFER_START" },
+   { 0x90D5, "GL_SHADER_STORAGE_BUFFER_SIZE" },
+   { 0x90D6, "GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS" },
+   { 0x90D7, "GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS" },
+   { 0x90D8, "GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS" },
+   { 0x90D9, "GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS" },
+   { 0x90DA, "GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS" },
+   { 0x90DB, "GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS" },
+   { 0x90DC, "GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS" },
+   { 0x90DD, "GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS" },
+   { 0x90DE, "GL_MAX_SHADER_STORAGE_BLOCK_SIZE" },
+   { 0x90DF, "GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT" },
{ 0x9100, "GL_TEXTURE_2D_MULTISAMPLE" },
{ 0x9101, "GL_PROXY_TEXTURE_2D_MULTISAMPLE" },
{ 0x9102, "GL_TEXTURE_2D_MULTISAMPLE_ARRAY" },
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 67/70] mesa: enable ARB_shader_storage_buffer_object extension for GLES 3.1

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Tapani Pälli 
---
 src/glsl/glsl_parser_extras.cpp | 2 +-
 src/glsl/glsl_parser_extras.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 7a8d430..beae272 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -608,7 +608,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT(ARB_shader_image_size,true,  false, 
ARB_shader_image_size),
EXT(ARB_shader_precision, true,  false, 
ARB_shader_precision),
EXT(ARB_shader_stencil_export,true,  false, 
ARB_shader_stencil_export),
-   EXT(ARB_shader_storage_buffer_object, true,  false, 
ARB_shader_storage_buffer_object),
+   EXT(ARB_shader_storage_buffer_object, true,  true,  
ARB_shader_storage_buffer_object),
EXT(ARB_shader_subroutine,true,  false, 
ARB_shader_subroutine),
EXT(ARB_shader_texture_lod,   true,  false, 
ARB_shader_texture_lod),
EXT(ARB_shading_language_420pack, true,  false, 
ARB_shading_language_420pack),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index c2ebf16..3f517fa 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -217,7 +217,7 @@ struct _mesa_glsl_parse_state {
 
bool has_shader_storage_buffer_objects() const
{
-  return ARB_shader_storage_buffer_object_enable || is_version(430, 0);
+  return ARB_shader_storage_buffer_object_enable || is_version(430, 310);
}
 
bool has_separate_shader_objects() const
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 69/70] docs: Mark ARB_shader_storage_buffer_object as done for i965

2015-09-10 Thread Iago Toral Quiroga

v2:
- Mark it too for GLES 3.1
---
 docs/GL3.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 561f204..b451e41 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -164,7 +164,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_program_interface_query   DONE (all drivers)
   GL_ARB_robust_buffer_access_behavior not started
   GL_ARB_shader_image_size DONE (i965)
-  GL_ARB_shader_storage_buffer_object  in progress (Iago 
Toral, Samuel Iglesias)
+  GL_ARB_shader_storage_buffer_object  DONE (i965)
   GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, 
nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_texture_buffer_range  DONE (nv50, nvc0, i965, 
r600, radeonsi, llvmpipe)
   GL_ARB_texture_query_levels  DONE (all drivers that 
support GLSL 1.30)
@@ -212,7 +212,7 @@ GLES3.1, GLSL ES 3.1
   GL_ARB_shader_atomic_countersDONE (i965)
   GL_ARB_shader_image_load_store   DONE (i965)
   GL_ARB_shader_image_size DONE (i965)
-  GL_ARB_shader_storage_buffer_object  in progress (Iago 
Toral, Samuel Iglesias)
+  GL_ARB_shader_storage_buffer_object  DONE (i965)
   GL_ARB_shading_language_packing  DONE (all drivers)
   GL_ARB_separate_shader_objects   DONE (all drivers)
   GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, 
nvc0, r600, radeonsi, llvmpipe, softpipe)
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 70/70] glsl: Mark as active all elements of shared/std140 block arrays

2015-09-10 Thread Iago Toral Quiroga

From: Antia Puentes 

Commit 1ca25ab (glsl: Do not eliminate 'shared' or 'std140' blocks
or block members) considered as active 'shared' and 'std140' uniform
blocks and uniform block arrays, but did not include the block array
elements. Because of that, it was possible to have an active uniform
block array without any elements marked as used, making the assertion
   ((b->num_array_elements > 0) == b->type->is_array())
in link_uniform_blocks() fail.

Fixes the following 5 dEQP tests:

 * dEQP-GLES3.functional.ubo.random.nested_structs_instance_arrays.18
 * dEQP-GLES3.functional.ubo.random.nested_structs_instance_arrays.24
 * dEQP-GLES3.functional.ubo.random.nested_structs_arrays_instance_arrays.19
 * dEQP-GLES3.functional.ubo.random.all_per_block_buffers.49
 * dEQP-GLES3.functional.ubo.random.all_shared_buffer.36

Fixes bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83508
---
 src/glsl/link_uniform_block_active_visitor.cpp | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/glsl/link_uniform_block_active_visitor.cpp 
b/src/glsl/link_uniform_block_active_visitor.cpp
index 5102947..fbe79de 100644
--- a/src/glsl/link_uniform_block_active_visitor.cpp
+++ b/src/glsl/link_uniform_block_active_visitor.cpp
@@ -106,6 +106,22 @@ link_uniform_block_active_visitor::visit(ir_variable *var)
assert(b->num_array_elements == 0);
assert(b->array_elements == NULL);
assert(b->type != NULL);
+   assert(!b->type->is_array() || b->has_instance_name);
+
+   /* For uniform block arrays declared with a shared or std140 layout
+* qualifier, mark all its instances as used.
+*/
+   if (b->type->is_array() && b->type->length > 0) {
+  b->num_array_elements = b->type->length;
+  b->array_elements = reralloc(this->mem_ctx,
+   b->array_elements,
+   unsigned,
+   b->num_array_elements);
+
+  for (unsigned i = 0; i < b->num_array_elements; i++) {
+ b->array_elements[i] = i;
+  }
+   }
 
return visit_continue;
 }
@@ -147,6 +163,13 @@ 
link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir)
assert((b->num_array_elements == 0) == (b->array_elements == NULL));
assert(b->type != NULL);
 
+   /* If the block array was declared with a shared or
+* std140 layout qualifier, all its instances have been already marked
+* as used in link_uniform_block_active_visitor::visit(ir_variable *).
+*/
+   if (var->type->interface_packing != GLSL_INTERFACE_PACKING_PACKED)
+  return visit_continue_with_parent;
+
ir_constant *c = ir->array_index->as_constant();
 
if (c) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 66/70] mesa: Add getters for the GL_ARB_shader_storage_buffer_object max constants

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

v2:
- Add tessellation shader constants support

v3:
- Add GLES 3.1 support.

v4:
- Move the getters to the proper place

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Tapani Pälli 
---
 src/mesa/main/get.c  |  7 +++
 src/mesa/main/get_hash_params.py | 16 
 2 files changed, 23 insertions(+)

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index f40c98b..1d2d5861 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -369,6 +369,12 @@ static const int extra_ARB_compute_shader_es31[] = {
EXTRA_END
 };
 
+static const int extra_ARB_shader_storage_buffer_object_es31[] = {
+   EXT(ARB_shader_storage_buffer_object),
+   EXTRA_API_ES31,
+   EXTRA_END
+};
+
 EXTRA_EXT(ARB_texture_cube_map);
 EXTRA_EXT(EXT_texture_array);
 EXTRA_EXT(NV_fog_distance);
@@ -417,6 +423,7 @@ EXTRA_EXT(EXT_polygon_offset_clamp);
 EXTRA_EXT(ARB_framebuffer_no_attachments);
 EXTRA_EXT(ARB_tessellation_shader);
 EXTRA_EXT(ARB_shader_subroutine);
+EXTRA_EXT(ARB_shader_storage_buffer_object);
 
 static const int
 extra_ARB_color_buffer_float_or_glcore[] = {
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 09e1f0e..063a80a 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -465,6 +465,17 @@ descriptor=[
 
 # GL 4.4 / GLES 3.1
   [ "MAX_VERTEX_ATTRIB_STRIDE", "CONTEXT_ENUM(Const.MaxVertexAttribStride), 
NO_EXTRA" ],
+
+  # GL_ARB_shader_storage_buffer_object / GLES 3.1
+  [ "MAX_VERTEX_SHADER_STORAGE_BLOCKS", 
"CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks), 
extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_FRAGMENT_SHADER_STORAGE_BLOCKS", 
"CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), 
extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_COMPUTE_SHADER_STORAGE_BLOCKS", 
"CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks), 
extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_COMBINED_SHADER_STORAGE_BLOCKS", 
"CONTEXT_INT(Const.MaxCombinedShaderStorageBlocks), 
extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_SHADER_STORAGE_BLOCK_SIZE", 
"CONTEXT_INT(Const.MaxShaderStorageBlockSize), 
extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_SHADER_STORAGE_BUFFER_BINDINGS", 
"CONTEXT_INT(Const.MaxShaderStorageBufferBindings), 
extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_COMBINED_SHADER_OUTPUT_RESOURCES", 
"CONTEXT_INT(Const.MaxCombinedShaderOutputResources), 
extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT", 
"CONTEXT_INT(Const.ShaderStorageBufferOffsetAlignment), 
extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "SHADER_STORAGE_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, 
extra_ARB_shader_storage_buffer_object_es31" ],
 ]},
 
 # Enums in OpenGL Core profile and ES 3.1
@@ -820,6 +831,11 @@ descriptor=[
 
 # GL_EXT_polygon_offset_clamp
   [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), 
extra_EXT_polygon_offset_clamp" ],
+
+# GL_ARB_shader_storage_buffer_object
+  [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", 
"CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), 
extra_ARB_shader_storage_buffer_object" ],
+  [ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", 
"CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), 
extra_ARB_shader_storage_buffer_object" ],
+  [ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", 
"CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), 
extra_ARB_shader_storage_buffer_object" ],
 ]},
 
 # Enums restricted to OpenGL Core profile
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 03/70] i965: Implement DriverFlags.NewShaderStorageBuffer

2015-09-10 Thread Iago Toral Quiroga

We use the same dirty state for SSBOs and UBOs because they share the
same infrastructure.

Reviewed-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_state_upload.c | 1 +
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index b2ca9c2..0171c29 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -482,6 +482,7 @@ void brw_init_state( struct brw_context *brw )
ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
+   ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index ff05b5c..665303b 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -108,6 +108,8 @@ alloc_buffer_object(struct brw_context *brw,
 */
if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
   brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+   if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
+  brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
   brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 01/70] mesa: set MAX_SHADER_STORAGE_BUFFERS to 15.

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

This patch sets the same value used for uniform buffers.

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/mesa/main/config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index b35031d..69acd7d 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -171,7 +171,7 @@
 #define MAX_PROGRAM_LOCAL_PARAMS   4096
 #define MAX_UNIFORMS   4096
 #define MAX_UNIFORM_BUFFERS15 /* + 1 default uniform buffer */
-#define MAX_SHADER_STORAGE_BUFFERS 7  /* + 1 default shader storage buffer 
*/
+#define MAX_SHADER_STORAGE_BUFFERS 15  /* + 1 default shader storage 
buffer */
 /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
 #define MAX_COMBINED_UNIFORM_BUFFERS   (MAX_UNIFORM_BUFFERS * 6)
 #define MAX_COMBINED_SHADER_STORAGE_BUFFERS   (MAX_SHADER_STORAGE_BUFFERS * 6)
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 07/70] i965: handle visiting of ir_var_shader_storage variables

2015-09-10 Thread Iago Toral Quiroga

Reviewed-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f6e59ce..ec41262 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1072,11 +1072,12 @@ vec4_visitor::visit(ir_variable *ir)
   break;
 
case ir_var_uniform:
+   case ir_var_shader_storage:
   reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 
   /* Thanks to the lower_ubo_reference pass, we will see only
-   * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
-   * variables, so no need for them to be in variable_ht.
+   * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable
+   * for UBO/SSBO variables, so no need for them to be in variable_ht.
*
* Some uniforms, such as samplers and atomic counters, have no actual
* storage, so we should ignore them.
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 14/70] i965/vec4: Implement VS_OPCODE_GET_BUFFER_SIZE

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Notice that Skylake needs to include a header in the sampler message
so it will need some tweaks to work there.

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/mesa/drivers/dri/i965/brw_defines.h  |  3 +++
 src/mesa/drivers/dri/i965/brw_shader.cpp |  3 +++
 src/mesa/drivers/dri/i965/brw_vec4.cpp   |  1 +
 src/mesa/drivers/dri/i965/brw_vec4.h |  6 +
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 31 
 5 files changed, 44 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index a8594af..9695c27 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1083,6 +1083,9 @@ enum opcode {
VS_OPCODE_PULL_CONSTANT_LOAD,
VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+
+   VS_OPCODE_GET_BUFFER_SIZE,
+
VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
 
/**
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index de1a7fe..991ccc6 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -740,6 +740,9 @@ brw_instruction_name(enum opcode op)
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
   return "set_simd4x2_header_gen9";
 
+   case VS_OPCODE_GET_BUFFER_SIZE:
+  return "vs_get_buffer_size";
+
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
   return "unpack_flags_simd4x2";
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 85dc372..d6a562f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -331,6 +331,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
+   case VS_OPCODE_GET_BUFFER_SIZE:
   return inst->header_size;
default:
   unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 01c6e84..c0e46ad 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -559,6 +559,12 @@ private:
  struct brw_reg offset);
void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
  struct brw_reg dst);
+
+   void generate_get_buffer_size(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index);
+
void generate_unpack_flags(struct brw_reg dst);
 
const struct brw_compiler *compiler;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 92050b9..3435cdc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1029,6 +1029,32 @@ 
vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
 }
 
 void
+vec4_generator::generate_get_buffer_size(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg surf_index)
+{
+   assert(devinfo->gen >= 7);
+   assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
+  surf_index.file == BRW_IMMEDIATE_VALUE);
+
+   brw_SAMPLE(p,
+  dst,
+  inst->base_mrf,
+  src,
+  surf_index.dw1.ud,
+  0,
+  GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+  1, /* response length */
+  inst->mlen,
+  inst->header_size > 0,
+  BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+  BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+   brw_mark_surface_used(_data->base, surf_index.dw1.ud);
+}
+
+void
 vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
  struct brw_reg dst,
  struct brw_reg surf_index,
@@ -1401,6 +1427,11 @@ vec4_generator::generate_code(const cfg_t *cfg)
  generate_set_simd4x2_header_gen9(inst, dst);
  break;
 
+
+  case VS_OPCODE_GET_BUFFER_SIZE:
+ generate_get_buffer_size(inst, dst, src[0], src[1]);
+ break;
+
   case GS_OPCODE_URB_WRITE:
  generate_gs_urb_write(inst);
  break;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 16/70] i965/vec4: Implement ir_unop_get_buffer_size

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c186f48..e6515dd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1793,9 +1793,29 @@ vec4_visitor::visit(ir_expression *ir)
   emit(RNDE(result_dst, op[0]));
   break;
 
-   case ir_unop_get_buffer_size:
-  unreachable("not reached: not implemented");
+   case ir_unop_get_buffer_size: {
+  ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+  unsigned ubo_index = const_uniform_block->value.u[0];
+  assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+  src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+   ubo_index);
+  vec4_instruction *inst = new(mem_ctx)
+ vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
+
+  inst->base_mrf = 2;
+  inst->mlen = 1; /* always at least one */
+  inst->src[1] = src_reg(surf_index);
+
+  /* MRF for the first parameter */
+  src_reg lod = src_reg(0);
+  int param_base = inst->base_mrf;
+  int writemask = WRITEMASK_X;
+  emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+
+  emit(inst);
   break;
+   }
 
case ir_binop_min:
   emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 05/70] i965: Set MaxShaderStorageBuffers for compute shaders

2015-09-10 Thread Iago Toral Quiroga

v2:
- Set it after the driver's MaxShaderStorageBuffers value assignment.

Reviewed-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 9982049..73acb57 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -574,6 +574,9 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.MaxCombinedShaderStorageBlocks = 12 * 3;
ctx->Const.MaxShaderStorageBufferBindings = 36;
 
+   if (_mesa_extension_override_enables.ARB_compute_shader)
+  ctx->Const.MaxShaderStorageBufferBindings += 12;
+
if (brw->gen >= 6) {
   ctx->Const.MaxVarying = 32;
   ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 20/70] i965/wm: surfaces should have the API buffer size, not the drm buffer size

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

The returned drm buffer object has a size multiple of 4096 but that should not
be exposed to the API user, which is working with a different size.

As far as I can see this problem is only visible in the calculation of the
length of unsized arrays used in SSBOs, as the implementation of this needs
to query the underlying buffer size via a message.

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 5b73c28..395bbe4 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -941,7 +941,7 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
   binding->Offset,
   binding->BufferObject->Size - 
binding->Offset);
 brw_create_constant_surface(brw, bo, binding->Offset,
-bo->size - binding->Offset,
+binding->BufferObject->Size - 
binding->Offset,
 _offsets[i],
 dword_pitch);
  }
@@ -958,7 +958,7 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
   binding->Offset,
   binding->BufferObject->Size - 
binding->Offset);
 brw_create_buffer_surface(brw, bo, binding->Offset,
-  bo->size - binding->Offset,
+  binding->BufferObject->Size - 
binding->Offset,
   _offsets[i],
   dword_pitch);
  }
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 15/70] i965/vec4/nir: implement nir_intrinsic_get_buffer_size

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 751ec73..f47b029 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -530,6 +530,32 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
   break;
}
 
+   case nir_intrinsic_get_buffer_size: {
+  nir_const_value *const_uniform_block = 
nir_src_as_const_value(instr->src[0]);
+  unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+
+  assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+  src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+   ubo_index);
+  dst_reg result_dst = get_nir_dest(instr->dest);
+  vec4_instruction *inst = new(mem_ctx)
+ vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
+
+  inst->base_mrf = 2;
+  inst->mlen = 1; /* always at least one */
+  inst->src[1] = src_reg(surf_index);
+
+  /* MRF for the first parameter */
+  src_reg lod = src_reg(0);
+  int param_base = inst->base_mrf;
+  int writemask = WRITEMASK_X;
+  emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+
+  emit(inst);
+  break;
+   }
+
case nir_intrinsic_load_vertex_id:
   unreachable("should be lowered by lower_vertex_id()");
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 12/70] glsl: implement unsized array length

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

v2:
- Reduce the number of lines over 80 character line width
  limit. (Thomas Hellan)

v3:
- Inject the formula to compute the array length in the IR, backends
  only need to provide the buffer size (Curro)
- Create an auxiliary function to simplify code (Jordan Justen)
- Rename variables (Jordan Justen)

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/lower_ubo_reference.cpp | 182 +++
 1 file changed, 182 insertions(+)

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 8b08107..8694383 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -166,6 +166,18 @@ public:
 bool row_major, int matrix_columns,
 unsigned write_mask);
 
+   ir_visitor_status visit_enter(class ir_expression *);
+   ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
+   void check_ssbo_unsized_array_length_expression(class ir_expression *);
+   void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
+
+   ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
+ir_dereference *,
+ir_variable *);
+   ir_expression *emit_ssbo_get_buffer_size();
+
+   unsigned calculate_unsized_array_stride(ir_dereference *deref);
+
void *mem_ctx;
struct gl_shader *shader;
struct gl_uniform_buffer_variable *ubo_var;
@@ -738,6 +750,175 @@ 
lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
row_major, matrix_columns, write_mask);
 }
 
+ir_visitor_status
+lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
+{
+   check_ssbo_unsized_array_length_expression(ir);
+   return rvalue_visit(ir);
+}
+
+ir_expression *
+lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression 
*expr)
+{
+   if (expr->operation !=
+   ir_expression_operation(ir_unop_ssbo_unsized_array_length))
+  return NULL;
+
+   ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
+   if (!rvalue ||
+   !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
+  return NULL;
+
+   ir_dereference *deref = expr->operands[0]->as_dereference();
+   if (!deref)
+  return NULL;
+
+   ir_variable *var = expr->operands[0]->variable_referenced();
+   if (!var || !var->is_in_shader_storage_block())
+  return NULL;
+   return process_ssbo_unsized_array_length(, deref, var);
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression
 *ir)
+{
+   if (ir->operation ==
+   ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
+ /* Don't replace this unop if it is found alone. It is going to be
+  * removed by the optimization passes or replaced if it is part of
+  * an ir_assignment or another ir_expression.
+  */
+ return;
+   }
+
+   for (unsigned i = 0; i < ir->get_num_operands(); i++) {
+  if (ir->operands[i]->ir_type != ir_type_expression)
+ continue;
+  ir_expression *expr = (ir_expression *) ir->operands[i];
+  ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
+  if (!temp)
+ continue;
+
+  delete expr;
+  ir->operands[i] = temp;
+   }
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment
 *ir)
+{
+   if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
+  return;
+
+   ir_expression *expr = (ir_expression *) ir->rhs;
+   ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
+   if (!temp)
+  return;
+
+   delete expr;
+   ir->rhs = temp;
+   return;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::emit_ssbo_get_buffer_size()
+{
+   ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
+   return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
+ glsl_type::int_type,
+ block_ref);
+}
+
+unsigned
+lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference 
*deref)
+{
+   unsigned array_stride = 0;
+
+   switch (deref->ir_type) {
+   case ir_type_dereference_variable:
+   {
+  ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
+  const struct glsl_type *unsized_array_type = NULL;
+  /* An unsized array can be sized by other lowering passes, so pick
+   * the first field of the array which has the data type of the unsized
+   * array.
+   */
+  unsized_array_type = deref_var->var->type->fields.array;
+
+  /* Whether or not the field is row-major (because it might be a
+   * bvec2 or something) does not affect the array itself. We need
+   * to know whether an array element in its entirety is row-major.
+   */
+  const bool array_row_major =
+

[Mesa-dev] [PATCH v5 08/70] i965/fs: Do not split buffer variables

2015-09-10 Thread Iago Toral Quiroga

Buffer variables are the same as uniforms, only that read/write, so we want
the same treatment.

Reviewed-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 96d4f37..b8eb3fd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -104,6 +104,7 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable 
*var)
 
switch (var->data.mode) {
case ir_var_uniform:
+   case ir_var_shader_storage:
case ir_var_shader_in:
case ir_var_shader_out:
case ir_var_system_value:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 02/70] i965: Use 16-byte offset alignment for shader storage buffers

2015-09-10 Thread Iago Toral Quiroga

This is the same we do for other things like uniforms because it ensures
optimal performance.

Reviewed-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 907b2a0..c8e8a68 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -558,6 +558,7 @@ brw_initialize_context_constants(struct brw_context *brw)
 * However, unaligned accesses are slower, so enforce buffer alignment.
 */
ctx->Const.UniformBufferOffsetAlignment = 16;
+   ctx->Const.ShaderStorageBufferOffsetAlignment = 16;
ctx->Const.TextureBufferOffsetAlignment = 16;
ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 23/70] glsl: refactor parser processing of an interface block definition

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

---
 src/glsl/ast.h  |   5 ++
 src/glsl/glsl_parser.yy | 127 +---
 src/glsl/glsl_parser_extras.cpp | 122 ++
 3 files changed, 128 insertions(+), 126 deletions(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 335f426..cca32b3 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -1172,4 +1172,9 @@ extern void
 check_builtin_array_max_size(const char *name, unsigned size,
  YYLTYPE loc, struct _mesa_glsl_parse_state 
*state);
 
+extern void _mesa_ast_process_interface_block(YYLTYPE *locp,
+  _mesa_glsl_parse_state *state,
+  ast_interface_block *const block,
+  const struct ast_type_qualifier 
q);
+
 #endif /* AST_H */
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 42108a3..7f00929 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -2634,132 +2634,7 @@ basic_interface_block:
   block->block_name = $2;
   block->declarations.push_degenerate_list_at_head(& $4->link);
 
-  if ($1.flags.q.buffer) {
- if (!state->has_shader_storage_buffer_objects()) {
-_mesa_glsl_error(& @1, state,
- "#version 430 / 
GL_ARB_shader_storage_buffer_object "
- "required for defining shader storage blocks");
- } else if (state->ARB_shader_storage_buffer_object_warn) {
-_mesa_glsl_warning(& @1, state,
-   "#version 430 / 
GL_ARB_shader_storage_buffer_object "
-   "required for defining shader storage blocks");
- }
-  } else if ($1.flags.q.uniform) {
- if (!state->has_uniform_buffer_objects()) {
-_mesa_glsl_error(& @1, state,
- "#version 140 / GL_ARB_uniform_buffer_object "
- "required for defining uniform blocks");
- } else if (state->ARB_uniform_buffer_object_warn) {
-_mesa_glsl_warning(& @1, state,
-   "#version 140 / GL_ARB_uniform_buffer_object "
-   "required for defining uniform blocks");
- }
-  } else {
- if (state->es_shader || state->language_version < 150) {
-_mesa_glsl_error(& @1, state,
- "#version 150 required for using "
- "interface blocks");
- }
-  }
-
-  /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
-   * "It is illegal to have an input block in a vertex shader
-   *  or an output block in a fragment shader"
-   */
-  if ((state->stage == MESA_SHADER_VERTEX) && $1.flags.q.in) {
- _mesa_glsl_error(& @1, state,
-  "`in' interface block is not allowed for "
-  "a vertex shader");
-  } else if ((state->stage == MESA_SHADER_FRAGMENT) && $1.flags.q.out) {
- _mesa_glsl_error(& @1, state,
-  "`out' interface block is not allowed for "
-  "a fragment shader");
-  }
-
-  /* Since block arrays require names, and both features are added in
-   * the same language versions, we don't have to explicitly
-   * version-check both things.
-   */
-  if (block->instance_name != NULL) {
- state->check_version(150, 300, & @1, "interface blocks with "
-   "an instance name are not allowed");
-  }
-
-  uint64_t interface_type_mask;
-  struct ast_type_qualifier temp_type_qualifier;
-
-  /* Get a bitmask containing only the in/out/uniform/buffer
-   * flags, allowing us to ignore other irrelevant flags like
-   * interpolation qualifiers.
-   */
-  temp_type_qualifier.flags.i = 0;
-  temp_type_qualifier.flags.q.uniform = true;
-  temp_type_qualifier.flags.q.buffer = true;
-  temp_type_qualifier.flags.q.in = true;
-  temp_type_qualifier.flags.q.out = true;
-  interface_type_mask = temp_type_qualifier.flags.i;
-
-  /* Get the block's interface qualifier.  The interface_qualifier
-   * production rule guarantees that only one bit will be set (and
-   * it will be in/out/uniform).
-   */
-  uint64_t block_interface_qualifier = $1.flags.i;
-
-  block->layout.flags.i |= block_interface_qualifier;
-
-  if (state->stage == MESA_SHADER_GEOMETRY &&
-  state->has_explicit_attrib_stream()) {
- /* Assign global layout's stream value. */
- block->layout.flags.q.stream = 1;
- block->layout.flags.q.explicit_stream = 0;
- block->layout.stream = state->out_qualifier->stream;
-  }
-
-  foreach_list_typed (ast_declarator_list, member, link,

[Mesa-dev] [PATCH v5 17/70] i965/fs: Implement FS_OPCODE_GET_BUFFER_SIZE

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/mesa/drivers/dri/i965/brw_defines.h|  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp   |  1 +
 src/mesa/drivers/dri/i965/brw_fs.h |  3 ++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 47 ++
 src/mesa/drivers/dri/i965/brw_shader.cpp   |  3 ++
 5 files changed, 55 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 9695c27..37fe308 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1066,6 +1066,7 @@ enum opcode {
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+   FS_OPCODE_GET_BUFFER_SIZE,
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_SAMPLE_ID,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d240371..576c61a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -880,6 +880,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
   return 1;
case FS_OPCODE_FB_WRITE:
   return 2;
+   case FS_OPCODE_GET_BUFFER_SIZE:
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case SHADER_OPCODE_GEN4_SCRATCH_READ:
   return 1;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index dd0526a..b48134e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -427,6 +427,9 @@ private:
 struct brw_reg *src);
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
  struct brw_reg sampler_index);
+   void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg surf_index);
void generate_math_gen6(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c86ca04..a8943a4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -542,6 +542,50 @@ fs_generator::generate_math_g45(fs_inst *inst,
 }
 
 void
+fs_generator::generate_get_buffer_size(fs_inst *inst,
+   struct brw_reg dst,
+   struct brw_reg src,
+   struct brw_reg surf_index)
+{
+   assert(devinfo->gen >= 7);
+   assert(surf_index.file == BRW_IMMEDIATE_VALUE);
+
+   uint32_t simd_mode;
+   int rlen = 4;
+
+   switch (inst->exec_size) {
+   case 8:
+  simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+  break;
+   case 16:
+  simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+  break;
+   default:
+  unreachable("Invalid width for texture instruction");
+   }
+
+   if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
+  rlen = 8;
+  dst = vec16(dst);
+   }
+
+   brw_SAMPLE(p,
+  retype(dst, BRW_REGISTER_TYPE_UW),
+  inst->base_mrf,
+  src,
+  surf_index.dw1.ud,
+  0,
+  GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+  rlen, /* response length */
+  inst->mlen,
+  inst->header_size > 0,
+  simd_mode,
+  BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+   brw_mark_surface_used(prog_data, surf_index.dw1.ud);
+}
+
+void
 fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg 
src,
struct brw_reg sampler_index)
 {
@@ -1908,6 +1952,9 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
  src[0].subnr = 4 * type_sz(src[0].type);
  brw_MOV(p, dst, stride(src[0], 8, 4, 1));
  break;
+  case FS_OPCODE_GET_BUFFER_SIZE:
+ generate_get_buffer_size(inst, dst, src[0], src[1]);
+ break;
   case SHADER_OPCODE_TEX:
   case FS_OPCODE_TXB:
   case SHADER_OPCODE_TXD:
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 991ccc6..9abf060 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -692,6 +692,9 @@ brw_instruction_name(enum opcode op)
case FS_OPCODE_PIXEL_Y:
   return "pixel_y";
 
+   case FS_OPCODE_GET_BUFFER_SIZE:
+  return "fs_get_buffer_size";
+
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
   return "uniform_pull_const";
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 09/70] glsl: return error if unsized arrays are found in OpenGL ES

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/ast_to_hir.cpp | 16 
 1 file changed, 16 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 72c6459..b67ae70 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6252,6 +6252,22 @@ ast_interface_block::hir(exec_list *instructions,
   else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == 
ir_var_shader_out)
  handle_tess_ctrl_shader_output_decl(state, loc, var);
 
+  for (unsigned i = 0; i < num_variables; i++) {
+ /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+  *
+  * "If an array is declared as the last member of a shader storage
+  * block and the size is not specified at compile-time, it is
+  * sized at run-time. In all other cases, arrays are sized only
+  * at compile-time."
+  */
+ if (state->es_shader && fields[i].type->is_unsized_array()) {
+ _mesa_glsl_error(, state, "unsized array `%s' definition: "
+  "only last member of a shader storage block "
+  "can be defined as unsized array",
+  fields[i].name);
+ }
+  }
+
   if (ir_variable *earlier =
   state->symbols->get_variable(this->instance_name)) {
  if (!redeclaring_per_vertex) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 13/70] nir: Implement ir_unop_get_buffer_size

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

This is how backends provide the buffer size required to compute
the size of unsized arrays in the previous patch

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/nir/glsl_to_nir.cpp  | 10 ++
 src/glsl/nir/nir_intrinsics.h |  7 +++
 2 files changed, 17 insertions(+)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index ad7d7dd..0666250 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1315,6 +1315,16 @@ nir_visitor::visit(ir_expression *ir)
  unreachable("not reached");
   }
   break;
+   case ir_unop_get_buffer_size: {
+  nir_intrinsic_instr *load = nir_intrinsic_instr_create(
+ this->shader,
+ nir_intrinsic_get_buffer_size);
+  load->num_components = ir->type->vector_elements;
+  load->src[0] = evaluate_rvalue(ir->operands[0]);
+  add_instr(>instr, ir->type->vector_elements);
+  return;
+   }
+
case ir_binop_add:
case ir_binop_sub:
case ir_binop_mul:
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index ed309b6..88f57f4 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -62,6 +62,13 @@ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
   NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
 /*
+ * Ask the driver for the size of a given buffer. It takes the buffer index
+ * as source.
+ */
+INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+  NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+/*
  * a barrier is an intrinsic with no inputs/outputs but which can't be moved
  * around/optimized in general
  */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2.1/2] egl/dri2: don't leak the fd on dri2_terminate

2015-09-10 Thread Emil Velikov

Currently the check was incorrect as it did not consider the (unlikely)
case of fd == 0. In order to fix this we should first correctly
initialize it to -1, as the swrast implementations leave it set to zero
(props to calloc()).

Signed-off-by: Emil Velikov 
---

Noticed while checking the code wrt patch 1/2.

-Emil

 src/egl/drivers/dri2/egl_dri2.c | 2 +-
 src/egl/drivers/dri2/platform_wayland.c | 1 +
 src/egl/drivers/dri2/platform_x11.c | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index eb56113..1740ee3 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -786,7 +786,7 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
 
if (dri2_dpy->own_dri_screen)
   dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
-   if (dri2_dpy->fd)
+   if (dri2_dpy->fd >= 0)
   close(dri2_dpy->fd);
if (dri2_dpy->driver)
   dlclose(dri2_dpy->driver);
diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index dbc64ba..6cf5461 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1804,6 +1804,7 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, 
_EGLDisplay *disp)
if (roundtrip(dri2_dpy) < 0 || dri2_dpy->formats == 0)
   goto cleanup_shm;
 
+   dri2_dpy->fd = -1;
dri2_dpy->driver_name = strdup("swrast");
if (!dri2_load_driver_swrast(disp))
   goto cleanup_shm;
diff --git a/src/egl/drivers/dri2/platform_x11.c 
b/src/egl/drivers/dri2/platform_x11.c
index bf7d2be..7991fc2 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1161,6 +1161,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay 
*disp)
 * Every hardware driver_name is set using strdup. Doing the same in
 * here will allow is to simply free the memory at dri2_terminate().
 */
+   dri2_dpy->fd = -1;
dri2_dpy->driver_name = strdup("swrast");
if (!dri2_load_driver_swrast(disp))
   goto cleanup_conn;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 62/70] glsl: Do not allow reads from write-only buffer variables

2015-09-10 Thread Iago Toral Quiroga

The error location won't be right, but fixing that would require to check
for this as we process each type of AST node that can involve a variable
read.

v2:
  - Limit the check to buffer variables, image variables have different
semantics involved.
---
 src/glsl/ast_to_hir.cpp | 56 +
 1 file changed, 56 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index fed5a47..feaee9f 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -67,6 +67,48 @@ static void
 remove_per_vertex_blocks(exec_list *instructions,
  _mesa_glsl_parse_state *state, ir_variable_mode mode);
 
+/**
+ * Visitor class that finds the first instance of any write-only variable that
+ * is ever read, if any
+ */
+class read_from_write_only_variable_visitor : public ir_hierarchical_visitor
+{
+public:
+   read_from_write_only_variable_visitor() : found(NULL)
+   {
+   }
+
+   virtual ir_visitor_status visit(ir_dereference_variable *ir)
+   {
+  if (this->in_assignee)
+ return visit_continue;
+
+  ir_variable *var = ir->variable_referenced();
+  /* We can have image_write_only set on both images and buffer variables,
+   * but in the former there is a distinction between reads from
+   * the variable itself (write_only) and from the memory they point to
+   * (image_write_only), while in the case of buffer variables there is
+   * no such distinction, that is why this check here is limited to
+   * buffer variables alone.
+   */
+  if (!var || var->data.mode != ir_var_shader_storage)
+ return visit_continue;
+
+  if (var->data.image_write_only) {
+ found = var;
+ return visit_stop;
+  }
+
+  return visit_continue;
+   }
+
+   ir_variable *get_variable() {
+  return found;
+   }
+
+private:
+   ir_variable *found;
+};
 
 void
 _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
@@ -162,6 +204,20 @@ _mesa_ast_to_hir(exec_list *instructions, struct 
_mesa_glsl_parse_state *state)
 */
remove_per_vertex_blocks(instructions, state, ir_var_shader_in);
remove_per_vertex_blocks(instructions, state, ir_var_shader_out);
+
+   /* Check that we don't have reads from write-only variables */
+   read_from_write_only_variable_visitor v;
+   v.run(instructions);
+   ir_variable *error_var = v.get_variable();
+   if (error_var) {
+  /* It would be nice to have proper location information, but for that
+   * we would need to check this as we process each kind of AST node
+   */
+  YYLTYPE loc;
+  memset(, 0, sizeof(loc));
+  _mesa_glsl_error(, state, "Read from write-only variable `%s'",
+   error_var->name);
+   }
 }
 
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 58/70] glsl: Allow use of memory qualifiers with ARB_shader_storage_buffer_object.

2015-09-10 Thread Iago Toral Quiroga

---
 src/glsl/glsl_lexer.ll | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll
index 90e84ed..2142817 100644
--- a/src/glsl/glsl_lexer.ll
+++ b/src/glsl/glsl_lexer.ll
@@ -406,11 +406,11 @@ image2DShadow   KEYWORD(130, 300, 0, 0, 
IMAGE2DSHADOW);
 image1DArrayShadow  KEYWORD(130, 300, 0, 0, IMAGE1DARRAYSHADOW);
 image2DArrayShadow  KEYWORD(130, 300, 0, 0, IMAGE2DARRAYSHADOW);
 
-coherent   KEYWORD_WITH_ALT(420, 300, 420, 310, 
yyextra->ARB_shader_image_load_store_enable, COHERENT);
-volatile   KEYWORD_WITH_ALT(110, 100, 420, 310, 
yyextra->ARB_shader_image_load_store_enable, VOLATILE);
-restrict   KEYWORD_WITH_ALT(420, 300, 420, 310, 
yyextra->ARB_shader_image_load_store_enable, RESTRICT);
-readonly   KEYWORD_WITH_ALT(420, 300, 420, 310, 
yyextra->ARB_shader_image_load_store_enable, READONLY);
-writeonly  KEYWORD_WITH_ALT(420, 300, 420, 310, 
yyextra->ARB_shader_image_load_store_enable, WRITEONLY);
+coherent   KEYWORD_WITH_ALT(420, 300, 420, 310, 
yyextra->ARB_shader_image_load_store_enable || 
yyextra->ARB_shader_storage_buffer_object_enable, COHERENT);
+volatile   KEYWORD_WITH_ALT(110, 100, 420, 310, 
yyextra->ARB_shader_image_load_store_enable || 
yyextra->ARB_shader_storage_buffer_object_enable, VOLATILE);
+restrict   KEYWORD_WITH_ALT(420, 300, 420, 310, 
yyextra->ARB_shader_image_load_store_enable || 
yyextra->ARB_shader_storage_buffer_object_enable, RESTRICT);
+readonly   KEYWORD_WITH_ALT(420, 300, 420, 310, 
yyextra->ARB_shader_image_load_store_enable || 
yyextra->ARB_shader_storage_buffer_object_enable, READONLY);
+writeonly  KEYWORD_WITH_ALT(420, 300, 420, 310, 
yyextra->ARB_shader_image_load_store_enable || 
yyextra->ARB_shader_storage_buffer_object_enable, WRITEONLY);
 
 atomic_uint KEYWORD_WITH_ALT(420, 300, 420, 310, 
yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT);
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 63/70] main: Add SHADER_STORAGE_BLOCK and BUFFER_VARIABLE support for ARB_program_interface_query

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

Including TOP_LEVEL_ARRAY_SIZE and TOP_LEVEL_ARRAY_STRIDE queries.

v2:
- Use std430_array_stride() to get top level array stride following std430's 
rules.

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Tapani Pälli 
---
 src/glsl/ir_uniform.h|   5 +
 src/glsl/link_uniforms.cpp   |   3 +
 src/glsl/linker.cpp  |  10 +-
 src/mesa/main/program_resource.c |   7 +-
 src/mesa/main/shader_query.cpp   | 265 +--
 5 files changed, 278 insertions(+), 12 deletions(-)

diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h
index 0b6f720..858a7da 100644
--- a/src/glsl/ir_uniform.h
+++ b/src/glsl/ir_uniform.h
@@ -194,6 +194,11 @@ struct gl_uniform_storage {
 * This is a built-in uniform that should not be modified through any gl 
API.
 */
bool builtin;
+
+   /**
+* This is a shader storage buffer variable, not an uniform.
+*/
+   bool is_shader_storage;
 };
 
 #ifdef __cplusplus
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index cc38287..10f287b 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -689,6 +689,9 @@ private:
   if (!this->uniforms[id].builtin)
  this->uniforms[id].storage = this->values;
 
+  this->uniforms[id].is_shader_storage =
+ current_var->is_in_shader_storage_block();
+
   if (this->ubo_block_index != -1) {
 this->uniforms[id].block_index = this->ubo_block_index;
 
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 323c162..9142265 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3361,14 +3361,18 @@ build_program_resource_list(struct gl_shader_program 
*shProg)
  }
   }
 
-  if (!add_program_resource(shProg, GL_UNIFORM,
+  bool is_shader_storage =  shProg->UniformStorage[i].is_shader_storage;
+  GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM;
+  if (!add_program_resource(shProg, type,
 >UniformStorage[i], stageref))
  return;
}
 
-   /* Add program uniform blocks. */
+   /* Add program uniform blocks and shader storage blocks. */
for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
-  if (!add_program_resource(shProg, GL_UNIFORM_BLOCK,
+  bool is_shader_storage = shProg->UniformBlocks[i].IsShaderStorage;
+  GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : 
GL_UNIFORM_BLOCK;
+  if (!add_program_resource(shProg, type,
   >UniformBlocks[i], 0))
  return;
}
diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c
index 23d2b4d..c609abe 100644
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -41,6 +41,8 @@ supported_interface_enum(struct gl_context *ctx, GLenum iface)
case GL_PROGRAM_OUTPUT:
case GL_TRANSFORM_FEEDBACK_VARYING:
case GL_ATOMIC_COUNTER_BUFFER:
+   case GL_BUFFER_VARIABLE:
+   case GL_SHADER_STORAGE_BLOCK:
   return true;
case GL_VERTEX_SUBROUTINE:
case GL_FRAGMENT_SUBROUTINE:
@@ -58,8 +60,6 @@ supported_interface_enum(struct gl_context *ctx, GLenum iface)
case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
   return _mesa_has_tessellation(ctx) && _mesa_has_shader_subroutine(ctx);
-   case GL_BUFFER_VARIABLE:
-   case GL_SHADER_STORAGE_BLOCK:
default:
   return false;
}
@@ -121,6 +121,7 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum 
programInterface,
case GL_MAX_NUM_ACTIVE_VARIABLES:
   switch (programInterface) {
   case GL_UNIFORM_BLOCK:
+  case GL_SHADER_STORAGE_BLOCK:
  for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
 if (shProg->ProgramResourceList[i].Type == programInterface) {
struct gl_uniform_block *block =
@@ -247,8 +248,10 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum 
programInterface,
case GL_PROGRAM_INPUT:
case GL_PROGRAM_OUTPUT:
case GL_UNIFORM:
+   case GL_BUFFER_VARIABLE:
case GL_TRANSFORM_FEEDBACK_VARYING:
case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
   res = _mesa_program_resource_find_name(shProg, programInterface, name,
  _index);
   if (!res || array_index > 0)
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index ee73202..99d9e10 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -431,6 +431,7 @@ _mesa_program_resource_name(struct gl_program_resource *res)
const ir_variable *var;
switch (res->Type) {
case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
   return RESOURCE_UBO(res)->Name;
case GL_TRANSFORM_FEEDBACK_VARYING:
   return RESOURCE_XFB(res)->Name;
@@ -445,6 +446,7 @@ _mesa_program_resource_name(struct gl_program_resource

[Mesa-dev] [PATCH v5 49/70] glsl: lower SSBO atomic intrinsics

2015-09-10 Thread Iago Toral Quiroga

The first argument to SSBO atomics is a reference to a SSBO buffer variable
so we want to compute its block index and offset and provide these values
to an internal version of the intrinsic that takes them instead of the
buffer variable reference.

v2:
- Support single components of integer vectors to be passed in as arguments.
- Get interface packing information from interface's type.
---
 src/glsl/lower_ubo_reference.cpp | 159 +++
 1 file changed, 159 insertions(+)

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 368dbfa..2d77680 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -180,6 +180,10 @@ public:
unsigned calculate_unsized_array_stride(ir_dereference *deref,
unsigned packing);
 
+   ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
+   ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
+   ir_visitor_status visit_enter(ir_call *ir);
+
void *mem_ctx;
struct gl_shader *shader;
struct gl_uniform_buffer_variable *ubo_var;
@@ -242,7 +246,12 @@ interface_field_name(void *mem_ctx, char *base_name, 
ir_rvalue *d,
 
  break;
   }
+  case ir_type_swizzle: {
+ ir_swizzle *s = (ir_swizzle *) d;
 
+ d = s->val->as_dereference();
+ break;
+  }
   default:
  assert(!"Should not get here.");
  break;
@@ -427,6 +436,16 @@ 
lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
  break;
   }
 
+  case ir_type_swizzle: {
+ ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
+
+ assert(deref_swizzle->mask.num_components == 1);
+
+ *const_offset += deref_swizzle->mask.x * sizeof(int);
+ deref = deref_swizzle->val->as_dereference();
+ break;
+  }
+
   default:
  assert(!"not reached");
  deref = NULL;
@@ -1017,6 +1036,146 @@ lower_ubo_reference_visitor::visit_enter(ir_assignment 
*ir)
return rvalue_visit(ir);
 }
 
+/* Lowers the intrinsic call to a new internal intrinsic that swaps the
+ * access to the buffer variable in the first parameter by an offset
+ * and block index. This involves creating the new internal intrinsic
+ * (i.e. the new function signature).
+ */
+ir_call *
+lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
+{
+   /* SSBO atomics usually have 2 parameters, the buffer variable and an
+* integer argument. The exception is CompSwap, that has an additional
+* integer parameter.
+*/
+   int param_count = ir->actual_parameters.length();
+   assert(param_count == 2 || param_count == 3);
+
+   /* First argument must be a scalar integer buffer variable */
+   exec_node *param = ir->actual_parameters.get_head();
+   ir_instruction *inst = (ir_instruction *) param;
+   assert(inst->ir_type == ir_type_dereference_variable ||
+  inst->ir_type == ir_type_dereference_array ||
+  inst->ir_type == ir_type_dereference_record ||
+  inst->ir_type == ir_type_swizzle);
+
+   ir_rvalue *deref = (ir_rvalue *) inst;
+   assert(deref->type->is_scalar() && deref->type->is_integer());
+
+   ir_variable *var = deref->variable_referenced();
+   assert(var);
+
+   /* Compute the offset to the start if the dereference and the
+* block index
+*/
+   mem_ctx = ralloc_parent(shader->ir);
+
+   ir_rvalue *offset = NULL;
+   unsigned const_offset;
+   bool row_major;
+   int matrix_columns;
+   unsigned packing = var->get_interface_type()->interface_packing;
+
+   setup_for_load_or_store(var, deref,
+   , _offset,
+   _major, _columns,
+   packing);
+   assert(offset);
+   assert(!row_major);
+   assert(matrix_columns == 1);
+
+   ir_rvalue *deref_offset =
+  add(offset, new(mem_ctx) ir_constant(const_offset));
+   ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
+
+   /* Create the new internal function signature that will take a block
+* index and offset instead of a buffer variable
+*/
+   exec_list sig_params;
+   ir_variable *sig_param = new(mem_ctx)
+  ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   sig_param = new(mem_ctx)
+  ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
+  glsl_type::int_type : glsl_type::uint_type;
+   param = param->get_next();
+   sig_param = new(mem_ctx)
+ ir_variable(type, "data1", ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   if (param_count == 3) {
+  param = param->get_next();
+  sig_param = new(mem_ctx)
+ir_variable(type, "data2", ir_var_function_in);
+  sig_params.push_tail(sig_param);
+   }
+
+   ir_function_signature *sig =
+  new(mem_ctx)

[Mesa-dev] [PATCH v5 54/70] glsl: First argument to atomic functions must be a buffer variable

2015-09-10 Thread Iago Toral Quiroga

v2:
  - Add ssbo_in the names of the static functions so it is clear that this
is specific to SSBO atomics.
---
 src/glsl/ast_function.cpp | 37 +
 1 file changed, 37 insertions(+)

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 0fb8928..1b588f7 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -142,6 +142,31 @@ verify_image_parameter(YYLTYPE *loc, 
_mesa_glsl_parse_state *state,
return true;
 }
 
+static bool
+verify_first_atomic_ssbo_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
+   ir_variable *var)
+{
+   if (!var || !var->is_in_shader_storage_block()) {
+  _mesa_glsl_error(loc, state, "First argument to atomic function"
+   " must be a buffer variable");
+  return false;
+   }
+   return true;
+}
+
+static bool
+is_atomic_ssbo_function(const char *func_name)
+{
+   return !strcmp(func_name, "atomicAdd") ||
+  !strcmp(func_name, "atomicMin") ||
+  !strcmp(func_name, "atomicMax") ||
+  !strcmp(func_name, "atomicAnd") ||
+  !strcmp(func_name, "atomicOr") ||
+  !strcmp(func_name, "atomicXor") ||
+  !strcmp(func_name, "atomicExchange") ||
+  !strcmp(func_name, "atomicCompSwap");
+}
+
 /**
  * Verify that 'out' and 'inout' actual parameters are lvalues.  Also, verify
  * that 'const_in' formal parameters (an extension in our IR) correspond to
@@ -156,6 +181,10 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
exec_node *actual_ir_node  = actual_ir_parameters.head;
exec_node *actual_ast_node = actual_ast_parameters.head;
 
+   const char *func_name = sig->function_name();
+   bool is_atomic_ssbo = is_atomic_ssbo_function(func_name);
+
+   bool is_first_param = true;
foreach_in_list(const ir_variable, formal, >parameters) {
   /* The lists must be the same length. */
   assert(!actual_ir_node->is_tail_sentinel());
@@ -170,6 +199,13 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
*/
   YYLTYPE loc = actual_ast->get_location();
 
+  /* The first parameter of atomic functions must be a buffer variable */
+  if (is_atomic_ssbo && is_first_param &&
+  !verify_first_atomic_ssbo_parameter(, state,
+  actual->variable_referenced())) {
+ return false;
+  }
+
   /* Verify that 'const_in' parameters are ir_constants. */
   if (formal->data.mode == ir_var_const_in &&
  actual->ir_type != ir_type_constant) {
@@ -255,6 +291,7 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
 
   actual_ir_node  = actual_ir_node->next;
   actual_ast_node = actual_ast_node->next;
+  is_first_param = false;
}
return true;
 }
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 61/70] glsl: Do not allow assignments to read-only buffer variables

2015-09-10 Thread Iago Toral Quiroga

v2:
  - Merge the error check for the readonly qualifier with the already
existing check for variables flagged as readonly (Timothy).
  - Limit the check to buffer variables, image variables have different
semantics involved (Curro).
---
 src/glsl/ast_to_hir.cpp | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 61318ef..fed5a47 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -820,7 +820,16 @@ do_assignment(exec_list *instructions, struct 
_mesa_glsl_parse_state *state,
   "assignment to %s",
   non_lvalue_description);
  error_emitted = true;
-  } else if (lhs_var != NULL && lhs_var->data.read_only) {
+  } else if (lhs_var != NULL && (lhs_var->data.read_only ||
+ (lhs_var->data.mode == ir_var_shader_storage &&
+  lhs_var->data.image_read_only))) {
+ /* We can have image_read_only set on both images and buffer 
variables,
+  * but in the former there is a distinction between assignments to
+  * the variable itself (read_only) and to the memory they point to
+  * (image_read_only), while in the case of buffer variables there is
+  * no such distinction, that is why this check here is limited to
+  * buffer variables alone.
+  */
  _mesa_glsl_error(_loc, state,
   "assignment to read-only variable '%s'",
   lhs_var->name);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 59/70] glsl: Apply memory qualifiers to buffer variables

2015-09-10 Thread Iago Toral Quiroga

v2:
  - Save memory qualifier info in the top level members of a shader
storage block.
  - Add a checks to record_compare() which is used when comparing
shader storage buffer declarations in different shaders.
  - Always report an error for incompatible readonly/writeonly
definitions, whether they are present at block or field level.
---
 src/glsl/ast_to_hir.cpp | 63 ++---
 src/glsl/glsl_types.cpp | 20 
 src/glsl/glsl_types.h   | 11 +
 3 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index a364aae..61318ef 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -5605,10 +5605,19 @@ ast_process_structure_or_interface_block(exec_list 
*instructions,
  bool is_interface,
  enum glsl_matrix_layout matrix_layout,
  bool allow_reserved_names,
- ir_variable_mode var_mode)
+ ir_variable_mode var_mode,
+ ast_type_qualifier *layout)
 {
unsigned decl_count = 0;
 
+   /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+* that we don't have incompatible qualifiers
+*/
+   if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
+  _mesa_glsl_error(, state,
+   "Interface block sets both readonly and writeonly");
+   }
+
/* Make an initial pass over the list of fields to determine how
 * many there are.  Each element in this list is an ast_declarator_list.
 * This means that we actually need to count the number of elements in the
@@ -5770,6 +5779,44 @@ ast_process_structure_or_interface_block(exec_list 
*instructions,
|| fields[i].matrix_layout == 
GLSL_MATRIX_LAYOUT_COLUMN_MAJOR);
  }
 
+ /* Image qualifiers are allowed on buffer variables, which can only
+  * be defined inside shader storage buffer objects
+  */
+ if (layout && var_mode == ir_var_shader_storage) {
+if (qual->flags.q.read_only && qual->flags.q.write_only) {
+   _mesa_glsl_error(, state,
+"buffer variable `%s' can't be "
+"readonly and writeonly.", fields[i].name);
+}
+
+/* For readonly and writeonly qualifiers the field definition,
+ * if set, overwrites the layout qualifier.
+ */
+bool read_only = layout->flags.q.read_only;
+bool write_only = layout->flags.q.write_only;
+
+if (qual->flags.q.read_only) {
+   read_only = true;
+   write_only = false;
+} else if (qual->flags.q.write_only) {
+   read_only = false;
+   write_only = true;
+}
+
+fields[i].image_read_only = read_only;
+fields[i].image_write_only = write_only;
+
+/* For other qualifiers, we set the flag if either the layout
+ * qualifier or the field qualifier are set
+ */
+fields[i].image_coherent = qual->flags.q.coherent ||
+layout->flags.q.coherent;
+fields[i].image_volatile = qual->flags.q._volatile ||
+layout->flags.q._volatile;
+fields[i].image_restrict = qual->flags.q.restrict_flag ||
+layout->flags.q.restrict_flag;
+ }
+
  i++;
   }
}
@@ -5824,7 +5871,8 @@ ast_struct_specifier::hir(exec_list *instructions,
false,
GLSL_MATRIX_LAYOUT_INHERITED,
false /* allow_reserved_names 
*/,
-   ir_var_auto);
+   ir_var_auto,
+   NULL);
 
validate_identifier(this->name, loc, state);
 
@@ -5979,7 +6027,8 @@ ast_interface_block::hir(exec_list *instructions,
true,
matrix_layout,
redeclaring_per_vertex,
-   var_mode);
+   var_mode,
+   >layout);
 
state->struct_specifier_depth--;
 
@@ -6363,6 +6412,14 @@ ast_interface_block::hir(exec_list *instructions,
 
  var->data.stream = this->layout.stream;
 
+ if (var->data.mode == ir_var_shader_storage) {
+var->data.image_read_only = fields[i].image_read_only;
+

[Mesa-dev] [PATCH v5 53/70] i965/vec4: Implement lowered SSBO atomic intrinsics

2015-09-10 Thread Iago Toral Quiroga

The original GLSL IR intrinsics have been lowered to an internal
version that accepts a block index and an offset instead of a
SSBO reference.
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |   1 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 103 +
 2 files changed, 104 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index d152713..feb9a30 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -402,6 +402,7 @@ public:
void dump_instruction(backend_instruction *inst, FILE *file);
 
void visit_atomic_counter_intrinsic(ir_call *ir);
+   void visit_atomic_intrinsic(ir_call *ir);
 
void visit_store_ssbo_intrinsic(ir_call *ir);
void visit_load_ssbo_intrinsic(ir_call *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 7ab6df0..9039aee 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2643,6 +2643,100 @@ vec4_visitor::visit_store_ssbo_intrinsic(ir_call *ir)
 }
 
 void
+vec4_visitor::visit_atomic_intrinsic(ir_call *ir)
+{
+   int param_count = ir->actual_parameters.length();
+   assert(param_count == 3 || param_count == 4);
+
+   /* block index */
+   exec_node *param = ir->actual_parameters.get_head();
+   ir_instruction *inst = (ir_instruction *) param;
+   ir_constant *const_uniform_block = inst->as_constant();
+   src_reg surface;
+   if (const_uniform_block) {
+  unsigned surf_index = prog_data->base.binding_table.ubo_start +
+const_uniform_block->value.u[0];
+  surface = src_reg(surf_index);
+  brw_mark_surface_used(_data->base, surf_index);
+   } else {
+  inst->accept(this);
+  surface = this->result;
+  emit(ADD(dst_reg(surface), surface,
+   src_reg(prog_data->base.binding_table.ubo_start)));
+
+  /* Assume this may touch any UBO. This is the same we do for other
+   * UBO/SSBO accesses with non-constant surface.
+   */
+  brw_mark_surface_used(_data->base,
+prog_data->base.binding_table.ubo_start +
+shader_prog->NumUniformBlocks - 1);
+   }
+
+   /* offset */
+   param = param->get_next();
+   inst = (ir_instruction *) param;
+   inst->accept(this);
+   src_reg offset = this->result;
+
+   /* data1 parameter (this is always present) */
+   param = param->get_next();
+   inst = (ir_instruction *) param;
+   assert(inst);
+   inst->accept(this);
+   src_reg data1 = this->result;
+   src_reg data2;
+
+   /* Emit the actual atomic operation operation */
+   const char *callee = ir->callee->function_name();
+   dst_reg dst = get_assignment_lhs(ir->return_deref, this);
+
+   const vec4_builder bld = vec4_builder(this).at_end()
+.annotate(current_annotation, base_ir);
+
+   unsigned atomic_op;
+   if (!strcmp("__intrinsic_ssbo_atomic_add_internal", callee)) {
+  atomic_op = BRW_AOP_ADD;
+   } else if (!strcmp("__intrinsic_ssbo_atomic_and_internal", callee)) {
+  atomic_op = BRW_AOP_AND;
+   } else if (!strcmp("__intrinsic_ssbo_atomic_or_internal", callee)) {
+  atomic_op = BRW_AOP_OR;
+   } else if (!strcmp("__intrinsic_ssbo_atomic_xor_internal", callee)) {
+  atomic_op = BRW_AOP_XOR;
+   } else if (!strcmp("__intrinsic_ssbo_atomic_min_internal", callee)) {
+  if (dst.type == BRW_REGISTER_TYPE_D)
+ atomic_op = BRW_AOP_IMIN;
+  else
+ atomic_op = BRW_AOP_UMIN;
+   } else if (!strcmp("__intrinsic_ssbo_atomic_max_internal", callee)) {
+  if (dst.type == BRW_REGISTER_TYPE_D)
+ atomic_op = BRW_AOP_IMAX;
+  else
+ atomic_op = BRW_AOP_UMAX;
+   } else if (!strcmp("__intrinsic_ssbo_atomic_exchange_internal", callee)) {
+  atomic_op = BRW_AOP_MOV;
+   } else if (!strcmp("__intrinsic_ssbo_atomic_comp_swap_internal", callee)) {
+  /* Needs data2 parameter */
+  assert(param_count == 4);
+  param = param->get_next();
+  inst = (ir_instruction *) param;
+  assert(inst);
+  inst->accept(this);
+  data2 = this->result;
+  atomic_op = BRW_AOP_CMPWR;
+   } else {
+  unreachable("Unsupported atomic intrinsic");
+   }
+
+   src_reg atomic_result =
+  surface_access::emit_untyped_atomic(bld, surface, offset,
+  data1, data2,
+  1 /* dims */, 1 /* rsize */,
+  atomic_op,
+  BRW_PREDICATE_NONE);
+   emit(MOV(dst, atomic_result));
+}
+
+void
 vec4_visitor::visit_load_ssbo_intrinsic(ir_call *ir)
 {
const glsl_type *type = ir->return_deref->var->type;
@@ -2729,6 +2823,15 @@ vec4_visitor::visit(ir_call *ir)
   visit_store_ssbo_intrinsic(ir);
} else if (!strcmp("__intrinsic_load_ssbo", callee)) {

[Mesa-dev] [PATCH v5 56/70] mesa: Add queries for GL_SHADER_STORAGE_BUFFER

2015-09-10 Thread Iago Toral Quiroga

These handle querying the buffer name attached to a giving binding point
as well as the start offset and size of that buffer.
---
 src/mesa/main/get.c | 31 +++
 1 file changed, 31 insertions(+)

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index d5df530..24442f3 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1001,6 +1001,10 @@ find_custom_value(struct gl_context *ctx, const struct 
value_desc *d, union valu
case GL_UNIFORM_BUFFER_BINDING:
   v->value_int = ctx->UniformBuffer->Name;
   break;
+   /* GL_ARB_shader_storage_buffer_object */
+   case GL_SHADER_STORAGE_BUFFER_BINDING:
+  v->value_int = ctx->ShaderStorageBuffer->Name;
+  break;
/* GL_ARB_timer_query */
case GL_TIMESTAMP:
   if (ctx->Driver.GetTimestamp) {
@@ -1935,6 +1939,33 @@ find_value_indexed(const char *func, GLenum pname, 
GLuint index, union value *v)
   v->value_int = ctx->UniformBufferBindings[index].Size;
   return TYPE_INT;
 
+   /* ARB_shader_storage_buffer_object */
+   case GL_SHADER_STORAGE_BUFFER_BINDING:
+  if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+ goto invalid_value;
+  if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+ goto invalid_enum;
+  v->value_int = 
ctx->ShaderStorageBufferBindings[index].BufferObject->Name;
+  return TYPE_INT;
+
+   case GL_SHADER_STORAGE_BUFFER_START:
+  if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+ goto invalid_value;
+  if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+ goto invalid_enum;
+  v->value_int = ctx->ShaderStorageBufferBindings[index].Offset < 0 ? 0 :
+ ctx->ShaderStorageBufferBindings[index].Offset;
+  return TYPE_INT;
+
+   case GL_SHADER_STORAGE_BUFFER_SIZE:
+  if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+ goto invalid_value;
+  if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+ goto invalid_enum;
+  v->value_int = ctx->ShaderStorageBufferBindings[index].Size < 0 ? 0 :
+ ctx->ShaderStorageBufferBindings[index].Size;
+  return TYPE_INT;
+
/* ARB_texture_multisample / GL3.2 */
case GL_SAMPLE_MASK_VALUE:
   if (index != 0)
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 51/70] i965/nir/fs: Implement nir_intrinsic_ssbo_atomic_*

2015-09-10 Thread Iago Toral Quiroga

---
 src/mesa/drivers/dri/i965/brw_fs.h   |  2 +
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 77 
 2 files changed, 79 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index b48134e..2016887 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -255,6 +255,8 @@ public:
nir_ssa_undef_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder ,
nir_intrinsic_instr *instr);
+   void nir_emit_ssbo_atomic(const brw::fs_builder ,
+ int op, nir_intrinsic_instr *instr);
void nir_emit_texture(const brw::fs_builder ,
  nir_tex_instr *instr);
void nir_emit_jump(const brw::fs_builder ,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 7fe8062..7833e3d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1835,6 +1835,37 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   emit_barrier();
   break;
 
+   case nir_intrinsic_ssbo_atomic_add:
+  nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_min:
+  if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
+  else
+ nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_max:
+  if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
+  else
+ nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_and:
+  nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_or:
+  nir_emit_ssbo_atomic(bld, BRW_AOP_OR, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_xor:
+  nir_emit_ssbo_atomic(bld, BRW_AOP_XOR, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_exchange:
+  nir_emit_ssbo_atomic(bld, BRW_AOP_MOV, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+  nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
+  break;
+
case nir_intrinsic_get_buffer_size: {
   nir_const_value *const_uniform_block = 
nir_src_as_const_value(instr->src[0]);
   unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
@@ -1866,6 +1897,52 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
 }
 
 void
+fs_visitor::nir_emit_ssbo_atomic(const fs_builder ,
+ int op, nir_intrinsic_instr *instr)
+{
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+  dest = get_nir_dest(instr->dest);
+
+   fs_reg surface;
+   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+   if (const_surface) {
+  unsigned surf_index = stage_prog_data->binding_table.ubo_start +
+const_surface->u[0];
+  surface = fs_reg(surf_index);
+  brw_mark_surface_used(prog_data, surf_index);
+   } else {
+  surface = vgrf(glsl_type::uint_type);
+  bld.ADD(surface, get_nir_src(instr->src[0]),
+  fs_reg(stage_prog_data->binding_table.ubo_start));
+
+  /* Assume this may touch any UBO. This is the same we do for other
+   * UBO/SSBO accesses with non-constant surface.
+   */
+  brw_mark_surface_used(prog_data,
+stage_prog_data->binding_table.ubo_start +
+shader_prog->NumUniformBlocks - 1);
+   }
+
+   fs_reg offset = get_nir_src(instr->src[1]);
+   fs_reg data1 = get_nir_src(instr->src[2]);
+   fs_reg data2;
+   if (op == BRW_AOP_CMPWR)
+  data2 = get_nir_src(instr->src[3]);
+
+   /* Emit the actual atomic operation operation */
+
+   fs_reg atomic_result =
+  surface_access::emit_untyped_atomic(bld, surface, offset,
+  data1, data2,
+  1 /* dims */, 1 /* rsize */,
+  op,
+  BRW_PREDICATE_NONE);
+   dest.type = atomic_result.type;
+   bld.MOV(dest, atomic_result);
+}
+
+void
 fs_visitor::nir_emit_texture(const fs_builder , nir_tex_instr *instr)
 {
unsigned sampler = instr->sampler_index;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 50/70] nir: Implement lowered SSBO atomic intrinsics

2015-09-10 Thread Iago Toral Quiroga

The original GLSL IR intrinsics have been lowered to an internal
version that accepts a block index and an offset instead of a
SSBO reference.

v2 (Connor):
  - Document the sources used by the atomic intrinsics.

Reviewed-by: Connor Abbott 
---
 src/glsl/nir/glsl_to_nir.cpp  | 55 +++
 src/glsl/nir/nir_intrinsics.h | 26 
 2 files changed, 81 insertions(+)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index a387a54..f800817 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -648,6 +648,22 @@ nir_visitor::visit(ir_call *ir)
  op = nir_intrinsic_store_ssbo;
   } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
  op = nir_intrinsic_load_ssbo;
+  } else if (strcmp(ir->callee_name(), 
"__intrinsic_ssbo_atomic_add_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_add;
+  } else if (strcmp(ir->callee_name(), 
"__intrinsic_ssbo_atomic_and_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_and;
+  } else if (strcmp(ir->callee_name(), 
"__intrinsic_ssbo_atomic_or_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_or;
+  } else if (strcmp(ir->callee_name(), 
"__intrinsic_ssbo_atomic_xor_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_xor;
+  } else if (strcmp(ir->callee_name(), 
"__intrinsic_ssbo_atomic_min_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_min;
+  } else if (strcmp(ir->callee_name(), 
"__intrinsic_ssbo_atomic_max_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_max;
+  } else if (strcmp(ir->callee_name(), 
"__intrinsic_ssbo_atomic_exchange_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_exchange;
+  } else if (strcmp(ir->callee_name(), 
"__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_comp_swap;
   } else {
  unreachable("not reached");
   }
@@ -838,7 +854,46 @@ nir_visitor::visit(ir_call *ir)
 nir_instr_insert_after_cf_list(this->cf_node_list,
_ssbo_compare->instr);
  }
+ break;
+  }
+  case nir_intrinsic_ssbo_atomic_add:
+  case nir_intrinsic_ssbo_atomic_min:
+  case nir_intrinsic_ssbo_atomic_max:
+  case nir_intrinsic_ssbo_atomic_and:
+  case nir_intrinsic_ssbo_atomic_or:
+  case nir_intrinsic_ssbo_atomic_xor:
+  case nir_intrinsic_ssbo_atomic_exchange:
+  case nir_intrinsic_ssbo_atomic_comp_swap: {
+ int param_count = ir->actual_parameters.length();
+ assert(param_count == 3 || param_count == 4);
+
+ /* Block index */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_instruction *inst = (ir_instruction *) param;
+ instr->src[0] = evaluate_rvalue(inst->as_rvalue());
 
+ /* Offset */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[1] = evaluate_rvalue(inst->as_rvalue());
+
+ /* data1 parameter (this is always present) */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[2] = evaluate_rvalue(inst->as_rvalue());
+
+ /* data2 parameter (only with atomic_comp_swap) */
+ if (param_count == 4) {
+assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
+param = param->get_next();
+inst = (ir_instruction *) param;
+instr->src[3] = evaluate_rvalue(inst->as_rvalue());
+ }
+
+ /* Atomic result */
+ assert(ir->return_deref);
+ nir_ssa_dest_init(>instr, >dest,
+   ir->return_deref->type->vector_elements, NULL);
  break;
   }
   default:
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 53066c6..676322f 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -133,6 +133,32 @@ INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), 
true, 1, 1, 0, 0)
 INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
   NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
+/*
+ * SSBO atomic intrinsics
+ *
+ * All of the SSBO atomic memory operations read a value from memory,
+ * compute a new value using one of the operations below, write the new
+ * value to memory, and return the original value read.
+ *
+ * All operations take 3 sources except CompSwap that takes 4. These
+ * sources represent:
+ *
+ * 0: The SSBO buffer index.
+ * 1: The offset into the SSBO buffer of the variable that the atomic
+ *operation will operate on.
+ * 2: The data parameter to the atomic function (i.e. the value to add
+ *in ssbo_atomic_add, etc).
+ * 3: For CompSwap only: the second data parameter.
+ */
+INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_min, 3, ARR(1, 1, 1), true, 1, 0,

[Mesa-dev] [PATCH v5 57/70] glsl: fix UNIFORM_BUFFER_START or UNIFORM_BUFFER_SIZE query when no buffer object is bound

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

According to ARB_uniform_buffer_object spec:

"If the parameter (starting offset or size) was not specified when the
 buffer object was bound (e.g. if bound with BindBufferBase), or if no
 buffer object is bound to , zero is returned."

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Tapani Pälli 
---
 src/mesa/main/get.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 24442f3..f40c98b 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1928,7 +1928,8 @@ find_value_indexed(const char *func, GLenum pname, GLuint 
index, union value *v)
 goto invalid_value;
   if (!ctx->Extensions.ARB_uniform_buffer_object)
 goto invalid_enum;
-  v->value_int = ctx->UniformBufferBindings[index].Offset;
+  v->value_int = ctx->UniformBufferBindings[index].Offset < 0 ? 0 :
+ ctx->UniformBufferBindings[index].Offset;
   return TYPE_INT;
 
case GL_UNIFORM_BUFFER_SIZE:
@@ -1936,7 +1937,8 @@ find_value_indexed(const char *func, GLenum pname, GLuint 
index, union value *v)
 goto invalid_value;
   if (!ctx->Extensions.ARB_uniform_buffer_object)
 goto invalid_enum;
-  v->value_int = ctx->UniformBufferBindings[index].Size;
+  v->value_int = ctx->UniformBufferBindings[index].Size < 0 ? 0 :
+ ctx->UniformBufferBindings[index].Size;
   return TYPE_INT;
 
/* ARB_shader_storage_buffer_object */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 60/70] glsl: Allow memory qualifiers on shader storage buffer blocks

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

v2:
  - Memory qualifiers on shader storage buffer objects do not come in the form
of layout qualifiers, they are block-level qualifiers.
---
 src/glsl/glsl_parser.yy | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 4cb018a..f0abeb0 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -2628,6 +2628,20 @@ interface_block:
   }
   $$ = block;
}
+   | memory_qualifier interface_block
+   {
+  ast_interface_block *block = (ast_interface_block *)$2;
+
+  if (!block->layout.flags.q.buffer) {
+_mesa_glsl_error(& @1, state,
+ "memory qualifiers can only be used in the "
+ "declaration of shader storage blocks");
+  }
+  if (!block->layout.merge_qualifier(& @1, state, $1)) {
+ YYERROR;
+  }
+  $$ = block;
+   }
;
 
 basic_interface_block:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 52/70] i965/nir/vec4: Implement nir_intrinsic_ssbo_atomic_*

2015-09-10 Thread Iago Toral Quiroga

---
 src/mesa/drivers/dri/i965/brw_vec4.h   |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 78 ++
 2 files changed, 79 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 082e209..d152713 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -426,6 +426,7 @@ public:
virtual void nir_emit_alu(nir_alu_instr *instr);
virtual void nir_emit_jump(nir_jump_instr *instr);
virtual void nir_emit_texture(nir_tex_instr *instr);
+   virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
 
dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 77a2414..eceb37e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -758,6 +758,37 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
   break;
}
 
+   case nir_intrinsic_ssbo_atomic_add:
+  nir_emit_ssbo_atomic(BRW_AOP_ADD, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_min:
+  if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
+  else
+ nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_max:
+  if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
+  else
+ nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_and:
+  nir_emit_ssbo_atomic(BRW_AOP_AND, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_or:
+  nir_emit_ssbo_atomic(BRW_AOP_OR, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_xor:
+  nir_emit_ssbo_atomic(BRW_AOP_XOR, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_exchange:
+  nir_emit_ssbo_atomic(BRW_AOP_MOV, instr);
+  break;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+  nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr);
+  break;
+
case nir_intrinsic_load_vertex_id:
   unreachable("should be lowered by lower_vertex_id()");
 
@@ -887,6 +918,53 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
}
 }
 
+void
+vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
+{
+   dst_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+  dest = get_nir_dest(instr->dest);
+
+   src_reg surface;
+   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+   if (const_surface) {
+  unsigned surf_index = prog_data->base.binding_table.ubo_start +
+const_surface->u[0];
+  surface = src_reg(surf_index);
+  brw_mark_surface_used(_data->base, surf_index);
+   } else {
+  surface = src_reg(this, glsl_type::uint_type);
+  emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
+   src_reg(prog_data->base.binding_table.ubo_start)));
+
+  /* Assume this may touch any UBO. This is the same we do for other
+   * UBO/SSBO accesses with non-constant surface.
+   */
+  brw_mark_surface_used(_data->base,
+prog_data->base.binding_table.ubo_start +
+shader_prog->NumUniformBlocks - 1);
+   }
+
+   src_reg offset = get_nir_src(instr->src[1], 1);
+   src_reg data1 = get_nir_src(instr->src[2], 1);
+   src_reg data2;
+   if (op == BRW_AOP_CMPWR)
+  data2 = get_nir_src(instr->src[3], 1);
+
+   /* Emit the actual atomic operation operation */
+   const vec4_builder bld =
+  vec4_builder(this).at_end().annotate(current_annotation, base_ir);
+
+   src_reg atomic_result =
+  surface_access::emit_untyped_atomic(bld, surface, offset,
+  data1, data2,
+  1 /* dims */, 1 /* rsize */,
+  op,
+  BRW_PREDICATE_NONE);
+   dest.type = atomic_result.type;
+   bld.MOV(dest, atomic_result);
+}
+
 static unsigned
 brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
 {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 39/70] i965/vec4: Implement __intrinsic_store_ssbo

2015-09-10 Thread Iago Toral Quiroga

---
 src/mesa/drivers/dri/i965/brw_vec4.h   |   2 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 157 +
 2 files changed, 159 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index c0e46ad..ab62be2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -403,6 +403,8 @@ public:
 
void visit_atomic_counter_intrinsic(ir_call *ir);
 
+   void visit_store_ssbo_intrinsic(ir_call *ir);
+
bool is_high_sampler(src_reg sampler);
 
virtual void emit_nir_code();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index e6515dd..a64f224 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -25,6 +25,11 @@
 #include "brw_cfg.h"
 #include "glsl/ir_uniform.h"
 #include "program/sampler.h"
+#include "brw_vec4_builder.h"
+#include "brw_vec4_surface_builder.h"
+
+using namespace brw;
+using namespace brw::surface_access;
 
 namespace brw {
 
@@ -2488,6 +2493,156 @@ vec4_visitor::visit_atomic_counter_intrinsic(ir_call 
*ir)
 }
 
 void
+vec4_visitor::visit_store_ssbo_intrinsic(ir_call *ir)
+{
+   /* Block index */
+   exec_node *param = ir->actual_parameters.get_head();
+   ir_rvalue *block_param = ((ir_instruction *)param)->as_rvalue();
+   ir_constant *const_uniform_block = block_param->as_constant();
+   src_reg surf_index;
+   if (const_uniform_block) {
+  unsigned index = prog_data->base.binding_table.ubo_start +
+   const_uniform_block->value.u[0];
+  surf_index = src_reg(index);
+
+  brw_mark_surface_used(_data->base, index);
+   } else {
+  block_param->accept(this);
+  src_reg block_reg = this->result;
+  surf_index = src_reg(this, glsl_type::uint_type);
+  emit(ADD(dst_reg(surf_index), block_reg,
+   src_reg(prog_data->base.binding_table.ubo_start)));
+
+  brw_mark_surface_used(_data->base,
+prog_data->base.binding_table.ubo_start +
+shader_prog->NumUniformBlocks - 1);
+   }
+
+   /* Offset */
+   param = param->get_next();
+   ir_rvalue *offset_param = ((ir_instruction *)param)->as_rvalue();
+   ir_constant *const_offset_ir = offset_param->as_constant();
+   unsigned const_offset_bytes =
+  const_offset_ir ? const_offset_ir->value.u[0] : 0;
+   src_reg offset = src_reg(this, glsl_type::uint_type);
+   offset_param->accept(this);
+   emit(MOV(dst_reg(offset), this->result));
+
+   /* Value */
+   param = param->get_next();
+   ir_rvalue *val_param = ((ir_instruction *)param)->as_rvalue();
+   src_reg val_reg = src_reg(this, glsl_type::vec4_type);
+   val_param->accept(this);
+   val_reg.type = this->result.type;
+   emit(MOV(dst_reg(val_reg), this->result));
+
+   /* Writemask */
+   param = param->get_next();
+   ir_rvalue *writemask_param = ((ir_instruction *)param)->as_rvalue();
+   ir_constant *const_writemask = writemask_param->as_constant();
+   assert(const_writemask);
+   unsigned write_mask = const_writemask->value.u[0];
+
+   /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
+* writes will use SIMD8 mode. In order to hide this and keep symmetry 
across
+* typed and untyped messages and across hardware platforms, the
+* current implementation of the untyped messages will transparently convert
+* the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it
+* and enabling only channel X on the SEND instruction.
+*
+* The above, works well for full vector writes, but not for partial writes
+* where we want to write some channels and not others, like when we have
+* code such as v.xyw = vec3(1,2,4). Because the untyped write messages are
+* quite restrictive with regards to the channel enables we can configure in
+* the message descriptor (not all combinations are allowed) we cannot 
simply
+* implement these scenarios with a single message while keeping the
+* aforementioned symmetry in the implementation. For now we de decided that
+* it is better to keep the symmetry to reduce complexity, so in situations
+* such as the one described we end up emitting two untyped write messages
+* (one for xy and another for w).
+*
+* The code below packs consecutive channels into a single write message,
+* detects gaps in the vector write and if needed, sends a second message
+* with the remaining channels. If in the future we decide that we want to
+* emit a single message at the expense of losing the symmetry in the
+* implementation we can:
+*
+* 1) For IvyBridge: Only use the red channel of the untyped write SIMD8
+*message payload. In this mode we can write up to 8 offsets and dwords
+*to the red channel only (for the two vec4s in the SIMD4x2 execution)
+*and select which of the 8

[Mesa-dev] [PATCH v5 43/70] i965/vec4: Implement __intrinsic_load_ssbo

2015-09-10 Thread Iago Toral Quiroga

---
 src/mesa/drivers/dri/i965/brw_vec4.h   |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 76 ++
 2 files changed, 77 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index ab62be2..082e209 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -404,6 +404,7 @@ public:
void visit_atomic_counter_intrinsic(ir_call *ir);
 
void visit_store_ssbo_intrinsic(ir_call *ir);
+   void visit_load_ssbo_intrinsic(ir_call *ir);
 
bool is_high_sampler(src_reg sampler);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a64f224..7ab6df0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2643,6 +2643,80 @@ vec4_visitor::visit_store_ssbo_intrinsic(ir_call *ir)
 }
 
 void
+vec4_visitor::visit_load_ssbo_intrinsic(ir_call *ir)
+{
+   const glsl_type *type = ir->return_deref->var->type;
+   assert(type->is_vector() || type->is_scalar());
+
+   /* Block */
+   exec_node *param = ir->actual_parameters.get_head();
+   ir_rvalue *block_param = ((ir_instruction *)param)->as_rvalue();
+   ir_constant *const_uniform_block = block_param->as_constant();
+   src_reg surf_index;
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+* the actual storage for the result here, instead.
+*/
+   dst_reg result_dst = get_assignment_lhs(ir->return_deref, this);
+   src_reg result_src(result_dst);
+   this->result = result_src;
+
+   if (const_uniform_block) {
+  /* The block index is a constant, so just emit the binding table entry
+   * as an immediate.
+   */
+  unsigned index = prog_data->base.binding_table.ubo_start +
+   const_uniform_block->value.u[0];
+  surf_index = src_reg(index);
+
+  brw_mark_surface_used(_data->base, index);
+   } else {
+  /* The block index is not a constant. Evaluate the index expression
+   * per-channel and add the base UBO index; we have to select a value
+   * from any live channel.
+   */
+  block_param->accept(this);
+  src_reg block_reg = this->result;
+
+  surf_index = src_reg(this, glsl_type::uint_type);
+  emit(ADD(dst_reg(surf_index), block_reg,
+   src_reg(prog_data->base.binding_table.ubo_start)));
+  surf_index = emit_uniformize(surf_index);
+
+  /* Assume this may touch any UBO. It would be nice to provide
+   * a tighter bound, but the array information is already lowered away.
+   */
+  brw_mark_surface_used(_data->base,
+prog_data->base.binding_table.ubo_start +
+shader_prog->NumUniformBlocks - 1);
+   }
+
+   /* Offset */
+   param = param->get_next();
+   ir_rvalue *offset_param = ((ir_instruction *)param)->as_rvalue();
+   offset_param->accept(this);
+   src_reg offset = src_reg(this, glsl_type::uint_type);
+   emit(MOV(dst_reg(offset), this->result));
+
+   /* Read the vector */
+   const vec4_builder bld = vec4_builder(this).at_end()
+  .annotate(current_annotation, base_ir);
+
+   src_reg read_result = emit_untyped_read(bld, surf_index, offset,
+   1 /* dims */, 4 /* size*/,
+   BRW_PREDICATE_NONE);
+   read_result.type = result_dst.type;
+   read_result.swizzle = brw_swizzle_for_size(type->vector_elements);
+
+   /* SSBO/UBO bools are any nonzero int.  We need to convert them to 0/~0. */
+   if (type->base_type == GLSL_TYPE_BOOL) {
+  emit(CMP(result_dst, read_result, src_reg(0u), BRW_CONDITIONAL_NZ));
+   } else {
+  emit(MOV(result_dst, read_result));
+   }
+}
+
+void
 vec4_visitor::visit(ir_call *ir)
 {
const char *callee = ir->callee->function_name();
@@ -2653,6 +2727,8 @@ vec4_visitor::visit(ir_call *ir)
   visit_atomic_counter_intrinsic(ir);
} else if (!strcmp("__intrinsic_store_ssbo", callee)) {
   visit_store_ssbo_intrinsic(ir);
+   } else if (!strcmp("__intrinsic_load_ssbo", callee)) {
+  visit_load_ssbo_intrinsic(ir);
} else {
   unreachable("Unsupported intrinsic.");
}
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 44/70] nir/glsl_to_nir: ignore an instruction's dest if it hasn't any

2015-09-10 Thread Iago Toral Quiroga

Reviewed-by: Connor Abbott 
---
 src/glsl/nir/glsl_to_nir.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index cb7b196..a387a54 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1032,7 +1032,8 @@ nir_visitor::add_instr(nir_instr *instr, unsigned 
num_components)
 {
nir_dest *dest = get_instr_dest(instr);
 
-   nir_ssa_dest_init(instr, dest, num_components, NULL);
+   if (dest)
+  nir_ssa_dest_init(instr, dest, num_components, NULL);
 
nir_instr_insert_after_cf_list(this->cf_node_list, instr);
this->result = instr;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 40/70] nir: Implement __intrinsic_load_ssbo

2015-09-10 Thread Iago Toral Quiroga

v2:
- Fix ssbo loads with boolean variables.

Reviewed-by: Connor Abbott 
---
 src/glsl/nir/glsl_to_nir.cpp| 80 -
 src/glsl/nir/nir_intrinsics.h   |  2 +-
 src/glsl/nir/nir_lower_phis_to_scalar.c |  2 +
 3 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 6f1e20a..cb7b196 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -646,11 +646,14 @@ nir_visitor::visit(ir_call *ir)
  op = nir_intrinsic_image_size;
   } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
  op = nir_intrinsic_store_ssbo;
+  } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
+ op = nir_intrinsic_load_ssbo;
   } else {
  unreachable("not reached");
   }
 
   nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+  nir_alu_instr *load_ssbo_compare;
 
   switch (op) {
   case nir_intrinsic_atomic_counter_read_var:
@@ -776,11 +779,75 @@ nir_visitor::visit(ir_call *ir)
  instr->src[1] = evaluate_rvalue(block);
  break;
   }
+  case nir_intrinsic_load_ssbo: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ /* Check if we need the indirect version */
+ ir_constant *const_offset = offset->as_constant();
+ if (!const_offset) {
+op = nir_intrinsic_load_ssbo_indirect;
+ralloc_free(instr);
+instr = nir_intrinsic_instr_create(shader, op);
+instr->src[1] = evaluate_rvalue(offset);
+instr->const_index[0] = 0;
+ } else {
+instr->const_index[0] = const_offset->value.u[0];
+ }
+
+ instr->src[0] = evaluate_rvalue(block);
+
+ const glsl_type *type = ir->return_deref->var->type;
+ instr->num_components = type->vector_elements;
+
+ /* Setup destination register */
+ nir_ssa_dest_init(>instr, >dest,
+   type->vector_elements, NULL);
+
+ /* Insert the created nir instruction now since in the case of boolean
+  * result we will need to emit another instruction after it
+  */
+ nir_instr_insert_after_cf_list(this->cf_node_list, >instr);
+
+ /*
+  * In SSBO/UBO's, a true boolean value is any non-zero value, but we
+  * consider a true boolean to be ~0. Fix this up with a != 0
+  * comparison.
+  */
+ if (type->base_type == GLSL_TYPE_BOOL) {
+nir_load_const_instr *const_zero =
+   nir_load_const_instr_create(shader, 1);
+const_zero->value.u[0] = 0;
+nir_instr_insert_after_cf_list(this->cf_node_list,
+   _zero->instr);
+
+load_ssbo_compare = nir_alu_instr_create(shader, nir_op_ine);
+load_ssbo_compare->src[0].src.is_ssa = true;
+load_ssbo_compare->src[0].src.ssa = >dest.ssa;
+load_ssbo_compare->src[1].src.is_ssa = true;
+load_ssbo_compare->src[1].src.ssa = _zero->def;
+for (unsigned i = 0; i < type->vector_elements; i++)
+   load_ssbo_compare->src[1].swizzle[i] = 0;
+nir_ssa_dest_init(_ssbo_compare->instr,
+  _ssbo_compare->dest.dest,
+  type->vector_elements, NULL);
+load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) 
- 1;
+nir_instr_insert_after_cf_list(this->cf_node_list,
+   _ssbo_compare->instr);
+ }
+
+ break;
+  }
   default:
  unreachable("not reached");
   }
 
-  nir_instr_insert_after_cf_list(this->cf_node_list, >instr);
+  /* nir_intrinsic_load_ssbo{_indirect} were already emitted */
+  if (op != nir_intrinsic_load_ssbo && op != 
nir_intrinsic_load_ssbo_indirect)
+ nir_instr_insert_after_cf_list(this->cf_node_list, >instr);
 
   if (ir->return_deref) {
  nir_intrinsic_instr *store_instr =
@@ -789,7 +856,16 @@ nir_visitor::visit(ir_call *ir)
 
  store_instr->variables[0] =
 evaluate_deref(_instr->instr, ir->return_deref);
- store_instr->src[0] = nir_src_for_ssa(>dest.ssa);
+
+ /* If nir_intrinsic_load_ssbo{_indirect} is loading a boolean 
variable,
+  * the value is on load_ssbo_compare's dest. Use it instead.
+  */
+ if ((op == nir_intrinsic_load_ssbo || op == 
nir_intrinsic_load_ssbo_indirect) &&
+ ir->return_deref->var->type->base_type == GLSL_TYPE_BOOL) {
+store_instr->src[0] = 
nir_src_for_ssa(_ssbo_compare->dest.dest.ssa);

[Mesa-dev] [PATCH v5 45/70] glsl: atomic counters can be declared as buffer-qualified variables

2015-09-10 Thread Iago Toral Quiroga

From: Kristian Høgsberg 

---
 src/glsl/ast_to_hir.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 566cc87..a364aae 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2789,7 +2789,7 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
}
 
if (var->type->contains_atomic()) {
-  if (var->data.mode == ir_var_uniform) {
+  if (var->data.mode == ir_var_uniform || var->data.mode == 
ir_var_shader_storage) {
  if (var->data.explicit_binding) {
 unsigned *offset =
>atomic_counter_offsets[var->data.binding];
@@ -2807,8 +2807,8 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
  }
   } else if (var->data.mode != ir_var_function_in) {
  _mesa_glsl_error(loc, state, "atomic counters may only be declared as 
"
-  "function parameters or uniform-qualified "
-  "global variables");
+  "function parameters, uniform-qualified or "
+  "buffer-qualified global variables");
   }
}
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 47/70] glsl: Add atomic functions from ARB_shader_storage_buffer_object

2015-09-10 Thread Iago Toral Quiroga

---
 src/glsl/builtin_functions.cpp | 185 +
 1 file changed, 185 insertions(+)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 73a2074..d09cc23 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -401,6 +401,12 @@ shader_atomic_counters(const _mesa_glsl_parse_state *state)
 }
 
 static bool
+shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
+{
+   return state->ARB_shader_storage_buffer_object_enable;
+}
+
+static bool
 shader_trinary_minmax(const _mesa_glsl_parse_state *state)
 {
return state->AMD_shader_trinary_minmax_enable;
@@ -732,6 +738,17 @@ private:
ir_function_signature *_atomic_counter_op(const char *intrinsic,
  builtin_available_predicate 
avail);
 
+   ir_function_signature *_atomic_ssbo_intrinsic2(builtin_available_predicate 
avail,
+  const glsl_type *type);
+   ir_function_signature *_atomic_ssbo_op2(const char *intrinsic,
+   builtin_available_predicate avail,
+   const glsl_type *type);
+   ir_function_signature *_atomic_ssbo_intrinsic3(builtin_available_predicate 
avail,
+  const glsl_type *type);
+   ir_function_signature *_atomic_ssbo_op3(const char *intrinsic,
+   builtin_available_predicate avail,
+   const glsl_type *type);
+
B1(min3)
B1(max3)
B1(mid3)
@@ -869,6 +886,55 @@ builtin_builder::create_intrinsics()
 _atomic_counter_intrinsic(shader_atomic_counters),
 NULL);
 
+   add_function("__intrinsic_ssbo_atomic_add",
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::uint_type),
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::int_type),
+NULL);
+   add_function("__intrinsic_ssbo_atomic_min",
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::uint_type),
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::int_type),
+NULL);
+   add_function("__intrinsic_ssbo_atomic_max",
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::uint_type),
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::int_type),
+NULL);
+   add_function("__intrinsic_ssbo_atomic_and",
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::uint_type),
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::int_type),
+NULL);
+   add_function("__intrinsic_ssbo_atomic_or",
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::uint_type),
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::int_type),
+NULL);
+   add_function("__intrinsic_ssbo_atomic_xor",
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::uint_type),
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::int_type),
+NULL);
+   add_function("__intrinsic_ssbo_atomic_exchange",
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::uint_type),
+_atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+glsl_type::int_type),
+NULL);
+   add_function("__intrinsic_ssbo_atomic_comp_swap",
+_atomic_ssbo_intrinsic3(shader_storage_buffer_object,
+glsl_type::uint_type),
+_atomic_ssbo_intrinsic3(shader_storage_buffer_object,
+glsl_type::int_type),
+NULL);
+
add_image_functions(false);
 
add_function("__intrinsic_memory_barrier",
@@ -2531,6 +2597,71 @@ builtin_builder::create_builtins()
shader_atomic_counters),
 NULL);
 
+   add_function("atomicAdd",
+_atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
+ shader_storage_buffer_object,
+ glsl_type::uint_type),
+

[Mesa-dev] [PATCH v5 46/70] glsl: Rename atomic counter functions

2015-09-10 Thread Iago Toral Quiroga

Shader Storage Buffer Object will add new atomic functions that are not
associated with counters, so better have atomic counter-specific functions
explicitly include the word "counter" in their names.

Reviewed-by: Timothy Arceri 
---
 src/glsl/builtin_functions.cpp | 30 +++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 3b4a9df..73a2074 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -728,9 +728,9 @@ private:
B1(interpolateAtOffset)
B1(interpolateAtSample)
 
-   ir_function_signature *_atomic_intrinsic(builtin_available_predicate avail);
-   ir_function_signature *_atomic_op(const char *intrinsic,
- builtin_available_predicate avail);
+   ir_function_signature 
*_atomic_counter_intrinsic(builtin_available_predicate avail);
+   ir_function_signature *_atomic_counter_op(const char *intrinsic,
+ builtin_available_predicate 
avail);
 
B1(min3)
B1(max3)
@@ -860,13 +860,13 @@ void
 builtin_builder::create_intrinsics()
 {
add_function("__intrinsic_atomic_read",
-_atomic_intrinsic(shader_atomic_counters),
+_atomic_counter_intrinsic(shader_atomic_counters),
 NULL);
add_function("__intrinsic_atomic_increment",
-_atomic_intrinsic(shader_atomic_counters),
+_atomic_counter_intrinsic(shader_atomic_counters),
 NULL);
add_function("__intrinsic_atomic_predecrement",
-_atomic_intrinsic(shader_atomic_counters),
+_atomic_counter_intrinsic(shader_atomic_counters),
 NULL);
 
add_image_functions(false);
@@ -2519,16 +2519,16 @@ builtin_builder::create_builtins()
 NULL);
 
add_function("atomicCounter",
-_atomic_op("__intrinsic_atomic_read",
-   shader_atomic_counters),
+_atomic_counter_op("__intrinsic_atomic_read",
+   shader_atomic_counters),
 NULL);
add_function("atomicCounterIncrement",
-_atomic_op("__intrinsic_atomic_increment",
-   shader_atomic_counters),
+_atomic_counter_op("__intrinsic_atomic_increment",
+   shader_atomic_counters),
 NULL);
add_function("atomicCounterDecrement",
-_atomic_op("__intrinsic_atomic_predecrement",
-   shader_atomic_counters),
+_atomic_counter_op("__intrinsic_atomic_predecrement",
+   shader_atomic_counters),
 NULL);
 
add_function("min3",
@@ -4798,7 +4798,7 @@ builtin_builder::_interpolateAtSample(const glsl_type 
*type)
 }
 
 ir_function_signature *
-builtin_builder::_atomic_intrinsic(builtin_available_predicate avail)
+builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail)
 {
ir_variable *counter = in_var(glsl_type::atomic_uint_type, "counter");
MAKE_INTRINSIC(glsl_type::uint_type, avail, 1, counter);
@@ -4806,8 +4806,8 @@ 
builtin_builder::_atomic_intrinsic(builtin_available_predicate avail)
 }
 
 ir_function_signature *
-builtin_builder::_atomic_op(const char *intrinsic,
-builtin_available_predicate avail)
+builtin_builder::_atomic_counter_op(const char *intrinsic,
+builtin_available_predicate avail)
 {
ir_variable *counter = in_var(glsl_type::atomic_uint_type, 
"atomic_counter");
MAKE_SIG(glsl_type::uint_type, avail, 1, counter);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 48/70] glsl: use ir_rvalue instead of ir_dereference in auxiliary functions

2015-09-10 Thread Iago Toral Quiroga

From: Samuel Iglesias Gonsalvez 

In a later commit we will need to handle ir_swizzle nodes too, which are
not an ir_dereference. That can happen, for example, when we pass a
component of an integer vector as argument to any of the SSBO atomic
functions.

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/lower_ubo_reference.cpp | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 7e45a26..368dbfa 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -57,7 +57,7 @@ using namespace ir_builder;
  * thing referenced is row-major.
  */
 static bool
-is_dereferenced_thing_row_major(const ir_dereference *deref)
+is_dereferenced_thing_row_major(const ir_rvalue *deref)
 {
bool matrix = false;
const ir_rvalue *ir = deref;
@@ -143,7 +143,7 @@ public:
ir_visitor_status visit_enter(ir_assignment *ir);
 
void setup_for_load_or_store(ir_variable *var,
-ir_dereference *deref,
+ir_rvalue *deref,
 ir_rvalue **offset,
 unsigned *const_offset,
 bool *row_major,
@@ -196,7 +196,7 @@ public:
  * \c UniformBlocks array.
  */
 static const char *
-interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d,
+interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
  ir_rvalue **nonconst_block_index)
 {
ir_rvalue *previous_index = NULL;
@@ -255,7 +255,7 @@ interface_field_name(void *mem_ctx, char *base_name, 
ir_dereference *d,
 
 void
 lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
- ir_dereference *deref,
+ ir_rvalue *deref,
  ir_rvalue **offset,
  unsigned *const_offset,
  bool *row_major,
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 42/70] i965/nir/vec4: Implement nir_intrinsic_load_ssbo

2015-09-10 Thread Iago Toral Quiroga

---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 54 ++
 1 file changed, 54 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 450441d..77a2414 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -704,6 +704,60 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
   break;
}
 
+   case nir_intrinsic_load_ssbo_indirect:
+  has_indirect = true;
+  /* fallthrough */
+   case nir_intrinsic_load_ssbo: {
+  assert(devinfo->gen >= 7);
+
+  nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[0]);
+
+  src_reg surf_index;
+  if (const_uniform_block) {
+ unsigned index = prog_data->base.binding_table.ubo_start +
+  const_uniform_block->u[0];
+ surf_index = src_reg(index);
+
+ brw_mark_surface_used(_data->base, index);
+  } else {
+ surf_index = src_reg(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
+  src_reg(prog_data->base.binding_table.ubo_start)));
+ surf_index = emit_uniformize(surf_index);
+
+ /* Assume this may touch any UBO. It would be nice to provide
+  * a tighter bound, but the array information is already lowered away.
+  */
+ brw_mark_surface_used(_data->base,
+   prog_data->base.binding_table.ubo_start +
+   shader_prog->NumUniformBlocks - 1);
+  }
+
+  src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+  unsigned const_offset_bytes = 0;
+  if (has_indirect) {
+ emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1)));
+  } else {
+ const_offset_bytes = instr->const_index[0];
+ emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+  }
+
+  /* Read the vector */
+  const vec4_builder bld = vec4_builder(this).at_end()
+ .annotate(current_annotation, base_ir);
+
+  src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+  1 /* dims */, 4 /* size*/,
+  BRW_PREDICATE_NONE);
+  dst_reg dest = get_nir_dest(instr->dest);
+  read_result.type = dest.type;
+  read_result.swizzle = brw_swizzle_for_size(instr->num_components);
+  emit(MOV(dest, read_result));
+
+  break;
+   }
+
case nir_intrinsic_load_vertex_id:
   unreachable("should be lowered by lower_vertex_id()");
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 41/70] i965/nir/fs: Implement nir_intrinsic_load_ssbo

2015-09-10 Thread Iago Toral Quiroga

---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 62 
 1 file changed, 62 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 6ccbf89..7fe8062 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1548,6 +1548,68 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
+   case nir_intrinsic_load_ssbo_indirect:
+  has_indirect = true;
+  /* fallthrough */
+   case nir_intrinsic_load_ssbo: {
+  assert(devinfo->gen >= 7);
+
+  nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[0]);
+
+  fs_reg surf_index;
+  if (const_uniform_block) {
+ unsigned index = stage_prog_data->binding_table.ubo_start +
+  const_uniform_block->u[0];
+ surf_index = fs_reg(index);
+ brw_mark_surface_used(prog_data, index);
+  } else {
+ surf_index = vgrf(glsl_type::uint_type);
+ bld.ADD(surf_index, get_nir_src(instr->src[0]),
+ fs_reg(stage_prog_data->binding_table.ubo_start));
+ surf_index = bld.emit_uniformize(surf_index);
+
+ /* Assume this may touch any UBO. It would be nice to provide
+  * a tighter bound, but the array information is already lowered away.
+  */
+ brw_mark_surface_used(prog_data,
+   stage_prog_data->binding_table.ubo_start +
+   shader_prog->NumUniformBlocks - 1);
+  }
+
+  /* Get the offset to read from */
+  fs_reg offset_reg = vgrf(glsl_type::uint_type);
+  unsigned const_offset_bytes = 0;
+  if (has_indirect) {
+ bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+  } else {
+ const_offset_bytes = instr->const_index[0];
+ bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+  }
+
+  /* Read the vector */
+  for (int i = 0; i < instr->num_components; i++) {
+ fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+1 /* dims */, 1 /* size */,
+BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ bld.MOV(dest, read_result);
+ dest = offset(dest, bld, 1);
+
+ /* Vector components are stored contiguous in memory */
+ if (i < instr->num_components) {
+if (!has_indirect) {
+   const_offset_bytes += 4;
+   bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+} else {
+   bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
+}
+ }
+  }
+
+  break;
+   }
+
case nir_intrinsic_load_input_indirect:
   has_indirect = true;
   /* fallthrough */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 0/3] Resolve further GCC sign-compare warnings

2015-09-10 Thread Emil Velikov

On 8 September 2015 at 16:52, Rhys Kidd  wrote:
> Continued progress towards addressing all GCC sign-compare warnings across
> Mesa (at least as compiled for i965 hw).
>
> As a new contributor, I am continuing to focus on manageable, easily reviewed,
> discrete improvements. This patchset resolves a number of warnings reported
> by GCC when configured to be pedantic.
>
> Unlikely to be any performance improvements, however code quality is enhanced.
>
> $ gcc --version
> gcc (Ubuntu 4.9.2-10ubuntu13) 4.9.2
>
> No piglit regressions on Ironlake.
>
> v2:
> - Squash series into three related patches.
>
> Rhys Kidd (3):
>   glsl: Resolve GCC sign-compare warning.
>   mesa: Resolve GCC sign-compare warning.
>   i965: Resolve GCC sign-compare warning.
>
All four patches should be in master now.

Thank you Rhys.
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

1 2 3 >

1 - 100 of 239 matches

Mail list logo