[Mesa-dev] [PATCH 11/13] glsl: Add an option to clamp block indices when lowering UBO/SSBOs

2016-05-19 Thread Jason Ekstrand
This prevents array overflow when the block is actually an array of UBOs or
SSBOs.  On some hardware such as i965, such overflows can cause GPU hangs.
---
 src/compiler/glsl/ir_optimization.h   |  2 +-
 src/compiler/glsl/linker.cpp  |  3 ++-
 src/compiler/glsl/lower_ubo_reference.cpp | 36 +++
 src/mesa/drivers/dri/i965/brw_compiler.c  |  1 +
 src/mesa/main/mtypes.h|  3 +++
 5 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 5fc2740..4afa37e 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -123,7 +123,7 @@ bool lower_clip_distance(gl_shader *shader);
 void lower_output_reads(unsigned stage, exec_list *instructions);
 bool lower_packing_builtins(exec_list *instructions, int op_mask);
 void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
-void lower_ubo_reference(struct gl_shader *shader);
+void lower_ubo_reference(struct gl_shader *shader, bool clamp_block_indices);
 void lower_packed_varyings(void *mem_ctx,
unsigned locations_used, ir_variable_mode mode,
unsigned gs_input_vertices, gl_shader *shader,
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 71a71df..07c8263 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4879,7 +4879,8 @@ link_shaders(struct gl_context *ctx, struct 
gl_shader_program *prog)
  &ctx->Const.ShaderCompilerOptions[i];
 
   if (options->LowerBufferInterfaceBlocks)
- lower_ubo_reference(prog->_LinkedShaders[i]);
+ lower_ubo_reference(prog->_LinkedShaders[i],
+ options->ClampBlockIndicesToArrayBounds);
 
   if (options->LowerShaderSharedVariables)
  lower_shared_reference(prog->_LinkedShaders[i],
diff --git a/src/compiler/glsl/lower_ubo_reference.cpp 
b/src/compiler/glsl/lower_ubo_reference.cpp
index 1a0140f..749deed 100644
--- a/src/compiler/glsl/lower_ubo_reference.cpp
+++ b/src/compiler/glsl/lower_ubo_reference.cpp
@@ -44,8 +44,10 @@ namespace {
 class lower_ubo_reference_visitor :
   public lower_buffer_access::lower_buffer_access {
 public:
-   lower_ubo_reference_visitor(struct gl_shader *shader)
-   : shader(shader), struct_field(NULL), variable(NULL)
+   lower_ubo_reference_visitor(struct gl_shader *shader,
+   bool clamp_block_indices)
+   : shader(shader), clamp_block_indices(clamp_block_indices),
+ struct_field(NULL), variable(NULL)
{
}
 
@@ -104,6 +106,7 @@ public:
ir_visitor_status visit_enter(ir_call *ir);
 
struct gl_shader *shader;
+   bool clamp_block_indices;
struct gl_uniform_buffer_variable *ubo_var;
const struct glsl_struct_field *struct_field;
ir_variable *variable;
@@ -242,6 +245,26 @@ interface_field_name(void *mem_ctx, char *base_name, 
ir_rvalue *d,
return NULL;
 }
 
+static ir_rvalue *
+clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
+{
+   assert(type->is_array());
+
+   const unsigned array_size = type->arrays_of_arrays_size();
+
+   ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
+   max_index->type = index->type;
+
+   ir_constant *zero = new(mem_ctx) ir_constant(0);
+   zero->type = index->type;
+
+   if (index->type->base_type == GLSL_TYPE_INT)
+  index = max2(index, zero);
+   index = min2(index, max_index);
+
+   return index;
+}
+
 void
 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
  ir_variable *var,
@@ -258,6 +281,11 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void 
*mem_ctx,
   interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
deref, &nonconst_block_index);
 
+   if (nonconst_block_index && clamp_block_indices) {
+  nonconst_block_index =
+ clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
+   }
+
/* Locate the block by interface name */
unsigned num_blocks;
struct gl_uniform_block **blocks;
@@ -1062,9 +1090,9 @@ lower_ubo_reference_visitor::visit_enter(ir_call *ir)
 } /* unnamed namespace */
 
 void
-lower_ubo_reference(struct gl_shader *shader)
+lower_ubo_reference(struct gl_shader *shader, bool clamp_block_indices)
 {
-   lower_ubo_reference_visitor v(shader);
+   lower_ubo_reference_visitor v(shader, clamp_block_indices);
 
/* Loop over the instructions lowering references, because we take
 * a deref of a UBO array using a UBO dereference as the index will
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c 
b/src/mesa/drivers/dri/i965/brw_compiler.c
index 82131db..3f17589 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -188,6 +188,7 @@ brw_compiler_create(void *mem_ctx, const stru

Re: [Mesa-dev] [PATCH 11/13] glsl: Add an option to clamp block indices when lowering UBO/SSBOs

2016-05-19 Thread Ian Romanick
So... what did we decide for arrays of atomic counters?  Do we need an
extra pass for that or ... ?

Also... how does this handle the possibly unsized (actually
draw-time-sized) array at the end of an SSBO?

For UBOs, I think this patch is definitely sufficient, and I think it
improves things quite a lot for SSBOs.  We may need some more, but this
patch is

Reviewed-by: Ian Romanick 

On 05/19/2016 12:21 AM, Jason Ekstrand wrote:
> This prevents array overflow when the block is actually an array of UBOs or
> SSBOs.  On some hardware such as i965, such overflows can cause GPU hangs.
> ---
>  src/compiler/glsl/ir_optimization.h   |  2 +-
>  src/compiler/glsl/linker.cpp  |  3 ++-
>  src/compiler/glsl/lower_ubo_reference.cpp | 36 
> +++
>  src/mesa/drivers/dri/i965/brw_compiler.c  |  1 +
>  src/mesa/main/mtypes.h|  3 +++
>  5 files changed, 39 insertions(+), 6 deletions(-)
> 
> diff --git a/src/compiler/glsl/ir_optimization.h 
> b/src/compiler/glsl/ir_optimization.h
> index 5fc2740..4afa37e 100644
> --- a/src/compiler/glsl/ir_optimization.h
> +++ b/src/compiler/glsl/ir_optimization.h
> @@ -123,7 +123,7 @@ bool lower_clip_distance(gl_shader *shader);
>  void lower_output_reads(unsigned stage, exec_list *instructions);
>  bool lower_packing_builtins(exec_list *instructions, int op_mask);
>  void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
> -void lower_ubo_reference(struct gl_shader *shader);
> +void lower_ubo_reference(struct gl_shader *shader, bool clamp_block_indices);
>  void lower_packed_varyings(void *mem_ctx,
> unsigned locations_used, ir_variable_mode mode,
> unsigned gs_input_vertices, gl_shader *shader,
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index 71a71df..07c8263 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -4879,7 +4879,8 @@ link_shaders(struct gl_context *ctx, struct 
> gl_shader_program *prog)
>   &ctx->Const.ShaderCompilerOptions[i];
>  
>if (options->LowerBufferInterfaceBlocks)
> - lower_ubo_reference(prog->_LinkedShaders[i]);
> + lower_ubo_reference(prog->_LinkedShaders[i],
> + options->ClampBlockIndicesToArrayBounds);
>  
>if (options->LowerShaderSharedVariables)
>   lower_shared_reference(prog->_LinkedShaders[i],
> diff --git a/src/compiler/glsl/lower_ubo_reference.cpp 
> b/src/compiler/glsl/lower_ubo_reference.cpp
> index 1a0140f..749deed 100644
> --- a/src/compiler/glsl/lower_ubo_reference.cpp
> +++ b/src/compiler/glsl/lower_ubo_reference.cpp
> @@ -44,8 +44,10 @@ namespace {
>  class lower_ubo_reference_visitor :
>public lower_buffer_access::lower_buffer_access {
>  public:
> -   lower_ubo_reference_visitor(struct gl_shader *shader)
> -   : shader(shader), struct_field(NULL), variable(NULL)
> +   lower_ubo_reference_visitor(struct gl_shader *shader,
> +   bool clamp_block_indices)
> +   : shader(shader), clamp_block_indices(clamp_block_indices),
> + struct_field(NULL), variable(NULL)
> {
> }
>  
> @@ -104,6 +106,7 @@ public:
> ir_visitor_status visit_enter(ir_call *ir);
>  
> struct gl_shader *shader;
> +   bool clamp_block_indices;
> struct gl_uniform_buffer_variable *ubo_var;
> const struct glsl_struct_field *struct_field;
> ir_variable *variable;
> @@ -242,6 +245,26 @@ interface_field_name(void *mem_ctx, char *base_name, 
> ir_rvalue *d,
> return NULL;
>  }
>  
> +static ir_rvalue *
> +clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
> +{
> +   assert(type->is_array());
> +
> +   const unsigned array_size = type->arrays_of_arrays_size();
> +
> +   ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
> +   max_index->type = index->type;
> +
> +   ir_constant *zero = new(mem_ctx) ir_constant(0);
> +   zero->type = index->type;
> +
> +   if (index->type->base_type == GLSL_TYPE_INT)
> +  index = max2(index, zero);
> +   index = min2(index, max_index);
> +
> +   return index;
> +}
> +
>  void
>  lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
>   ir_variable *var,
> @@ -258,6 +281,11 @@ 
> lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
>interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
> deref, &nonconst_block_index);
>  
> +   if (nonconst_block_index && clamp_block_indices) {
> +  nonconst_block_index =
> + clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
> +   }
> +
> /* Locate the block by interface name */
> unsigned num_blocks;
> struct gl_uniform_block **blocks;
> @@ -1062,9 +1090,9 @@ lower_ubo_reference_visitor::visit_enter(ir_call *ir)
>  } /* unnamed namespace */
>  
>  void
> -lo

Re: [Mesa-dev] [PATCH 11/13] glsl: Add an option to clamp block indices when lowering UBO/SSBOs

2016-05-19 Thread Jason Ekstrand
On Thu, May 19, 2016 at 10:07 AM, Ian Romanick  wrote:

> So... what did we decide for arrays of atomic counters?  Do we need an
> extra pass for that or ... ?
>
> Also... how does this handle the possibly unsized (actually
> draw-time-sized) array at the end of an SSBO?
>

This patch only matters for the block index not the offset into the
buffer.  For atomics, the "block index" isn't allowed to be indirected
(Maybe they don't even have one?).  For UBOs and SSBOs, we set the correct
buffer size in the surface state so the hardware won't let it go outside.
This should also work for SSBO unsized arrays.


> For UBOs, I think this patch is definitely sufficient, and I think it
> improves things quite a lot for SSBOs.  We may need some more, but this
> patch is
>
> Reviewed-by: Ian Romanick 
>
> On 05/19/2016 12:21 AM, Jason Ekstrand wrote:
> > This prevents array overflow when the block is actually an array of UBOs
> or
> > SSBOs.  On some hardware such as i965, such overflows can cause GPU
> hangs.
> > ---
> >  src/compiler/glsl/ir_optimization.h   |  2 +-
> >  src/compiler/glsl/linker.cpp  |  3 ++-
> >  src/compiler/glsl/lower_ubo_reference.cpp | 36
> +++
> >  src/mesa/drivers/dri/i965/brw_compiler.c  |  1 +
> >  src/mesa/main/mtypes.h|  3 +++
> >  5 files changed, 39 insertions(+), 6 deletions(-)
> >
> > diff --git a/src/compiler/glsl/ir_optimization.h
> b/src/compiler/glsl/ir_optimization.h
> > index 5fc2740..4afa37e 100644
> > --- a/src/compiler/glsl/ir_optimization.h
> > +++ b/src/compiler/glsl/ir_optimization.h
> > @@ -123,7 +123,7 @@ bool lower_clip_distance(gl_shader *shader);
> >  void lower_output_reads(unsigned stage, exec_list *instructions);
> >  bool lower_packing_builtins(exec_list *instructions, int op_mask);
> >  void lower_shared_reference(struct gl_shader *shader, unsigned
> *shared_size);
> > -void lower_ubo_reference(struct gl_shader *shader);
> > +void lower_ubo_reference(struct gl_shader *shader, bool
> clamp_block_indices);
> >  void lower_packed_varyings(void *mem_ctx,
> > unsigned locations_used, ir_variable_mode
> mode,
> > unsigned gs_input_vertices, gl_shader
> *shader,
> > diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> > index 71a71df..07c8263 100644
> > --- a/src/compiler/glsl/linker.cpp
> > +++ b/src/compiler/glsl/linker.cpp
> > @@ -4879,7 +4879,8 @@ link_shaders(struct gl_context *ctx, struct
> gl_shader_program *prog)
> >   &ctx->Const.ShaderCompilerOptions[i];
> >
> >if (options->LowerBufferInterfaceBlocks)
> > - lower_ubo_reference(prog->_LinkedShaders[i]);
> > + lower_ubo_reference(prog->_LinkedShaders[i],
> > + options->ClampBlockIndicesToArrayBounds);
> >
> >if (options->LowerShaderSharedVariables)
> >   lower_shared_reference(prog->_LinkedShaders[i],
> > diff --git a/src/compiler/glsl/lower_ubo_reference.cpp
> b/src/compiler/glsl/lower_ubo_reference.cpp
> > index 1a0140f..749deed 100644
> > --- a/src/compiler/glsl/lower_ubo_reference.cpp
> > +++ b/src/compiler/glsl/lower_ubo_reference.cpp
> > @@ -44,8 +44,10 @@ namespace {
> >  class lower_ubo_reference_visitor :
> >public lower_buffer_access::lower_buffer_access {
> >  public:
> > -   lower_ubo_reference_visitor(struct gl_shader *shader)
> > -   : shader(shader), struct_field(NULL), variable(NULL)
> > +   lower_ubo_reference_visitor(struct gl_shader *shader,
> > +   bool clamp_block_indices)
> > +   : shader(shader), clamp_block_indices(clamp_block_indices),
> > + struct_field(NULL), variable(NULL)
> > {
> > }
> >
> > @@ -104,6 +106,7 @@ public:
> > ir_visitor_status visit_enter(ir_call *ir);
> >
> > struct gl_shader *shader;
> > +   bool clamp_block_indices;
> > struct gl_uniform_buffer_variable *ubo_var;
> > const struct glsl_struct_field *struct_field;
> > ir_variable *variable;
> > @@ -242,6 +245,26 @@ interface_field_name(void *mem_ctx, char
> *base_name, ir_rvalue *d,
> > return NULL;
> >  }
> >
> > +static ir_rvalue *
> > +clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type
> *type)
> > +{
> > +   assert(type->is_array());
> > +
> > +   const unsigned array_size = type->arrays_of_arrays_size();
> > +
> > +   ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
> > +   max_index->type = index->type;
> > +
> > +   ir_constant *zero = new(mem_ctx) ir_constant(0);
> > +   zero->type = index->type;
> > +
> > +   if (index->type->base_type == GLSL_TYPE_INT)
> > +  index = max2(index, zero);
> > +   index = min2(index, max_index);
> > +
> > +   return index;
> > +}
> > +
> >  void
> >  lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
> >   ir_variable *var,
> > @@ -258,6 +281,11 @@
> lower_ubo_reference_visitor::setup_fo