date:20150911

Re: [Mesa-dev] [PATCH v2] glsl: add packed varyings to program resource list

2015-09-11 Thread Tapani Pälli




On 09/11/2015 09:32 AM, Ilia Mirkin wrote:

On Fri, Sep 11, 2015 at 2:18 AM, Tapani Pälli  wrote:

On 09/11/2015 09:15 AM, Ilia Mirkin wrote:


On Fri, Sep 11, 2015 at 2:12 AM, Tapani Pälli 
wrote:


This makes sure that user is still able to query properties about
variables that have gotten packed by lower_packed_varyings pass.

Fixes following OpenGL ES 3.1 test:
 ES31-CTS.program_interface_query.separate-programs-vertex

v2: fix 'name included in packed list' check (Ilia Mirkin)

Signed-off-by: Tapani Pälli 
---
   src/glsl/linker.cpp | 74
++---
   1 file changed, 70 insertions(+), 4 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 94f847e..d8afb26 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3114,6 +3114,29 @@ add_program_resource(struct gl_shader_program
*prog, GLenum type,
  return true;
   }

+/* Function checks if a variable var is a packed varying and
+ * if given name is part of packed varying's list.
+ *
+ * If a variable is a packed varying, it has a name like
+ * 'packed:a,b,c' where a, b and c are separate variables.
+ */
+static bool
+included_in_packed_varying(ir_variable *var, const char *name)
+{
+   if (strncmp(var->name, "packed:", 7) != 0)
+  return false;
+
+   const char *name_in_list = strstr(var->name, name);
+   const char *head = name_in_list - 1;
+   const char *tail = name_in_list + strlen(name);
+
+   if (name_in_list &&
+   (*head == ':' || *head == ',') &&
+   (*tail == '\0' || *tail == ','))



So... in the case "ab,b" and you search for 'b', it'll find 'ab' and
reject it.



Huh, this seems more complex that I thought .. almost a case for proper
parser rather than a helper function.


FWIW there's strtok_r(). BTW, make sure to skip the "packed:" bit of
var->name. And be thankful that variable names can't contain commas in
them :)


I could iterate over instances also using strstr but strtok_r seems 
cleaner, only minus is that it requires strdup but this will happen only 
with packed varyings so overall it is quite a corner case. Will send 
strtok_r version.


// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3] glsl: add packed varyings to program resource list

2015-09-11 Thread Tapani Pälli

This makes sure that user is still able to query properties about
variables that have gotten packed by lower_packed_varyings pass.

Fixes following OpenGL ES 3.1 test:
   ES31-CTS.program_interface_query.separate-programs-vertex

v2: fix 'name included in packed list' check (Ilia Mirkin)
v3: iterate over instances of name using strtok_r (Ilia Mirkin)

Signed-off-by: Tapani Pälli 
---
 src/glsl/linker.cpp | 80 ++---
 1 file changed, 76 insertions(+), 4 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 94f847e..de499e2 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3114,6 +3114,35 @@ add_program_resource(struct gl_shader_program *prog, 
GLenum type,
return true;
 }
 
+/* Function checks if a variable var is a packed varying and
+ * if given name is part of packed varying's list.
+ *
+ * If a variable is a packed varying, it has a name like
+ * 'packed:a,b,c' where a, b and c are separate variables.
+ */
+static bool
+included_in_packed_varying(ir_variable *var, const char *name)
+{
+   if (strncmp(var->name, "packed:", 7) != 0)
+  return false;
+
+   char *list = strdup(var->name + 7);
+   assert(list);
+
+   bool found = false;
+   char *saveptr;
+   char *token = strtok_r(list, ",", &saveptr);
+   while (token) {
+  if (strcmp(token, name) == 0) {
+ found = true;
+ break;
+  }
+  token = strtok_r(NULL, ",", &saveptr);
+   }
+   free(list);
+   return found;
+}
+
 /**
  * Function builds a stage reference bitmask from variable name.
  */
@@ -3141,6 +3170,11 @@ build_stageref(struct gl_shader_program *shProg, const 
char *name,
  if (var) {
 unsigned baselen = strlen(var->name);
 
+if (included_in_packed_varying(var, name)) {
+  stages |= (1 << i);
+  break;
+}
+
 /* Type needs to match if specified, otherwise we might
  * pick a variable with same name but different interface.
  */
@@ -3166,9 +3200,9 @@ build_stageref(struct gl_shader_program *shProg, const 
char *name,
 
 static bool
 add_interface_variables(struct gl_shader_program *shProg,
-struct gl_shader *sh, GLenum programInterface)
+exec_list *ir, GLenum programInterface)
 {
-   foreach_in_list(ir_instruction, node, sh->ir) {
+   foreach_in_list(ir_instruction, node, ir) {
   ir_variable *var = node->as_variable();
   uint8_t mask = 0;
 
@@ -3203,6 +3237,12 @@ add_interface_variables(struct gl_shader_program *shProg,
  continue;
   };
 
+  /* Skip packed varyings, packed varyings are handled separately
+   * by add_packed_varyings.
+   */
+  if (strncmp(var->name, "packed:", 7) == 0)
+ continue;
+
   if (!add_program_resource(shProg, programInterface, var,
 build_stageref(shProg, var->name,
var->data.mode) | mask))
@@ -3211,6 +3251,33 @@ add_interface_variables(struct gl_shader_program *shProg,
return true;
 }
 
+static bool
+add_packed_varyings(struct gl_shader_program *shProg, int stage)
+{
+   struct gl_shader *sh = shProg->_LinkedShaders[stage];
+   GLenum iface;
+
+   if (!sh || !sh->packed_varyings)
+  return true;
+
+   foreach_in_list(ir_instruction, node, sh->packed_varyings) {
+  ir_variable *var = node->as_variable();
+  if (var) {
+ switch (var->data.mode) {
+ case ir_var_shader_in:
+iface = GL_PROGRAM_INPUT;
+ case ir_var_shader_out:
+iface = GL_PROGRAM_OUTPUT;
+ }
+ if (!add_program_resource(shProg, iface, var,
+   build_stageref(shProg, var->name,
+  var->data.mode)))
+return false;
+  }
+   }
+   return true;
+}
+
 /**
  * Builds up a list of program resources that point to existing
  * resource data.
@@ -3243,12 +3310,17 @@ build_program_resource_list(struct gl_shader_program 
*shProg)
if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
   return;
 
+   if (!add_packed_varyings(shProg, input_stage))
+  return;
+   if (!add_packed_varyings(shProg, output_stage))
+  return;
+
/* Add inputs and outputs to the resource list. */
-   if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage],
+   if (!add_interface_variables(shProg, 
shProg->_LinkedShaders[input_stage]->ir,
 GL_PROGRAM_INPUT))
   return;
 
-   if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage],
+   if (!add_interface_variables(shProg, 
shProg->_LinkedShaders[output_stage]->ir,
 GL_PROGRAM_OUTPUT))
   return;
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/lis

Re: [Mesa-dev] [PATCH 1/3] mesa: enable query of PROGRAM_PIPELINE_BINDING for ES 3.1

2015-09-11 Thread Lofstedt, Marta

Reviewed-by: Marta Lofstedt 

> -Original Message-
> From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On
> Behalf Of Tapani Pälli
> Sent: Tuesday, September 1, 2015 12:54 PM
> To: mesa-dev@lists.freedesktop.org
> Cc: Romanick, Ian D
> Subject: [Mesa-dev] [PATCH 1/3] mesa: enable query of
> PROGRAM_PIPELINE_BINDING for ES 3.1
> 
> Specified in OpenGL ES 3.1 spec, Table 23.32: Program Object State.
> 
> Signed-off-by: Tapani Pälli 
> ---
>  src/mesa/main/get_hash_params.py | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/main/get_hash_params.py
> b/src/mesa/main/get_hash_params.py
> index dc5ba6f..e2e3d0d 100644
> --- a/src/mesa/main/get_hash_params.py
> +++ b/src/mesa/main/get_hash_params.py
> @@ -458,6 +458,9 @@ descriptor=[
> 
>  # GL_ARB_explicit_uniform_location / GLES 3.1
>[ "MAX_UNIFORM_LOCATIONS",
> "CONTEXT_INT(Const.MaxUserAssignableUniformLocations),
> extra_ARB_explicit_uniform_location" ],
> +
> +# GL_ARB_separate_shader_objects / GLES 3.1
> +  [ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT,
> +GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ],
>  ]},
> 
>  # Enums in OpenGL Core profile and ES 3.1 @@ -799,9 +802,6 @@
> descriptor=[  # GL_ARB_texture_gather
>[ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB",
> "CONTEXT_INT(Const.MaxProgramTextureGatherComponents),
> extra_ARB_texture_gather"],
> 
> -# GL_ARB_separate_shader_objects
> -  [ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT,
> GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ],
> -
>  # GL_ARB_shader_atomic_counters
>[ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS",
> "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuff
> ers), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
>[ "MAX_GEOMETRY_ATOMIC_COUNTERS",
> "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCou
> nters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
> --
> 2.4.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nir: Fix output variable names

2015-09-11 Thread Eduardo Lima Mitev

Commit 1dbe4af9c9e318525fc082b542b93fb7f1e5efba
"nir: Add a pass to lower outputs to temporary variables" messed up output
variable names. The issue can be reproduced by dumping the NIR shaders
with INTEL_DEBUG="vs,fs".
---
 src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c 
b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
index b730cad..e9c4c0d 100644
--- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
+++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
@@ -87,12 +87,13 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
foreach_list_typed(nir_variable, var, node, &state.old_outputs) {
   nir_variable *output = ralloc(shader, nir_variable);
   memcpy(output, var, sizeof *output);
+  output->name = ralloc_strdup(output, var->name);
 
   /* The orignal is now the temporary */
   nir_variable *temp = var;
 
   /* Give the output a new name with @out-temp appended */
-  temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
+  temp->name = ralloc_asprintf(output, "%s@out-temp", output->name);
   temp->data.mode = nir_var_global;
   temp->constant_initializer = NULL;
 
-- 
2.4.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/7] i965/cs: Reserve local invocation id in payload regs

2015-09-11 Thread Iago Toral

On Mon, 2015-08-03 at 23:00 -0700, Jordan Justen wrote:
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/brw_cs.cpp | 29 +
>  src/mesa/drivers/dri/i965/brw_cs.h   |  5 +
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +++
>  src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
>  4 files changed, 46 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp 
> b/src/mesa/drivers/dri/i965/brw_cs.cpp
> index b566b92..541151a 100644
> --- a/src/mesa/drivers/dri/i965/brw_cs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
> @@ -444,6 +444,35 @@ const struct brw_tracked_state brw_cs_state = {
>  
> 
>  /**
> + * We are building the local ID push constant data using the simplest 
> possible
> + * method. We simply push the local IDs directly as they should appear in the
> + * registers for the uvec3 gl_LocalInvocationID variable.
> + *
> + * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
> + * registers worth of push constant space.
> + *
> + * FINISHME: There are a few easy optimizations to consider.
> + *
> + * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
> + *no need for using push constant space for that dimension.
> + *
> + * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
> + *easily use 16-bit words rather than 32-bit dwords in the push constant
> + *data.
> + *
> + * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
> + *conveying the data, and thereby reduce push constant usage.
> + *
> + */
> +unsigned
> +brw_cs_prog_local_id_payload_size(const struct gl_program *prog,
> +  unsigned dispatch_width)
> +{
> +   return 3 * dispatch_width * sizeof(uint32_t);
> +}
> +
> +
> +/**
>   * Creates a region containing the push constants for the CS on gen7+.
>   *
>   * Push constants are constant values (such as GLSL uniforms) that are
> diff --git a/src/mesa/drivers/dri/i965/brw_cs.h 
> b/src/mesa/drivers/dri/i965/brw_cs.h
> index 8404aa3..5738918 100644
> --- a/src/mesa/drivers/dri/i965/brw_cs.h
> +++ b/src/mesa/drivers/dri/i965/brw_cs.h
> @@ -42,6 +42,11 @@ void
>  brw_upload_cs_prog(struct brw_context *brw);
>  
>  #ifdef __cplusplus
> +
> +unsigned
> +brw_cs_prog_local_id_payload_size(const struct gl_program *prog,
> +  unsigned dispatch_width);
> +
>  }
>  #endif
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 15fe364..b72eb76 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -42,6 +42,7 @@
>  #include "brw_eu.h"
>  #include "brw_wm.h"
>  #include "brw_fs.h"
> +#include "brw_cs.h"
>  #include "brw_cfg.h"
>  #include "brw_dead_control_flow.h"
>  #include "main/uniforms.h"
> @@ -4624,6 +4625,16 @@ fs_visitor::setup_cs_payload()
> assert(devinfo->gen >= 7);
>  
> payload.num_regs = 1;
> +
> +   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
> +  const unsigned local_id_size =
> + brw_cs_prog_local_id_payload_size(prog, dispatch_width);
> +  const unsigned local_id_regs = ALIGN(local_id_size, 32) / 32;

Isn't this guaranteed to be 32-byte aligned? In any case, I suppose this
is okay to prepare the ground for some of the potential optimizations
you mentioned above.

> +  if (local_id_regs > 0) {
> + payload.local_invocation_id_reg = payload.num_regs;
> + payload.num_regs += local_id_regs;
> +  }
> +   }

As it is now, local_id_regs can't be zero. I suppose that it could be
possible for it to be zero in the future if we end up implementing the
first of the optimizations you suggest above for the case where all the
components are 1 though... is that why you decided to go with a
condition here instead of an assert? In that case maybe it could be
worth to add a comment explaining when this could be zero.

Reviewed-by: Iago Toral Quiroga 

>  }
>  
>  void
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 4749c47..b2266b2 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -368,6 +368,7 @@ public:
>uint8_t sample_pos_reg;
>uint8_t sample_mask_in_reg;
>uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
> +  uint8_t local_invocation_id_reg;
>  
>/** The number of thread payload registers the hardware will supply. */
>uint8_t num_regs;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] mesa: return initial value for PROGRAM_SEPARABLE when not linked

2015-09-11 Thread Lofstedt, Marta

Hi Tapani,

For spec. reference I believe you could go for section 7.12:
"Most properties set within program objects are specified not to take effect 
until
the next call to LinkProgram or ProgramBinary. Some properties further require
a successful call to either of these commands before taking effect.
GetProgramiv returns the properties currently in effect for
program, which may differ from the properties set within
program since the most recent call to LinkProgram or ProgramBinary
, which have not yet taken effect. If there has been no such call putting
changes to pname into effect, initial values are returned"

and from table 20.20 we see that PROGRAM_SEPARABLE is initialized to FALSE.

You have my R.b if you update.

/Marta

From: mesa-dev [mesa-dev-boun...@lists.freedesktop.org] on behalf of Tapani 
Pälli [tapani.pa...@intel.com]
Sent: Tuesday, September 01, 2015 12:53 PM
To: mesa-dev@lists.freedesktop.org
Cc: Romanick, Ian D
Subject: [Mesa-dev] [PATCH 2/3] mesa: return initial value for  
PROGRAM_SEPARABLE when not linked

I haven't found clear spec evidence of this behaviour but this is
expected by a conformance test that changes the value with
glProgramParameteri but does not link the program. Test says:

"The query for PROGRAM_SEPARABLE must query latched state. In other
words, the state of the binary after it was linked. So in the tests
below, the queries should return the default state GL_FALSE since the
program has no linked binary."

Signed-off-by: Tapani Pälli 
---
 src/mesa/main/shaderapi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 0e0e0d6..fb82543 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -773,7 +773,8 @@ get_programiv(struct gl_context *ctx, GLuint program, 
GLenum pname,
   return;
}
case GL_PROGRAM_SEPARABLE:
-  *params = shProg->SeparateShader;
+  /* If the program has not been linked, return initial value 0. */
+  *params = (shProg->LinkStatus == GL_FALSE) ? 0 : shProg->SeparateShader;
   return;

/* ARB_tessellation_shader */
--
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] mesa: return initial value for VALIDATE_STATUS if pipe not bound

2015-09-11 Thread Lofstedt, Marta

Reviewed-by: Marta Lofstedt 


From: mesa-dev [mesa-dev-boun...@lists.freedesktop.org] on behalf of Tapani 
Pälli [tapani.pa...@intel.com]
Sent: Tuesday, September 01, 2015 12:53 PM
To: mesa-dev@lists.freedesktop.org
Cc: Romanick, Ian D
Subject: [Mesa-dev] [PATCH 3/3] mesa: return initial value for  VALIDATE_STATUS 
if pipe not bound

From OpenGL 4.5 Core spec (7.13):

"If pipeline is a name that has been generated (without subsequent
deletion) by GenProgramPipelines, but refers to a program pipeline
object that has not been previously bound, the GL first creates a
new state vector in the same manner as when BindProgramPipeline
creates a new program pipeline object."

I interpret this as "If GetProgramPipelineiv gets called without a
bound (but valid) pipeline object, the state should reflect initial
state of a new pipeline object." This is also expected behaviour by
ES31-CTS.sepshaderobjs.PipelineApi conformance test.

Signed-off-by: Tapani Pälli 
---
 src/mesa/main/pipelineobj.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c
index 07acbf1..c2e1d29 100644
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -614,7 +614,8 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, 
GLint *params)
   *params = pipe->InfoLog ? strlen(pipe->InfoLog) + 1 : 0;
   return;
case GL_VALIDATE_STATUS:
-  *params = pipe->Validated;
+  /* If pipeline is not bound, return initial value 0. */
+  *params = (ctx->_Shader->Name != pipe->Name) ? 0 : pipe->Validated;
   return;
case GL_VERTEX_SHADER:
   *params = pipe->CurrentProgram[MESA_SHADER_VERTEX]
--
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] mesa: return initial value for PROGRAM_SEPARABLE when not linked

2015-09-11 Thread Tapani Pälli




On 09/11/2015 10:53 AM, Lofstedt, Marta wrote:

Hi Tapani,

For spec. reference I believe you could go for section 7.12:
"Most properties set within program objects are specified not to take effect 
until
the next call to LinkProgram or ProgramBinary. Some properties further require
a successful call to either of these commands before taking effect.
GetProgramiv returns the properties currently in effect for
program, which may differ from the properties set within
program since the most recent call to LinkProgram or ProgramBinary
, which have not yet taken effect. If there has been no such call putting
changes to pname into effect, initial values are returned"


True, thanks for finding this!


and from table 20.20 we see that PROGRAM_SEPARABLE is initialized to FALSE.

You have my R.b if you update.

/Marta

From: mesa-dev [mesa-dev-boun...@lists.freedesktop.org] on behalf of Tapani 
Pälli [tapani.pa...@intel.com]
Sent: Tuesday, September 01, 2015 12:53 PM
To: mesa-dev@lists.freedesktop.org
Cc: Romanick, Ian D
Subject: [Mesa-dev] [PATCH 2/3] mesa: return initial value for  
PROGRAM_SEPARABLE when not linked

I haven't found clear spec evidence of this behaviour but this is
expected by a conformance test that changes the value with
glProgramParameteri but does not link the program. Test says:

"The query for PROGRAM_SEPARABLE must query latched state. In other
words, the state of the binary after it was linked. So in the tests
below, the queries should return the default state GL_FALSE since the
program has no linked binary."

Signed-off-by: Tapani Pälli 
---
  src/mesa/main/shaderapi.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 0e0e0d6..fb82543 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -773,7 +773,8 @@ get_programiv(struct gl_context *ctx, GLuint program, 
GLenum pname,
return;
 }
 case GL_PROGRAM_SEPARABLE:
-  *params = shProg->SeparateShader;
+  /* If the program has not been linked, return initial value 0. */
+  *params = (shProg->LinkStatus == GL_FALSE) ? 0 : shProg->SeparateShader;
return;

 /* ARB_tessellation_shader */
--
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/20] i965: Define gather push constants opcodes

2015-09-11 Thread Abdiel Janulgue

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_defines.h | 20 
 1 file changed, 20 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index a8594af..7b07c50 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2396,6 +2396,26 @@ enum brw_wm_barycentric_interp_mode {
 #define _3DSTATE_CONSTANT_HS  0x7819 /* GEN7+ */
 #define _3DSTATE_CONSTANT_DS  0x781A /* GEN7+ */
 
+/* Resource streamer gather constants */
+#define _3DSTATE_GATHER_POOL_ALLOC0x791A /* GEN7.5+ */
+#define _3DSTATE_GATHER_CONSTANT_VS   0x7834
+#define _3DSTATE_GATHER_CONSTANT_GS   0x7835
+#define _3DSTATE_GATHER_CONSTANT_HS   0x7836
+#define _3DSTATE_GATHER_CONSTANT_DS   0x7837
+#define _3DSTATE_GATHER_CONSTANT_PS   0x7838
+/* Only required in HSW */
+#define HSW_GATHER_CONSTANTS_RESERVED (3 << 4)
+
+#define BRW_GATHER_CONSTANTS_ENABLE   (1 << 11) /* GEN7.5+ */
+#define BRW_GATHER_BUFFER_VALID_SHIFT 16
+#define BRW_GATHER_BUFFER_VALID_MASK  INTEL_MASK(31, 16)
+#define BRW_GATHER_BINDING_TABLE_BLOCK_SHIFT  12
+#define BRW_GATHER_BINDING_TABLE_BLOCK_MASK   INTEL_MASK(15, 12)
+#define BRW_GATHER_CONST_BUFFER_OFFSET_SHIFT  8
+#define BRW_GATHER_CONST_BUFFER_OFFSET_MASK   INTEL_MASK(15, 8)
+#define BRW_GATHER_CHANNEL_MASK_SHIFT 4
+#define BRW_GATHER_CHANNEL_MASK_MASK  INTEL_MASK(7, 4)
+
 #define _3DSTATE_STREAMOUT0x781e /* GEN7+ */
 /* DW1 */
 # define SO_FUNCTION_ENABLE(1 << 31)
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] i965: Turn UBOs as push constants

2015-09-11 Thread Abdiel Janulgue

Some updated figures first:

Ue4 Reflections Subway demo
x  fs gather constants disabled
+  fs gather constants enabled

HSW:
N   Min   MaxMedian   AvgStddev
x  10   5.09739   6.48963   6.33357  6.1979850.40742969
+  10   6.56139   6.86579   6.80952  6.7585160.10267153
Difference at 95.0% confidence
0.560531 ± 0.279157
9.04376% ± 4.50399%
(Student's t, pooled s = 0.297103)

BDW:
N   Min   MaxMedian   AvgStddev
x  10   3.64474   3.69746   3.68596   3.67842   0.015452422
+  10   3.77998   3.80967   3.79824  3.795633  0.0079241642
Difference at 95.0% confidence
0.117213 ± 0.0115377
3.1865% ± 0.31366%
(Student's t, pooled s = 0.0122794)

This patch series is taken from my initial gather constants series last April.
Now that the basic i965 resource streamer infrastructure is in place, these are
the remaining bits to enable the gather constants hardware[*]. I've tried to
address the comments from the reviews that happened since then.

Major changes from last posting is we now support GEN8. Also the vec4 backend
gather constant support is probably no longer relevant because of the switch
to NIR. But I've included it here so people interested on implementing it on
the vec4-NIR backend can have a reference point.

The series has no piglit regressions.

[PATCH 01/20] i965: Define gather push constants opcodes
[PATCH 02/20] i965: Enable gather push constants
[PATCH 03/20] i965: Allocate space on the gather pool for plain
[PATCH 04/20] i965: Allocate space on the gather pool for UBO entries
[PATCH 05/20] i965: Store gather table information in the program
[PATCH 06/20] i965: Assign hw-binding table index for each UBO
[PATCH 07/20] i965: Assign hw-binding table index for uniform
[PATCH 08/20] nir: Add glsl_get_array_size() wrapper.
[PATCH 09/20] nir: Add glsl_get_type_without_array() wrapper
[PATCH 10/20] i965: Include UBO parameter sizes in push constant
[PATCH 11/20] i965/fs: Append uniform entries to the gather table
[PATCH 12/20] i965/fs/nir: Append nir_intrinsic_load_ubo entries to
[PATCH 13/20] i965/fs: Pack UBO registers right after uniform
[PATCH 14/20] i965/vec4: Append uniform entries to the gather table
[PATCH 15/20] i965/vec4: Append ir_binop_ubo_load entries to the
[PATCH 16/20] i965/vec4: Pack UBO registers right after uniform
[PATCH 17/20] i965: Upload UBO surfaces before emitting constant
[PATCH 18/20] i965: Program the push constants state using the gather
[PATCH 19/20] i965: Disable gather push constants for null constants
[PATCH 20/20] i965: Enable push constants for UBOs

-Abdiel

--
[*] http://lists.freedesktop.org/archives/mesa-dev/2015-April/082991.html
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 02/20] i965: Enable gather push constants

2015-09-11 Thread Abdiel Janulgue

The 3DSTATE_GATHER_POOL_ALLOC is used to enable or disable the gather
push constants feature within a context. This patch provides the toggle
functionality of using gather push constants to program constant data
within a batch.

Using gather push constants require that a gather pool be allocated so
that the resource streamer can flush the packed constants it gathered.
The pool is later referenced by the 3DSTATE_CONSTANT_* command to
program the push constant data.

Also introduce INTEL_UBO_GATHER to selectively enable which shader stage
uses gather constants for ubo fetches.

v2: GEN8 support.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_binding_tables.c | 59 ++
 src/mesa/drivers/dri/i965/brw_context.c| 39 -
 src/mesa/drivers/dri/i965/brw_context.h| 10 +
 src/mesa/drivers/dri/i965/brw_state.h  |  1 +
 4 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 508f1f0..aa16b6d 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -276,6 +276,60 @@ gen7_update_binding_table_from_array(struct brw_context 
*brw,
ADVANCE_BATCH();
 }
 
+static void
+gen7_init_gather_pool(struct brw_context *brw)
+{
+   if (!brw->use_resource_streamer ||
+   (!brw->vs_ubo_gather && !brw->gs_ubo_gather && !brw->fs_ubo_gather))
+  return;
+
+   if (!brw->gather_pool.bo) {
+  brw->gather_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "gather_pool",
+   brw->gather_pool.size, 4096);
+  brw->gather_pool.next_offset = 0;
+   }
+}
+
+void
+gen7_toggle_gather_constants(struct brw_context *brw, bool enable)
+{
+   if (!brw->use_resource_streamer ||
+   (enable && !(brw->vs_ubo_gather || brw->gs_ubo_gather ||
+brw->fs_ubo_gather)))
+  return;
+
+   uint32_t dw1 = enable ? BRW_GATHER_CONSTANTS_ENABLE : 0;
+   if (brw->is_haswell) {
+  dw1 |= HSW_GATHER_CONSTANTS_RESERVED | (enable ? GEN7_MOCS_L3 : 0);
+   } else if (brw->gen == 8) {
+  dw1 |= (enable ? BDW_MOCS_WB : 0);
+   }
+   int pkt_len = brw->gen >= 8 ? 4 : 3;
+
+   BEGIN_BATCH(pkt_len);
+   OUT_BATCH(_3DSTATE_GATHER_POOL_ALLOC << 16 | (pkt_len - 2));
+   if (brw->gen >=8 ) {
+  if (enable) {
+ OUT_RELOC64(brw->gather_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+ OUT_BATCH(brw->gather_pool.bo->size);
+  } else {
+ OUT_BATCH(dw1);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+  }
+   } else {
+  if (enable) {
+ OUT_RELOC(brw->gather_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+ OUT_RELOC(brw->gather_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+   brw->gather_pool.bo->size);
+  } else {
+ OUT_BATCH(dw1);
+ OUT_BATCH(0);
+  }
+   }
+   ADVANCE_BATCH();
+}
+
 /**
  * Disable hardware binding table support, falling back to the
  * older software-generated binding table mechanism.
@@ -294,6 +348,7 @@ gen7_disable_hw_binding_tables(struct brw_context *brw)
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
 
int pkt_len = brw->gen >= 8 ? 4 : 3;
+   gen7_toggle_gather_constants(brw, false);
 
BEGIN_BATCH(pkt_len);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
@@ -360,12 +415,16 @@ gen7_enable_hw_binding_tables(struct brw_context *brw)
  brw->hw_bt_pool.bo->size);
}
ADVANCE_BATCH();
+
+   gen7_init_gather_pool(brw);
+   gen7_toggle_gather_constants(brw, true);
 }
 
 void
 gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
 {
brw->hw_bt_pool.next_offset = 0;
+   brw->gather_pool.next_offset = 0;
 }
 
 const struct brw_tracked_state gen7_hw_binding_tables = {
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 7c1c133..8079465 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -68,6 +68,7 @@
 #include "tnl/tnl.h"
 #include "tnl/t_pipeline.h"
 #include "util/ralloc.h"
+#include "util/u_atomic.h"
 
 /***
  * Mesa's Driver Functions
@@ -687,6 +688,26 @@ brw_process_driconf_options(struct brw_context *brw)
   driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 }
 
+static void
+brw_process_intel_gather_variable(struct brw_context *brw)
+{
+   uint64_t INTEL_UBO_GATHER = 0;
+
+   static const struct dri_debug_control gather_control[] = {
+  { "vs", 1 << MESA_SHADER_VERTEX },
+  { "gs", 1 << MESA_SHADER_GEOMETRY },
+  { "fs", 1 << MESA_SHADER_FRAGMENT },
+  { NULL, 0 }
+   };
+   uint64_t intel_ubo_gather = driParseDebugString(getenv("INTEL_UBO_GATHER"),
+   gather_control);
+   (void) p_atomic_cmpxchg(&INTEL_UBO_GATHER, 0, intel_ubo_gather);
+
+   brw->vs_ubo

[Mesa-dev] [PATCH 09/20] nir: Add glsl_get_type_without_array() wrapper

2015-09-11 Thread Abdiel Janulgue

Signed-off-by: Abdiel Janulgue 
---
 src/glsl/nir/nir_types.cpp | 6 ++
 src/glsl/nir/nir_types.h   | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp
index a0caf0e..332da8f 100644
--- a/src/glsl/nir/nir_types.cpp
+++ b/src/glsl/nir/nir_types.cpp
@@ -171,3 +171,9 @@ glsl_get_array_size(const struct glsl_type *type)
 {
return type->array_size();
 }
+
+const struct glsl_type *
+glsl_get_type_without_array(const struct glsl_type *type)
+{
+   return type->without_array();
+}
diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h
index 72b980a..da9544c 100644
--- a/src/glsl/nir/nir_types.h
+++ b/src/glsl/nir/nir_types.h
@@ -49,6 +49,8 @@ const struct glsl_type *glsl_get_array_element(const struct 
glsl_type *type);
 
 const struct glsl_type *glsl_get_column_type(const struct glsl_type *type);
 
+const struct glsl_type *glsl_get_type_without_array(const struct glsl_type 
*type);
+
 enum glsl_base_type glsl_get_base_type(const struct glsl_type *type);
 
 unsigned glsl_get_array_size(const struct glsl_type *type);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/20] i965: Assign hw-binding table index for uniform constant buffer block

2015-09-11 Thread Abdiel Janulgue

Assign the uploaded uniform block with hardware binding table indices.
This is indexed by the resource streamer to fetch the constant buffers
referred to by our gather table entries.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/gen6_vs_state.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c 
b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 843df94..d43af5b 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -72,9 +72,18 @@ gen6_upload_push_constants(struct brw_context *brw,
   gl_constant_value *param;
   unsigned i;
 
-  param = brw_state_batch(brw, type,
- prog_data->nr_params * sizeof(gl_constant_value),
+  const uint32_t size = prog_data->nr_params * sizeof(gl_constant_value);
+  param = brw_state_batch(brw, type, size,
  32, &stage_state->push_const_offset);
+  if (brw->gather_pool.bo != NULL) {
+ uint32_t surf_offset = 0;
+ brw_create_constant_surface(brw, brw->batch.bo,
+ stage_state->push_const_offset,
+ size, &surf_offset, false);
+ gen7_edit_hw_binding_table_entry(brw, stage_state->stage,
+  BRW_UNIFORM_GATHER_INDEX_START,
+  surf_offset);
+  }
 
   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 03/20] i965: Allocate space on the gather pool for plain uniforms

2015-09-11 Thread Abdiel Janulgue

Reserve space in the gather pool where the gathered uniforms are flushed.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/gen6_vs_state.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c 
b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 6653a6d..b78166e 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -120,6 +120,14 @@ gen6_upload_push_constants(struct brw_context *brw,
*/
   assert(stage_state->push_const_size <= 32);
}
+   /* Allocate gather pool space for uniform and UBO entries in 512-bit 
chunks*/
+   if (brw->gather_pool.bo != NULL) {
+  if (prog_data->nr_params > 0) {
+ int num_consts = ALIGN(prog_data->nr_params, 4) / 4;
+ stage_state->push_const_offset = brw->gather_pool.next_offset;
+ brw->gather_pool.next_offset += (ALIGN(num_consts, 4) / 4) * 64;
+  }
+   }
 }
 
 static void
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 06/20] i965: Assign hw-binding table index for each UBO constant buffer.

2015-09-11 Thread Abdiel Janulgue

To be able to refer to a constant buffer, the resource streamer needs
to index it with a hardware binding table entry. This blankets the ubo
buffers with hardware binding table indices.

Gather constants hardware fetches in 16-entry binding table blocks.
So we need to use a block that is unused.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_context.h  | 11 +++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  6 ++
 2 files changed, 17 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index de0db5a..58edaf4 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -762,6 +762,17 @@ struct brw_vs_prog_data {
 
 #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
 
+/** Start of hardware binding table index for uniform gather constant entries.
+ *  This must be aligned to the start of a hardware binding table block (a 
block
+ *  is a group 16 binding table entries).
+ */
+#define BRW_UNIFORM_GATHER_INDEX_START 32
+
+/** Appended to the end of the binding table index for uniform constant buffers
+ *  to indicate start of the UBO gather constant binding table.
+ */
+#define BRW_UBO_GATHER_INDEX_APPEND 2
+
 /* Note: brw_gs_prog_data_compare() must be updated when adding fields to
  * this struct!
  */
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 8213f4e..fab553b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -917,6 +917,12 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
   bo->size - binding->Offset,
   &surf_offsets[i],
   dword_pitch);
+  if (brw->gather_pool.bo) {
+ int bt_idx = BRW_UNIFORM_GATHER_INDEX_START +
+BRW_UBO_GATHER_INDEX_APPEND + i;
+ gen7_edit_hw_binding_table_entry(brw, stage_state->stage,
+  bt_idx, surf_offsets[i]);
+  }
}
 
if (shader->NumUniformBlocks)
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 08/20] nir: Add glsl_get_array_size() wrapper.

2015-09-11 Thread Abdiel Janulgue

Signed-off-by: Abdiel Janulgue 
---
 src/glsl/nir/nir_types.cpp | 6 ++
 src/glsl/nir/nir_types.h   | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp
index 940c676..a0caf0e 100644
--- a/src/glsl/nir/nir_types.cpp
+++ b/src/glsl/nir/nir_types.cpp
@@ -165,3 +165,9 @@ glsl_array_type(const glsl_type *base, unsigned elements)
 {
return glsl_type::get_array_instance(base, elements);
 }
+
+unsigned
+glsl_get_array_size(const struct glsl_type *type)
+{
+   return type->array_size();
+}
diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h
index a8ff8f2..72b980a 100644
--- a/src/glsl/nir/nir_types.h
+++ b/src/glsl/nir/nir_types.h
@@ -51,6 +51,8 @@ const struct glsl_type *glsl_get_column_type(const struct 
glsl_type *type);
 
 enum glsl_base_type glsl_get_base_type(const struct glsl_type *type);
 
+unsigned glsl_get_array_size(const struct glsl_type *type);
+
 unsigned glsl_get_vector_elements(const struct glsl_type *type);
 
 unsigned glsl_get_components(const struct glsl_type *type);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 12/20] i965/fs/nir: Append nir_intrinsic_load_ubo entries to the gather table

2015-09-11 Thread Abdiel Janulgue

When the const block and offset are immediate values. Otherwise just
fall-back to the previous method of uploading the UBO constant data to
GRF using pull constants.

Cc: kenn...@whitecape.org
Cc: ja...@jlekstrand.net
Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 17 +++
 src/mesa/drivers/dri/i965/brw_fs.h   |  6 +++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 68 
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  6 ++-
 4 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e39d821..ad084af 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1865,6 +1865,7 @@ fs_visitor::assign_constant_locations()
 
stage_prog_data->nr_pull_params = num_pull_constants;
stage_prog_data->nr_params = 0;
+   stage_prog_data->nr_ubo_params = ubo_uniforms;
 
unsigned const_reg_access[uniforms];
memset(const_reg_access, 0, sizeof(const_reg_access));
@@ -1899,6 +1900,20 @@ fs_visitor::assign_constant_locations()
   stage_prog_data->gather_table[p].channel_mask =
  const_reg_access[i];
}
+
+   for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) {
+  int p = stage_prog_data->nr_gather_table++;
+  stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg;
+  stage_prog_data->gather_table[p].channel_mask =
+ this->ubo_gather_table[i].channel_mask;
+  stage_prog_data->gather_table[p].const_block =
+ this->ubo_gather_table[i].const_block;
+  stage_prog_data->gather_table[p].const_offset =
+ this->ubo_gather_table[i].const_offset;
+  stage_prog_data->max_ubo_const_block =
+ MAX2(stage_prog_data->max_ubo_const_block,
+  this->ubo_gather_table[i].const_block);
+   }
 }
 
 /**
@@ -5171,6 +5186,7 @@ brw_wm_fs_emit(struct brw_context *brw,
fs_visitor v(brw->intelScreen->compiler, brw,
 mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
 prog, &fp->Base, 8, st_index8);
+   v.use_gather_constants = brw->fs_ubo_gather && brw->use_resource_streamer;
if (!v.run_fs(false /* do_rep_send */)) {
   if (prog) {
  prog->LinkStatus = false;
@@ -5187,6 +5203,7 @@ brw_wm_fs_emit(struct brw_context *brw,
fs_visitor v2(brw->intelScreen->compiler, brw,
  mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
  prog, &fp->Base, 16, st_index16);
+   v2.use_gather_constants = v.use_gather_constants;
if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
   if (!v.simd16_unsupported) {
  /* Try a SIMD16 compile */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index dd0526a..ded007a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -293,6 +293,9 @@ public:
  unsigned n);
 
int implied_mrf_writes(fs_inst *inst);
+   bool nir_generate_ubo_gather_table(const brw::fs_builder &bld,
+  nir_intrinsic_instr *instr, fs_reg &dest,
+  bool has_indirect);
 
virtual void dump_instructions();
virtual void dump_instructions(const char *name);
@@ -316,6 +319,9 @@ public:
/** Number of uniform variable components visited. */
unsigned uniforms;
 
+   /** Number of ubo uniform variable components visited. */
+   unsigned ubo_uniforms;
+
/** Byte-offset for the next available spot in the scratch space buffer. */
unsigned last_scratch;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index a6c6a2f..9a50b99 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1488,6 +1488,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
   has_indirect = true;
   /* fallthrough */
case nir_intrinsic_load_ubo: {
+  if (nir_generate_ubo_gather_table(bld, instr, dest, has_indirect))
+ break;
+
   nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
   fs_reg surf_index;
 
@@ -1874,3 +1877,68 @@ fs_visitor::nir_emit_jump(const fs_builder &bld, 
nir_jump_instr *instr)
   unreachable("unknown jump");
}
 }
+
+bool
+fs_visitor::nir_generate_ubo_gather_table(const brw::fs_builder &bld,
+  nir_intrinsic_instr *instr,
+  fs_reg &dest,
+  bool has_indirect)
+{
+   const nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
+
+   if (!const_index || has_indirect || !use_gather_constants)
+  return false;
+
+   /* Only allow 16 registers (128 uniform components) as push constants.
+*/
+   static const unsigned max_push_comp

[Mesa-dev] [PATCH 10/20] i965: Include UBO parameter sizes in push constant parameters

2015-09-11 Thread Abdiel Janulgue

Now that we consider UBO constants as push constants, we need to include
the sizes of the UBO's constant slots in the visitor's uniform slot sizes.
This information is needed to properly pack vector constants tightly next to
each other.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_gs.c  |  1 +
 src/mesa/drivers/dri/i965/brw_program.h |  3 +++
 src/mesa/drivers/dri/i965/brw_vs.c  |  3 +++
 src/mesa/drivers/dri/i965/brw_wm.c  | 22 ++
 4 files changed, 29 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
b/src/mesa/drivers/dri/i965/brw_gs.c
index 17e87b8..7641cc5 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -72,6 +72,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
   rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
c.prog_data.base.base.nr_params = param_count;
c.prog_data.base.base.nr_image_params = gs->NumImages;
+   c.prog_data.base.base.nr_ubo_params = brw_count_ubo_params(gs);
c.prog_data.base.base.nr_gather_table = 0;
c.prog_data.base.base.gather_table =
   rzalloc_size(NULL, sizeof(*c.prog_data.base.base.gather_table) *
diff --git a/src/mesa/drivers/dri/i965/brw_program.h 
b/src/mesa/drivers/dri/i965/brw_program.h
index 00e8f3f..20f5371 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -182,6 +182,9 @@ void
 brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
 struct gl_shader *shader, struct gl_program *prog);
 
+int
+brw_count_ubo_params(struct gl_shader *fs);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index 8501796..1ec2bc9 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -141,6 +141,9 @@ brw_codegen_vs_prog(struct brw_context *brw,
 stage_prog_data->nr_image_params);
stage_prog_data->nr_params = param_count;
 
+   stage_prog_data->nr_ubo_params = 0;
+   if (vs)
+  stage_prog_data->nr_ubo_params = brw_count_ubo_params(vs);
stage_prog_data->nr_gather_table = 0;
stage_prog_data->gather_table =
   rzalloc_size(NULL, sizeof(*stage_prog_data->gather_table) *
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index 204baa6..44efba0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -33,6 +33,7 @@
 #include "main/framebuffer.h"
 #include "program/prog_parameter.h"
 #include "program/program.h"
+#include "glsl/nir/nir_types.h"
 #include "intel_mipmap_tree.h"
 
 #include "util/ralloc.h"
@@ -149,6 +150,23 @@ brw_wm_prog_data_compare(const void *in_a, const void 
*in_b)
return true;
 }
 
+int
+brw_count_ubo_params(struct gl_shader *shader)
+{
+   int nr_ubo = 0;
+   for (int i = 0; i < shader->NumUniformBlocks; i++) {
+  for (int p = 0; p < shader->UniformBlocks[i].NumUniforms; p++) {
+ const struct glsl_type *type = 
shader->UniformBlocks[i].Uniforms[p].Type;
+ int array_sz = glsl_get_array_size(type);
+ array_sz = MAX2(array_sz, 1);
+ int components = 
glsl_get_components(glsl_get_type_without_array(type));
+ nr_ubo += components * array_sz;
+  }
+   }
+
+   return nr_ubo;
+}
+
 /**
  * All Mesa program -> GPU code generation goes through this function.
  * Depending on the instructions used (i.e. flow control instructions)
@@ -208,6 +226,10 @@ brw_codegen_wm_prog(struct brw_context *brw,
 prog_data.base.nr_image_params);
prog_data.base.nr_params = param_count;
 
+   prog_data.base.nr_ubo_params = 0;
+   if (fs)
+  prog_data.base.nr_ubo_params = brw_count_ubo_params(fs);
+
prog_data.base.nr_gather_table = 0;
prog_data.base.gather_table =
   rzalloc_size(NULL, sizeof(*prog_data.base.gather_table) *
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 13/20] i965/fs: Pack UBO registers right after uniform registers

2015-09-11 Thread Abdiel Janulgue

We now have two sources of constant buffers: UBOs and ordinary uniforms.
After assigning a block of push constant hw-register to normal uniforms,
just pack the UBO push constant registers right after it.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ad084af..6abe367 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -942,6 +942,8 @@ fs_visitor::import_uniforms(fs_visitor *v)
this->pull_constant_loc = v->pull_constant_loc;
this->uniforms = v->uniforms;
this->param_size = v->param_size;
+   this->nr_ubo_gather_table = v->nr_ubo_gather_table;
+   this->ubo_gather_table = v->ubo_gather_table;
 }
 
 void
@@ -1362,7 +1364,8 @@ fs_visitor::assign_curb_setup()
   }
}
 
-   prog_data->curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / 8;
+   prog_data->curb_read_length = ALIGN(stage_prog_data->nr_params + 
stage_prog_data->nr_ubo_params,
+   8) / 8;
 
/* Map the offsets in the UNIFORM file to fixed HW regs. */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
@@ -1370,7 +1373,7 @@ fs_visitor::assign_curb_setup()
 if (inst->src[i].file == UNIFORM) {
 int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
 int constant_nr;
-if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
+if (uniform_nr >= 0 && uniform_nr < (int) (uniforms + 
ubo_uniforms)) {
constant_nr = push_constant_loc[uniform_nr];
 } else {
/* Section 5.11 of the OpenGL 4.1 spec says:
@@ -1788,10 +1791,11 @@ fs_visitor::assign_constant_locations()
 
unsigned int num_pull_constants = 0;
 
-   pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
-   memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * uniforms);
+   unsigned int total_uniforms = uniforms + ubo_uniforms;
+   pull_constant_loc = ralloc_array(mem_ctx, int, total_uniforms);
+   memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * 
total_uniforms);
 
-   bool is_live[uniforms];
+   bool is_live[total_uniforms];
memset(is_live, 0, sizeof(is_live));
 
/* First, we walk through the instructions and do two things:
@@ -1823,7 +1827,7 @@ fs_visitor::assign_constant_locations()
  } else {
 /* Mark the the one accessed uniform as live */
 int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
-if (constant_nr >= 0 && constant_nr < (int) uniforms)
+if (constant_nr >= 0 && constant_nr < (int) total_uniforms)
is_live[constant_nr] = true;
  }
   }
@@ -1840,9 +1844,9 @@ fs_visitor::assign_constant_locations()
unsigned int max_push_components = 16 * 8;
unsigned int num_push_constants = 0;
 
-   push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
+   push_constant_loc = ralloc_array(mem_ctx, int, total_uniforms);
 
-   for (unsigned int i = 0; i < uniforms; i++) {
+   for (unsigned int i = 0; i < total_uniforms; i++) {
   if (!is_live[i] || pull_constant_loc[i] != -1) {
  /* This UNIFORM register is either dead, or has already been demoted
   * to a pull const.  Mark it as no longer living in the param[] array.
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 05/20] i965: Store gather table information in the program data

2015-09-11 Thread Abdiel Janulgue

The resource streamer is able to gather and pack sparsely-located
constant data from any buffer object by a referring to a gather table
This patch adds support for keeping track of these constant data
fetches into a gather table.

The gather table is generated from two sources. Ordinary uniform fetches
are stored first. These are then combined with a separate table containing
UBO entries. The separate entry for UBOs is needed to make it easier to
generate the gather mask when combining and packing the constant data.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_context.h  | 11 +++
 src/mesa/drivers/dri/i965/brw_gs.c   |  5 +
 src/mesa/drivers/dri/i965/brw_program.c  |  5 +
 src/mesa/drivers/dri/i965/brw_shader.cpp |  5 -
 src/mesa/drivers/dri/i965/brw_shader.h   | 11 +++
 src/mesa/drivers/dri/i965/brw_vs.c   |  6 ++
 src/mesa/drivers/dri/i965/brw_wm.c   |  5 +
 7 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 33c49b7..de0db5a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -364,9 +364,12 @@ struct brw_stage_prog_data {
GLuint nr_params;   /**< number of float params/constants */
GLuint nr_pull_params;
unsigned nr_image_params;
+   unsigned nr_ubo_params;
+   unsigned nr_gather_table;
 
unsigned curb_read_length;
unsigned total_scratch;
+   unsigned max_ubo_const_block;
 
/**
 * Register where the thread expects to find input data from the URB
@@ -385,6 +388,14 @@ struct brw_stage_prog_data {
const gl_constant_value **param;
const gl_constant_value **pull_param;
 
+   /** Combined gather table containing uniform and UBO entries */
+   struct {
+  int reg;
+  unsigned channel_mask;
+  unsigned const_block;
+  unsigned const_offset;
+   } *gather_table;
+
/**
 * Image metadata passed to the shader as uniforms.  This is deliberately
 * ignored by brw_stage_prog_data_compare() because its contents don't have
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
b/src/mesa/drivers/dri/i965/brw_gs.c
index 16ea684..17e87b8 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -72,6 +72,11 @@ brw_codegen_gs_prog(struct brw_context *brw,
   rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
c.prog_data.base.base.nr_params = param_count;
c.prog_data.base.base.nr_image_params = gs->NumImages;
+   c.prog_data.base.base.nr_gather_table = 0;
+   c.prog_data.base.base.gather_table =
+  rzalloc_size(NULL, sizeof(*c.prog_data.base.base.gather_table) *
+   (c.prog_data.base.base.nr_params +
+c.prog_data.base.base.nr_ubo_params));
 
if (brw->gen >= 7) {
   if (gp->program.OutputType == GL_POINTS) {
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 1ac0ed2..aa805be 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -558,6 +558,10 @@ brw_stage_prog_data_compare(const struct 
brw_stage_prog_data *a,
if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void 
*)))
   return false;
 
+   if (memcmp(a->gather_table, b->gather_table,
+  a->nr_gather_table * sizeof(*a->gather_table)))
+  return false;
+
return true;
 }
 
@@ -568,6 +572,7 @@ brw_stage_prog_data_free(const void *p)
 
ralloc_free(prog_data->param);
ralloc_free(prog_data->pull_param);
+   ralloc_free(prog_data->gather_table);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index de1a7fe..9d45cfe 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -917,7 +917,10 @@ backend_shader::backend_shader(const struct brw_compiler 
*compiler,
  stage_prog_data(stage_prog_data),
  mem_ctx(mem_ctx),
  cfg(NULL),
- stage(stage)
+ stage(stage),
+ use_gather_constants(false),
+ nr_ubo_gather_table(0),
+ ubo_gather_table(NULL)
 {
debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
stage_name = _mesa_shader_stage_to_string(stage);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
b/src/mesa/drivers/dri/i965/brw_shader.h
index f4d..f0afce5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -275,6 +275,17 @@ public:
  unsigned n) = 0;
void setup_image_uniform_values(unsigned param_offset,
const gl_uniform_storage *storage);
+   bool use_gather_constants;
+   unsigned nr_ubo_gather_table;
+
+   /** Gather table for UBO entries only */
+   struct gather_table {
+  int reg;
+  unsigned channel_mask;
+  unsigned const_block;
+  unsigned

[Mesa-dev] [PATCH 14/20] i965/vec4: Append uniform entries to the gather table

2015-09-11 Thread Abdiel Janulgue

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 12 
 src/mesa/drivers/dri/i965/brw_vec4.h   |  1 +
 2 files changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 85dc372..f2b03f8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -595,6 +595,17 @@ vec4_visitor::pack_uniform_registers()
}
 }
 
+void
+vec4_visitor::generate_gather_table()
+{
+   int num_consts = ALIGN(stage_prog_data->nr_params, 4) / 4;
+   for (int i = 0; i < num_consts; i++) {
+  int p = stage_prog_data->nr_gather_table++;
+  stage_prog_data->gather_table[p].reg = -1;
+  stage_prog_data->gather_table[p].channel_mask = 0xf;
+   }
+}
+
 /**
  * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a).
  *
@@ -1832,6 +1843,7 @@ vec4_visitor::run()
   return false;
 
setup_payload();
+   generate_gather_table();
 
if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
   /* Debug of register spilling: Go spill everything. */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 01c6e84..534f1b1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -206,6 +206,7 @@ public:
bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
void opt_set_dependency_control();
void opt_schedule_instructions();
+   void generate_gather_table();
 
vec4_instruction *emit(vec4_instruction *inst);
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 11/20] i965/fs: Append uniform entries to the gather table

2015-09-11 Thread Abdiel Janulgue

This patch generates the gather table entries for ordinary uniforms
if they are present. The uniform constants here will later be packed
together with UBO constants.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d240371..e39d821 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1863,8 +1863,11 @@ fs_visitor::assign_constant_locations()
   }
}
 
-   stage_prog_data->nr_params = num_push_constants;
stage_prog_data->nr_pull_params = num_pull_constants;
+   stage_prog_data->nr_params = 0;
+
+   unsigned const_reg_access[uniforms];
+   memset(const_reg_access, 0, sizeof(const_reg_access));
 
/* Up until now, the param[] array has been indexed by reg + reg_offset
 * of UNIFORM registers.  Move pull constants into pull_param[] and
@@ -1881,8 +1884,21 @@ fs_visitor::assign_constant_locations()
  stage_prog_data->pull_param[pull_constant_loc[i]] = value;
   } else if (push_constant_loc[i] != -1) {
  stage_prog_data->param[push_constant_loc[i]] = value;
+ int p = stage_prog_data->nr_params++;
+
+ /* access table for uniform registers*/
+ const_reg_access[(ALIGN(prog_data->nr_params, 4) / 4) - 1] |=
+(1 << (p % 4));
   }
}
+
+   int num_consts = ALIGN(prog_data->nr_params, 4) / 4;
+   for (int i = 0; i < num_consts; i++) {
+  int p = stage_prog_data->nr_gather_table++;
+  stage_prog_data->gather_table[p].reg = -1;
+  stage_prog_data->gather_table[p].channel_mask =
+ const_reg_access[i];
+   }
 }
 
 /**
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/20] i965: Allocate space on the gather pool for UBO entries

2015-09-11 Thread Abdiel Janulgue

If there are UBO constant entries, append them to stage_state->push_const_size.
The gather pool contains the combined entries of both ordinary uniforms
and UBO constants.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/gen6_vs_state.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c 
b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index b78166e..843df94 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -59,7 +59,9 @@ gen6_upload_push_constants(struct brw_context *brw,
struct gl_context *ctx = &brw->ctx;
 
if (prog_data->nr_params == 0) {
-  stage_state->push_const_size = 0;
+  if (prog_data->nr_ubo_params == 0) {
+ stage_state->push_const_size = 0;
+  }
} else {
   /* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
@@ -122,10 +124,24 @@ gen6_upload_push_constants(struct brw_context *brw,
}
/* Allocate gather pool space for uniform and UBO entries in 512-bit 
chunks*/
if (brw->gather_pool.bo != NULL) {
+  unsigned gather_pool_next_offset = brw->gather_pool.next_offset;
+
   if (prog_data->nr_params > 0) {
  int num_consts = ALIGN(prog_data->nr_params, 4) / 4;
+ gather_pool_next_offset += (ALIGN(num_consts, 4) / 4) * 64;
+  }
+
+  if (prog_data->nr_ubo_params > 0) {
+ stage_state->push_const_size = ALIGN(prog_data->nr_params + 
prog_data->nr_ubo_params, 8) / 8;
+ uint32_t num_constants = ALIGN(prog_data->nr_ubo_params, 4) / 4;
+ gather_pool_next_offset += (ALIGN(num_constants, 4) / 4) * 64;
+  }
+
+  if (gather_pool_next_offset > brw->gather_pool.next_offset) {
  stage_state->push_const_offset = brw->gather_pool.next_offset;
- brw->gather_pool.next_offset += (ALIGN(num_consts, 4) / 4) * 64;
+ brw->gather_pool.next_offset = gather_pool_next_offset;
+ assert(brw->gather_pool.next_offset < brw->gather_pool.bo->size);
+ assert(stage_state->push_const_offset < brw->gather_pool.next_offset);
   }
}
 }
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 19/20] i965: Disable gather push constants for null constants

2015-09-11 Thread Abdiel Janulgue

Programming null constants with gather constant tables seems to
be unsupported and results in a GPU lockup even with the prescribed
GPU workarounds in the bspec. Found out by trial and error that
disabling HW gather constant when the constant state for a stage
needs to be nullified is the only way to go around the issue.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/gen7_disable.c  | 4 
 src/mesa/drivers/dri/i965/gen7_vs_state.c | 7 +++
 src/mesa/drivers/dri/i965/gen8_disable.c  | 4 
 3 files changed, 15 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c 
b/src/mesa/drivers/dri/i965/gen7_disable.c
index bb50969..259accc 100644
--- a/src/mesa/drivers/dri/i965/gen7_disable.c
+++ b/src/mesa/drivers/dri/i965/gen7_disable.c
@@ -29,6 +29,8 @@
 static void
 disable_stages(struct brw_context *brw)
 {
+   gen7_toggle_gather_constants(brw, false);
+
/* Disable the HS Unit */
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2));
@@ -87,6 +89,8 @@ disable_stages(struct brw_context *brw)
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
+
+   gen7_toggle_gather_constants(brw, true);
 }
 
 const struct brw_tracked_state gen7_disable_stages = {
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c 
b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index cf07658..4daf2bc 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -87,6 +87,13 @@ gen7_upload_constant_state(struct brw_context *brw,
int const_loc = use_gather ? 16 : 0;
int dwords = brw->gen >= 8 ? 11 : 7;
 
+   /* Disable gather constants when zeroing constant states */
+   bool gather_switched_off = false;
+   if (use_gather && !active) {
+  gen7_toggle_gather_constants(brw, false);
+  gather_switched_off = true;
+   }
+
struct brw_stage_prog_data *prog_data = stage_state->prog_data;
if (prog_data && use_gather && active) {
   gen7_submit_gather_table(brw, stage_state, prog_data, gather_opcode);
diff --git a/src/mesa/drivers/dri/i965/gen8_disable.c 
b/src/mesa/drivers/dri/i965/gen8_disable.c
index 32508e3..617ed76 100644
--- a/src/mesa/drivers/dri/i965/gen8_disable.c
+++ b/src/mesa/drivers/dri/i965/gen8_disable.c
@@ -29,6 +29,8 @@
 static void
 disable_stages(struct brw_context *brw)
 {
+   gen7_toggle_gather_constants(brw, false);
+
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
OUT_BATCH(0);
@@ -104,6 +106,8 @@ disable_stages(struct brw_context *brw)
OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
 
+   gen7_toggle_gather_constants(brw, true);
+
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
OUT_BATCH(0);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 20/20] i965: Enable push constants for UBOs

2015-09-11 Thread Abdiel Janulgue

Switches on push constants whenever we have UBO entries.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/gen7_wm_state.c | 2 +-
 src/mesa/drivers/dri/i965/gen8_ps_state.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index fd6dab5..e8c5347 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -173,7 +173,7 @@ gen7_upload_ps_state(struct brw_context *brw,
 
dw4 |= (brw->max_wm_threads - 1) << max_threads_shift;
 
-   if (prog_data->base.nr_params > 0)
+   if (prog_data->base.nr_params > 0 || prog_data->base.nr_ubo_params > 0)
   dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
 
/* From the IVB PRM, volume 2 part 1, page 287:
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c 
b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index a686fed..487e414 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -183,7 +183,7 @@ gen8_upload_ps_state(struct brw_context *brw,
else
   dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT;
 
-   if (prog_data->base.nr_params > 0)
+   if (prog_data->base.nr_params > 0  || prog_data->base.nr_ubo_params > 0)
   dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
 
/* From the documentation for this packet:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 16/20] i965/vec4: Pack UBO registers right after uniform registers

2015-09-11 Thread Abdiel Janulgue

Since we now consider UBOs as push constants, we need to layout
our push constant register space in such a way that UBO registers
are packed right after uniform registers.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 38 ++
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 549fcd3..5bc1f10 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -516,9 +516,10 @@ vec4_visitor::split_uniform_registers()
 void
 vec4_visitor::pack_uniform_registers()
 {
-   bool uniform_used[this->uniforms];
-   int new_loc[this->uniforms];
-   int new_chan[this->uniforms];
+   int total_uniforms = this->uniforms + this->ubo_uniforms;
+   bool uniform_used[total_uniforms];
+   int new_loc[total_uniforms];
+   int new_chan[total_uniforms];
 
memset(uniform_used, 0, sizeof(uniform_used));
memset(new_loc, 0, sizeof(new_loc));
@@ -542,7 +543,7 @@ vec4_visitor::pack_uniform_registers()
/* Now, figure out a packing of the live uniform vectors into our
 * push constants.
 */
-   for (int src = 0; src < uniforms; src++) {
+   for (int src = 0; src < total_uniforms; src++) {
   assert(src < uniform_array_size);
   int size = this->uniform_vector_size[src];
 
@@ -552,9 +553,16 @@ vec4_visitor::pack_uniform_registers()
   }
 
   int dst;
-  /* Find the lowest place we can slot this uniform in. */
+  /* Find the lowest place we can slot this uniform in. However, when
+   * our constants come from a mix of UBO and uniform sources, don't allow 
registers
+   * assigned to UBOs fall into half-filled uniform slots when repacking,
+   * otherwise we could mix up uniform and UBO register fetches in one 
vec4.
+   */
   for (dst = 0; dst < src; dst++) {
-if (this->uniform_vector_size[dst] + size <= 4)
+ bool allow_repack = ((src >= uniforms && dst >= uniforms) ||
+  (src < uniforms && dst < uniforms)   ||
+  this->uniform_vector_size[dst] == 0);
+if (this->uniform_vector_size[dst] + size <= 4 && allow_repack)
break;
   }
 
@@ -565,17 +573,20 @@ vec4_visitor::pack_uniform_registers()
 new_loc[src] = dst;
 new_chan[src] = this->uniform_vector_size[dst];
 
-/* Move the references to the data */
-for (int j = 0; j < size; j++) {
-   stage_prog_data->param[dst * 4 + new_chan[src] + j] =
-  stage_prog_data->param[src * 4 + j];
-}
+/* Move the references only for uniform data */
+ if (src < uniforms) {
+for (int j = 0; j < size; j++) {
+   stage_prog_data->param[dst * 4 + new_chan[src] + j] =
+  stage_prog_data->param[src * 4 + j];
+}
+ }
 
 this->uniform_vector_size[dst] += size;
 this->uniform_vector_size[src] = 0;
   }
 
-  new_uniform_count = MAX2(new_uniform_count, dst + 1);
+  if (src < uniforms)
+ new_uniform_count = MAX2(new_uniform_count, dst + 1);
}
 
this->uniforms = new_uniform_count;
@@ -1616,7 +1627,8 @@ vec4_visitor::setup_uniforms(int reg)
   this->uniforms++;
   reg++;
} else {
-  reg += ALIGN(uniforms, 2) / 2;
+  int ubo_regs = ALIGN(ubo_uniforms, 4) / 4;
+  reg += ALIGN(ubo_regs + uniforms, 2) / 2;
}
 
stage_prog_data->nr_params = this->uniforms * 4;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 15/20] i965/vec4: Append ir_binop_ubo_load entries to the gather table

2015-09-11 Thread Abdiel Janulgue

When the const block and offset are immediate values. Otherwise just
fall-back to the previous method of uploading the UBO constant data to
GRF using pull constants.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp| 13 
 src/mesa/drivers/dri/i965/brw_vec4.h  |  2 +
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  2 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 75 +++
 4 files changed, 92 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index f2b03f8..549fcd3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -604,6 +604,18 @@ vec4_visitor::generate_gather_table()
   stage_prog_data->gather_table[p].reg = -1;
   stage_prog_data->gather_table[p].channel_mask = 0xf;
}
+
+   for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) {
+  int p = stage_prog_data->nr_gather_table++;
+  stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg;
+  stage_prog_data->gather_table[p].channel_mask = 
this->ubo_gather_table[i].channel_mask;
+  stage_prog_data->gather_table[p].const_block = 
this->ubo_gather_table[i].const_block;
+  stage_prog_data->gather_table[p].const_offset = 
this->ubo_gather_table[i].const_offset;
+  stage_prog_data->max_ubo_const_block = 
MAX2(stage_prog_data->max_ubo_const_block,
+  
this->ubo_gather_table[i].const_block);
+   }
+
+   stage_prog_data->nr_ubo_params = ubo_uniforms;
 }
 
 /**
@@ -1991,6 +2003,7 @@ brw_vs_emit(struct brw_context *brw,
 vp, prog, brw_select_clip_planes(&brw->ctx),
 mem_ctx, st_index,
 !_mesa_is_gles3(&brw->ctx));
+  v.use_gather_constants = brw->vs_ubo_gather && 
brw->use_resource_streamer;
   if (!v.run()) {
  if (prog) {
 prog->LinkStatus = false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 534f1b1..0888ec7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -163,6 +163,7 @@ public:
int *uniform_vector_size;
int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
int uniforms;
+   int ubo_uniforms;
 
src_reg shader_start_time;
 
@@ -403,6 +404,7 @@ public:
void dump_instruction(backend_instruction *inst, FILE *file);
 
void visit_atomic_counter_intrinsic(ir_call *ir);
+   bool generate_ubo_gather_table(ir_expression *ir, const dst_reg 
&result_dst);
 
bool is_high_sampler(src_reg sampler);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index b9694f6..5a85a21 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -679,6 +679,8 @@ brw_gs_emit(struct brw_context *brw,
 
  vec4_gs_visitor v(brw->intelScreen->compiler, brw,
c, prog, mem_ctx, true /* no_spills */, st_index);
+ v.use_gather_constants = brw->gs_ubo_gather &&
+brw->use_resource_streamer;
  if (v.run()) {
 return generate_assembly(brw, prog, &c->gp->program.Base,
  &c->prog_data.base, mem_ctx, v.cfg,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f6e59ce..4bba4a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1828,6 +1828,12 @@ vec4_visitor::visit(ir_expression *ir)
   break;
 
case ir_binop_ubo_load: {
+  /* Use gather push constants if at all possible, otherwise just
+   * fall back to pull constants for UBOs
+   */
+  if (generate_ubo_gather_table(ir, result_dst))
+ break;
+
   ir_constant *const_uniform_block = ir->operands[0]->as_constant();
   ir_constant *const_offset_ir = ir->operands[1]->as_constant();
   unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 
0;
@@ -3688,6 +3694,67 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, 
src_reg *reg)
*reg = neg_result;
 }
 
+bool
+vec4_visitor::generate_ubo_gather_table(ir_expression *ir, const dst_reg 
&result_dst)
+{
+   ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+   ir_constant *const_offset_ir = ir->operands[1]->as_constant();
+   unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
+
+   if (ir->operation != ir_binop_ubo_load ||
+   !use_gather_constants  ||
+   !const_uniform_block   ||
+   !const_offset_ir)
+  return false;
+
+   /* Only allow 32 registers (256 uniform components) as push constants,
+*/
+   int max_uniform_components = 32 * 8;
+   int param_index = uniforms + ubo_unif

[Mesa-dev] [PATCH 17/20] i965: Upload UBO surfaces before emitting constant state packet

2015-09-11 Thread Abdiel Janulgue

Now that UBOs are uploaded as push constants. We need to obtain and
append the amount of push constant entries generated by the UBO entry
fetches to the 3DSTATE_CONSTANT_* packets.

v2: GEN8 support

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_state_upload.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index b2ca9c2..280340f 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -197,6 +197,9 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
&brw_vs_image_surfaces, /* Before vs push/pull constants and binding table 
*/
&brw_gs_image_surfaces, /* Before gs push/pull constants and binding table 
*/
&brw_wm_image_surfaces, /* Before wm push/pull constants and binding table 
*/
+   &brw_vs_ubo_surfaces,
+   &brw_gs_ubo_surfaces,
+   &brw_wm_ubo_surfaces,
 
&gen6_vs_push_constants, /* Before vs_state */
&gen6_gs_push_constants, /* Before gs_state */
@@ -206,13 +209,10 @@ static const struct brw_tracked_state 
*gen7_render_atoms[] =
 * table upload must be last.
 */
&brw_vs_pull_constants,
-   &brw_vs_ubo_surfaces,
&brw_vs_abo_surfaces,
&brw_gs_pull_constants,
-   &brw_gs_ubo_surfaces,
&brw_gs_abo_surfaces,
&brw_wm_pull_constants,
-   &brw_wm_ubo_surfaces,
&brw_wm_abo_surfaces,
&gen6_renderbuffer_surfaces,
&brw_texture_surfaces,
@@ -281,6 +281,9 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
&brw_vs_image_surfaces, /* Before vs push/pull constants and binding table 
*/
&brw_gs_image_surfaces, /* Before gs push/pull constants and binding table 
*/
&brw_wm_image_surfaces, /* Before wm push/pull constants and binding table 
*/
+   &brw_vs_ubo_surfaces,
+   &brw_gs_ubo_surfaces,
+   &brw_wm_ubo_surfaces,
 
&gen6_vs_push_constants, /* Before vs_state */
&gen6_gs_push_constants, /* Before gs_state */
@@ -290,13 +293,10 @@ static const struct brw_tracked_state 
*gen8_render_atoms[] =
 * table upload must be last.
 */
&brw_vs_pull_constants,
-   &brw_vs_ubo_surfaces,
&brw_vs_abo_surfaces,
&brw_gs_pull_constants,
-   &brw_gs_ubo_surfaces,
&brw_gs_abo_surfaces,
&brw_wm_pull_constants,
-   &brw_wm_ubo_surfaces,
&brw_wm_abo_surfaces,
&gen6_renderbuffer_surfaces,
&brw_texture_surfaces,
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 18/20] i965: Program the push constants state using the gather table

2015-09-11 Thread Abdiel Janulgue

Use the gather table generated from the uniform uploads and
ir_binop_ubo_load to gather and pack the constants to the gather pool.

Note that the 3DSTATE_CONSTANT_* packet now refers to the gather
pool generated by the resource streamer instead of the constant buffer
pointed to by an offset of the dynamic state base address.

v2: Support GEN8 + non-trivial rebase.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_state.h |  2 +-
 src/mesa/drivers/dri/i965/gen6_gs_state.c |  2 +-
 src/mesa/drivers/dri/i965/gen6_vs_state.c |  2 +-
 src/mesa/drivers/dri/i965/gen6_wm_state.c |  2 +-
 src/mesa/drivers/dri/i965/gen7_vs_state.c | 84 +++
 5 files changed, 79 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index c7c7e0b..a1e6c73 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -361,7 +361,7 @@ brw_upload_pull_constants(struct brw_context *brw,
 void
 gen7_upload_constant_state(struct brw_context *brw,
const struct brw_stage_state *stage_state,
-   bool active, unsigned opcode);
+   bool active, unsigned opcode, unsigned gather_op);
 
 void gen7_rs_control(struct brw_context *brw, int enable);
 
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c 
b/src/mesa/drivers/dri/i965/gen6_gs_state.c
index eb4c586..79a899e 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
@@ -48,7 +48,7 @@ gen6_upload_gs_push_constants(struct brw_context *brw)
}
 
if (brw->gen >= 7)
-  gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS);
+  gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS, 
_3DSTATE_GATHER_CONSTANT_GS);
 }
 
 const struct brw_tracked_state gen6_gs_push_constants = {
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c 
b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index d43af5b..bb375b9 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -174,7 +174,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
  gen7_emit_vs_workaround_flush(brw);
 
   gen7_upload_constant_state(brw, stage_state, true /* active */,
- _3DSTATE_CONSTANT_VS);
+ _3DSTATE_CONSTANT_VS, 
_3DSTATE_GATHER_CONSTANT_VS);
}
 }
 
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c 
b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index d1748ba..c7b37c6 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -51,7 +51,7 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
 
if (brw->gen >= 7) {
   gen7_upload_constant_state(brw, &brw->wm.base, true,
- _3DSTATE_CONSTANT_PS);
+ _3DSTATE_CONSTANT_PS, 
_3DSTATE_GATHER_CONSTANT_PS);
}
 }
 
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c 
b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index b7e4858..cf07658 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -28,19 +28,70 @@
 #include "program/prog_parameter.h"
 #include "program/prog_statevars.h"
 #include "intel_batchbuffer.h"
+#include "glsl/glsl_parser_extras.h"
 
+static void
+gen7_submit_gather_table(struct brw_context* brw,
+ const struct brw_stage_state *stage_state,
+ const struct brw_stage_prog_data *prog_data,
+ unsigned gather_opcode)
+{
+   uint32_t gather_dwords = 3 + prog_data->nr_gather_table;
+
+   /* Ordinary uniforms are assigned to the first constant buffer slot */
+   unsigned cb_valid = 1;
+   /* Assign subsequent constant buffer slots to UBOs if any */
+   cb_valid |= (prog_data->nr_ubo_params > 0) ?
+  (2 << (BRW_UBO_GATHER_INDEX_APPEND + prog_data->max_ubo_const_block)) - 
1 : 0;
+
+   assert(cb_valid < 0x);
+
+   BEGIN_BATCH(gather_dwords);
+   OUT_BATCH(gather_opcode << 16 | (gather_dwords - 2));
+   OUT_BATCH(SET_FIELD(cb_valid, BRW_GATHER_BUFFER_VALID) |
+ SET_FIELD(BRW_UNIFORM_GATHER_INDEX_START / 16,
+   BRW_GATHER_BINDING_TABLE_BLOCK));
+   OUT_BATCH(stage_state->push_const_offset);
+   for (int i = 0; i < prog_data->nr_gather_table; i++) {
+  /* Which bo are we referring to? The uniform constant buffer or
+   * the UBO block?
+   */
+  bool is_uniform = prog_data->gather_table[i].reg == -1;
+  int cb_offset = is_uniform ? i :
+ (prog_data->gather_table[i].const_offset / 16);
+  int bt_offset = is_uniform ? 0 :
+ (prog_data->gather_table[i].const_block + 
BRW_UBO_GATHER_INDEX_APPEND);
+
+  assert(cb_offset < 256);
+  assert(bt_offset < 16);
+
+  OUT_BATCH(SET_FIELD(cb_offset, BRW_GATHER_CONST_BUFFER_O

Re: [Mesa-dev] [PATCH 0/4] gallium: add support for retrieving number of texture samples

2015-09-11 Thread Edward O'Callaghan

This series is,

Reviewed-by: Edward O'Callaghan 

-- 
  Edward O'Callaghan
  edward.ocallag...@koparo.com

On Fri, Sep 11, 2015, at 01:15 PM, Ilia Mirkin wrote:
> My hope was (as you can see in the last patch) to enable this for all
> drivers that support MS textures. I've got nv50/nvc0/r600g covered.
> 
> RadeonSI will have to read the data from the texture descriptor. I'm
> totally unfamiliar with LLVM, the descriptor format, etc. Hopefully
> someone will be able to write an appropriate patch.
> 
> SVGA seems to support ms textures, but I didn't see anything in its
> virtual ISA that would fit. VMWare folk -- is this going to be easy to
> add? Or do I need to add a new PIPE_CAP?
> 
> Ilia Mirkin (4):
>   tgsi: add a TXQS opcode to retrieve the number of texture samples
>   nv50/ir: add support for TXQS tgsi opcode
>   r600g: add support for TXQS tgsi opcode
>   st/mesa: emit TXQS, support ARB_shader_texture_image_samples
> 
>  src/gallium/auxiliary/tgsi/tgsi_info.c|  3 ++-
>  src/gallium/docs/source/tgsi.rst  | 12 +++-
>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 17
>  -
>  .../drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp | 19
>  +++
>  .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp |  2 ++
>  src/gallium/drivers/r600/r600_shader.c| 16
>  
>  src/gallium/include/pipe/p_shader_tokens.h|  1 +
>  src/mesa/state_tracker/st_extensions.c|  1 +
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp|  6 +-
>  9 files changed, 65 insertions(+), 12 deletions(-)
> 
> -- 
> 2.4.6
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] r600g: lower number of driver const buffers

2015-09-11 Thread Glenn Kennard


Series is:

Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] r600g: lower number of driver const buffers

2015-09-11 Thread Edward O'Callaghan

This series is,

Reviewed-by: Edward O'Callaghan 

-- 
  Edward O'Callaghan
  edward.ocallag...@koparo.com

On Fri, Sep 11, 2015, at 08:09 PM, Glenn Kennard wrote:
> Series is:
> 
> Reviewed-by: Glenn Kennard 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] r600: define some values for the fetch constant offsets.

2015-09-11 Thread Edward O'Callaghan

Reviewed-by: Edward O'Callaghan 

-- 
  Edward O'Callaghan
  edward.ocallag...@koparo.com

On Fri, Sep 11, 2015, at 02:43 PM, Dave Airlie wrote:
> From: Dave Airlie 
> 
> This just puts these in one place and #defines them.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/evergreen_state.c | 30
>  +-
>  src/gallium/drivers/r600/evergreend.h  |  8 
>  src/gallium/drivers/r600/r600_state.c  | 25
>  +++--
>  src/gallium/drivers/r600/r600d.h   | 11 +++
>  4 files changed, 47 insertions(+), 27 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c
> b/src/gallium/drivers/r600/evergreen_state.c
> index 0c54a3f..52f4dc8 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1888,12 +1888,12 @@ static void evergreen_emit_vertex_buffers(struct
> r600_context *rctx,
>  
>  static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx,
>  struct r600_atom * atom)
>  {
> -   evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state,
> 992, 0);
> +   evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state,
> EG_FETCH_CONSTANTS_OFFSET_FS, 0);
>  }
>  
>  static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx,
>  struct r600_atom * atom)
>  {
> -   evergreen_emit_vertex_buffers(rctx,
> &rctx->cs_vertex_buffer_state, 816,
> +   evergreen_emit_vertex_buffers(rctx,
> &rctx->cs_vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_CS,
> RADEON_CP_PACKET3_COMPUTE_MODE);
>  }
>  
> @@ -1963,7 +1963,8 @@ static void evergreen_emit_constant_buffers(struct
> r600_context *rctx,
>  
>  static void evergreen_emit_vs_constant_buffers(struct r600_context
>  *rctx, struct r600_atom *atom)
>  {
> -   evergreen_emit_constant_buffers(rctx,
> &rctx->constbuf_state[PIPE_SHADER_VERTEX], 176,
> +   evergreen_emit_constant_buffers(rctx,
> &rctx->constbuf_state[PIPE_SHADER_VERTEX],
> +   EG_FETCH_CONSTANTS_OFFSET_VS,
>   R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
>   R_028980_ALU_CONST_CACHE_VS_0,
>   0 /* PKT3 flags */);
> @@ -1971,7 +1972,8 @@ static void
> evergreen_emit_vs_constant_buffers(struct r600_context *rctx, struct
>  
>  static void evergreen_emit_gs_constant_buffers(struct r600_context
>  *rctx, struct r600_atom *atom)
>  {
> -   evergreen_emit_constant_buffers(rctx,
> &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 336,
> +   evergreen_emit_constant_buffers(rctx,
> &rctx->constbuf_state[PIPE_SHADER_GEOMETRY],
> +   EG_FETCH_CONSTANTS_OFFSET_GS,
>   R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0,
>   R_0289C0_ALU_CONST_CACHE_GS_0,
>   0 /* PKT3 flags */);
> @@ -1979,15 +1981,17 @@ static void
> evergreen_emit_gs_constant_buffers(struct r600_context *rctx, struct
>  
>  static void evergreen_emit_ps_constant_buffers(struct r600_context
>  *rctx, struct r600_atom *atom)
>  {
> -   evergreen_emit_constant_buffers(rctx,
> &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 0,
> - 
> R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
> -  R_028940_ALU_CONST_CACHE_PS_0,
> -  0 /* PKT3 flags */);
> +   evergreen_emit_constant_buffers(rctx,
> &rctx->constbuf_state[PIPE_SHADER_FRAGMENT],
> +   EG_FETCH_CONSTANTS_OFFSET_PS,
> +  
> R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
> +   R_028940_ALU_CONST_CACHE_PS_0,
> +   0 /* PKT3 flags */);
>  }
>  
>  static void evergreen_emit_cs_constant_buffers(struct r600_context
>  *rctx, struct r600_atom *atom)
>  {
> -   evergreen_emit_constant_buffers(rctx,
> &rctx->constbuf_state[PIPE_SHADER_COMPUTE], 816,
> +   evergreen_emit_constant_buffers(rctx,
> &rctx->constbuf_state[PIPE_SHADER_COMPUTE],
> +   EG_FETCH_CONSTANTS_OFFSET_CS,
>   R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0,
>   R_028F40_ALU_CONST_CACHE_LS_0,
>   RADEON_CP_PACKET3_COMPUTE_MODE);
> @@ -2031,25 +2035,25 @@ static void evergreen_emit_sampler_views(struct
> r600_context *rctx,
>  static void evergreen_emit_vs_sampler_views(struct r600_context *rctx,
>  struct r600_atom *atom)
>  {
>   evergreen_emit_sampler_views(rctx, 
> &rctx->samplers[PIPE_SHADER_VERTEX].views,
> -176 + R600_MAX_CONST_BUFFERS, 0);
> +EG_FETCH_CONSTANTS_OFFSET_VS +
> R600_MAX_CONST_BUFFERS

Re: [Mesa-dev] [PATCH 08/11] i965/vec4_nir: Use partial SSA form rather than full non-SSA

2015-09-11 Thread Eduardo Lima Mitev

On 09/10/2015 02:50 AM, Jason Ekstrand wrote:
> We made this switch in the FS backend some time ago and it seems to make a
> number of things a bit easier.
>

The commit log could be a bit less abstract, like mentioning why this
change helps the subsequent passes. In any case, if it was only for
consistency with FS, this change is beneficial.

I also tested it with full dEQP GLES3 suite, apart from piglit, on HWS.
No regressions.

Reviewed-by: Eduardo Lima Mitev 

> ---
>  src/mesa/drivers/dri/i965/brw_nir.c|  2 +-
>  src/mesa/drivers/dri/i965/brw_vec4.h   |  1 +
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 21 ++---
>  3 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
> b/src/mesa/drivers/dri/i965/brw_nir.c
> index 8f3edc5..f326b23 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -183,7 +183,7 @@ brw_create_nir(struct brw_context *brw,
>nir_print_shader(nir, stderr);
> }
>  
> -   nir_convert_from_ssa(nir, is_scalar);
> +   nir_convert_from_ssa(nir, true);
> nir_validate_shader(nir);
>  
> if (!is_scalar) {
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 01c6e84..de74ec9 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -423,6 +423,7 @@ public:
> virtual void nir_emit_alu(nir_alu_instr *instr);
> virtual void nir_emit_jump(nir_jump_instr *instr);
> virtual void nir_emit_texture(nir_tex_instr *instr);
> +   virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
>  
> dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
> dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index 751ec73..87a7941 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -367,6 +367,10 @@ vec4_visitor::nir_emit_instr(nir_instr *instr)
>nir_emit_texture(nir_instr_as_tex(instr));
>break;
>  
> +   case nir_instr_type_ssa_undef:
> +  nir_emit_undef(nir_instr_as_ssa_undef(instr));
> +  break;
> +
> default:
>fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
>break;
> @@ -393,9 +397,14 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register 
> *nir_reg,
>  dst_reg
>  vec4_visitor::get_nir_dest(nir_dest dest)
>  {
> -   assert(!dest.is_ssa);
> -   return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
> -  dest.reg.indirect);
> +   if (dest.is_ssa) {
> +  dst_reg dst = dst_reg(GRF, alloc.allocate(1));
> +  nir_ssa_values[dest.ssa.index] = dst;
> +  return dst;
> +   } else {
> +  return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
> + dest.reg.indirect);
> +   }
>  }
>  
>  dst_reg
> @@ -1528,4 +1537,10 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
>  mcs, is_cube_array, sampler, sampler_reg);
>  }
>  
> +void
> +vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr)
> +{
> +   nir_ssa_values[instr->def.index] = dst_reg(GRF, alloc.allocate(1));
> +}
> +
>  }
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v1] i915: fixing driver crashes if too few vertices are submitted

2015-09-11 Thread Predut, Marius



> -Original Message-
> From: Eirik Byrkjeflot Anonsen [mailto:ei...@eirikba.org]
> Sent: Wednesday, September 09, 2015 9:18 PM
> To: Predut, Marius; mesa-dev@lists.freedesktop.org
> Subject: Re: [Mesa-dev] [PATCH v1] i915: fixing driver crashes if too few
> vertices are submitted
> 
> Marius Predut  writes:
> 
> > Comparison with a signed expression and unsigned value is converted to
> > unsigned value, reason for minus value is interpreted as a big
> > unsigned value. For this case the "for" loop is going into unexpected
> > behavior.
> >
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
> > Signed-off-by: Marius Predut 
> > ---
> >  src/mesa/tnl_dd/t_dd_dmatmp.h | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h
> > b/src/mesa/tnl_dd/t_dd_dmatmp.h index 7be3954..88ecc78 100644
> > --- a/src/mesa/tnl_dd/t_dd_dmatmp.h
> > +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
> > @@ -627,6 +627,8 @@ static void TAG(render_quads_verts)( struct gl_context
> *ctx,
> >LOCAL_VARS;
> >GLuint j;
> >
> > +  if(count%4 != 0) return;
> > +
> 
> Seems to me that does a bit more than just fixing the crash. I would think
> 
> if (count < 4)
> 
> would fix the crash only. (And then quite possibly you won't need the next
> hunk?) But I have no idea whether the side effect is desired.
> 
> (Actually, what if count is 0? Or is that impossible?)

No count can't be 0 because is check filtered before in validate_render

> 
> eirik
> 
> >INIT(GL_TRIANGLES);
> >
> >for (j = start; j < count-3; j += 4) { @@ -1248,7 +1250,7 @@
> > static GLboolean TAG(validate_render)( struct gl_context *ctx,
> > ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
> >  }
> >  else {
> > -   ok = HAVE_TRIANGLES; /* flatshading is ok. */
> > +   ok = HAVE_TRIANGLES && (count%4 == 0); /* flatshading is ok. */
> >  }
> >  break;
> >default:
> > --
> > 1.9.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] i915: fixing driver crashes if too few vertices are submitted

2015-09-11 Thread Predut, Marius



> -Original Message-
> From: Ian Romanick [mailto:i...@freedesktop.org]
> Sent: Wednesday, September 09, 2015 8:54 PM
> To: Predut, Marius; mesa-dev@lists.freedesktop.org
> Subject: Re: [Mesa-dev] [PATCH v2] i915: fixing driver crashes if too few
> vertices are submitted
> 
> On 09/09/2015 11:16 AM, Marius Predut wrote:
> > Comparison with a signed expression and unsigned value
> > is converted to unsigned value, reason for minus value is interpreted
> > as a big unsigned value. For this case the "for" loop
> > is going into unexpected behavior.
> >
> > v1:Brian Paul: code style fix.
> 
> I don't think you really did...
> 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
> > Signed-off-by: Marius Predut 
> > ---
> >  src/mesa/tnl_dd/t_dd_dmatmp.h | 5 -
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
> > index 7be3954..79de224 100644
> > --- a/src/mesa/tnl_dd/t_dd_dmatmp.h
> > +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
> > @@ -627,6 +627,9 @@ static void TAG(render_quads_verts)( struct gl_context
> *ctx,
> >LOCAL_VARS;
> >GLuint j;
> >
> > +  if(count % 4 != 0)
>^
> ...because I'm quite sure Brian's code had a space here, per Mesa's
> coding standards.

 So code style is:   if( count % 4 != 0 )? 
I think Brian used:  if(count % 4 != 0)

> 
> Also, this change is incorrect.  If an application does
> glDrawArrays(GL_QUADS, 0, (n * 4) + 3), this change will cause zero
> quads to be drawn when n quads should be drawn.
> 
> Page 18 (page 32 of the PDF) of the OpenGL 2.1 spec says:
> 
> "The total number of vertices between Begin and End is 4n + k,
> where 0 ≤ k ≤ 3; if k is not zero, the final k vertices are
> ignored."
> 
> We probably don't have a piglit test for this scenario, so one should be
> added.  You can CC me on that patch. :)


Ok

> 
> I think the correct change is to trim count such that (count % 4) == 0.
>  If the modified value of count is zero, bail out.  With that change,
> the other hunk (below) is unnecessary.
> 
> > +  return;
> > +
> >INIT(GL_TRIANGLES);
> >
> >for (j = start; j < count-3; j += 4) {
> > @@ -1248,7 +1251,7 @@ static GLboolean TAG(validate_render)( struct
> gl_context *ctx,
> > ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
> >  }
> >  else {
> > -   ok = HAVE_TRIANGLES; /* flatshading is ok. */
> > +   ok = HAVE_TRIANGLES && (count % 4 == 0); /* flatshading is ok. */
> >  }
> >  break;
> >default:
> >

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 1/2] r600g: Support I2D/U2D/D2I/D2U

2015-09-11 Thread Glenn Kennard

Only for Cypress/Cayman/Aruba, older chips have only partial fp64 support.
Uses float intermediate values so only accurate for int24 range, which
matches what the blob does.

Signed-off-by: Glenn Kennard 
---
Changes since v1:
 Split into two functions
 Make names a bit clearer which chips they apply to
 Fix mixup of INT_TO_FLT/UINT_TO_FLT for eg opcode table
 Updated commit message

 src/gallium/drivers/r600/r600_shader.c | 106 ++---
 1 file changed, 98 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index f2c9e16..41cb226 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -3058,6 +3058,96 @@ static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
return 0;
 }
 
+
+static int egcm_int_to_double(struct r600_shader_ctx *ctx)
+{
+   struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+   struct r600_bytecode_alu alu;
+   int i, r;
+   int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+   assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D ||
+   inst->Instruction.Opcode == TGSI_OPCODE_U2D);
+
+   for (i = 0; i <= (lasti+1)/2; i++) {
+   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+   alu.op = ctx->inst_info->op;
+
+   r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+   alu.dst.sel = ctx->temp_reg;
+   alu.dst.chan = i;
+   alu.dst.write = 1;
+   alu.last = 1;
+
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   }
+
+   for (i = 0; i <= lasti; i++) {
+   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+   alu.op = ALU_OP1_FLT32_TO_FLT64;
+
+   alu.src[0].chan = i/2;
+   if (i%2 == 0)
+   alu.src[0].sel = ctx->temp_reg;
+   else {
+   alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+   alu.src[0].value = 0x0;
+   }
+   tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+   alu.last = i == lasti;
+
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   }
+
+   return 0;
+}
+
+static int egcm_double_to_int(struct r600_shader_ctx *ctx)
+{
+   struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+   struct r600_bytecode_alu alu;
+   int i, r;
+   int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+   assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I ||
+   inst->Instruction.Opcode == TGSI_OPCODE_D2U);
+
+   for (i = 0; i <= lasti; i++) {
+   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+   alu.op = ALU_OP1_FLT64_TO_FLT32;
+
+   r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i));
+   alu.dst.chan = i;
+   alu.dst.sel = ctx->temp_reg;
+   alu.dst.write = i%2 == 0;
+   alu.last = i == lasti;
+
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   }
+
+   for (i = 0; i <= (lasti+1)/2; i++) {
+   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+   alu.op = ctx->inst_info->op;
+
+   alu.src[0].chan = i*2;
+   alu.src[0].sel = ctx->temp_reg;
+   tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+   alu.last = 1;
+
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   }
+
+   return 0;
+}
+
 static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
 {
struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
@@ -8150,10 +8240,10 @@ static const struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64},
[TGSI_OPCODE_DLDEXP]= { ALU_OP2_LDEXP_64, tgsi_op2_64},
[TGSI_OPCODE_DFRACEXP]  = { ALU_OP1_FREXP_64, tgsi_dfracexp},
-   [TGSI_OPCODE_D2I]   = { ALU_OP0_NOP, tgsi_unsupported},
-   [TGSI_OPCODE_I2D]   = { ALU_OP0_NOP, tgsi_unsupported},
-   [TGSI_OPCODE_D2U]   = { ALU_OP0_NOP, tgsi_unsupported},
-   [TGSI_OPCODE_U2D]   = { ALU_OP0_NOP, tgsi_unsupported},
+   [TGSI_OPCODE_D2I]   = { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
+   [TGSI_OPCODE_I2D]   = { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
+   [TGSI_OPCODE_D2U]   = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
+   [TGSI_OPCODE_U2D]   = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
[TGSI_OPCODE_DRSQ]  = { ALU_OP2_RECIPSQRT_64, 
cayman_emit_double_instr},
[TGSI_OPCODE_LAS

[Mesa-dev] [PATCH v2 2/2] r600: Enable fp64 on chips with native support

2015-09-11 Thread Glenn Kennard

Cypress/Cayman/Aruba, earlier r6xx/r7xx chips only support a subset
of the needed fp64 ops, and don't do GL4 anyway.

Signed-off-by: Glenn Kennard 
---
Changes since v1:
 Updated commit message

 docs/GL3.txt | 4 ++--
 docs/relnotes/11.1.0.html| 2 +-
 src/gallium/drivers/r600/r600_pipe.c | 3 +++
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 8ad1aac..7247eb6 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -109,7 +109,7 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi
   - Enhanced per-sample shadingDONE (r600)
   - Interpolation functionsDONE (r600)
   - New overload resolution rules  DONE
-  GL_ARB_gpu_shader_fp64   DONE (llvmpipe, 
softpipe)
+  GL_ARB_gpu_shader_fp64   DONE (r600, llvmpipe, 
softpipe)
   GL_ARB_sample_shadingDONE (i965, nv50, r600)
   GL_ARB_shader_subroutine DONE (i965, nv50, r600, 
llvmpipe, softpipe)
   GL_ARB_tessellation_shader   DONE ()
@@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10 --- all DONE: nvc0, radeonsi
   GL_ARB_get_program_binaryDONE (0 binary formats)
   GL_ARB_separate_shader_objects   DONE (all drivers)
   GL_ARB_shader_precision  DONE (all drivers that 
support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bit   DONE (llvmpipe, 
softpipe)
+  GL_ARB_vertex_attrib_64bit   DONE (r600, llvmpipe, 
softpipe)
   GL_ARB_viewport_arrayDONE (i965, nv50, r600, 
llvmpipe)
 
 
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 4b56f69..f7ff74a 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -45,7 +45,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 GL_ARB_texture_query_lod on softpipe
-TBD.
+GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips
 
 
 Bug fixes
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index fd9c16c..a18ec49 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -500,6 +500,9 @@ static int r600_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, e
return PIPE_SHADER_IR_TGSI;
}
case PIPE_SHADER_CAP_DOUBLES:
+   if (rscreen->b.family == CHIP_CYPRESS ||
+   rscreen->b.family == CHIP_CAYMAN || rscreen->b.family 
== CHIP_ARUBA)
+   return 1;
return 0;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ralloc: Use attribute((destructor)) instead of atexit(3)

2015-09-11 Thread Jose Fonseca

On 11/09/15 00:35, Erik Faye-Lund wrote:

On Mon, Sep 7, 2015 at 3:54 PM, Jose Fonseca  wrote:

On 07/09/15 10:17, Jean-Sébastien Pédron wrote:

On 04.09.2015 01:37, Matt Turner wrote:

You need to test for this support in configure.ac. It's as simple as
adding a call to AX_GCC_FUNC_ATTRIBUTE in the existing alphabetized
list and then a little bit of preprocessor in src/util/macros.h.

Should the code fallbacks on atexit(3) if the attribute is not
supported?

At least on Windows, with MSVC,  atexit should be the right thing to do,
since we statically link MSVC RunTime,

Can I use the HAVE_FUNC_ATTRIBUTE_DESTRUCTOR macro in

ralloc.c for this purpose?

For the record, another alternative (way more portable), is you have a
simple .cpp file with a static destructior:

   class AtExit
   {
   public:
  ~AtExit() {
  // do what must be done
   }
   };

   AtExit atExit();

After all, it seems wrong to use non-standard C to replicate what standard
C++ can do.

That sounds nice and all, until you stumble over problems like
STB_GNU_UNIQUE, which makes renders C++ static destructors completely
unusable on Linux if you have even a single static variable in a
method in your C++ code.

Read up on it on  Read
http://gcc.gnu.org/ml/gcc-help/2011-05/msg00450.html if you feel like
being depressed.

I read up to https://gcc.gnu.org/ml/gcc-help/2011-05/msg00450.html but 
it sounded like a problem in dlclose.

> Unfortunately, C++ and it's many questionable implementations make up
> an unpredictable beast that keeps on throwing curve-balls that makes
> it no fun at all for library development.

The irony is that, __attribute__((destructor)) is one of those sources 
of unpredictability (the order destructors gets called is almost 
unpredictable), which is probably why it was never added to standard C.

I can't look at using C with non-standard extensions on a good light, 
and using extended C (ie, C++) under a bad light.  Using and abusing 
non-standard extensions will surely throw curve-balls too.  At least 
those curve-balls threw by C++ are shared by a lot of other people, more 
likely to get sorted out without our intervention.

Finally, dlclose'ing shared objects is inherently like walking on a mine 
field, no matter what language one uses.  IMHO, if we can't support 
dlclose safely/robustly, might as well slap -Wl,-z,nodelete on our 
shared objects, ie. just pretend everything was statically linked.

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: Fix output variable names

2015-09-11 Thread Iago Toral

On Fri, 2015-09-11 at 09:24 +0200, Eduardo Lima Mitev wrote:
> Commit 1dbe4af9c9e318525fc082b542b93fb7f1e5efba
> "nir: Add a pass to lower outputs to temporary variables" messed up output
> variable names. The issue can be reproduced by dumping the NIR shaders
> with INTEL_DEBUG="vs,fs".
> ---
>  src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c 
> b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> index b730cad..e9c4c0d 100644
> --- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> @@ -87,12 +87,13 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
> foreach_list_typed(nir_variable, var, node, &state.old_outputs) {
>nir_variable *output = ralloc(shader, nir_variable);
>memcpy(output, var, sizeof *output);
> +  output->name = ralloc_strdup(output, var->name);
>  
>/* The orignal is now the temporary */
>nir_variable *temp = var;
>  
>/* Give the output a new name with @out-temp appended */
> -  temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
> +  temp->name = ralloc_asprintf(output, "%s@out-temp", output->name);
>temp->data.mode = nir_var_global;
>temp->constant_initializer = NULL;

I think I saw some instances of this happening today... however, isn't
the problem bigger than just the variable name? I see that nir_variable
has other fields that are also dynamically allocated so they are bound
to point to trashed memory as soon as some opt pass kills the old
variable (which is what is happening with the name).
 
Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/4] svga: remove useless MAX2() call

2015-09-11 Thread Iago Toral

Reviewed-by: Iago Toral Quiroga 

On Thu, 2015-09-10 at 09:04 -0600, Brian Paul wrote:
> The sum of two unsigned ints is always >= 0.  Found with Coverity.
> ---
>  src/gallium/drivers/svga/svga_state_tss.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/svga/svga_state_tss.c 
> b/src/gallium/drivers/svga/svga_state_tss.c
> index a13980d..5991da1 100644
> --- a/src/gallium/drivers/svga/svga_state_tss.c
> +++ b/src/gallium/drivers/svga/svga_state_tss.c
> @@ -90,7 +90,7 @@ emit_tex_binding_unit(struct svga_context *svga,
>}
>else {
>   last_level = MIN2(sv->u.tex.last_level, sv->texture->last_level);
> - min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level));
> + min_lod = s->view_min_lod + sv->u.tex.first_level;
>   min_lod = MIN2(min_lod, last_level);
>   max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level, last_level);
>}


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] i965/vec4: check writemask when bailing out at register coalesce

2015-09-11 Thread Alejandro Piñeiro

opt_register_coalesce stopped to check previous instructions to
coalesce with if somebody else was writing on the same
destination. This can be optimized to check if somebody else was
writing to the same channels of the same destination using the
writemask.

Shader DB results (taking into account only vec4):

total instructions in shared programs: 1781593 -> 1734957 (-2.62%)
instructions in affected programs: 1238390 -> 1191754 (-3.77%)
helped:12782
HURT:  0
GAINED:0
LOST:  0
---

piglit run on both IR and NIR path without any regression.


 src/mesa/drivers/dri/i965/brw_vec4.cpp | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 85dc372..998dbf2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1089,11 +1089,12 @@ vec4_visitor::opt_register_coalesce()
 if (interfered)
break;
 
- /* If somebody else writes our destination here, we can't coalesce
-  * before that.
+ /* If somebody else writes the same channels of our destination here,
+  * we can't coalesce before that.
   */
- if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
-   break;
+ if ((inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
+  (inst->dst.writemask & scan_inst->dst.writemask)))
+break;
 
  /* Check for reads of the register we're trying to coalesce into.  We
   * can't go rewriting instructions above that to put some other value
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: Fix output variable names

2015-09-11 Thread Eduardo Lima Mitev

Oops, I missed a patch from Jason from two days ago, fixing exactly the
same thing.

http://lists.freedesktop.org/archives/mesa-dev/2015-September/094107.html

Please ignore this one.

Eduardo

On 09/11/2015 09:24 AM, Eduardo Lima Mitev wrote:
> Commit 1dbe4af9c9e318525fc082b542b93fb7f1e5efba
> "nir: Add a pass to lower outputs to temporary variables" messed up output
> variable names. The issue can be reproduced by dumping the NIR shaders
> with INTEL_DEBUG="vs,fs".
> ---
>  src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c 
> b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> index b730cad..e9c4c0d 100644
> --- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> @@ -87,12 +87,13 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
> foreach_list_typed(nir_variable, var, node, &state.old_outputs) {
>nir_variable *output = ralloc(shader, nir_variable);
>memcpy(output, var, sizeof *output);
> +  output->name = ralloc_strdup(output, var->name);
>  
>/* The orignal is now the temporary */
>nir_variable *temp = var;
>  
>/* Give the output a new name with @out-temp appended */
> -  temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
> +  temp->name = ralloc_asprintf(output, "%s@out-temp", output->name);
>temp->data.mode = nir_var_global;
>temp->constant_initializer = NULL;
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir/lower_outputs_to_temporaries: Reparent the output name

2015-09-11 Thread Eduardo Lima Mitev

Reviewed-by: Eduardo Lima Mitev 

On 09/10/2015 10:58 PM, Jason Ekstrand wrote:
> We copy the output, make the old output the temporary, and give the
> temporary a new name.  The copy keeps the pointer to the old name.  This
> works just fine up until the point where we lower things to SSA and delete
> the old variable and, with it, the name.  Instead, we should re-parent to
> the copy.
> ---
>  src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c 
> b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> index fb8070c..9d3a913 100644
> --- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> @@ -97,6 +97,9 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
>/* The orignal is now the temporary */
>nir_variable *temp = var;
>  
> +  /* Reparent the name to the new variable */
> +  ralloc_steal(output, output->name);
> +
>/* Give the output a new name with @out-temp appended */
>temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
>temp->data.mode = nir_var_global;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/4] gallium: add support for retrieving number of texture samples

2015-09-11 Thread Roland Scheidegger

Am 11.09.2015 um 05:15 schrieb Ilia Mirkin:
> My hope was (as you can see in the last patch) to enable this for all
> drivers that support MS textures. I've got nv50/nvc0/r600g covered.
> 
> RadeonSI will have to read the data from the texture descriptor. I'm
> totally unfamiliar with LLVM, the descriptor format, etc. Hopefully
> someone will be able to write an appropriate patch.
> 
> SVGA seems to support ms textures, but I didn't see anything in its
> virtual ISA that would fit. VMWare folk -- is this going to be easy to
> add? Or do I need to add a new PIPE_CAP?
Note the d3d equivalent (sample_info) is d3d10.1 and svga3d opcodes only
cover d3d10, so yes this isn't doable. So I guess for the time being,
either a cap bit or emulation (stick it into a uniform) should be done,
though Brian probably knows better.

Roland


> 
> Ilia Mirkin (4):
>   tgsi: add a TXQS opcode to retrieve the number of texture samples
>   nv50/ir: add support for TXQS tgsi opcode
>   r600g: add support for TXQS tgsi opcode
>   st/mesa: emit TXQS, support ARB_shader_texture_image_samples
> 
>  src/gallium/auxiliary/tgsi/tgsi_info.c|  3 ++-
>  src/gallium/docs/source/tgsi.rst  | 12 +++-
>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 17 -
>  .../drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp | 19 
> +++
>  .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp |  2 ++
>  src/gallium/drivers/r600/r600_shader.c| 16 
>  src/gallium/include/pipe/p_shader_tokens.h|  1 +
>  src/mesa/state_tracker/st_extensions.c|  1 +
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp|  6 +-
>  9 files changed, 65 insertions(+), 12 deletions(-)
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/4] tgsi: add a TXQS opcode to retrieve the number of texture samples

2015-09-11 Thread Roland Scheidegger

Am 11.09.2015 um 05:15 schrieb Ilia Mirkin:
> Signed-off-by: Ilia Mirkin 
> ---
>  src/gallium/auxiliary/tgsi/tgsi_info.c |  3 ++-
>  src/gallium/docs/source/tgsi.rst   | 12 +++-
>  src/gallium/include/pipe/p_shader_tokens.h |  1 +
>  3 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
> b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index fb29ea0..3b40c3d 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -141,7 +141,7 @@ static const struct tgsi_opcode_info 
> opcode_info[TGSI_OPCODE_LAST] =
> { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
> { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
> { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
> -   { 0, 0, 0, 0, 0, 0, NONE, "", 104 }, /* removed */
> +   { 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
> { 0, 0, 0, 0, 0, 0, NONE, "", 105 }, /* removed */
> { 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
> { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
> @@ -331,6 +331,7 @@ tgsi_opcode_infer_type( uint opcode )
> case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
> case TGSI_OPCODE_TXQ:
> case TGSI_OPCODE_TXQ_LZ:
> +   case TGSI_OPCODE_TXQS:
> case TGSI_OPCODE_F2U:
> case TGSI_OPCODE_UDIV:
> case TGSI_OPCODE_UMAD:
> diff --git a/src/gallium/docs/source/tgsi.rst 
> b/src/gallium/docs/source/tgsi.rst
> index 314c9ca..e773e9d 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -960,7 +960,6 @@ XXX doesn't look like most of the opcodes really belong 
> here.
>For components which don't return a resource dimension, their value
>is undefined.
>  
> -
>  .. math::
>  
>lod = src0.x
> @@ -973,6 +972,17 @@ XXX doesn't look like most of the opcodes really belong 
> here.
>  
>dst.w = texture\_levels(unit)
>  
> +
> +.. opcode:: TXQS - Texture Samples Query
> +
> +  This retrieves the number of samples in the sampler, and stores it
in the texture maybe? Not that it really makes a difference...

> +  into the x component. The other components are undefined.
> +
> +.. math::
> +
> +  dst.x = texture\_samples(unit)
> +
> +
>  .. opcode:: TG4 - Texture Gather
>  
>As per ARB_texture_gather, gathers the four texels to be used in a 
> bi-linear
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
> b/src/gallium/include/pipe/p_shader_tokens.h
> index 6e07b2c..b36e0a3 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -402,6 +402,7 @@ struct tgsi_property_data {
>  #define TGSI_OPCODE_ENDLOOP 101
>  #define TGSI_OPCODE_ENDSUB  102
>  #define TGSI_OPCODE_TXQ_LZ  103 /* TXQ for mipmap level 0 */
> +#define TGSI_OPCODE_TXQS104
>  /* gap */
>  #define TGSI_OPCODE_NOP 107
>  
> 

Reviewed-by: Roland Scheidegger 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] r600g: Support I2D/U2D/D2I/D2U

2015-09-11 Thread Roland Scheidegger

Am 11.09.2015 um 05:14 schrieb Dave Airlie:
> On 11 September 2015 at 12:37, Roland Scheidegger  wrote:
>> Just on a very quick glance, seems somewhat odd this function is named
>> "cypress" whereas the other 64bit ones are named "cayman".
>> In any case, saying enabling support for chips which have hative support
>> isn'quite correct, since there's a couple more which should be able to
>> do it (rv670, rv770, maybe more) as far as I know (no idea though how
>> complex it would be to implement, maybe they are missing some things...)
> 
> probably should say evergreen native,
> 
> I doubt anyone will care about rv670/770, their fp64 support is pretty lacking
> compared to evergreen. We really only care on evergreen because GL4.0,
> and none of the older gpus can do GL4.0
> 

True enough. I'm kind of surprised the blob gets away with the
uber-cheap cheated implementation of the int to double / double to int
conversion, needless to say that probably doesn't get you quite the
results you'd expect and doesn't look very conformant to me... At least
for int to double, a better implementation wouldn't be all that tricky,
just convert upper and lower bits separately and add them back together
(rescaled of course), albeit of course quite a few more instructions.
The reverse probably could work similarly too, but anyway if the blob is
happy with the cheat I guess that's good enough...

Roland

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965/vec4: check writemask when bailing out at register coalesce

2015-09-11 Thread Matt Turner

On Fri, Sep 11, 2015 at 5:18 AM, Alejandro Piñeiro  wrote:
> opt_register_coalesce stopped to check previous instructions to
> coalesce with if somebody else was writing on the same
> destination. This can be optimized to check if somebody else was
> writing to the same channels of the same destination using the
> writemask.
>
> Shader DB results (taking into account only vec4):
>
> total instructions in shared programs: 1781593 -> 1734957 (-2.62%)
> instructions in affected programs: 1238390 -> 1191754 (-3.77%)
> helped:12782

Heh, yeah, that seems like a pretty obvious improvement. Nice :)

> HURT:  0
> GAINED:0
> LOST:  0
> ---
>
> piglit run on both IR and NIR path without any regression.
>
>
>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 85dc372..998dbf2 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -1089,11 +1089,12 @@ vec4_visitor::opt_register_coalesce()
>  if (interfered)
> break;
>
> - /* If somebody else writes our destination here, we can't coalesce
> -  * before that.
> + /* If somebody else writes the same channels of our destination 
> here,
> +  * we can't coalesce before that.
>*/
> - if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
> -   break;
> + if ((inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
> +  (inst->dst.writemask & scan_inst->dst.writemask)))

Too many parentheses, and the indentation is a little off I think. How about:

  if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
  (inst->dst.writemask & scan_inst->dst.writemask) != 0)


With that,

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] vc4: Try to pair up instructions when only one of them has PM bit

2015-09-11 Thread Boyan Ding

2015-08-30 15:07 GMT+08:00 Boyan Ding :
> Instructions with difference in PM field can actually be paired up if
> the one without PM doesn't do packing/unpacking and non-NOP
> packing/unpacking operations from PM instruction aren't added to the
> other without PM.
>
> total instructions in shared programs: 48209 -> 47460 (-1.55%)
> instructions in affected programs: 11688 -> 10939 (-6.41%)
>
> Signed-off-by: Boyan Ding 

Ping.

I ran piglit with the patch today on my rpi2 and found no regression.

Regards,
Boyan Ding

> ---
> Use 'git diff -w' for easier review
>
>  src/gallium/drivers/vc4/vc4_qpu.c | 123 
> +++---
>  1 file changed, 76 insertions(+), 47 deletions(-)
>
> diff --git a/src/gallium/drivers/vc4/vc4_qpu.c 
> b/src/gallium/drivers/vc4/vc4_qpu.c
> index f67e3f8..d6bc804 100644
> --- a/src/gallium/drivers/vc4/vc4_qpu.c
> +++ b/src/gallium/drivers/vc4/vc4_qpu.c
> @@ -454,8 +454,7 @@ qpu_merge_inst(uint64_t a, uint64_t b)
>  QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
>
>  /* Misc fields that have to match exactly. */
> -ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_PM,
> -~0);
> +ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
>
>  if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
>QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
> @@ -493,64 +492,94 @@ qpu_merge_inst(uint64_t a, uint64_t b)
>  return 0;
>  }
>
> -/* packing: Make sure that non-NOP packs agree, then deal with
> - * special-case failing of adding a non-NOP pack to something with a
> - * NOP pack.
> - */
> -if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
> -return 0;
> -bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
> -   QPU_GET_FIELD(merge, QPU_PACK));
> -bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
> -   QPU_GET_FIELD(merge, QPU_PACK));
> -if (!(merge & QPU_PM)) {
> -/* Make sure we're not going to be putting a new
> - * a-file packing on either half.
> +if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
> +/* If one instruction has PM bit set and the other not, the
> + * one without PM shouldn't do packing/unpacking, and we
> + * have to make sure non-NOP packing/unpacking from PM
> + * instruction aren't added to it.
>   */
> -if (new_a_pack && writes_a_file(a))
> -return 0;
> +uint64_t temp;
>
> -if (new_b_pack && writes_a_file(b))
> -return 0;
> -} else {
> -/* Make sure we're not going to be putting new MUL packing on
> - * either half.
> - */
> -if (new_a_pack && QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
> -return 0;
> +/* Let a be the one with PM bit */
> +if (!(a & QPU_PM)) {
> +temp = a;
> +a = b;
> +b = temp;
> +}
>
> -if (new_b_pack && QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
> +if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
>  return 0;
> -}
>
> -/* unpacking: Make sure that non-NOP unpacks agree, then deal with
> - * special-case failing of adding a non-NOP unpack to something with 
> a
> - * NOP unpack.
> - */
> -if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
> -return 0;
> -bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
> - QPU_GET_FIELD(merge, QPU_UNPACK));
> -bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
> - QPU_GET_FIELD(merge, QPU_UNPACK));
> -if (!(merge & QPU_PM)) {
> -/* Make sure we're not going to be putting a new
> - * a-file packing on either half.
> - */
> -if (new_a_unpack && QPU_GET_FIELD(a, QPU_RADDR_A) != 
> QPU_R_NOP)
> +if ((a & QPU_PACK_MASK) != 0 &&
> +QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
>  return 0;
>
> -if (new_b_unpack && QPU_GET_FIELD(b, QPU_RADDR_A) != 
> QPU_R_NOP)
> +if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
>  return 0;
>  } else {
> -/* Make sure we're not going to be putting new r4 unpack on
> - * either half.
> +/* packing: Make sure that non-NOP packs agree, then deal 
> with
> + * special-case failing of adding a non-NOP pack to something
> + * with

Re: [Mesa-dev] [PATCH 1/4] tgsi: add a TXQS opcode to retrieve the number of texture samples

2015-09-11 Thread Brian Paul


On 09/10/2015 09:15 PM, Ilia Mirkin wrote:

Signed-off-by: Ilia Mirkin 
---
  src/gallium/auxiliary/tgsi/tgsi_info.c |  3 ++-
  src/gallium/docs/source/tgsi.rst   | 12 +++-
  src/gallium/include/pipe/p_shader_tokens.h |  1 +
  3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index fb29ea0..3b40c3d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -141,7 +141,7 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
 { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
 { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
 { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
-   { 0, 0, 0, 0, 0, 0, NONE, "", 104 }, /* removed */
+   { 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
 { 0, 0, 0, 0, 0, 0, NONE, "", 105 }, /* removed */
 { 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
 { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
@@ -331,6 +331,7 @@ tgsi_opcode_infer_type( uint opcode )
 case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
 case TGSI_OPCODE_TXQ:
 case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_TXQS:
 case TGSI_OPCODE_F2U:
 case TGSI_OPCODE_UDIV:
 case TGSI_OPCODE_UMAD:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 314c9ca..e773e9d 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -960,7 +960,6 @@ XXX doesn't look like most of the opcodes really belong 
here.
For components which don't return a resource dimension, their value
is undefined.

-
  .. math::

lod = src0.x
@@ -973,6 +972,17 @@ XXX doesn't look like most of the opcodes really belong 
here.

dst.w = texture\_levels(unit)

+
+.. opcode:: TXQS - Texture Samples Query
+
+  This retrieves the number of samples in the sampler, and stores it


I'd replace 'sampler' with 'texture', as Roland suggested.



+  into the x component. The other components are undefined.
+
+.. math::
+
+  dst.x = texture\_samples(unit)
+
+
  .. opcode:: TG4 - Texture Gather

As per ARB_texture_gather, gathers the four texels to be used in a bi-linear
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index 6e07b2c..b36e0a3 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -402,6 +402,7 @@ struct tgsi_property_data {
  #define TGSI_OPCODE_ENDLOOP 101
  #define TGSI_OPCODE_ENDSUB  102
  #define TGSI_OPCODE_TXQ_LZ  103 /* TXQ for mipmap level 0 */
+#define TGSI_OPCODE_TXQS104
  /* gap */
  #define TGSI_OPCODE_NOP 107




Do you also need to add TXQS in the switch in tgsi_util.c's 
tgsi_util_get_inst_usage_mask()?  Looks like several other recently 
added opcodes (like TXQ, TXQ_LZ) aren't present there either though.


And what about an addition to tgsi_opcode_tmp.h?

Looks good otherwise.

-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] st/mesa: emit TXQS, support ARB_shader_texture_image_samples

2015-09-11 Thread Brian Paul


On 09/10/2015 09:15 PM, Ilia Mirkin wrote:

This is enabled for all drivers that support MS textures. The image
component of the ext is a no-op since there is no image support in
gallium (yet).

Signed-off-by: Ilia Mirkin 
---
  src/mesa/state_tracker/st_extensions.c | 1 +
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +-
  2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 884761c..d554e4d 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -449,6 +449,7 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE
 },
{ o(ARB_seamless_cube_map),PIPE_CAP_SEAMLESS_CUBE_MAP   
 },
{ o(ARB_shader_stencil_export),PIPE_CAP_SHADER_STENCIL_EXPORT   
 },
+  { o(ARB_shader_texture_image_samples), PIPE_CAP_TEXTURE_MULTISAMPLE  
},
{ o(ARB_shader_texture_lod),   PIPE_CAP_SM3 
 },
{ o(ARB_shadow),   PIPE_CAP_TEXTURE_SHADOW_MAP  
 },
{ o(ARB_texture_buffer_object),PIPE_CAP_TEXTURE_BUFFER_OBJECTS  
 },
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 625c4e9..c3a8c11 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3229,7 +3229,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
opcode = TGSI_OPCODE_LODQ;
break;
 case ir_texture_samples:
-  unreachable("unexpected texture op");
+  opcode = TGSI_OPCODE_TXQS;
+  break;
 }

 if (ir->projector) {
@@ -3339,6 +3340,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
   emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
} else
   inst = emit_asm(ir, opcode, result_dst, lod_info);
+   } else if (opcode == TGSI_OPCODE_TXQS) {
+  inst = emit_asm(ir, opcode, result_dst);
 } else if (opcode == TGSI_OPCODE_TXF) {
inst = emit_asm(ir, opcode, result_dst, coord);
 } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
@@ -5030,6 +5033,7 @@ compile_tgsi_instruction(struct st_translate *t,
 case TGSI_OPCODE_TXL:
 case TGSI_OPCODE_TXP:
 case TGSI_OPCODE_TXQ:
+   case TGSI_OPCODE_TXQS:
 case TGSI_OPCODE_TXF:
 case TGSI_OPCODE_TEX2:
 case TGSI_OPCODE_TXB2:



Reviewed-by: Brian Paul 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] nv3x xfce4 compositing issue, making good progress, need help / input

2015-09-11 Thread Hans de Goede


Hi,

I've been working on trying to fix this one:

https://bugs.freedesktop.org/show_bug.cgi?id=90871

And today I've more or less root caused this, it seems
that some code is making glTexImage2D calls with npot
width / height, which fails on nv3x (where as it works
on nv4x).

The bug has a simple reproducer attached, but that is
not directly calling glTexImage2D, so it seems that
the npot values are coming from some helper library
used (glXBindTexImageEXT  ?).

2 questions:

1) Does anyone know / suspect where the glTexImage2D call
is originating from (see the test-program attachment
in bugzilla.

2) Is this a bug in glXBindTexImageEXT (assuming that is
the culprit), or should the test program take into account
that the card does not support npot when calling this ?

Thanks & Regards

Hans
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/4] gallium: add support for retrieving number of texture samples

2015-09-11 Thread Brian Paul


On 09/11/2015 07:26 AM, Roland Scheidegger wrote:

Am 11.09.2015 um 05:15 schrieb Ilia Mirkin:

My hope was (as you can see in the last patch) to enable this for all
drivers that support MS textures. I've got nv50/nvc0/r600g covered.

RadeonSI will have to read the data from the texture descriptor. I'm
totally unfamiliar with LLVM, the descriptor format, etc. Hopefully
someone will be able to write an appropriate patch.

SVGA seems to support ms textures, but I didn't see anything in its
virtual ISA that would fit. VMWare folk -- is this going to be easy to
add? Or do I need to add a new PIPE_CAP?

Note the d3d equivalent (sample_info) is d3d10.1 and svga3d opcodes only
cover d3d10, so yes this isn't doable. So I guess for the time being,
either a cap bit or emulation (stick it into a uniform) should be done,
though Brian probably knows better.


Yeah, we could emulate it with and extra constant, but I don't think 
I'll have time to look at it for a while.  Can we use a new CAP for now?


-Brian




Roland




Ilia Mirkin (4):
   tgsi: add a TXQS opcode to retrieve the number of texture samples
   nv50/ir: add support for TXQS tgsi opcode
   r600g: add support for TXQS tgsi opcode
   st/mesa: emit TXQS, support ARB_shader_texture_image_samples

  src/gallium/auxiliary/tgsi/tgsi_info.c|  3 ++-
  src/gallium/docs/source/tgsi.rst  | 12 +++-
  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 17 -
  .../drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp | 19 +++
  .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp |  2 ++
  src/gallium/drivers/r600/r600_shader.c| 16 
  src/gallium/include/pipe/p_shader_tokens.h|  1 +
  src/mesa/state_tracker/st_extensions.c|  1 +
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp|  6 +-
  9 files changed, 65 insertions(+), 12 deletions(-)



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2] i965/vec4: check writemask when bailing out at register coalesce

2015-09-11 Thread Alejandro Piñeiro

opt_register_coalesce stopped to check previous instructions to
coalesce with if somebody else was writing on the same
destination. This can be optimized to check if somebody else was
writing to the same channels of the same destination using the
writemask.

Shader DB results (taking into account only vec4):

total instructions in shared programs: 1781593 -> 1734957 (-2.62%)
instructions in affected programs: 1238390 -> 1191754 (-3.77%)
helped:12782
HURT:  0
GAINED:0
LOST:  0

v2: removed some parenthesis, fixed indentation, as suggested by
Matt Turner

Reviewed-by: Matt Turner 
---

Patch updated after Matt review.

 src/mesa/drivers/dri/i965/brw_vec4.cpp | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 85dc372..587a782 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1089,11 +1089,12 @@ vec4_visitor::opt_register_coalesce()
 if (interfered)
break;
 
- /* If somebody else writes our destination here, we can't coalesce
-  * before that.
+ /* If somebody else writes the same channels of our destination here,
+  * we can't coalesce before that.
   */
- if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
-   break;
+ if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
+ (inst->dst.writemask & scan_inst->dst.writemask) != 0)
+break;
 
  /* Check for reads of the register we're trying to coalesce into.  We
   * can't go rewriting instructions above that to put some other value
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 09/11] nir/lower_vec_to_movs: Get rid of start_idx and swizzle compacting

2015-09-11 Thread Eduardo Lima Mitev

On 09/10/2015 02:50 AM, Jason Ekstrand wrote:
> Previously, we did this thing with keeping track of a separate start_idx
> which was different from the iteration variable.  I think this was a relic
> of the way that GLSL IR implements writemasks.  In NIR, if a given bit in
> the writemask is unset then that channel is just "unused", not missing.  In
> particular, a vec4 operation with a writemask of 0xd will use sources 0, 2,
> and 3 and leave source 1 alone.  We can simplify things a good deal (and
> make them correct) by removing this "compacting" step.
> 

Indeed, much clearer now.

Reviewed-by: Eduardo Lima Mitev 

> Cc: Eric Anholt 
> ---
>  src/glsl/nir/nir_lower_vec_to_movs.c | 33 +
>  1 file changed, 13 insertions(+), 20 deletions(-)
> 
> diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c 
> b/src/glsl/nir/nir_lower_vec_to_movs.c
> index 7d31e36..ed8ec9b 100644
> --- a/src/glsl/nir/nir_lower_vec_to_movs.c
> +++ b/src/glsl/nir/nir_lower_vec_to_movs.c
> @@ -58,29 +58,25 @@ src_matches_dest_reg(nir_dest *dest, nir_src *src)
>   * which ones have been processed.
>   */
>  static unsigned
> -insert_mov(nir_alu_instr *vec, unsigned start_channel,
> -unsigned start_src_idx, nir_shader *shader)
> +insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
>  {
> -   unsigned src_idx = start_src_idx;
> -   assert(src_idx < nir_op_infos[vec->op].num_inputs);
> +   assert(start_idx < nir_op_infos[vec->op].num_inputs);
>  
> nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
> -   nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mov);
> +   nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
> nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
>  
> -   mov->dest.write_mask = (1u << start_channel);
> -   mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
> -   src_idx++;
> +   mov->dest.write_mask = (1u << start_idx);
> +   mov->src[0].swizzle[start_idx] = vec->src[start_idx].swizzle[0];
>  
> -   for (unsigned i = start_channel + 1; i < 4; i++) {
> +   for (unsigned i = start_idx + 1; i < 4; i++) {
>if (!(vec->dest.write_mask & (1 << i)))
>   continue;
>  
> -  if (nir_srcs_equal(vec->src[src_idx].src, 
> vec->src[start_src_idx].src)) {
> +  if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src)) {
>   mov->dest.write_mask |= (1 << i);
> - mov->src[0].swizzle[i] = vec->src[src_idx].swizzle[0];
> + mov->src[0].swizzle[i] = vec->src[i].swizzle[0];
>}
> -  src_idx++;
> }
>  
> nir_instr_insert_before(&vec->instr, &mov->instr);
> @@ -129,26 +125,23 @@ lower_vec_to_movs_block(nir_block *block, void 
> *void_state)
> * destination reg, in case other values we're populating in the dest
> * might overwrite them.
> */
> -  for (unsigned i = 0, src_idx = 0; i < 4; i++) {
> +  for (unsigned i = 0; i < 4; i++) {
>   if (!(vec->dest.write_mask & (1 << i)))
>  continue;
>  
> - if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) {
> -finished_write_mask |= insert_mov(vec, i, src_idx, 
> state->shader);
> + if (src_matches_dest_reg(&vec->dest.dest, &vec->src[i].src)) {
> +finished_write_mask |= insert_mov(vec, i, state->shader);
>  break;
>   }
> - src_idx++;
>}
>  
>/* Now, emit MOVs for all the other src channels. */
> -  for (unsigned i = 0, src_idx = 0; i < 4; i++) {
> +  for (unsigned i = 0; i < 4; i++) {
>   if (!(vec->dest.write_mask & (1 << i)))
>  continue;
>  
>   if (!(finished_write_mask & (1 << i)))
> -finished_write_mask |= insert_mov(vec, i, src_idx, 
> state->shader);
> -
> - src_idx++;
> +finished_write_mask |= insert_mov(vec, i, state->shader);
>}
>  
>nir_instr_remove(&vec->instr);
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: add nir_swizzle

2015-09-11 Thread Rob Clark

On Thu, Sep 10, 2015 at 8:03 PM, Erik Faye-Lund  wrote:
> On Thu, Sep 10, 2015 at 10:08 PM, Rob Clark  wrote:
>> From: Rob Clark 
>>
>> Rather than make yet another copy of channel(), let's move it into nir.
>>
>> Signed-off-by: Rob Clark 
>> ---
>>  src/glsl/nir/nir_builder.h  |  6 ++
>>  src/glsl/nir/nir_lower_tex_projector.c  | 24 +---
>>  src/glsl/nir/nir_normalize_cubemap_coords.c | 20 +++-
>>  3 files changed, 22 insertions(+), 28 deletions(-)
>>
>> diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
>> index ba988d7..6568493 100644
>> --- a/src/glsl/nir/nir_builder.h
>> +++ b/src/glsl/nir/nir_builder.h
>> @@ -216,6 +216,12 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, 
>> unsigned swiz[4],
>>   nir_imov_alu(build, alu_src, num_components);
>>  }
>>
>> +static inline nir_ssa_def *
>> +nir_channel(nir_builder *b, nir_ssa_def *def, int c)
>> +{
>> +   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
>> +}
>> +
>
> The subject is "add nir_swizzle", but you seem to rename channel to
> nir_channel instead... Old subject, perhaps?

hmm, yeah, subject is totally bogus.. I'll fix that ;-)

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] nir: add lowering stage for user-clip-planes

2015-09-11 Thread Rob Clark

On Fri, Sep 11, 2015 at 1:20 AM, Connor Abbott  wrote:
> On Thu, Sep 10, 2015 at 10:54 PM, Rob Clark  wrote:
>>
>> On Sep 10, 2015 7:39 PM, "Jason Ekstrand"  wrote:
>>>
>>> On Thu, Sep 10, 2015 at 2:39 PM, Rob Clark  wrote:
>>> > From: Rob Clark 
>>> >
>>> > So this is basically working as a lowering pass for handling user-clip-
>>> > planes, and frag-shader emulation of CLIPDIST for hardware that needs
>>> > to handle this in the shader.
>>> >
>>> > For user-clip-planes, instructions are inserted to calculate CLIPDIST
>>> > in the VS.  And in both cases, discard_if's are inserted in FS to
>>> > handle the clipping.
>>> >
>>> > NOTE: This currently requires a bit of a hack in nir_validate, which
>>> > is unhappy about things like:
>>> >
>>> > decl_var uniform  vec4[10] uniform_0 (0, 0)
>>> > decl_var shader_out  vec4 out_0 (0, 0)
>>> > decl_var shader_out  vec4 clipdist_1 (17, 1)
>>> > decl_var shader_out  vec4 clipdist_2 (18, 2)
>>> > decl_overload main returning void
>>> >
>>> > impl main {
>>> > block block_0:
>>> > /* preds: */
>>> > ...
>>> > /* succs: block_1 */
>>> > block block_1:
>>> > /* preds: block_0 */
>>> > ... inserted CLIPDIST calculations ...
>>> > /* succs: block_2 */
>>> > block block_2:
>>> > }
>>> >
>>> > The thing is, repurposing the end_block and creating a new end_block,
>>> > without having to consider all the different potential successors,
>>> > seems like by far the easiest way to insert code at the end of a
>>> > shader.  So probably useful in other cases.  (I do something similar
>>> > internally in ir3 for transform-feedback).
>>> >
>>> > Seems like the easiest thing would be to relax the restriction in
>>> > nir_validate, but I'm not sure if this will break other assumptions
>>> > elsewhere.
>>>
>>> Yes it does break assumptions.  In particular, it breaks the
>>> assumption that the end-block is empty :-)  In all seriousness though,
>>> the idea is that once you jump to the end, you're done.  The end block
>>> is the place where returns go.  You can't put code after a return
>>> (Ok, yeah, exception handling and destructors etc...).
>>
>> No, I insert a new end_block, so that particular assumption should be
>> safe... I was just wondering if there was some less obvious assumption..
>
> In principle there's no reason you can't have two basic blocks next to
> each other, but it would be pointless -- in that case, they could
> always be combined into a single basic block. That being said, I think
> you're trying to fit a square peg in a round hole. The fundamental
> restriction, as Jason said, is that you can't have code that gets
> executed after the return jump, since (on some architectures) the
> return jump corresponds to an actual instruction, whereas if you can
> execute stuff after the return you're essentially turning the return
> instruction into a more general-purpose jump -- you might as well just
> add a nir_jump_branch and call it a day. The end block is only there
> for bookkeeping purposes, to make representing post-dominance easier
> (if we want to add it) and to make it easy to enumerate all the return
> jumps in a function.
>
> So if you do have to insert stuff that always runs before the shader
> ends, and you're worried about doing the right thing, I would simply
> iterate over all the predecessors of the end block and do your thing
> at the end of each of them. As Jason said, in TGSI all the early
> returns have been lowered away anyways so the issue is kind of
> irrelevant for you now, but at least it will make things more
> future-proof.

So I was starting to think of adding a nir_cursor fxn to dtrt when a
lowering pass wants to insert what is basically like a "finally" block
(ie. just insert into last block before end_block if end_block only
has one predecessor, or insert a new block and fix up end_block
otherwise.  At least this way we don't have to duplicate that logic in
other lowering passes which want to do something similar.  (For
example, it would be nice to eventually convert my stream-out handling
to a generic nir pass.)

I guess that new nir_cursor function could fix up return's to be jumps
too.. although that makes things a bit more complicated if you have
multiple returns.  I guess the returns will always be the last
instruction in the N predecessors of the end_block, so maybe it isn't
so bad.

BR,
-R

>>
>>> That said, you're in TGSI land where function inlining and early
>>> return lowering has already happened so there are no early returns.
>>> This means that the end block has exactly one predecessor and you can
>>> put your stuff there.  If you want to make it obvious, feel free to
>>> add a nir_end_of_impl constructor for nir_cursor which asserts that
>>> there is exactly one predecessor and calls nir_after_block_before_jump
>>> on that blo

[Mesa-dev] [PATCH v3] i915: fixing driver crashes if too few vertices are submitted

2015-09-11 Thread Marius Predut

Comparison with a signed expression and unsigned value
is converted to unsigned value, reason for minus value is interpreted
as a big unsigned value. For this case the "for" loop
is going into unexpected behavior.

v1: Brian Paul: code style fix.
v2: Ian Romanick: glDrawArrays(GL_QUADS, 0, (n * 4) + k) fail , k < 4.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
Signed-off-by: Marius Predut 
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 7be3954..f99d977 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -627,6 +627,13 @@ static void TAG(render_quads_verts)( struct gl_context 
*ctx,
   LOCAL_VARS;
   GLuint j;
 
+  /* Page 18 (page 32 of the PDF) of the OpenGL 2.1 spec says:
+   * The total number of vertices between Begin and End is 4n + k,
+   * where 0 ??? k ??? 3; if k is not zero, the final k vertices are  
ignored.
+   */
+  count = (count / 4) * 4;
+  if(count == 0) return;
+
   INIT(GL_TRIANGLES);
 
   for (j = start; j < count-3; j += 4) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 07/13] nir/lower_vec_to_movs: Handle partially SSA shaders

2015-09-11 Thread Jason Ekstrand

v2 (Jason Ekstrand):
 - Use nir_instr_rewrite_dest
 - Pass impl directly into lower_vec_to_movs
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c 
b/src/glsl/nir/nir_lower_vec_to_movs.c
index 25a6f7d..993a108 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -84,8 +84,11 @@ insert_mov(nir_alu_instr *vec, unsigned start_channel,
 }
 
 static bool
-lower_vec_to_movs_block(nir_block *block, void *shader)
+lower_vec_to_movs_block(nir_block *block, void *void_impl)
 {
+   nir_function_impl *impl = void_impl;
+   nir_shader *shader = impl->overload->function->shader;
+
nir_foreach_instr_safe(block, instr) {
   if (instr->type != nir_instr_type_alu)
  continue;
@@ -101,8 +104,16 @@ lower_vec_to_movs_block(nir_block *block, void *shader)
  continue; /* The loop */
   }
 
-  /* Since we insert multiple MOVs, we have to be non-SSA. */
-  assert(!vec->dest.dest.is_ssa);
+  if (vec->dest.dest.is_ssa) {
+ /* Since we insert multiple MOVs, we have a register destination. */
+ nir_register *reg = nir_local_reg_create(impl);
+ reg->num_components = vec->dest.dest.ssa.num_components;
+
+ nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg));
+
+ nir_instr_rewrite_dest(&vec->instr, &vec->dest.dest,
+nir_dest_for_reg(reg));
+  }
 
   unsigned finished_write_mask = 0;
 
@@ -142,9 +153,7 @@ lower_vec_to_movs_block(nir_block *block, void *shader)
 static void
 nir_lower_vec_to_movs_impl(nir_function_impl *impl)
 {
-   nir_shader *shader = impl->overload->function->shader;
-
-   nir_foreach_block(impl, lower_vec_to_movs_block, shader);
+   nir_foreach_block(impl, lower_vec_to_movs_block, impl);
 }
 
 void
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 10.1/11] nir: Add a fdot instruction that replicates the result to a vec4

2015-09-11 Thread Jason Ekstrand

Fortunately, nir_constant_expr already auto-splats if "dst" never shows up
in the constant expression field so we don't need to do anything there.

Cc: Connor Abbott 
---
 src/glsl/nir/nir.h| 6 ++
 src/glsl/nir/nir_opcodes.py   | 3 +++
 src/glsl/nir/nir_opt_algebraic.py | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 3f693b1..4e4543a 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1434,6 +1434,12 @@ typedef struct nir_shader_compiler_options {
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
bool lower_scmp;
 
+   /* Does the native fdot instruction replicate its result for four
+* components?  If so, then opt_algebraic_late will turn all fdotN
+* instructions into fdot_replicatedN instructions.
+*/
+   bool fdot_replicates;
+
/**
 * Does the driver support real 32-bit integers?  (Otherwise, integers
 * are simulated by floats.)
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index df5b7e2..495d109 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -453,6 +453,9 @@ binop("fxor", tfloat, commutative,
 binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
  "{src}")
 
+binop_reduce("fdot_replicated", 4, tfloat, tfloat,
+ "{src0} * {src1}", "{src0} + {src1}", "{src}")
+
 binop("fmin", tfloat, "", "fminf(src0, src1)")
 binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
 binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : 
src1")
diff --git a/src/glsl/nir/nir_opt_algebraic.py 
b/src/glsl/nir/nir_opt_algebraic.py
index 226e0a8..acc3b04 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -240,6 +240,9 @@ late_optimizations = [
(('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
(('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
(('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+   (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+   (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+   (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
 ]
 
 print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 11/11] nir/lower_vec_to_movs: Coalesce into destinations of fdot instructions

2015-09-11 Thread Jason Ekstrand

Now that we have a replicating fdot instruction, we can actually coalesce
into the destinations of vec4 instructions.  We couldn't really do this
before because, if the destination had to end up in .z, we couldn't
reswizzle the instruction.  With a replicated destination, the result ends
up in all channels so we can just set the writemask and we're done.

Shader-db results for vec4 programs on Haswell:

   total instructions in shared programs: 1778849 -> 1751223 (-1.55%)
   instructions in affected programs: 763104 -> 735478 (-3.62%)
   helped:7067
   HURT:  26

It turns out that dot-products matter...

Cc: Eduardo Lima Mitev 
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 49 ++--
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c 
b/src/glsl/nir/nir_lower_vec_to_movs.c
index 9ff86ea..2cb0457 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -79,6 +79,14 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, 
nir_shader *shader)
return mov->dest.write_mask;
 }
 
+static bool
+has_replicated_dest(nir_alu_instr *alu)
+{
+   return alu->op == nir_op_fdot_replicated2 ||
+  alu->op == nir_op_fdot_replicated3 ||
+  alu->op == nir_op_fdot_replicated4;
+}
+
 /* Attempts to coalesce the "move" from the given source of the vec to the
  * destination of the instruction generating the value. If, for whatever
  * reason, we cannot coalesce the mmove, it does nothing and returns 0.  We
@@ -116,19 +124,28 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, 
nir_shader *shader)
nir_alu_instr *src_alu =
   nir_instr_as_alu(vec->src[start_idx].src.ssa->parent_instr);
 
-   /* We only care about being able to re-swizzle the instruction if it is
-* something that we can reswizzle.  It must be per-component.
-*/
-   if (nir_op_infos[src_alu->op].output_size != 0)
-  return 0;
-
-   /* If we are going to reswizzle the instruction, we can't have any
-* non-per-component sources either.
-*/
-   for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
-  if (nir_op_infos[src_alu->op].input_sizes[j] != 0)
+   if (has_replicated_dest(src_alu)) {
+  /* The fdot instruction is special: It replicates its result to all
+   * components.  This means that we can always rewrite its destination
+   * and we don't need to swizzle anything.
+   */
+   } else {
+  /* We only care about being able to re-swizzle the instruction if it is
+   * something that we can reswizzle.  It must be per-component.  The one
+   * exception to this is the fdotN instructions which implicitly splat
+   * their result out to all channels.
+   */
+  if (nir_op_infos[src_alu->op].output_size != 0)
  return 0;
 
+  /* If we are going to reswizzle the instruction, we can't have any
+   * non-per-component sources either.
+   */
+  for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ if (nir_op_infos[src_alu->op].input_sizes[j] != 0)
+return 0;
+   }
+
/* Stash off all of the ALU instruction's swizzles. */
uint8_t swizzles[4][4];
for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
@@ -148,8 +165,14 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, 
nir_shader *shader)
* instruction so we can re-swizzle that component to match.
*/
   write_mask |= 1 << i;
-  for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
- src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]];
+  if (has_replicated_dest(src_alu)) {
+ /* Since the destination is a single replicated value, we don't need
+  * to do any reswizzling
+  */
+  } else {
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]];
+  }
 
   /* Clear the no longer needed vec source */
   nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, NIR_SRC_INIT);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 10.2/11] i965/vec4: Use the replicated fdot instruction in NIR

2015-09-11 Thread Jason Ekstrand

Cc: Connor Abbott 
---
 src/mesa/drivers/dri/i965/brw_shader.cpp   | 8 
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 6 +++---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index cf9aa23..eed73fb 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -96,6 +96,14 @@ brw_compiler_create(void *mem_ctx, const struct 
brw_device_info *devinfo)
 */
nir_options->lower_ffma = true;
nir_options->lower_sub = true;
+   /* In the vec4 backend, our dpN instruction replicates its result to all
+* the components of a vec4.  We would like NIR to give us replicated fdot
+* instructions because it can optimize better for us.
+*
+* For the FS backend, it should be lowered away by the scalarizing pass so
+* we should never see fdot anyway.
+*/
+   nir_options->fdot_replicates = true;
 
/* We want the GLSL compiler to emit code that uses condition codes */
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index ae65d8c..6526295 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1254,17 +1254,17 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
   inst->predicate = BRW_PREDICATE_NORMAL;
   break;
 
-   case nir_op_fdot2:
+   case nir_op_fdot_replicated2:
   inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
   inst->saturate = instr->dest.saturate;
   break;
 
-   case nir_op_fdot3:
+   case nir_op_fdot_replicated3:
   inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
   inst->saturate = instr->dest.saturate;
   break;
 
-   case nir_op_fdot4:
+   case nir_op_fdot_replicated4:
   inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
   inst->saturate = instr->dest.saturate;
   break;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 06/11] nir/lower_vec_to_movs: Pass the shader around directly

2015-09-11 Thread Jason Ekstrand

Previously, we were passing the shader around, we were just calling it
"mem_ctx".  However, the nir_shader is (and must be for the purposes of
mark-and-sweep) the mem_ctx so we might as well pass it around explicitly.
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c 
b/src/glsl/nir/nir_lower_vec_to_movs.c
index b7f096d..25a6f7d 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -54,12 +54,12 @@ src_matches_dest_reg(nir_dest *dest, nir_src *src)
  */
 static unsigned
 insert_mov(nir_alu_instr *vec, unsigned start_channel,
-unsigned start_src_idx, void *mem_ctx)
+   unsigned start_src_idx, nir_shader *shader)
 {
unsigned src_idx = start_src_idx;
assert(src_idx < nir_op_infos[vec->op].num_inputs);
 
-   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+   nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mov);
nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
 
@@ -84,7 +84,7 @@ insert_mov(nir_alu_instr *vec, unsigned start_channel,
 }
 
 static bool
-lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
+lower_vec_to_movs_block(nir_block *block, void *shader)
 {
nir_foreach_instr_safe(block, instr) {
   if (instr->type != nir_instr_type_alu)
@@ -115,7 +115,7 @@ lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
 continue;
 
  if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) {
-finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+finished_write_mask |= insert_mov(vec, i, src_idx, shader);
 break;
  }
  src_idx++;
@@ -127,7 +127,7 @@ lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
 continue;
 
  if (!(finished_write_mask & (1 << i)))
-finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+finished_write_mask |= insert_mov(vec, i, src_idx, shader);
 
  src_idx++;
   }
@@ -142,7 +142,9 @@ lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
 static void
 nir_lower_vec_to_movs_impl(nir_function_impl *impl)
 {
-   nir_foreach_block(impl, lower_vec_to_movs_block, ralloc_parent(impl));
+   nir_shader *shader = impl->overload->function->shader;
+
+   nir_foreach_block(impl, lower_vec_to_movs_block, shader);
 }
 
 void
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir/lower_outputs_to_temporaries: Reparent the output name

2015-09-11 Thread Jason Ekstrand

On Fri, Sep 11, 2015 at 5:45 AM, Eduardo Lima Mitev  wrote:
> Reviewed-by: Eduardo Lima Mitev 

One side-note: Could you please reply-all when reviewing patches.
That way you keep the Cc list alive.  While I still get it, my e-mail
client flags things that are specifically Cc'd to me so I notice them.
Thanks!
--Jason

> On 09/10/2015 10:58 PM, Jason Ekstrand wrote:
>> We copy the output, make the old output the temporary, and give the
>> temporary a new name.  The copy keeps the pointer to the old name.  This
>> works just fine up until the point where we lower things to SSA and delete
>> the old variable and, with it, the name.  Instead, we should re-parent to
>> the copy.
>> ---
>>  src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 +++
>>  1 file changed, 3 insertions(+)
>>
>> diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c 
>> b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
>> index fb8070c..9d3a913 100644
>> --- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
>> +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
>> @@ -97,6 +97,9 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
>>/* The orignal is now the temporary */
>>nir_variable *temp = var;
>>
>> +  /* Reparent the name to the new variable */
>> +  ralloc_steal(output, output->name);
>> +
>>/* Give the output a new name with @out-temp appended */
>>temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
>>temp->data.mode = nir_var_global;
>>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 10.1/11] nir: Add a fdot instruction that replicates the result to a vec4

2015-09-11 Thread Connor Abbott

On Fri, Sep 11, 2015 at 11:52 AM, Jason Ekstrand  wrote:
> Fortunately, nir_constant_expr already auto-splats if "dst" never shows up
> in the constant expression field so we don't need to do anything there.
>
> Cc: Connor Abbott 
> ---
>  src/glsl/nir/nir.h| 6 ++
>  src/glsl/nir/nir_opcodes.py   | 3 +++
>  src/glsl/nir/nir_opt_algebraic.py | 3 +++
>  3 files changed, 12 insertions(+)
>
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 3f693b1..4e4543a 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1434,6 +1434,12 @@ typedef struct nir_shader_compiler_options {
> /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
> bool lower_scmp;
>
> +   /* Does the native fdot instruction replicate its result for four
> +* components?  If so, then opt_algebraic_late will turn all fdotN
> +* instructions into fdot_replicatedN instructions.
> +*/
> +   bool fdot_replicates;
> +
> /**
>  * Does the driver support real 32-bit integers?  (Otherwise, integers
>  * are simulated by floats.)
> diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
> index df5b7e2..495d109 100644
> --- a/src/glsl/nir/nir_opcodes.py
> +++ b/src/glsl/nir/nir_opcodes.py
> @@ -453,6 +453,9 @@ binop("fxor", tfloat, commutative,
>  binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
>   "{src}")
>
> +binop_reduce("fdot_replicated", 4, tfloat, tfloat,
> + "{src0} * {src1}", "{src0} + {src1}", "{src}")
> +

The {}'s are a relic of when the constant-folding stuff was
implemented using Python string formatting. We shouldn't add more of
them, and we should probably fix the ones above too. Other than that,
these 2 patches have my R-b.

>  binop("fmin", tfloat, "", "fminf(src0, src1)")
>  binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
>  binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : 
> src1")
> diff --git a/src/glsl/nir/nir_opt_algebraic.py 
> b/src/glsl/nir/nir_opt_algebraic.py
> index 226e0a8..acc3b04 100644
> --- a/src/glsl/nir/nir_opt_algebraic.py
> +++ b/src/glsl/nir/nir_opt_algebraic.py
> @@ -240,6 +240,9 @@ late_optimizations = [
> (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
> (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
> (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
> +   (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
> +   (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
> +   (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
>  ]
>
>  print nir_algebraic.AlgebraicPass("nir_opt_algebraic", 
> optimizations).render()
> --
> 2.5.0.400.gff86faf
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 10.1/11] nir: Add a fdot instruction that replicates the result to a vec4

2015-09-11 Thread Jason Ekstrand

On Fri, Sep 11, 2015 at 9:00 AM, Connor Abbott  wrote:
> On Fri, Sep 11, 2015 at 11:52 AM, Jason Ekstrand  wrote:
>> Fortunately, nir_constant_expr already auto-splats if "dst" never shows up
>> in the constant expression field so we don't need to do anything there.
>>
>> Cc: Connor Abbott 
>> ---
>>  src/glsl/nir/nir.h| 6 ++
>>  src/glsl/nir/nir_opcodes.py   | 3 +++
>>  src/glsl/nir/nir_opt_algebraic.py | 3 +++
>>  3 files changed, 12 insertions(+)
>>
>> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>> index 3f693b1..4e4543a 100644
>> --- a/src/glsl/nir/nir.h
>> +++ b/src/glsl/nir/nir.h
>> @@ -1434,6 +1434,12 @@ typedef struct nir_shader_compiler_options {
>> /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
>> bool lower_scmp;
>>
>> +   /* Does the native fdot instruction replicate its result for four
>> +* components?  If so, then opt_algebraic_late will turn all fdotN
>> +* instructions into fdot_replicatedN instructions.
>> +*/
>> +   bool fdot_replicates;
>> +
>> /**
>>  * Does the driver support real 32-bit integers?  (Otherwise, integers
>>  * are simulated by floats.)
>> diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
>> index df5b7e2..495d109 100644
>> --- a/src/glsl/nir/nir_opcodes.py
>> +++ b/src/glsl/nir/nir_opcodes.py
>> @@ -453,6 +453,9 @@ binop("fxor", tfloat, commutative,
>>  binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + 
>> {src1}",
>>   "{src}")
>>
>> +binop_reduce("fdot_replicated", 4, tfloat, tfloat,
>> + "{src0} * {src1}", "{src0} + {src1}", "{src}")
>> +
>
> The {}'s are a relic of when the constant-folding stuff was
> implemented using Python string formatting. We shouldn't add more of
> them, and we should probably fix the ones above too. Other than that,
> these 2 patches have my R-b.

As I said on IRC, the {}'s are still needed for binop_reduce.  It uses
them to re-construct the actual expression which then doesn't contain
any {}'s.

--Jason

>>  binop("fmin", tfloat, "", "fminf(src0, src1)")
>>  binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
>>  binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : 
>> src1")
>> diff --git a/src/glsl/nir/nir_opt_algebraic.py 
>> b/src/glsl/nir/nir_opt_algebraic.py
>> index 226e0a8..acc3b04 100644
>> --- a/src/glsl/nir/nir_opt_algebraic.py
>> +++ b/src/glsl/nir/nir_opt_algebraic.py
>> @@ -240,6 +240,9 @@ late_optimizations = [
>> (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
>> (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
>> (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
>> +   (('fdot2', a, b), ('fdot_replicated2', a, b), 
>> 'options->fdot_replicates'),
>> +   (('fdot3', a, b), ('fdot_replicated3', a, b), 
>> 'options->fdot_replicates'),
>> +   (('fdot4', a, b), ('fdot_replicated4', a, b), 
>> 'options->fdot_replicates'),
>>  ]
>>
>>  print nir_algebraic.AlgebraicPass("nir_opt_algebraic", 
>> optimizations).render()
>> --
>> 2.5.0.400.gff86faf
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] i965/vec4: check writemask when bailing out at register coalesce

2015-09-11 Thread Jason Ekstrand

Cc'ing stable

On Fri, Sep 11, 2015 at 8:13 AM, Alejandro Piñeiro  wrote:
> opt_register_coalesce stopped to check previous instructions to
> coalesce with if somebody else was writing on the same
> destination. This can be optimized to check if somebody else was
> writing to the same channels of the same destination using the
> writemask.
>
> Shader DB results (taking into account only vec4):
>
> total instructions in shared programs: 1781593 -> 1734957 (-2.62%)
> instructions in affected programs: 1238390 -> 1191754 (-3.77%)
> helped:12782
> HURT:  0
> GAINED:0
> LOST:  0
>
> v2: removed some parenthesis, fixed indentation, as suggested by
> Matt Turner
>
> Reviewed-by: Matt Turner 
> ---
>
> Patch updated after Matt review.
>
>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 85dc372..587a782 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -1089,11 +1089,12 @@ vec4_visitor::opt_register_coalesce()
>  if (interfered)
> break;
>
> - /* If somebody else writes our destination here, we can't coalesce
> -  * before that.
> + /* If somebody else writes the same channels of our destination 
> here,
> +  * we can't coalesce before that.
>*/
> - if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
> -   break;
> + if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
> + (inst->dst.writemask & scan_inst->dst.writemask) != 0)
> +break;
>
>   /* Check for reads of the register we're trying to coalesce into.  
> We
>* can't go rewriting instructions above that to put some other 
> value
> --
> 2.1.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: Fix output variable names

2015-09-11 Thread Michael Schellenberger Costa

Hi,

didnt Jason just send a similar patch to the list?

[Mesa-dev] [PATCH] nir/lower_outputs_to_temporaries: Reparent the output
name

Regards
Michael

Am 11/09/2015 um 09:24 schrieb Eduardo Lima Mitev:
> Commit 1dbe4af9c9e318525fc082b542b93fb7f1e5efba
> "nir: Add a pass to lower outputs to temporary variables" messed up output
> variable names. The issue can be reproduced by dumping the NIR shaders
> with INTEL_DEBUG="vs,fs".
> ---
>  src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c 
> b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> index b730cad..e9c4c0d 100644
> --- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> @@ -87,12 +87,13 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
> foreach_list_typed(nir_variable, var, node, &state.old_outputs) {
>nir_variable *output = ralloc(shader, nir_variable);
>memcpy(output, var, sizeof *output);
> +  output->name = ralloc_strdup(output, var->name);
>  
>/* The orignal is now the temporary */
>nir_variable *temp = var;
>  
>/* Give the output a new name with @out-temp appended */
> -  temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
> +  temp->name = ralloc_asprintf(output, "%s@out-temp", output->name);
>temp->data.mode = nir_var_global;
>temp->constant_initializer = NULL;
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] nv3x xfce4 compositing issue, making good progress, need help / input

2015-09-11 Thread Ilia Mirkin

On Fri, Sep 11, 2015 at 10:46 AM, Hans de Goede  wrote:
> Hi,
>
> I've been working on trying to fix this one:
>
> https://bugs.freedesktop.org/show_bug.cgi?id=90871
>
> And today I've more or less root caused this, it seems
> that some code is making glTexImage2D calls with npot
> width / height, which fails on nv3x (where as it works
> on nv4x).
>
> The bug has a simple reproducer attached, but that is
> not directly calling glTexImage2D, so it seems that
> the npot values are coming from some helper library
> used (glXBindTexImageEXT  ?).
>
> 2 questions:
>
> 1) Does anyone know / suspect where the glTexImage2D call
> is originating from (see the test-program attachment
> in bugzilla.
>
> 2) Is this a bug in glXBindTexImageEXT (assuming that is
> the culprit), or should the test program take into account
> that the card does not support npot when calling this ?

Without directly answering your questions (as I don't know the
answers), without NPOT support (which nv3x doesn't have), you can only
use non-power-of-two textures with GL_TEXTURE_RECTANGLE, not
GL_TEXTURE_2D. The program that you have does appear to detect this
though, and uses the rect target if ARB_texture_rectangle is
available, which it should be. I guess it should just bail if both
ARB_texture_rectangle and ARB_texture_non_power_of_two aren't
available...

Perhaps code in src/glx messes up? You could put a breakpoint in
_mesa_error and see where it's coming from...

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH V2 1/8] i965: Add a helper function intel_get_tile_dims()

2015-09-11 Thread Anuj Phogat

On Thu, Sep 10, 2015 at 12:20 PM, Chad Versace 
wrote:

> On Wed 19 Aug 2015, Anuj Phogat wrote:
> > V2:
> > - Do the tile width/height computations in the new helper
> >   function and use it later in intel_miptree_get_tile_masks().
> > - Change the name to intel_get_tile_dims().
> >
> > Cc: Ben Widawsky 
> > Signed-off-by: Anuj Phogat 
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 81
> +++
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  4 ++
> >  2 files changed, 63 insertions(+), 22 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index e85c3f0..c282e94 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -563,35 +563,15 @@ static unsigned long
> >  intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned
> *alignment,
> >  unsigned long *pitch)
> >  {
> > -   const uint32_t bpp = mt->cpp * 8;
> > -   const uint32_t aspect_ratio = (bpp == 16 || bpp == 64) ? 2 : 1;
> > uint32_t tile_width, tile_height;
> > unsigned long stride, size, aligned_y;
> >
> > assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
> > -
> > -   switch (bpp) {
> > -   case 8:
> > -  tile_height = 64;
> > -  break;
> > -   case 16:
> > -   case 32:
> > -  tile_height = 32;
> > -  break;
> > -   case 64:
> > -   case 128:
> > -  tile_height = 16;
> > -  break;
> > -   default:
> > -  unreachable("not reached");
> > -   }
> > -
> > -   if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
> > -  tile_height *= 4;
> > +   intel_get_tile_dims(mt->tiling, mt->tr_mode, mt->cpp,
> > +   &tile_width, &tile_height);
> >
> > aligned_y = ALIGN(mt->total_height, tile_height);
> > stride = mt->total_width * mt->cpp;
> > -   tile_width = tile_height * mt->cpp * aspect_ratio;
> > stride = ALIGN(stride, tile_width);
> > size = stride * aligned_y;
> >
> > @@ -1081,6 +1061,63 @@ intel_miptree_get_image_offset(const struct
> intel_mipmap_tree *mt,
> > *y = mt->level[level].slice[slice].y_offset;
> >  }
> >
> > +
> > +/**
> > + * This function computes the width and height in bytes of different
> tiling
> > + * patterns. If the BO is untiled, the dimensions are set to cpp.
> > + */
>
> Is the tile_w parameter in units of bytes or pixels? That should be
> documented at the top of the function.
>
It's in bytes. I'll document it.


>
> Also, just to be clear, "tile height" is always unitless. The hw docs
> sometime express it in units of "rows". But "rows" itself is unitless.
>
> Right. I'm returning the tile_h in bytes in this function, so I need to
fix it.
It didn't break anything because It isn't used anywhere.

> > +void
> > +intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
> > +uint32_t *tile_w, uint32_t *tile_h)
> > +{
> > +   if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) {
> > +  switch (tiling) {
> > +  case I915_TILING_X:
> > + *tile_w = 512;
> > + *tile_h = 8 * cpp;
>
> For legacy tiling formats, the height of a tile is independent of the
> pixel size,  because the height is unitless. For Tile X, it's always
> 2^3. For Tile Y Legacy, it's always 2^5.
>
Right. I'll fix it.


> If tile_w is in units of bytes, then it's also independent of pixel
> size. If tile_w is in units of pixels, though, then
>
> tile_w_pixels = tile_w_bytes / cpp
>
>
> > + break;
> > +  case I915_TILING_Y:
> > + *tile_w = 128;
> > + *tile_h = 32 * cpp;
> > + break;
> > +  case I915_TILING_NONE:
> > + *tile_w = cpp;
> > + *tile_h = cpp;
> > + break;
> > +  default:
> > + unreachable("not reached");
> > +  }
> > +   } else {
> > +  uint32_t aspect_ratio = 1;
> > +  assert(_mesa_is_pow_two(cpp));
> > +
> > +  switch (cpp) {
> > +  case 1:
> > + *tile_h = 64 * cpp;
>
> I'm still reading the docs for the non-legay tiling formats Yf, and Ys.
> So I can't comment on this part of the patch.
>
> > + break;
> > +  case 2:
> > +  case 4:
> > + *tile_h = 32 * cpp;
> > + break;
> > +  case 8:
> > +  case 16:
> > + *tile_h = 16 * cpp;
> > + break;
> > +  default:
> > + unreachable("not reached");
> > +  }
> > +
> > +  if (cpp == 2 || cpp == 8)
> > + aspect_ratio = 2;
> > +
> > +  if (tr_mode == INTEL_MIPTREE_TRMODE_YS)
> > + *tile_h *= 4;
> > +
> > +  *tile_w = *tile_h * aspect_ratio;
> > +   }
> > +}
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] i965: L3 cache partitioning.

2015-09-11 Thread Ben Widawsky

On Sun, Sep 06, 2015 at 06:12:38PM +0200, Francisco Jerez wrote:
> This series implements dynamic partitioning of the L3 cache space
> among its clients, the purpose is multiple:
> 
>  - Steal a chunk of L3 space when necessary and reserve it for SLM as
>required to support compute shaders with shared variables.
> 
>  - Allow L3 caching of dataport DC memory access where the default L3
>partitioning doesn't have any space reserved for it (pre-Gen8) --
>Should improve performance of scratch access (register spills and
>fills and some forms of indirect array indexing), atomic counters
>and images.
> 
>  - Allow dynamic changes of the L3 configuration for work-loads that
>could benefit from a partitioning other than the default
>(e.g. reduce URB size to gain some additional cache space on
>heavily fragment-bound workloads, or split the L3 allocation of
>different clients to reduce thrashing).  The basic infrastructure
>to achieve this is implemented here but no specific heuristics are
>included yet in this series.

I admit to not know how this stuff works pre-GEN8, but it was my impression that
on GEN8+ these kind of tweaks will make no difference to 3D clients other than
for constant buffers, and scratch space. Every other client of the L3 uses a
fixed size. Therefore I am skeptical of your last claim and I'd very much like
it if you could help me find where the theory came from and certainly some
amount of performance data would be very welcome as well. 

I certainly believe the partitioning is critical for optimal usage of SLM, and
as you mention, ensuring that other users of the dynamic partitioning don't
screw us over. It's the rest that I'm unsure of.

> 
> The series can be found here in a testable form:
> http://cgit.freedesktop.org/~currojerez/mesa/log/?h=i965-l3-partitioning
> 
> [PATCH 01/13] i965: Define symbolic constants for some useful L3 cache 
> control registers.
> [PATCH 02/13] i965: Keep track of whether LRI is allowed in the context 
> struct.
> [PATCH 03/13] i965: Define state flag to signal that the URB size has been 
> altered.
> [PATCH 04/13] i965/gen8: Don't add workaround bits to PIPE_CONTROL stalls if 
> DC flush is set.
> [PATCH 05/13] i965: Import tables enumerating the set of validated L3 
> configurations.
> [PATCH 06/13] i965: Implement programming of the L3 configuration.
> [PATCH 07/13] i965/hsw: Enable L3 atomics.
> [PATCH 08/13] i965: Implement selection of the closest L3 configuration based 
> on a vector of weights.
> [PATCH 09/13] i965: Calculate appropriate L3 partition weights for the 
> current pipeline state.
> [PATCH 10/13] i965: Implement L3 state atom.
> [PATCH 11/13] i965: Add debug flag to print out the new L3 state during 
> transitions.
> [PATCH 12/13] i965: Work around L3 state leaks during context switches.
> [PATCH 13/13] i965: Hook up L3 partitioning state atom.
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3] i915: fixing driver crashes if too few vertices are submitted

2015-09-11 Thread Ilia Mirkin

On Fri, Sep 11, 2015 at 12:36 PM, Marius Predut  wrote:
> Comparison with a signed expression and unsigned value
> is converted to unsigned value, reason for minus value is interpreted
> as a big unsigned value. For this case the "for" loop
> is going into unexpected behavior.
>
> v1: Brian Paul: code style fix.
> v2: Ian Romanick: glDrawArrays(GL_QUADS, 0, (n * 4) + k) fail , k < 4.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
> Signed-off-by: Marius Predut 
> ---
>  src/mesa/tnl_dd/t_dd_dmatmp.h | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
> index 7be3954..f99d977 100644
> --- a/src/mesa/tnl_dd/t_dd_dmatmp.h
> +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
> @@ -627,6 +627,13 @@ static void TAG(render_quads_verts)( struct gl_context 
> *ctx,
>LOCAL_VARS;
>GLuint j;
>
> +  /* Page 18 (page 32 of the PDF) of the OpenGL 2.1 spec says:
> +   * The total number of vertices between Begin and End is 4n + k,
> +   * where 0 ≤ k ≤ 3; if k is not zero, the final k vertices are  
> ignored.
> +   */
> +  count = (count / 4) * 4;

Might be just me, but I'd find

count &= ~0x3

to be a lot clearer. Don't know if the compiler can make such an
optimization. However this seems wrong... you're supposed to draw
start..count, so that's the value that has to be div-by-4. Further up,
when there's native quad support, the logic does:

  /* Emit whole number of quads in total.  dmasz is already a multiple
   * of 4.
   */
  count -= (count-start)%4;

Which seems more accurate.

> +  if(count == 0) return;

if (count == 0)

note the space. That's the style used in all of mesa.

$ git grep '\sif(' | wc -l
1076
$ git grep '\sif (' | wc -l
58071

I guess a few 'if(' instances snuck through, mostly in src/gallium.
But the overwhelming majority are of the 'if (' style.

> +
>INIT(GL_TRIANGLES);
>
>for (j = start; j < count-3; j += 4) {
> --
> 1.9.1
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/7] softpipe: Constify sampler and view parameters in img filters

2015-09-11 Thread Krzesimir Nowak

Those functions actually could always take them as constants.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 68 ++--
 src/gallium/drivers/softpipe/sp_tex_sample.h |  4 +-
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index ba292c4..a2f18a4 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1017,8 +1017,8 @@ print_sample_4(const char *function, float 
rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZ
 /* Some image-filter fastpaths:
  */
 static inline void
-img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
+img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview,
+const struct sp_sampler *sp_samp,
 const struct img_filter_args *args,
 float *rgba)
 {
@@ -1071,8 +1071,8 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view 
*sp_sview,
 
 
 static inline void
-img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
  const struct img_filter_args *args,
  float rgba[TGSI_QUAD_SIZE])
 {
@@ -1105,8 +1105,8 @@ img_filter_2d_nearest_repeat_POT(struct sp_sampler_view 
*sp_sview,
 
 
 static inline void
-img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
+img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview,
+const struct sp_sampler *sp_samp,
 const struct img_filter_args *args,
 float rgba[TGSI_QUAD_SIZE])
 {
@@ -1147,8 +1147,8 @@ img_filter_2d_nearest_clamp_POT(struct sp_sampler_view 
*sp_sview,
 
 
 static void
-img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
-  struct sp_sampler *sp_samp,
+img_filter_1d_nearest(const struct sp_sampler_view *sp_sview,
+  const struct sp_sampler *sp_samp,
   const struct img_filter_args *args,
   float rgba[TGSI_QUAD_SIZE])
 {
@@ -1179,8 +1179,8 @@ img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
+img_filter_1d_array_nearest(const struct sp_sampler_view *sp_sview,
+const struct sp_sampler *sp_samp,
 const struct img_filter_args *args,
 float *rgba)
 {
@@ -1213,8 +1213,8 @@ img_filter_1d_array_nearest(struct sp_sampler_view 
*sp_sview,
 
 
 static void
-img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
-  struct sp_sampler *sp_samp,
+img_filter_2d_nearest(const struct sp_sampler_view *sp_sview,
+  const struct sp_sampler *sp_samp,
   const struct img_filter_args *args,
   float *rgba)
 {
@@ -1248,8 +1248,8 @@ img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
+img_filter_2d_array_nearest(const struct sp_sampler_view *sp_sview,
+const struct sp_sampler *sp_samp,
 const struct img_filter_args *args,
 float *rgba)
 {
@@ -1285,8 +1285,8 @@ img_filter_2d_array_nearest(struct sp_sampler_view 
*sp_sview,
 
 
 static void
-img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
+img_filter_cube_nearest(const struct sp_sampler_view *sp_sview,
+const struct sp_sampler *sp_samp,
 const struct img_filter_args *args,
 float *rgba)
 {
@@ -1330,8 +1330,8 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
 }
 
 static void
-img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
-  struct sp_sampler *sp_samp,
+img_filter_cube_array_nearest(const struct sp_sampler_view *sp_sview,
+  const struct sp_sampler *sp_samp,
   const struct img_filter_args *args,
   float *rgba)
 {
@@ -1367,8 +1367,8 @@ img_filter_cube_array_nearest(struct sp_sampler_view 
*sp_sview,
 }
 
 static void
-img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
-  struct sp_sampler *sp_samp,
+img_filter

[Mesa-dev] [PATCH 3/7] tgsi, softpipe: Constify tgsi_sampler in query_lod vfunc

2015-09-11 Thread Krzesimir Nowak

A followup from previous commit - since all functions called by
query_lod take pointers to const sp_sampler_view and const sp_sampler,
which are taken from tgsi_sampler subclass, we can the tgsi_sampler as
const itself now.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.h   | 2 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h 
b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 5fc276c..a371aa9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -139,7 +139,7 @@ struct tgsi_sampler
  const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
  const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
-   void (*query_lod)(struct tgsi_sampler *tgsi_sampler,
+   void (*query_lod)(const struct tgsi_sampler *tgsi_sampler,
  const unsigned sview_index,
  const unsigned sampler_index,
  const float s[TGSI_QUAD_SIZE],
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index f5a1264..ba292c4 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -3612,7 +3612,7 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
 }
 
 static void
-sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler,
+sp_tgsi_query_lod(const struct tgsi_sampler *tgsi_sampler,
   const unsigned sview_index,
   const unsigned sampler_index,
   const float s[TGSI_QUAD_SIZE],
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/7] softpipe: Move the faces array from view to filter_args

2015-09-11 Thread Krzesimir Nowak

With that, sp_sampler_view instances are not abused anymore as a local
storage, so we can later make them constant.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 36 +---
 src/gallium/drivers/softpipe/sp_tex_sample.h |  4 +---
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 489cae7..d5a7ed6 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2003,7 +2003,7 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
   args.s = s[j];
   args.t = t[j];
   args.p = p[j];
-  args.face_id = sp_sview->faces[j];
+  args.face_id = filt_args->faces[j];
 
   if (lod[j] < 0.0) {
  args.level = psview->u.tex.first_level;
@@ -2087,7 +2087,7 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
   args.s = s[j];
   args.t = t[j];
   args.p = p[j];
-  args.face_id = sp_sview->faces[j];
+  args.face_id = filt_args->faces[j];
 
   if (lod[j] < 0.0) {
  args.level = psview->u.tex.first_level;
@@ -2148,7 +2148,7 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
   args.s = s[j];
   args.t = t[j];
   args.p = p[j];
-  args.face_id = sp_sview->faces[j];
+  args.face_id = filt_args->faces[j];
   if (lod[j] < 0.0) {
  mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
   }
@@ -2193,7 +2193,7 @@ mip_filter_none_no_filter_select(struct sp_sampler_view 
*sp_sview,
   args.s = s[j];
   args.t = t[j];
   args.p = p[j];
-  args.face_id = sp_sview->faces[j];
+  args.face_id = filt_args->faces[j];
   mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
}
 }
@@ -2239,6 +2239,7 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
   const float s[TGSI_QUAD_SIZE],
   const float t[TGSI_QUAD_SIZE],
   const float p[TGSI_QUAD_SIZE],
+  const float faces[TGSI_QUAD_SIZE],
   unsigned level,
   const float dudx, const float dvdx,
   const float dudy, const float dvdy,
@@ -2319,7 +2320,7 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
   float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
   buffer_next = 0;
   den = 0;
-  args.face_id = sp_sview->faces[j];
+  args.face_id = faces[j];
 
   U = u0 - tex_u;
   for (v = v0; v <= v1; ++v) {
@@ -2528,7 +2529,7 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
  args.t = t[j];
  args.p = p[j];
  args.level = psview->u.tex.last_level;
- args.face_id = sp_sview->faces[j];
+ args.face_id = filt_args->faces[j];
  min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
   }
}
@@ -2537,7 +2538,7 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
* seem to be worth the extra running time.
*/
   img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
-s, t, p, level0,
+s, t, p, filt_args->faces, level0,
 dudx, dvdx, dudy, dvdy, rgba);
}
 
@@ -2590,7 +2591,7 @@ mip_filter_linear_2d_linear_repeat_POT(
   args.s = s[j];
   args.t = t[j];
   args.p = p[j];
-  args.face_id = sp_sview->faces[j];
+  args.face_id = filt_args->faces[j];
   args.offset = filt_args->offset;
   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
   if ((unsigned)level0 >= psview->u.tex.last_level) {
@@ -3129,7 +3130,8 @@ convert_cube(struct sp_sampler_view *sp_sview,
  const float c0[TGSI_QUAD_SIZE],
  float [TGSI_QUAD_SIZE],
  float [TGSI_QUAD_SIZE],
- float [TGSI_QUAD_SIZE])
+ float [TGSI_QUAD_SIZE],
+ float faces[TGSI_QUAD_SIZE])
 {
unsigned j;
 
@@ -3176,7 +3178,7 @@ convert_cube(struct sp_sampler_view *sp_sview,
 const float ima = -0.5F / fabsf(s[j]);
 [j] = sign *  p[j] * ima + 0.5F;
 [j] = t[j] * ima + 0.5F;
-sp_sview->faces[j] = face;
+faces[j] = face;
  }
   }
   else if (ary >= arx && ary >= arz) {
@@ -3187,7 +3189,7 @@ convert_cube(struct sp_sampler_view *sp_sview,
 const float ima = -0.5F / fabsf(t[j]);
 [j] =-s[j] * ima + 0.5F;
 [j] = sign * -p[j] * ima + 0.5F;
-sp_sview->faces[j] = face;
+faces[j] = face;
  }
   }
   else {
@@ -3198,7 +3200,7 @@ convert_cube(struct sp_sampler_view *sp_sview,
 const float ima = -0.5F / fabsf(p[j]);
 [j] = sign * -s[j] * ima + 0.5F;
 [j] = t[j] * ima + 0.5F;
-sp_sview->faces[j] = face;
+faces[j] = face;
  }
   }
}
@@ -3594,11 +3596,16 @@ sp_tgsi_get_samples(s

[Mesa-dev] [PATCH 2/7] softpipe: Constify some sampler and view parameters

2015-09-11 Thread Krzesimir Nowak

This is to prepare for making tgsi_sampler parameter in query_lod a
const too. These functions do not modify anything in either sampler or
view anymore.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 51 ++--
 src/gallium/drivers/softpipe/sp_tex_sample.h |  4 +--
 2 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index d5a7ed6..f5a1264 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1861,8 +1861,8 @@ compute_lod(const struct pipe_sampler_state *sampler,
  * \param lod results per-fragment lod.
  */
 static inline void
-compute_lambda_lod_unclamped(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+compute_lambda_lod_unclamped(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
  const float s[TGSI_QUAD_SIZE],
  const float t[TGSI_QUAD_SIZE],
  const float p[TGSI_QUAD_SIZE],
@@ -1965,8 +1965,8 @@ clamp_lod(const struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for linear mip filter
  */
 static void
-mip_rel_level_linear(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+mip_rel_level_linear(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
  const float lod[TGSI_QUAD_SIZE],
  float level[TGSI_QUAD_SIZE])
 {
@@ -2039,8 +2039,8 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for nearest mip filter
  */
 static void
-mip_rel_level_nearest(struct sp_sampler_view *sp_sview,
-  struct sp_sampler *sp_samp,
+mip_rel_level_nearest(const struct sp_sampler_view *sp_sview,
+  const struct sp_sampler *sp_samp,
   const float lod[TGSI_QUAD_SIZE],
   float level[TGSI_QUAD_SIZE])
 {
@@ -2109,8 +2109,8 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for none mip filter
  */
 static void
-mip_rel_level_none(struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
+mip_rel_level_none(const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
const float lod[TGSI_QUAD_SIZE],
float level[TGSI_QUAD_SIZE])
 {
@@ -2163,8 +2163,8 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for none mip filter
  */
 static void
-mip_rel_level_none_no_filter_select(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
+mip_rel_level_none_no_filter_select(const struct sp_sampler_view *sp_sview,
+const struct sp_sampler *sp_samp,
 const float lod[TGSI_QUAD_SIZE],
 float level[TGSI_QUAD_SIZE])
 {
@@ -2428,8 +2428,8 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for linear mip filter
  */
 static void
-mip_rel_level_linear_aniso(struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
+mip_rel_level_linear_aniso(const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
const float lod[TGSI_QUAD_SIZE],
float level[TGSI_QUAD_SIZE])
 {
@@ -2551,10 +2551,11 @@ mip_filter_linear_aniso(struct sp_sampler_view 
*sp_sview,
  * Get mip level relative to base level for linear mip filter
  */
 static void
-mip_rel_level_linear_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
-  struct sp_sampler *sp_samp,
-  const float lod[TGSI_QUAD_SIZE],
-  float level[TGSI_QUAD_SIZE])
+mip_rel_level_linear_2d_linear_repeat_POT(
+   const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
+   const float lod[TGSI_QUAD_SIZE],
+   float level[TGSI_QUAD_SIZE])
 {
mip_rel_level_linear(sp_sview, sp_samp, lod, level);
 }
@@ -3049,9 +3050,9 @@ get_img_filter(const struct sp_sampler_view *sp_sview,
  * or NULL.
  */
 static void
-get_filters(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
-enum tgsi_sampler_control control,
+get_filters(const struct sp_sampler_view *sp_sview,
+const struct sp_sampler *sp_samp,
+const enum tgsi_sampler_control control,
 const struct sp_filter_funcs **funcs,
 img_filter_func *min,
 img_filter_func *mag)
@@ -3122,8 +3123,8 @@ sample_mip(struct sp_

[Mesa-dev] [PATCH 0/7] Const fixes/cleanups in softpipe's tex samplers

2015-09-11 Thread Krzesimir Nowak

This stops abusing sp_sampler_view as a local storage and thus
allowing to put some more const modifiers in front of some types.

I might have gotten carried away too bit with the last commit. :) So
if you don't like it, then please ignore it.

Tested it with piglit shader tests and got no regressions.

Krzesimir Nowak (7):
  softpipe: Move the faces array from view to filter_args
  softpipe: Constify some sampler and view parameters
  tgsi, softpipe: Constify tgsi_sampler in query_lod vfunc
  softpipe: Constify sampler and view parameters in img filters
  softpipe: Constify sampler and view parameters in mip filters
  softpipe: Constify sp_tgsi_sampler
  softpipe: Constify variables

 src/gallium/auxiliary/tgsi/tgsi_exec.h   |   2 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c | 663 +--
 src/gallium/drivers/softpipe/sp_tex_sample.h |  16 +-
 3 files changed, 328 insertions(+), 353 deletions(-)

-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/7] softpipe: Constify sampler and view parameters in mip filters

2015-09-11 Thread Krzesimir Nowak

Those functions actually could always take them as constants.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 32 ++--
 src/gallium/drivers/softpipe/sp_tex_sample.h |  4 ++--
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index a2f18a4..c91288e 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1908,8 +1908,8 @@ compute_lambda_lod_unclamped(const struct sp_sampler_view 
*sp_sview,
  * \param lod results per-fragment lod.
  */
 static inline void
-compute_lambda_lod(struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
+compute_lambda_lod(const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
const float s[TGSI_QUAD_SIZE],
const float t[TGSI_QUAD_SIZE],
const float p[TGSI_QUAD_SIZE],
@@ -1974,8 +1974,8 @@ mip_rel_level_linear(const struct sp_sampler_view 
*sp_sview,
 }
 
 static void
-mip_filter_linear(struct sp_sampler_view *sp_sview,
-  struct sp_sampler *sp_samp,
+mip_filter_linear(const struct sp_sampler_view *sp_sview,
+  const struct sp_sampler *sp_samp,
   img_filter_func min_filter,
   img_filter_func mag_filter,
   const float s[TGSI_QUAD_SIZE],
@@ -2060,8 +2060,8 @@ mip_rel_level_nearest(const struct sp_sampler_view 
*sp_sview,
  * \param c0  the LOD bias factors, or absolute LODs (depending on control)
  */
 static void
-mip_filter_nearest(struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
+mip_filter_nearest(const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
img_filter_func min_filter,
img_filter_func mag_filter,
const float s[TGSI_QUAD_SIZE],
@@ -2122,8 +2122,8 @@ mip_rel_level_none(const struct sp_sampler_view *sp_sview,
 }
 
 static void
-mip_filter_none(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
+mip_filter_none(const struct sp_sampler_view *sp_sview,
+const struct sp_sampler *sp_samp,
 img_filter_func min_filter,
 img_filter_func mag_filter,
 const float s[TGSI_QUAD_SIZE],
@@ -2172,8 +2172,8 @@ mip_rel_level_none_no_filter_select(const struct 
sp_sampler_view *sp_sview,
 }
 
 static void
-mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+mip_filter_none_no_filter_select(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
  img_filter_func min_filter,
  img_filter_func mag_filter,
  const float s[TGSI_QUAD_SIZE],
@@ -2232,8 +2232,8 @@ create_filter_table(void)
  * "Fundamentals of Texture Mapping and Image Warping" (1989)
  */
 static void
-img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
-  struct sp_sampler *sp_samp,
+img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
+  const struct sp_sampler *sp_samp,
   img_filter_func min_filter,
   img_filter_func mag_filter,
   const float s[TGSI_QUAD_SIZE],
@@ -2440,8 +2440,8 @@ mip_rel_level_linear_aniso(const struct sp_sampler_view 
*sp_sview,
  * Sample 2D texture using an anisotropic filter.
  */
 static void
-mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
+mip_filter_linear_aniso(const struct sp_sampler_view *sp_sview,
+const struct sp_sampler *sp_samp,
 img_filter_func min_filter,
 img_filter_func mag_filter,
 const float s[TGSI_QUAD_SIZE],
@@ -2566,8 +2566,8 @@ mip_rel_level_linear_2d_linear_repeat_POT(
  */
 static void
 mip_filter_linear_2d_linear_repeat_POT(
-   struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
+   const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
img_filter_func min_filter,
img_filter_func mag_filter,
const float s[TGSI_QUAD_SIZE],
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h 
b/src/gallium/drivers/softpipe/sp_tex_sample.h
index e8a0051..83ee3a3 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -75,8 +75,8 @@ struct filter_args {
const float *faces;
 };
 
-typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
+typedef void (*mip_filter_func)(const struct sp_sampler_view *sp_sview,
+c

[Mesa-dev] [PATCH 6/7] softpipe: Constify sp_tgsi_sampler

2015-09-11 Thread Krzesimir Nowak

Add a small inline function doing the casting - this is to make sure
we don't do a cast from some completely unrelated type. This commit
does not make tgsi_sampler parameters const in vfuncs themselves for
now - probably llvmpipe would need looking at before making such a
change.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 35 ++--
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index c91288e..b0c4989 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2658,8 +2658,8 @@ static const struct sp_filter_funcs 
funcs_linear_2d_linear_repeat_POT = {
  * Do shadow/depth comparisons.
  */
 static void
-sample_compare(struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
+sample_compare(const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
const float s[TGSI_QUAD_SIZE],
const float t[TGSI_QUAD_SIZE],
const float p[TGSI_QUAD_SIZE],
@@ -3083,8 +3083,8 @@ get_filters(const struct sp_sampler_view *sp_sview,
 }
 
 static void
-sample_mip(struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
+sample_mip(const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
const float s[TGSI_QUAD_SIZE],
const float t[TGSI_QUAD_SIZE],
const float p[TGSI_QUAD_SIZE],
@@ -3209,7 +3209,8 @@ convert_cube(const struct sp_sampler_view *sp_sview,
 
 
 static void
-sp_get_dims(struct sp_sampler_view *sp_sview, int level,
+sp_get_dims(const struct sp_sampler_view *sp_sview,
+int level,
 int dims[4])
 {
const struct pipe_sampler_view *view = &sp_sview->base;
@@ -3267,7 +3268,7 @@ sp_get_dims(struct sp_sampler_view *sp_sview, int level,
  * coords to the texture image size.
  */
 static void
-sp_get_texels(struct sp_sampler_view *sp_sview,
+sp_get_texels(const struct sp_sampler_view *sp_sview,
   const int v_i[TGSI_QUAD_SIZE],
   const int v_j[TGSI_QUAD_SIZE],
   const int v_k[TGSI_QUAD_SIZE],
@@ -3537,12 +3538,20 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
 }
 
 
+static inline const struct sp_tgsi_sampler *
+sp_tgsi_sampler_cast_c(const struct tgsi_sampler *sampler)
+{
+   return (const struct sp_tgsi_sampler *)sampler;
+}
+
+
 static void
 sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
  const unsigned sview_index,
  int level, int dims[4])
 {
-   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   const struct sp_tgsi_sampler *sp_samp =
+  sp_tgsi_sampler_cast_c(tgsi_sampler);
 
assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
/* always have a view here but texture is NULL if no sampler view was set. 
*/
@@ -3568,9 +3577,10 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
 enum tgsi_sampler_control control,
 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
-   struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler 
*)tgsi_sampler;
-   struct sp_sampler_view *sp_sview;
-   struct sp_sampler *sp_samp;
+   const struct sp_tgsi_sampler *sp_tgsi_samp =
+  sp_tgsi_sampler_cast_c(tgsi_sampler);
+   const struct sp_sampler_view *sp_sview;
+   const struct sp_sampler *sp_samp;
struct filter_args filt_args;
 
assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
@@ -3626,7 +3636,7 @@ sp_tgsi_query_lod(const struct tgsi_sampler *tgsi_sampler,
static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
 
const struct sp_tgsi_sampler *sp_tgsi_samp =
-  (const struct sp_tgsi_sampler *)tgsi_sampler;
+  sp_tgsi_sampler_cast_c(tgsi_sampler);
const struct sp_sampler_view *sp_sview;
const struct sp_sampler *sp_samp;
const struct sp_filter_funcs *funcs;
@@ -3674,7 +3684,8 @@ sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
   const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
-   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   const struct sp_tgsi_sampler *sp_samp =
+  sp_tgsi_sampler_cast_c(tgsi_sampler);
 
assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
/* always have a view here but texture is NULL if no sampler view was set. 
*/
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 7/7] softpipe: Constify variables

2015-09-11 Thread Krzesimir Nowak

This commit makes a lot of variables constant - this is basically done
by moving the computation to variable definition. Some of them are
moved into lower scopes (like in img_filter_2d_ewa).
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 441 ---
 1 file changed, 199 insertions(+), 242 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index b0c4989..dfe38af 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -135,7 +135,7 @@ wrap_nearest_repeat(float s, unsigned size, int offset, int 
*icoord)
 {
/* s limited to [0,1) */
/* i limited to [0,size-1] */
-   int i = util_ifloor(s * size);
+   const int i = util_ifloor(s * size);
*icoord = repeat(i + offset, size);
 }
 
@@ -280,7 +280,7 @@ static void
 wrap_linear_repeat(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
 {
-   float u = s * size - 0.5F;
+   const float u = s * size - 0.5F;
*icoord0 = repeat(util_ifloor(u) + offset, size);
*icoord1 = repeat(*icoord0 + 1, size);
*w = frac(u);
@@ -291,9 +291,8 @@ static void
 wrap_linear_clamp(float s, unsigned size, int offset,
   int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s * size + offset, 0.0F, (float)size);
+   const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
 
-   u = u - 0.5f;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
@@ -304,8 +303,7 @@ static void
 wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
   int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s * size + offset, 0.0F, (float)size);
-   u = u - 0.5f;
+   const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
if (*icoord0 < 0)
@@ -322,8 +320,7 @@ wrap_linear_clamp_to_border(float s, unsigned size, int 
offset,
 {
const float min = -0.5F;
const float max = (float)size + 0.5F;
-   float u = CLAMP(s * size + offset, min, max);
-   u = u - 0.5f;
+   const float u = CLAMP(s * size + offset, min, max) - 0.5f;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
@@ -391,12 +388,8 @@ wrap_linear_mirror_clamp_to_border(float s, unsigned size, 
int offset,
 {
const float min = -0.5F;
const float max = size + 0.5F;
-   float u = fabsf(s * size + offset);
-   if (u <= min)
-  u = min;
-   else if (u >= max)
-  u = max;
-   u -= 0.5F;
+   const float t = fabsf(s * size + offset);
+   const float u = CLAMP(t, min, max) - 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
@@ -409,7 +402,7 @@ wrap_linear_mirror_clamp_to_border(float s, unsigned size, 
int offset,
 static void
 wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
 {
-   int i = util_ifloor(s);
+   const int i = util_ifloor(s);
*icoord = CLAMP(i + offset, 0, (int) size-1);
 }
 
@@ -442,7 +435,7 @@ wrap_linear_unorm_clamp(float s, unsigned size, int offset,
 int *icoord0, int *icoord1, float *w)
 {
/* Not exactly what the spec says, but it matches NVIDIA output */
-   float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
+   const float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
@@ -456,8 +449,7 @@ static void
 wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
   int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F);
-   u -= 0.5F;
+   const float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F) - 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
if (*icoord1 > (int) size - 1)
@@ -473,8 +465,7 @@ static void
 wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
 int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F);
-   u -= 0.5F;
+   const float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F) - 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
if (*icoord1 > (int) size - 1)
@@ -489,7 +480,7 @@ wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int 
offset,
 static inline int
 coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
 {
-   int c = util_ifloor(coord + 0.5F);
+   const int c = util_ifloor(coord + 0.5F);
return CLAMP(c, (int)first_layer, (int)last_layer);
 }
 
@@ -505,9 +496,9 @@ compute_lambda_1d(const struct sp_sampler_view *sview,
   const float p[TGSI_QUAD_SIZE])
 {
const struct pipe_resource *texture = sview->base.texture;
-   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
-   float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_

Re: [Mesa-dev] [PATCH 0/4] gallium: add support for retrieving number of texture samples

2015-09-11 Thread Ilia Mirkin

On Fri, Sep 11, 2015 at 10:47 AM, Brian Paul  wrote:
> On 09/11/2015 07:26 AM, Roland Scheidegger wrote:
>>
>> Am 11.09.2015 um 05:15 schrieb Ilia Mirkin:
>>>
>>> My hope was (as you can see in the last patch) to enable this for all
>>> drivers that support MS textures. I've got nv50/nvc0/r600g covered.
>>>
>>> RadeonSI will have to read the data from the texture descriptor. I'm
>>> totally unfamiliar with LLVM, the descriptor format, etc. Hopefully
>>> someone will be able to write an appropriate patch.
>>>
>>> SVGA seems to support ms textures, but I didn't see anything in its
>>> virtual ISA that would fit. VMWare folk -- is this going to be easy to
>>> add? Or do I need to add a new PIPE_CAP?
>>
>> Note the d3d equivalent (sample_info) is d3d10.1 and svga3d opcodes only
>> cover d3d10, so yes this isn't doable. So I guess for the time being,
>> either a cap bit or emulation (stick it into a uniform) should be done,
>> though Brian probably knows better.
>
>
> Yeah, we could emulate it with and extra constant, but I don't think I'll
> have time to look at it for a while.  Can we use a new CAP for now?

Bah humbug. OK, cap it is.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/4] tgsi: add a TXQS opcode to retrieve the number of texture samples

2015-09-11 Thread Ilia Mirkin

On Fri, Sep 11, 2015 at 10:42 AM, Brian Paul  wrote:
> On 09/10/2015 09:15 PM, Ilia Mirkin wrote:
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>   src/gallium/auxiliary/tgsi/tgsi_info.c |  3 ++-
>>   src/gallium/docs/source/tgsi.rst   | 12 +++-
>>   src/gallium/include/pipe/p_shader_tokens.h |  1 +
>>   3 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c
>> b/src/gallium/auxiliary/tgsi/tgsi_info.c
>> index fb29ea0..3b40c3d 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
>> @@ -141,7 +141,7 @@ static const struct tgsi_opcode_info
>> opcode_info[TGSI_OPCODE_LAST] =
>>  { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
>>  { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
>>  { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
>> -   { 0, 0, 0, 0, 0, 0, NONE, "", 104 }, /* removed */
>> +   { 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
>>  { 0, 0, 0, 0, 0, 0, NONE, "", 105 }, /* removed */
>>  { 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
>>  { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
>> @@ -331,6 +331,7 @@ tgsi_opcode_infer_type( uint opcode )
>>  case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
>>  case TGSI_OPCODE_TXQ:
>>  case TGSI_OPCODE_TXQ_LZ:
>> +   case TGSI_OPCODE_TXQS:
>>  case TGSI_OPCODE_F2U:
>>  case TGSI_OPCODE_UDIV:
>>  case TGSI_OPCODE_UMAD:
>> diff --git a/src/gallium/docs/source/tgsi.rst
>> b/src/gallium/docs/source/tgsi.rst
>> index 314c9ca..e773e9d 100644
>> --- a/src/gallium/docs/source/tgsi.rst
>> +++ b/src/gallium/docs/source/tgsi.rst
>> @@ -960,7 +960,6 @@ XXX doesn't look like most of the opcodes really
>> belong here.
>> For components which don't return a resource dimension, their value
>> is undefined.
>>
>> -
>>   .. math::
>>
>> lod = src0.x
>> @@ -973,6 +972,17 @@ XXX doesn't look like most of the opcodes really
>> belong here.
>>
>> dst.w = texture\_levels(unit)
>>
>> +
>> +.. opcode:: TXQS - Texture Samples Query
>> +
>> +  This retrieves the number of samples in the sampler, and stores it
>
>
> I'd replace 'sampler' with 'texture', as Roland suggested.

Will do. I said 'sampler' since it takes a SAMP[] argument, not a
SVIEW[] arg. But it's a minor point, and 'texture' is definitely
clearer as to the intent.

>
>
>> +  into the x component. The other components are undefined.
>> +
>> +.. math::
>> +
>> +  dst.x = texture\_samples(unit)
>> +
>> +
>>   .. opcode:: TG4 - Texture Gather
>>
>> As per ARB_texture_gather, gathers the four texels to be used in a
>> bi-linear
>> diff --git a/src/gallium/include/pipe/p_shader_tokens.h
>> b/src/gallium/include/pipe/p_shader_tokens.h
>> index 6e07b2c..b36e0a3 100644
>> --- a/src/gallium/include/pipe/p_shader_tokens.h
>> +++ b/src/gallium/include/pipe/p_shader_tokens.h
>> @@ -402,6 +402,7 @@ struct tgsi_property_data {
>>   #define TGSI_OPCODE_ENDLOOP 101
>>   #define TGSI_OPCODE_ENDSUB  102
>>   #define TGSI_OPCODE_TXQ_LZ  103 /* TXQ for mipmap level 0 */
>> +#define TGSI_OPCODE_TXQS104
>>   /* gap */
>>   #define TGSI_OPCODE_NOP 107
>>
>>
>
> Do you also need to add TXQS in the switch in tgsi_util.c's
> tgsi_util_get_inst_usage_mask()?  Looks like several other recently added
> opcodes (like TXQ, TXQ_LZ) aren't present there either though.

tgsi_util_get_inst_usage_mask is about usage of sources. TXQ_LZ and
TXQS don't have sources. TXQ should probably be added in there... not
sure what cares about this though.

>
> And what about an addition to tgsi_opcode_tmp.h?

Meh. When someone wants to use it with ureg, they can add it. I don't
think I've been adding stuff in there for most new opcodes. None of
the ARB_gs5 stuff is there, nor is the fp64 stuff (which Dave added).

>
> Looks good otherwise.

Thanks for looking!

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] i965: L3 cache partitioning.

2015-09-11 Thread Ben Widawsky

On Fri, Sep 11, 2015 at 10:24:29AM -0700, Ben Widawsky wrote:
> On Sun, Sep 06, 2015 at 06:12:38PM +0200, Francisco Jerez wrote:
> > This series implements dynamic partitioning of the L3 cache space
> > among its clients, the purpose is multiple:
> > 
> >  - Steal a chunk of L3 space when necessary and reserve it for SLM as
> >required to support compute shaders with shared variables.
> > 
> >  - Allow L3 caching of dataport DC memory access where the default L3
> >partitioning doesn't have any space reserved for it (pre-Gen8) --
> >Should improve performance of scratch access (register spills and
> >fills and some forms of indirect array indexing), atomic counters
> >and images.
> > 
> >  - Allow dynamic changes of the L3 configuration for work-loads that
> >could benefit from a partitioning other than the default
> >(e.g. reduce URB size to gain some additional cache space on
> >heavily fragment-bound workloads, or split the L3 allocation of
> >different clients to reduce thrashing).  The basic infrastructure
> >to achieve this is implemented here but no specific heuristics are
> >included yet in this series.
> 
> I admit to not know how this stuff works pre-GEN8, but it was my impression 
> that
> on GEN8+ these kind of tweaks will make no difference to 3D clients other than
> for constant buffers, and scratch space. Every other client of the L3 uses a
> fixed size. Therefore I am skeptical of your last claim and I'd very much like
> it if you could help me find where the theory came from and certainly some
> amount of performance data would be very welcome as well. 
> 
> I certainly believe the partitioning is critical for optimal usage of SLM, and
> as you mention, ensuring that other users of the dynamic partitioning don't
> screw us over. It's the rest that I'm unsure of.
> 

Interesting. My information seems to be GEN9+. GEN8 does seem to have a balance
with the L3 D$

> > 
> > The series can be found here in a testable form:
> > http://cgit.freedesktop.org/~currojerez/mesa/log/?h=i965-l3-partitioning
> > 
> > [PATCH 01/13] i965: Define symbolic constants for some useful L3 cache 
> > control registers.
> > [PATCH 02/13] i965: Keep track of whether LRI is allowed in the context 
> > struct.
> > [PATCH 03/13] i965: Define state flag to signal that the URB size has been 
> > altered.
> > [PATCH 04/13] i965/gen8: Don't add workaround bits to PIPE_CONTROL stalls 
> > if DC flush is set.
> > [PATCH 05/13] i965: Import tables enumerating the set of validated L3 
> > configurations.
> > [PATCH 06/13] i965: Implement programming of the L3 configuration.
> > [PATCH 07/13] i965/hsw: Enable L3 atomics.
> > [PATCH 08/13] i965: Implement selection of the closest L3 configuration 
> > based on a vector of weights.
> > [PATCH 09/13] i965: Calculate appropriate L3 partition weights for the 
> > current pipeline state.
> > [PATCH 10/13] i965: Implement L3 state atom.
> > [PATCH 11/13] i965: Add debug flag to print out the new L3 state during 
> > transitions.
> > [PATCH 12/13] i965: Work around L3 state leaks during context switches.
> > [PATCH 13/13] i965: Hook up L3 partitioning state atom.
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] i965: L3 cache partitioning.

2015-09-11 Thread Ben Widawsky

On Fri, Sep 11, 2015 at 11:37:21AM -0700, Ben Widawsky wrote:
> On Fri, Sep 11, 2015 at 10:24:29AM -0700, Ben Widawsky wrote:
> > On Sun, Sep 06, 2015 at 06:12:38PM +0200, Francisco Jerez wrote:
> > > This series implements dynamic partitioning of the L3 cache space
> > > among its clients, the purpose is multiple:
> > > 
> > >  - Steal a chunk of L3 space when necessary and reserve it for SLM as
> > >required to support compute shaders with shared variables.
> > > 
> > >  - Allow L3 caching of dataport DC memory access where the default L3
> > >partitioning doesn't have any space reserved for it (pre-Gen8) --
> > >Should improve performance of scratch access (register spills and
> > >fills and some forms of indirect array indexing), atomic counters
> > >and images.
> > > 
> > >  - Allow dynamic changes of the L3 configuration for work-loads that
> > >could benefit from a partitioning other than the default
> > >(e.g. reduce URB size to gain some additional cache space on
> > >heavily fragment-bound workloads, or split the L3 allocation of
> > >different clients to reduce thrashing).  The basic infrastructure
> > >to achieve this is implemented here but no specific heuristics are
> > >included yet in this series.
> > 
> > I admit to not know how this stuff works pre-GEN8, but it was my impression 
> > that
> > on GEN8+ these kind of tweaks will make no difference to 3D clients other 
> > than
> > for constant buffers, and scratch space. Every other client of the L3 uses a
> > fixed size. Therefore I am skeptical of your last claim and I'd very much 
> > like
> > it if you could help me find where the theory came from and certainly some
> > amount of performance data would be very welcome as well. 
> > 
> > I certainly believe the partitioning is critical for optimal usage of SLM, 
> > and
> > as you mention, ensuring that other users of the dynamic partitioning don't
> > screw us over. It's the rest that I'm unsure of.
> > 
> 
> Interesting. My information seems to be GEN9+. GEN8 does seem to have a 
> balance
> with the L3 D$

Yeah, I redact my statement now. I think there is value in having this
programmability. I looked at this before too, not sure how I forgot we can
actually split the DC and RO.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965/skl: Use larger URB size where available.

2015-09-11 Thread Jordan Justen

On 2015-09-10 16:59:12, Ben Widawsky wrote:
> All SKL SKUs except the lowest one which has half the L3 size actually have 
> 384K

These commit message lines seem to wrap a bit long. This first line is
80 characters.

> of URB per slice.
> 
> For once, I can explain how this mistake was made and how it was missed in
> review...  Historically when we enable a platform and put the production 
> sizes,
> you can simply look at the "smallest" SKU and see what its URB size is (and we
> assumed it was the 1 slice variant). Since on newer platforms the URB sizes 
> are
> scaled automatically by HW, this was sufficient. On SKL, this is a bit 
> different
> as the lowest SKU actually has half of the L3 fused off. GT2 is the 1 slice 
> (not
> GT1) variant and it has 384K.
> 
> There are no Jenkins tests fixed (or regressions) and we don't expect any 
> fixes
> here because you can always run with less URB size - this potentially improves
> performance.

It would be nice if we were able to find a benchmark that improves
from this change. If we can't then maybe we should just remove this
paragraph. It seems like the right change regardless.

Reviewed-by: Jordan Justen 

> Thanks to Sarah for bringing this to my attention.

Reported-by?

> Cc: "10.6 11.0" 
> Cc: Neil Roberts 
> Cc: Sarah Sharp 
> Cc: Jordan Justen 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/brw_device_info.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
> b/src/mesa/drivers/dri/i965/brw_device_info.c
> index 7ad3a2f..a6a3bb6 100644
> --- a/src/mesa/drivers/dri/i965/brw_device_info.c
> +++ b/src/mesa/drivers/dri/i965/brw_device_info.c
> @@ -314,7 +314,7 @@ static const struct brw_device_info brw_device_info_chv = 
> {
> .max_wm_threads = 64 * 6,\
> .max_cs_threads = 56,\
> .urb = { \
> -  .size = 192,  \
> +  .size = 384,  \
>.min_vs_entries = 64, \
>.max_vs_entries = 1856,   \
>.max_hs_entries = 672,\
> @@ -324,6 +324,7 @@ static const struct brw_device_info brw_device_info_chv = 
> {
>  
>  static const struct brw_device_info brw_device_info_skl_gt1 = {
> GEN9_FEATURES, .gt = 1,
> +   .urb.size = 192,
>  };
>  
>  static const struct brw_device_info brw_device_info_skl_gt2 = {
> -- 
> 2.5.1
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965/skl: Use larger URB size where available.

2015-09-11 Thread Ben Widawsky

On Fri, Sep 11, 2015 at 12:12:15PM -0700, Jordan Justen wrote:
> On 2015-09-10 16:59:12, Ben Widawsky wrote:
> > All SKL SKUs except the lowest one which has half the L3 size actually have 
> > 384K
> 
> These commit message lines seem to wrap a bit long. This first line is
> 80 characters.
> 
> > of URB per slice.
> > 
> > For once, I can explain how this mistake was made and how it was missed in
> > review...  Historically when we enable a platform and put the production 
> > sizes,
> > you can simply look at the "smallest" SKU and see what its URB size is (and 
> > we
> > assumed it was the 1 slice variant). Since on newer platforms the URB sizes 
> > are
> > scaled automatically by HW, this was sufficient. On SKL, this is a bit 
> > different
> > as the lowest SKU actually has half of the L3 fused off. GT2 is the 1 slice 
> > (not
> > GT1) variant and it has 384K.
> > 
> > There are no Jenkins tests fixed (or regressions) and we don't expect any 
> > fixes
> > here because you can always run with less URB size - this potentially 
> > improves
> > performance.
> 
> It would be nice if we were able to find a benchmark that improves
> from this change. If we can't then maybe we should just remove this
> paragraph. It seems like the right change regardless.
> 
> Reviewed-by: Jordan Justen 

I think what I'd like to do is run the perf data to make sure there are at least
no regressions since I am proposing it for stable... Maybe if I don't get around
to that before the next stable release, we'll bail on it for 10.6

> 
> > Thanks to Sarah for bringing this to my attention.
> 
> Reported-by?
> 
> > Cc: "10.6 11.0" 
> > Cc: Neil Roberts 
> > Cc: Sarah Sharp 
> > Cc: Jordan Justen 
> > Signed-off-by: Ben Widawsky 
> > ---
> >  src/mesa/drivers/dri/i965/brw_device_info.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
> > b/src/mesa/drivers/dri/i965/brw_device_info.c
> > index 7ad3a2f..a6a3bb6 100644
> > --- a/src/mesa/drivers/dri/i965/brw_device_info.c
> > +++ b/src/mesa/drivers/dri/i965/brw_device_info.c
> > @@ -314,7 +314,7 @@ static const struct brw_device_info brw_device_info_chv 
> > = {
> > .max_wm_threads = 64 * 6,\
> > .max_cs_threads = 56,\
> > .urb = { \
> > -  .size = 192,  \
> > +  .size = 384,  \
> >.min_vs_entries = 64, \
> >.max_vs_entries = 1856,   \
> >.max_hs_entries = 672,\
> > @@ -324,6 +324,7 @@ static const struct brw_device_info brw_device_info_chv 
> > = {
> >  
> >  static const struct brw_device_info brw_device_info_skl_gt1 = {
> > GEN9_FEATURES, .gt = 1,
> > +   .urb.size = 192,
> >  };
> >  
> >  static const struct brw_device_info brw_device_info_skl_gt2 = {
> > -- 
> > 2.5.1
> > 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa/texcompress: add function to determine compressed format type

2015-09-11 Thread Anuj Phogat

On Fri, Aug 28, 2015 at 9:09 AM, Nanley Chery  wrote:

> From: Nanley Chery 
>
> Determines if a compressed format is non-palettized and specific.
>
> Signed-off-by: Nanley Chery 
> ---
>  src/mesa/main/texcompress.c | 10 ++
>  src/mesa/main/texcompress.h |  3 +++
>  2 files changed, 13 insertions(+)
>
> diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
> index 84973d3..c8598bd 100644
> --- a/src/mesa/main/texcompress.c
> +++ b/src/mesa/main/texcompress.c
> @@ -547,6 +547,16 @@ _mesa_glenum_to_compressed_format(GLenum format)
> }
>  }
>
> +/**
> + * Return whether or not the format is a specific, non-palettized
> compressed
> + * format. This works because all MESA_FORMATs for compressed GL formats
> + * correspond to non-generic compressed formats.
> + */
> +bool
> +_mesa_is_specific_np_compressed_format(GLenum format)
> +{
> +   return _mesa_glenum_to_compressed_format(format) != MESA_FORMAT_NONE;
> +}
>
>  /**
>   * Given a compressed MESA_FORMAT_x value, return the corresponding
> diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h
> index b00924d..b088dd2 100644
> --- a/src/mesa/main/texcompress.h
> +++ b/src/mesa/main/texcompress.h
> @@ -39,6 +39,9 @@ _mesa_get_compressed_formats(struct gl_context *ctx,
> GLint *formats);
>  extern mesa_format
>  _mesa_glenum_to_compressed_format(GLenum format);
>
> +extern bool
> +_mesa_is_specific_np_compressed_format(GLenum format);
> +
>  extern GLenum
>  _mesa_compressed_format_to_glenum(struct gl_context *ctx, mesa_format
> mesaFormat);
>
> --
> 2.5.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

LGTM. Both patches are:
Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/7] Const fixes/cleanups in softpipe's tex samplers

2015-09-11 Thread Brian Paul


On 09/11/2015 12:07 PM, Krzesimir Nowak wrote:

This stops abusing sp_sampler_view as a local storage and thus
allowing to put some more const modifiers in front of some types.

I might have gotten carried away too bit with the last commit. :) So
if you don't like it, then please ignore it.

Tested it with piglit shader tests and got no regressions.

Krzesimir Nowak (7):
   softpipe: Move the faces array from view to filter_args
   softpipe: Constify some sampler and view parameters
   tgsi, softpipe: Constify tgsi_sampler in query_lod vfunc
   softpipe: Constify sampler and view parameters in img filters
   softpipe: Constify sampler and view parameters in mip filters
   softpipe: Constify sp_tgsi_sampler
   softpipe: Constify variables

  src/gallium/auxiliary/tgsi/tgsi_exec.h   |   2 +-
  src/gallium/drivers/softpipe/sp_tex_sample.c | 663 +--
  src/gallium/drivers/softpipe/sp_tex_sample.h |  16 +-
  3 files changed, 328 insertions(+), 353 deletions(-)



Series, LGTM.  I'll commit/push with my R-b after a little bit of 
testing here.


Thanks!

-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/7] softpipe: Constify variables

2015-09-11 Thread Roland Scheidegger

Honestly, I don't see much point of constifying simple one-time-assigned
local variables (won't make a difference to the compiler anyway, and
even for reading imho doesn't add much). I don't object changing it
though, and the rest of the series definitely looks good to me, I'm glad
you got rid of the faces storage hack.
For the series:
Reviewed-by: Roland Scheidegger 

Am 11.09.2015 um 20:07 schrieb Krzesimir Nowak:
> This commit makes a lot of variables constant - this is basically done
> by moving the computation to variable definition. Some of them are
> moved into lower scopes (like in img_filter_2d_ewa).
> ---
>  src/gallium/drivers/softpipe/sp_tex_sample.c | 441 
> ---
>  1 file changed, 199 insertions(+), 242 deletions(-)
> 
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
> b/src/gallium/drivers/softpipe/sp_tex_sample.c
> index b0c4989..dfe38af 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
> @@ -135,7 +135,7 @@ wrap_nearest_repeat(float s, unsigned size, int offset, 
> int *icoord)
>  {
> /* s limited to [0,1) */
> /* i limited to [0,size-1] */
> -   int i = util_ifloor(s * size);
> +   const int i = util_ifloor(s * size);
> *icoord = repeat(i + offset, size);
>  }
>  
> @@ -280,7 +280,7 @@ static void
>  wrap_linear_repeat(float s, unsigned size, int offset,
> int *icoord0, int *icoord1, float *w)
>  {
> -   float u = s * size - 0.5F;
> +   const float u = s * size - 0.5F;
> *icoord0 = repeat(util_ifloor(u) + offset, size);
> *icoord1 = repeat(*icoord0 + 1, size);
> *w = frac(u);
> @@ -291,9 +291,8 @@ static void
>  wrap_linear_clamp(float s, unsigned size, int offset,
>int *icoord0, int *icoord1, float *w)
>  {
> -   float u = CLAMP(s * size + offset, 0.0F, (float)size);
> +   const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
>  
> -   u = u - 0.5f;
> *icoord0 = util_ifloor(u);
> *icoord1 = *icoord0 + 1;
> *w = frac(u);
> @@ -304,8 +303,7 @@ static void
>  wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
>int *icoord0, int *icoord1, float *w)
>  {
> -   float u = CLAMP(s * size + offset, 0.0F, (float)size);
> -   u = u - 0.5f;
> +   const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
> *icoord0 = util_ifloor(u);
> *icoord1 = *icoord0 + 1;
> if (*icoord0 < 0)
> @@ -322,8 +320,7 @@ wrap_linear_clamp_to_border(float s, unsigned size, int 
> offset,
>  {
> const float min = -0.5F;
> const float max = (float)size + 0.5F;
> -   float u = CLAMP(s * size + offset, min, max);
> -   u = u - 0.5f;
> +   const float u = CLAMP(s * size + offset, min, max) - 0.5f;
> *icoord0 = util_ifloor(u);
> *icoord1 = *icoord0 + 1;
> *w = frac(u);
> @@ -391,12 +388,8 @@ wrap_linear_mirror_clamp_to_border(float s, unsigned 
> size, int offset,
>  {
> const float min = -0.5F;
> const float max = size + 0.5F;
> -   float u = fabsf(s * size + offset);
> -   if (u <= min)
> -  u = min;
> -   else if (u >= max)
> -  u = max;
> -   u -= 0.5F;
> +   const float t = fabsf(s * size + offset);
> +   const float u = CLAMP(t, min, max) - 0.5F;
> *icoord0 = util_ifloor(u);
> *icoord1 = *icoord0 + 1;
> *w = frac(u);
> @@ -409,7 +402,7 @@ wrap_linear_mirror_clamp_to_border(float s, unsigned 
> size, int offset,
>  static void
>  wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
>  {
> -   int i = util_ifloor(s);
> +   const int i = util_ifloor(s);
> *icoord = CLAMP(i + offset, 0, (int) size-1);
>  }
>  
> @@ -442,7 +435,7 @@ wrap_linear_unorm_clamp(float s, unsigned size, int 
> offset,
>  int *icoord0, int *icoord1, float *w)
>  {
> /* Not exactly what the spec says, but it matches NVIDIA output */
> -   float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
> +   const float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
> *icoord0 = util_ifloor(u);
> *icoord1 = *icoord0 + 1;
> *w = frac(u);
> @@ -456,8 +449,7 @@ static void
>  wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
>int *icoord0, int *icoord1, float *w)
>  {
> -   float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F);
> -   u -= 0.5F;
> +   const float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F) - 0.5F;
> *icoord0 = util_ifloor(u);
> *icoord1 = *icoord0 + 1;
> if (*icoord1 > (int) size - 1)
> @@ -473,8 +465,7 @@ static void
>  wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
>  int *icoord0, int *icoord1, float *w)
>  {
> -   float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F);
> -   u -= 0.5F;
> +   const float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F) - 0.5F;
> *icoord0 = util_ifloor(u);
> *icoord1 = *icoord0 + 1;
> if (*icoord1 > (

Re: [Mesa-dev] [PATCH 1/7] softpipe: Move the faces array from view to filter_args

2015-09-11 Thread Emil Velikov

On 11 September 2015 at 19:07, Krzesimir Nowak  wrote:
> With that, sp_sampler_view instances are not abused anymore as a local
> storage, so we can later make them constant.
> ---
>  src/gallium/drivers/softpipe/sp_tex_sample.c | 36 
> +---
>  src/gallium/drivers/softpipe/sp_tex_sample.h |  4 +---
>  2 files changed, 23 insertions(+), 17 deletions(-)
>
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
> b/src/gallium/drivers/softpipe/sp_tex_sample.c
> index 489cae7..d5a7ed6 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c

> @@ -3594,11 +3596,16 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
>float cs[TGSI_QUAD_SIZE];
>float ct[TGSI_QUAD_SIZE];
>float cp[TGSI_QUAD_SIZE];
> +  float faces[TGSI_QUAD_SIZE];
>
> -  convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
> +  convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, faces);
>
> +  filt_args.faces = faces;
If I remember it correctly the contents of faces will become invalid
and as we exit the function, thus any attempt to use them (via
filt_args.faces) and things will go crazy.

>sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba);
> } else {
> +  static const float zero_faces[TGSI_QUAD_SIZE] = {0.0f, 0.0f, 0.0f, 
> 0.0f};
> +
> +  filt_args.faces = zero_faces;
Here we should be safe due to the static qualifier.

>sample_mip(sp_sview, sp_samp, s, t, p, c0, lod, &filt_args, rgba);
> }
>  }

> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h 
> b/src/gallium/drivers/softpipe/sp_tex_sample.h
> index 72b4a1a..6743b7e 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.h
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
> @@ -72,6 +72,7 @@ typedef void (*img_filter_func)(struct sp_sampler_view 
> *sp_sview,
>  struct filter_args {
> enum tgsi_sampler_control control;
> const int8_t *offset;
> +   const float *faces;
Afaict during calculation of face (in convert_cube) uint type is used.
Won't this cause unnecessary int <> float conversions ?

Cheers,
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/6] mesa/glformats: restrict ETC1_RGB8 support to GLES1/2

2015-09-11 Thread Anuj Phogat

On Fri, Aug 28, 2015 at 7:50 AM, Nanley Chery  wrote:

> From: Nanley Chery 
>
> According to the extensions table and our glext headers,
> OES_compressed_ETC1_RGB8_texture is only supported in
> GLES1 and GLES2.
>
> Signed-off-by: Nanley Chery 
> ---
>  src/mesa/main/teximage.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
> index 33b3def..39d1281 100644
> --- a/src/mesa/main/teximage.c
> +++ b/src/mesa/main/teximage.c
> @@ -558,7 +558,8 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint
> internalFormat )
>}
> }
>
> -   if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
> +   if (_mesa_is_gles(ctx) &&
> +  ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
>switch (internalFormat) {
>case GL_ETC1_RGB8_OES:
>   return GL_RGB;
> --
> 2.5.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH V3 1/6] glsl: order indices for samplers inside a struct array

2015-09-11 Thread Timothy Arceri

On Thu, 2015-09-10 at 21:36 -0700, Jason Ekstrand wrote:
> On Thu, Sep 10, 2015 at 7:43 PM, Timothy Arceri <
> t_arc...@yahoo.com.au> wrote:
> > On Tue, 2015-09-08 at 14:14 -0700, Jason Ekstrand wrote:
> > > On Mon, Sep 7, 2015 at 3:09 PM, Timothy Arceri <
> > > t_arc...@yahoo.com.au
> > > > wrote:
> > > > On Mon, 2015-09-07 at 11:24 -0700, Jason Ekstrand wrote:
> > > > > On Tue, Sep 1, 2015 at 7:44 PM, Timothy Arceri <
> > > > > t_arc...@yahoo.com.au>
> > > > > wrote:
> > > > > > This allows the correct offset to be easily calculated for
> > > > > > indirect
> > > > > > indexing when a struct array contains multiple samplers, or
> > > > > > any
> > > > > > crazy
> > > > > > nesting.
> > > > > > 
> > > > > > The indices for the folling struct will now look like this:
> > > > > > Sampler index: 0 Name: s[0].tex
> > > > > > Sampler index: 1 Name: s[1].tex
> > > > > > Sampler index: 2 Name: s[0].si.tex
> > > > > > Sampler index: 3 Name: s[1].si.tex
> > > > > > Sampler index: 4 Name: s[0].si.tex2
> > > > > > Sampler index: 5 Name: s[1].si.tex2
> > > > > > 
> > > > > > Before this change it looked like this:
> > > > > > Sampler index: 0 Name: s[0].tex
> > > > > > Sampler index: 3 Name: s[1].tex
> > > > > > Sampler index: 1 Name: s[0].si.tex
> > > > > > Sampler index: 4 Name: s[1].si.tex
> > > > > > Sampler index: 2 Name: s[0].si.tex2
> > > > > > Sampler index: 5 Name: s[1].si.tex2
> > > > > > 
> > > > > > struct S_inner {
> > > > > >sampler2D tex;
> > > > > >sampler2D tex2;
> > > > > > };
> > > > > > 
> > > > > > struct S {
> > > > > >sampler2D tex;
> > > > > >S_inner si;
> > > > > > };
> > > > > > 
> > > > > > uniform S s[2];
> > > > > > 
> > > > > > V2: rename struct array counter to have better name
> > > > > > ---
> > > > > >  src/glsl/link_uniforms.cpp | 112
> > > > > > ++--
> > > > > > -
> > > > > >  src/glsl/linker.h  |   4 +-
> > > > > >  2 files changed, 98 insertions(+), 18 deletions(-)
> > > > > > 
> > > > > > diff --git a/src/glsl/link_uniforms.cpp
> > > > > > b/src/glsl/link_uniforms.cpp
> > > > > > index 254086d..5402c99 100644
> > > > > > --- a/src/glsl/link_uniforms.cpp
> > > > > > +++ b/src/glsl/link_uniforms.cpp
> > > > > > @@ -28,6 +28,7 @@
> > > > > >  #include "glsl_symbol_table.h"
> > > > > >  #include "program/hash_table.h"
> > > > > >  #include "program.h"
> > > > > > +#include "util/hash_table.h"
> > > > > > 
> > > > > >  /**
> > > > > >   * \file link_uniforms.cpp
> > > > > > @@ -63,14 +64,17 @@ program_resource_visitor::process(const
> > > > > > glsl_type
> > > > > > *type, const char *name)
> > > > > > assert(type->without_array()->is_record()
> > > > > >|| type->without_array()->is_interface());
> > > > > > 
> > > > > > +   unsigned record_array_count = 1;
> > > > > > char *name_copy = ralloc_strdup(NULL, name);
> > > > > > -   recursion(type, &name_copy, strlen(name), false, NULL,
> > > > > > false);
> > > > > > +   recursion(type, &name_copy, strlen(name), false, NULL,
> > > > > > false,
> > > > > > + record_array_count);
> > > > > > ralloc_free(name_copy);
> > > > > >  }
> > > > > > 
> > > > > >  void
> > > > > >  program_resource_visitor::process(ir_variable *var)
> > > > > >  {
> > > > > > +   unsigned record_array_count = 1;
> > > > > > const glsl_type *t = var->type;
> > > > > > const bool row_major =
> > > > > >var->data.matrix_layout ==
> > > > > > GLSL_MATRIX_LAYOUT_ROW_MAJOR;
> > > > > > @@ -111,7 +115,8 @@
> > > > > > program_resource_visitor::process(ir_variable *var)
> > > > > >* lowering is only applied to non-uniform
> > > > > > interface
> > > > > > blocks, so
> > > > > > we
> > > > > >* can safely pass false for row_major.
> > > > > >*/
> > > > > > - recursion(var->type, &name, new_length,
> > > > > > row_major,
> > > > > > NULL, false);
> > > > > > + recursion(var->type, &name, new_length,
> > > > > > row_major,
> > > > > > NULL, false,
> > > > > > +   record_array_count);
> > > > > >}
> > > > > >ralloc_free(name);
> > > > > > } else if (var->data.from_named_ifc_block_nonarray) {
> > > > > > @@ -135,19 +140,23 @@
> > > > > > program_resource_visitor::process(ir_variable *var)
> > > > > > * is only applied to non-uniform interface blocks,
> > > > > > so
> > > > > > we can
> > > > > > safely
> > > > > > * pass false for row_major.
> > > > > > */
> > > > > > -  recursion(var->type, &name, strlen(name), row_major,
> > > > > > NULL, false);
> > > > > > +  recursion(var->type, &name, strlen(name), row_major,
> > > > > > NULL, false,
> > > > > > +record_array_count);
> > > > > >ralloc_free(name);
> > > > > > } else if (t->without_array()->is_record()) {
> > > > > >char *name = ralloc_strdup(NULL, var->name);
> > > > > > -  recursion(var->type, &name, strlen(name), row_major,
> > > > > > NULL, false);

[Mesa-dev] [PATCH v2 4/5] r600g: add support for TXQS tgsi opcode

2015-09-11 Thread Ilia Mirkin

Signed-off-by: Ilia Mirkin 
Reviewed-by: Glenn Kennard 
---
 src/gallium/drivers/r600/r600_pipe.c   |  2 +-
 src/gallium/drivers/r600/r600_shader.c | 16 
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index dfbf0e5..0d8c61e 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -273,6 +273,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_TGSI_TXQS:
return 1;
 
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -341,7 +342,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
-   case PIPE_CAP_TGSI_TXQS:
return 0;
 
/* Stream output. */
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index f2c9e16..8132b6a 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5582,6 +5582,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
/* Texture fetch instructions can only use gprs as source.
 * Also they cannot negate the source or take the absolute value */
const boolean src_requires_loading = (inst->Instruction.Opcode != 
TGSI_OPCODE_TXQ_LZ &&
+ inst->Instruction.Opcode != 
TGSI_OPCODE_TXQS &&
   
tgsi_tex_src_requires_loading(ctx, 0)) ||
 read_compressed_msaa || 
txf_add_offsets;
 
@@ -6326,6 +6327,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.dst_sel_z = 7;
tex.dst_sel_w = 7;
}
+   else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
+   tex.dst_sel_x = 3;
+   tex.dst_sel_y = 7;
+   tex.dst_sel_z = 7;
+   tex.dst_sel_w = 7;
+   }
else {
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
@@ -6334,7 +6341,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
}
 
 
-   if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
+   if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ||
+   inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
tex.src_sel_x = 4;
tex.src_sel_y = 4;
tex.src_sel_z = 4;
@@ -7842,7 +7850,7 @@ static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[]
[TGSI_OPCODE_ENDLOOP]   = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB]= { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXQ_LZ]= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
-   [104]   = { ALU_OP0_NOP, tgsi_unsupported},
+   [TGSI_OPCODE_TXQS]  = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[105]   = { ALU_OP0_NOP, tgsi_unsupported},
[106]   = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_NOP]   = { ALU_OP0_NOP, tgsi_unsupported},
@@ -8041,7 +8049,7 @@ static const struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_ENDLOOP]   = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB]= { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXQ_LZ]= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
-   [104]   = { ALU_OP0_NOP, tgsi_unsupported},
+   [TGSI_OPCODE_TXQS]  = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[105]   = { ALU_OP0_NOP, tgsi_unsupported},
[106]   = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_NOP]   = { ALU_OP0_NOP, tgsi_unsupported},
@@ -8263,7 +8271,7 @@ static const struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_ENDLOOP]   = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB]= { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXQ_LZ]= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
-   [104]   = { ALU_OP0_NOP, tgsi_unsupported},
+   [TGSI_OPCODE_TXQS]  = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[105]   = { ALU_OP0_NOP, tgsi_unsupported},
[106]   = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_NOP]   = { ALU_OP0_NOP, tgsi_unsupported},
-- 
2.4.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 1/5] tgsi: add a TXQS opcode to retrieve the number of texture samples

2015-09-11 Thread Ilia Mirkin

Signed-off-by: Ilia Mirkin 
Reviewed-by: Roland Scheidegger 
Reviewed-by: Edward O'Callaghan 
---
 src/gallium/auxiliary/tgsi/tgsi_info.c |  3 ++-
 src/gallium/docs/source/tgsi.rst   | 12 +++-
 src/gallium/include/pipe/p_shader_tokens.h |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index fb29ea0..3b40c3d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -141,7 +141,7 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
{ 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
{ 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
-   { 0, 0, 0, 0, 0, 0, NONE, "", 104 }, /* removed */
+   { 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
{ 0, 0, 0, 0, 0, 0, NONE, "", 105 }, /* removed */
{ 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
{ 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
@@ -331,6 +331,7 @@ tgsi_opcode_infer_type( uint opcode )
case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
case TGSI_OPCODE_TXQ:
case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_TXQS:
case TGSI_OPCODE_F2U:
case TGSI_OPCODE_UDIV:
case TGSI_OPCODE_UMAD:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 314c9ca..314fe1b 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -960,7 +960,6 @@ XXX doesn't look like most of the opcodes really belong 
here.
   For components which don't return a resource dimension, their value
   is undefined.
 
-
 .. math::
 
   lod = src0.x
@@ -973,6 +972,17 @@ XXX doesn't look like most of the opcodes really belong 
here.
 
   dst.w = texture\_levels(unit)
 
+
+.. opcode:: TXQS - Texture Samples Query
+
+  This retrieves the number of samples in the texture, and stores it
+  into the x component. The other components are undefined.
+
+.. math::
+
+  dst.x = texture\_samples(unit)
+
+
 .. opcode:: TG4 - Texture Gather
 
   As per ARB_texture_gather, gathers the four texels to be used in a bi-linear
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index 6e07b2c..b36e0a3 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -402,6 +402,7 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_ENDLOOP 101
 #define TGSI_OPCODE_ENDSUB  102
 #define TGSI_OPCODE_TXQ_LZ  103 /* TXQ for mipmap level 0 */
+#define TGSI_OPCODE_TXQS104
 /* gap */
 #define TGSI_OPCODE_NOP 107
 
-- 
2.4.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 3/5] nv50/ir: add support for TXQS tgsi opcode

2015-09-11 Thread Ilia Mirkin

Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h  |  4 ++--
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 22 +-
 .../nouveau/codegen/nv50_ir_lowering_nv50.cpp  | 20 
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  2 +-
 5 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index ba1b085..f6e9308 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -824,8 +824,8 @@ private:
 
 enum TexQuery
 {
-   TXQ_DIMS,
-   TXQ_TYPE,
+   TXQ_DIMS, /* x, y, z, levels */
+   TXQ_TYPE, /* ?, ?, samples, ? */
TXQ_SAMPLE_POSITION,
TXQ_FILTER,
TXQ_LOD,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index f153674..c8efaf5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -631,6 +631,7 @@ static nv50_ir::operation translateOpcode(uint opcode)
NV50_IR_OPCODE_CASE(SAD, SAD);
NV50_IR_OPCODE_CASE(TXF, TXF);
NV50_IR_OPCODE_CASE(TXQ, TXQ);
+   NV50_IR_OPCODE_CASE(TXQS, TXQ);
NV50_IR_OPCODE_CASE(TG4, TXG);
NV50_IR_OPCODE_CASE(LODQ, TXLQ);
 
@@ -1324,7 +1325,7 @@ private:
void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
void handleTXF(Value *dst0[4], int R, int L_M);
-   void handleTXQ(Value *dst0[4], enum TexQuery);
+   void handleTXQ(Value *dst0[4], enum TexQuery, int R);
void handleLIT(Value *dst0[4]);
void handleUserClipPlanes();
 
@@ -1795,7 +1796,7 @@ Converter::setTexRS(TexInstruction *tex, unsigned int& s, 
int R, int S)
 }
 
 void
-Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
+Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
 {
TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
tex->tex.query = query;
@@ -1807,9 +1808,12 @@ Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
   tex->tex.mask |= 1 << c;
   tex->setDef(d++, dst0[c]);
}
-   tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+   if (query == TXQ_DIMS)
+  tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+   else
+  tex->setSrc((c = 0), zero);
 
-   setTexRS(tex, ++c, 1, -1);
+   setTexRS(tex, ++c, R, -1);
 
bb->insertTail(tex);
 }
@@ -2764,7 +2768,15 @@ Converter::handleInstruction(const struct 
tgsi_full_instruction *insn)
   break;
case TGSI_OPCODE_TXQ:
case TGSI_OPCODE_SVIEWINFO:
-  handleTXQ(dst0, TXQ_DIMS);
+  handleTXQ(dst0, TXQ_DIMS, 1);
+  break;
+   case TGSI_OPCODE_TXQS:
+  // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
+  // be in .x
+  dst0[1] = dst0[2] = dst0[3] = NULL;
+  std::swap(dst0[0], dst0[2]);
+  handleTXQ(dst0, TXQ_TYPE, 0);
+  std::swap(dst0[0], dst0[2]);
   break;
case TGSI_OPCODE_F2I:
case TGSI_OPCODE_F2U:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index d87cdff..eec502b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -617,6 +617,7 @@ private:
bool handleTXL(TexInstruction *); // hate
bool handleTXD(TexInstruction *); // these 3
bool handleTXLQ(TexInstruction *);
+   bool handleTXQ(TexInstruction *);
 
bool handleCALL(Instruction *);
bool handlePRECONT(Instruction *);
@@ -975,6 +976,23 @@ NV50LoweringPreSSA::handleTXLQ(TexInstruction *i)
 }
 
 bool
+NV50LoweringPreSSA::handleTXQ(TexInstruction *i)
+{
+   Value *ms, *ms_x, *ms_y;
+   if (i->tex.query == TXQ_DIMS)
+  return true;
+   assert(i->tex.query == TXQ_TYPE);
+   assert(i->tex.mask == 4);
+
+   loadTexMsInfo(i->tex.r * 4 * 2, &ms, &ms_x, &ms_y);
+   bld.mkOp2(OP_SHL, TYPE_U32, i->getDef(0), bld.loadImm(NULL, 1), ms);
+   i->bb->remove(i);
+
+   return true;
+}
+
+
+bool
 NV50LoweringPreSSA::handleSET(Instruction *i)
 {
if (i->dType == TYPE_F32) {
@@ -1333,6 +1351,8 @@ NV50LoweringPreSSA::visit(Instruction *i)
   return handleTXD(i->asTex());
case OP_TXLQ:
   return handleTXLQ(i->asTex());
+   case OP_TXQ:
+  return handleTXQ(i->asTex());
case OP_EX2:
   bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
   i->setSrc(0, i->getDef(0));
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6b7f250..9068ae1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -179,6 +179,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum

[Mesa-dev] [PATCH v2 0/5] gallium: add support for retrieving number of texture samples

2015-09-11 Thread Ilia Mirkin

OK, I've added a PIPE_CAP to control it now. I've left the relevant
R-b's from v1 in place despite a few changes, I think they're minor
enough.

Ilia Mirkin (5):
  tgsi: add a TXQS opcode to retrieve the number of texture samples
  gallium: add PIPE_CAP_TGSI_TXQS to let st know if TXQS is supported
  nv50/ir: add support for TXQS tgsi opcode
  r600g: add support for TXQS tgsi opcode
  st/mesa: emit TXQS, support ARB_shader_texture_image_samples

 docs/GL3.txt   |  2 +-
 docs/relnotes/11.1.0.html  |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_info.c |  3 ++-
 src/gallium/docs/source/screen.rst |  1 +
 src/gallium/docs/source/tgsi.rst   | 12 +++-
 src/gallium/drivers/freedreno/freedreno_screen.c   |  1 +
 src/gallium/drivers/i915/i915_screen.c |  1 +
 src/gallium/drivers/ilo/ilo_screen.c   |  1 +
 src/gallium/drivers/llvmpipe/lp_screen.c   |  1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.h  |  4 ++--
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 22 +-
 .../nouveau/codegen/nv50_ir_lowering_nv50.cpp  | 20 
 src/gallium/drivers/nouveau/nv30/nv30_screen.c |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  1 +
 src/gallium/drivers/r300/r300_screen.c |  1 +
 src/gallium/drivers/r600/r600_pipe.c   |  1 +
 src/gallium/drivers/r600/r600_shader.c | 16 
 src/gallium/drivers/radeonsi/si_pipe.c |  1 +
 src/gallium/drivers/softpipe/sp_screen.c   |  1 +
 src/gallium/drivers/svga/svga_screen.c |  1 +
 src/gallium/drivers/vc4/vc4_screen.c   |  1 +
 src/gallium/include/pipe/p_defines.h   |  1 +
 src/gallium/include/pipe/p_shader_tokens.h |  1 +
 src/mesa/state_tracker/st_extensions.c |  1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  6 +-
 26 files changed, 88 insertions(+), 16 deletions(-)

-- 
2.4.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 5/5] st/mesa: emit TXQS, support ARB_shader_texture_image_samples

2015-09-11 Thread Ilia Mirkin

The image component of the ext is a no-op since there is no image support
in gallium (yet).

Signed-off-by: Ilia Mirkin 
Reviewed-by: Edward O'Callaghan 
Reviewed-by: Brian Paul 
---
 docs/GL3.txt   | 2 +-
 docs/relnotes/11.1.0.html  | 2 +-
 src/mesa/state_tracker/st_extensions.c | 1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +-
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 2535002..1029b18 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -194,7 +194,7 @@ GL 4.5, GLSL 4.50:
   GL_ARB_derivative_controlDONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_direct_state_access   DONE (all drivers)
   GL_ARB_get_texture_sub_image DONE (all drivers)
-  GL_ARB_shader_texture_image_samples  DONE (i965)
+  GL_ARB_shader_texture_image_samples  DONE (i965, nv50, nvc0, 
r600)
   GL_ARB_texture_barrier   DONE (nv50, nvc0, r600, 
radeonsi)
   GL_KHR_context_flush_control DONE (all - but needs 
GLX/EGL extension to be useful)
   GL_KHR_robust_buffer_access_behavior not started
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 603b06f..69a35a7 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -44,7 +44,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 
-GL_ARB_shader_texture_image_samples on i965
+GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600
 GL_ARB_texture_query_lod on softpipe
 
 
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 884761c..e290292 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -449,6 +449,7 @@ void st_init_extensions(struct pipe_screen *screen,
   { o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE 
},
   { o(ARB_seamless_cube_map),PIPE_CAP_SEAMLESS_CUBE_MAP
},
   { o(ARB_shader_stencil_export),PIPE_CAP_SHADER_STENCIL_EXPORT
},
+  { o(ARB_shader_texture_image_samples), PIPE_CAP_TGSI_TXQS
},
   { o(ARB_shader_texture_lod),   PIPE_CAP_SM3  
},
   { o(ARB_shadow),   PIPE_CAP_TEXTURE_SHADOW_MAP   
},
   { o(ARB_texture_buffer_object),PIPE_CAP_TEXTURE_BUFFER_OBJECTS   
},
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 625c4e9..c3a8c11 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3229,7 +3229,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
   opcode = TGSI_OPCODE_LODQ;
   break;
case ir_texture_samples:
-  unreachable("unexpected texture op");
+  opcode = TGSI_OPCODE_TXQS;
+  break;
}
 
if (ir->projector) {
@@ -3339,6 +3340,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
  emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
   } else
  inst = emit_asm(ir, opcode, result_dst, lod_info);
+   } else if (opcode == TGSI_OPCODE_TXQS) {
+  inst = emit_asm(ir, opcode, result_dst);
} else if (opcode == TGSI_OPCODE_TXF) {
   inst = emit_asm(ir, opcode, result_dst, coord);
} else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
@@ -5030,6 +5033,7 @@ compile_tgsi_instruction(struct st_translate *t,
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
case TGSI_OPCODE_TXQ:
+   case TGSI_OPCODE_TXQS:
case TGSI_OPCODE_TXF:
case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXB2:
-- 
2.4.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 2/5] gallium: add PIPE_CAP_TGSI_TXQS to let st know if TXQS is supported

2015-09-11 Thread Ilia Mirkin

Signed-off-by: Ilia Mirkin 
---
 src/gallium/docs/source/screen.rst   | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/ilo/ilo_screen.c | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 15 files changed, 15 insertions(+)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 2c0da01..e780047 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -267,6 +267,7 @@ The integer capabilities:
 * ``PIPE_CAP_DEPTH_BOUNDS_TEST``: Whether bounds_test, bounds_min, and
   bounds_max states of pipe_depth_stencil_alpha_state behave according
   to the GL_EXT_depth_bounds_test specification.
+* ``PIPE_CAP_TGSI_TXQS``: Whether the `TXQS` opcode is supported
 
 
 .. _pipe_capf:
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 17dd47c..8000279 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -235,6 +235,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index 19a94a8..51c64ed 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -247,6 +247,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
   return 0;
 
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c 
b/src/gallium/drivers/ilo/ilo_screen.c
index ab4d137..9e37e24 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -469,6 +469,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 14eeab0..697e3d9 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -296,6 +296,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
   return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index efa3a59..806d4e6 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -169,6 +169,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_TGSI_TXQS:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 30e6e04..6b7f250 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -214,6 +214,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_TGSI_TXQS:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index ab19b26..220c2aa 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -200,6 +200,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap para

Re: [Mesa-dev] i965: Turn UBOs as push constants

2015-09-11 Thread Ben Widawsky

On Fri, Sep 11, 2015 at 11:33:14AM +0300, Abdiel Janulgue wrote:
> Some updated figures first:
> 
> Ue4 Reflections Subway demo
> x  fs gather constants disabled
> +  fs gather constants enabled
> 
> HSW:
> N   Min   MaxMedian   AvgStddev
> x  10   5.09739   6.48963   6.33357  6.1979850.40742969
> +  10   6.56139   6.86579   6.80952  6.7585160.10267153
> Difference at 95.0% confidence
> 0.560531 ± 0.279157
> 9.04376% ± 4.50399%
> (Student's t, pooled s = 0.297103)
> 
> BDW:
> N   Min   MaxMedian   AvgStddev
> x  10   3.64474   3.69746   3.68596   3.67842   0.015452422
> +  10   3.77998   3.80967   3.79824  3.795633  0.0079241642
> Difference at 95.0% confidence
> 0.117213 ± 0.0115377
> 3.1865% ± 0.31366%
> (Student's t, pooled s = 0.0122794)

Does this imply that's the only benchmark which improved, or that it was the
only one which was run?

> 
> This patch series is taken from my initial gather constants series last April.
> Now that the basic i965 resource streamer infrastructure is in place, these 
> are
> the remaining bits to enable the gather constants hardware[*]. I've tried to
> address the comments from the reviews that happened since then.
> 
> Major changes from last posting is we now support GEN8. Also the vec4 backend
> gather constant support is probably no longer relevant because of the switch
> to NIR. But I've included it here so people interested on implementing it on
> the vec4-NIR backend can have a reference point.
> 
> The series has no piglit regressions.
> 
> [PATCH 01/20] i965: Define gather push constants opcodes
> [PATCH 02/20] i965: Enable gather push constants
> [PATCH 03/20] i965: Allocate space on the gather pool for plain
> [PATCH 04/20] i965: Allocate space on the gather pool for UBO entries
> [PATCH 05/20] i965: Store gather table information in the program
> [PATCH 06/20] i965: Assign hw-binding table index for each UBO
> [PATCH 07/20] i965: Assign hw-binding table index for uniform
> [PATCH 08/20] nir: Add glsl_get_array_size() wrapper.
> [PATCH 09/20] nir: Add glsl_get_type_without_array() wrapper
> [PATCH 10/20] i965: Include UBO parameter sizes in push constant
> [PATCH 11/20] i965/fs: Append uniform entries to the gather table
> [PATCH 12/20] i965/fs/nir: Append nir_intrinsic_load_ubo entries to
> [PATCH 13/20] i965/fs: Pack UBO registers right after uniform
> [PATCH 14/20] i965/vec4: Append uniform entries to the gather table
> [PATCH 15/20] i965/vec4: Append ir_binop_ubo_load entries to the
> [PATCH 16/20] i965/vec4: Pack UBO registers right after uniform
> [PATCH 17/20] i965: Upload UBO surfaces before emitting constant
> [PATCH 18/20] i965: Program the push constants state using the gather
> [PATCH 19/20] i965: Disable gather push constants for null constants
> [PATCH 20/20] i965: Enable push constants for UBOs
> 
> -Abdiel
> 
> --
> [*] http://lists.freedesktop.org/archives/mesa-dev/2015-April/082991.html
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/7] softpipe: Move the faces array from view to filter_args

2015-09-11 Thread Krzesimir Nowak

On Fri, Sep 11, 2015 at 11:57 PM, Emil Velikov 
wrote:

> On 11 September 2015 at 19:07, Krzesimir Nowak 
> wrote:
> > With that, sp_sampler_view instances are not abused anymore as a local
> > storage, so we can later make them constant.
> > ---
> >  src/gallium/drivers/softpipe/sp_tex_sample.c | 36
> +---
> >  src/gallium/drivers/softpipe/sp_tex_sample.h |  4 +---
> >  2 files changed, 23 insertions(+), 17 deletions(-)
> >
> > diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c
> b/src/gallium/drivers/softpipe/sp_tex_sample.c
> > index 489cae7..d5a7ed6 100644
> > --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> > +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
>
> > @@ -3594,11 +3596,16 @@ sp_tgsi_get_samples(struct tgsi_sampler
> *tgsi_sampler,
> >float cs[TGSI_QUAD_SIZE];
> >float ct[TGSI_QUAD_SIZE];
> >float cp[TGSI_QUAD_SIZE];
> > +  float faces[TGSI_QUAD_SIZE];
> >
> > -  convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
> > +  convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, faces);
> >
> > +  filt_args.faces = faces;
> If I remember it correctly the contents of faces will become invalid
> and as we exit the function, thus any attempt to use them (via
> filt_args.faces) and things will go crazy.
>

And that's fine - filt_args variable itself goes out of scope when you exit
the function. And we do not store the pointer to faces anywhere for later
reuse or anything.


>
> >sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args,
> rgba);
> > } else {
> > +  static const float zero_faces[TGSI_QUAD_SIZE] = {0.0f, 0.0f,
> 0.0f, 0.0f};
> > +
> > +  filt_args.faces = zero_faces;
> Here we should be safe due to the static qualifier.
>
> >sample_mip(sp_sview, sp_samp, s, t, p, c0, lod, &filt_args, rgba);
> > }
> >  }
>
> > diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h
> b/src/gallium/drivers/softpipe/sp_tex_sample.h
> > index 72b4a1a..6743b7e 100644
> > --- a/src/gallium/drivers/softpipe/sp_tex_sample.h
> > +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
> > @@ -72,6 +72,7 @@ typedef void (*img_filter_func)(struct sp_sampler_view
> *sp_sview,
> >  struct filter_args {
> > enum tgsi_sampler_control control;
> > const int8_t *offset;
> > +   const float *faces;
> Afaict during calculation of face (in convert_cube) uint type is used.
> Won't this cause unnecessary int <> float conversions ?
>
>
Good point, I haven't noticed that.


> Cheers,
> Emil
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

1 2 >

1 - 100 of 113 matches

Mail list logo