Re: [Mesa-dev] [PATCH 2/8] mesa: rename gl_vertex_attrib_array::VertexBinding

2016-10-12 Thread Mathias Fröhlich
On Tuesday, 11 October 2016 20:10:27 CEST Brian Paul wrote:
> Rename to gl_vertex_attrib_array::BufferBindingIndex because this field
> is an index into the array of buffer binding points.  This makes some
> code a little easier to follow since there's also a "VertexBinding" field
> in gl_vertex_array_object.

What about BindingIndex instead?
Either way is ok with me, but may be this helps not to get too long lines 
including the benefits you are aiming for?

Mathias


> ---
>  src/mesa/main/api_arrayelt.c  | 20 ++--
>  src/mesa/main/arrayobj.c  |  8 
>  src/mesa/main/mtypes.h|  2 +-
>  src/mesa/main/varray.c| 16 
>  src/mesa/vbo/vbo_exec_array.c |  6 +++---
>  5 files changed, 26 insertions(+), 26 deletions(-)
> 
> diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
> index 15fbb8c..cc7dee3 100644
> --- a/src/mesa/main/api_arrayelt.c
> +++ b/src/mesa/main/api_arrayelt.c
> @@ -1566,7 +1566,7 @@ _ae_update_state(struct gl_context *ctx)
> /* conventional vertex arrays */
> if (vao->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) {
>aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR_INDEX];
> -  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
> +  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
>aa->offset = IndexFuncs[TYPE_IDX(aa->array->Type)];
>check_vbo(actx, aa->binding->BufferObj);
>aa++;
> @@ -1574,7 +1574,7 @@ _ae_update_state(struct gl_context *ctx)
>  
> if (vao->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) {
>aa->array = &vao->VertexAttrib[VERT_ATTRIB_EDGEFLAG];
> -  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
> +  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
>aa->offset = _gloffset_EdgeFlagv;
>check_vbo(actx, aa->binding->BufferObj);
>aa++;
> @@ -1582,7 +1582,7 @@ _ae_update_state(struct gl_context *ctx)
>  
> if (vao->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) {
>aa->array = &vao->VertexAttrib[VERT_ATTRIB_NORMAL];
> -  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
> +  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
>aa->offset = NormalFuncs[TYPE_IDX(aa->array->Type)];
>check_vbo(actx, aa->binding->BufferObj);
>aa++;
> @@ -1590,7 +1590,7 @@ _ae_update_state(struct gl_context *ctx)
>  
> if (vao->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) {
>aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR0];
> -  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
> +  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
>aa->offset = ColorFuncs[aa->array->Size-3][TYPE_IDX(aa->array-
>Type)];
>check_vbo(actx, aa->binding->BufferObj);
>aa++;
> @@ -1598,7 +1598,7 @@ _ae_update_state(struct gl_context *ctx)
>  
> if (vao->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) {
>aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR1];
> -  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
> +  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
>aa->offset = SecondaryColorFuncs[TYPE_IDX(aa->array->Type)];
>check_vbo(actx, aa->binding->BufferObj);
>aa++;
> @@ -1606,7 +1606,7 @@ _ae_update_state(struct gl_context *ctx)
>  
> if (vao->VertexAttrib[VERT_ATTRIB_FOG].Enabled) {
>aa->array = &vao->VertexAttrib[VERT_ATTRIB_FOG];
> -  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
> +  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
>aa->offset = FogCoordFuncs[TYPE_IDX(aa->array->Type)];
>check_vbo(actx, aa->binding->BufferObj);
>aa++;
> @@ -1620,7 +1620,7 @@ _ae_update_state(struct gl_context *ctx)
>* If we ever remove GL_NV_vertex_program this will have to 
change.
>*/
>   at->array = attribArray;
> - at->binding = &vao->VertexBinding[attribArray->VertexBinding];
> + at->binding = &vao->VertexBinding[attribArray-
>BufferBindingIndex];
>   assert(!at->array->Normalized);
>   at->func = AttribFuncsNV[at->array->Normalized]
>   [at->array->Size-1]
> @@ -1638,7 +1638,7 @@ _ae_update_state(struct gl_context *ctx)
>if (attribArray->Enabled) {
>   GLint intOrNorm;
>   at->array = attribArray;
> - at->binding = &vao->VertexBinding[attribArray->VertexBinding];
> + at->binding = &vao->VertexBinding[attribArray-
>BufferBindingIndex];
>   /* Note: we can't grab the _glapi_Dispatch->VertexAttrib1fvNV
>* function pointer here (for float arrays) since the pointer may
>* change from one execution of _ae_ArrayElement() to
> @@ -1669,7 +1669,7 @@ _ae_update_state(struct gl_context *ctx)
> * issued as the last (provoking) attribute).
> */
>aa->array = &vao-

Re: [Mesa-dev] [PATCH 1/8] mesa: rename some vars in arrayobj.c

2016-10-12 Thread Mathias Fröhlich
Hi,

On Tuesday, 11 October 2016 20:10:26 CEST Brian Paul wrote:
> Use 'vao' instead of 'obj' to be consistent with other code.
> Plus, add a comment.

One question in patch #2.
Either way, the series is:

Reviewed-by: Mathias Fröhlich 




> ---
>  src/mesa/main/arrayobj.c | 55 ++
+-
>  1 file changed, 31 insertions(+), 24 deletions(-)
> 
> diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
> index 2d3b69c..fe11686 100644
> --- a/src/mesa/main/arrayobj.c
> +++ b/src/mesa/main/arrayobj.c
> @@ -223,13 +223,20 @@ _mesa_reference_vao_(struct gl_context *ctx,
>  }
>  
>  
> -
> +/**
> + * Initialize attribtes of a vertex array within a vertex array object.
> + * \param vao  the container vertex array object
> + * \param index  which array in the VAO to initialize
> + * \param size  number of components (1, 2, 3 or 4) per attribute
> + * \param type  datatype of the attribute (GL_FLOAT, GL_INT, etc).
> + */
>  static void
>  init_array(struct gl_context *ctx,
> -   struct gl_vertex_array_object *obj, GLuint index, GLint size, 
GLint type)
> +   struct gl_vertex_array_object *vao,
> +   GLuint index, GLint size, GLint type)
>  {
> -   struct gl_vertex_attrib_array *array = &obj->VertexAttrib[index];
> -   struct gl_vertex_buffer_binding *binding = &obj->VertexBinding[index];
> +   struct gl_vertex_attrib_array *array = &vao->VertexAttrib[index];
> +   struct gl_vertex_buffer_binding *binding = &vao->VertexBinding[index];
>  
> array->Size = size;
> array->Type = type;
> @@ -260,47 +267,47 @@ init_array(struct gl_context *ctx,
>   */
>  void
>  _mesa_initialize_vao(struct gl_context *ctx,
> - struct gl_vertex_array_object *obj,
> + struct gl_vertex_array_object *vao,
>   GLuint name)
>  {
> GLuint i;
>  
> -   obj->Name = name;
> +   vao->Name = name;
>  
> -   mtx_init(&obj->Mutex, mtx_plain);
> -   obj->RefCount = 1;
> +   mtx_init(&vao->Mutex, mtx_plain);
> +   vao->RefCount = 1;
>  
> /* Init the individual arrays */
> -   for (i = 0; i < ARRAY_SIZE(obj->VertexAttrib); i++) {
> +   for (i = 0; i < ARRAY_SIZE(vao->VertexAttrib); i++) {
>switch (i) {
>case VERT_ATTRIB_WEIGHT:
> - init_array(ctx, obj, VERT_ATTRIB_WEIGHT, 1, GL_FLOAT);
> + init_array(ctx, vao, VERT_ATTRIB_WEIGHT, 1, GL_FLOAT);
>   break;
>case VERT_ATTRIB_NORMAL:
> - init_array(ctx, obj, VERT_ATTRIB_NORMAL, 3, GL_FLOAT);
> + init_array(ctx, vao, VERT_ATTRIB_NORMAL, 3, GL_FLOAT);
>   break;
>case VERT_ATTRIB_COLOR1:
> - init_array(ctx, obj, VERT_ATTRIB_COLOR1, 3, GL_FLOAT);
> + init_array(ctx, vao, VERT_ATTRIB_COLOR1, 3, GL_FLOAT);
>   break;
>case VERT_ATTRIB_FOG:
> - init_array(ctx, obj, VERT_ATTRIB_FOG, 1, GL_FLOAT);
> + init_array(ctx, vao, VERT_ATTRIB_FOG, 1, GL_FLOAT);
>   break;
>case VERT_ATTRIB_COLOR_INDEX:
> - init_array(ctx, obj, VERT_ATTRIB_COLOR_INDEX, 1, GL_FLOAT);
> + init_array(ctx, vao, VERT_ATTRIB_COLOR_INDEX, 1, GL_FLOAT);
>   break;
>case VERT_ATTRIB_EDGEFLAG:
> - init_array(ctx, obj, VERT_ATTRIB_EDGEFLAG, 1, GL_BOOL);
> + init_array(ctx, vao, VERT_ATTRIB_EDGEFLAG, 1, GL_BOOL);
>   break;
>case VERT_ATTRIB_POINT_SIZE:
> - init_array(ctx, obj, VERT_ATTRIB_POINT_SIZE, 1, GL_FLOAT);
> + init_array(ctx, vao, VERT_ATTRIB_POINT_SIZE, 1, GL_FLOAT);
>   break;
>default:
> - init_array(ctx, obj, i, 4, GL_FLOAT);
> + init_array(ctx, vao, i, 4, GL_FLOAT);
>   break;
>}
> }
>  
> -   _mesa_reference_buffer_object(ctx, &obj->IndexBufferObj,
> +   _mesa_reference_buffer_object(ctx, &vao->IndexBufferObj,
>   ctx->Shared->NullBufferObj);
>  }
>  
> @@ -309,11 +316,11 @@ _mesa_initialize_vao(struct gl_context *ctx,
>   * Add the given array object to the array object pool.
>   */
>  static void
> -save_array_object( struct gl_context *ctx, struct gl_vertex_array_object 
*obj )
> +save_array_object(struct gl_context *ctx, struct gl_vertex_array_object 
*vao)
>  {
> -   if (obj->Name > 0) {
> +   if (vao->Name > 0) {
>/* insert into hash table */
> -  _mesa_HashInsert(ctx->Array.Objects, obj->Name, obj);
> +  _mesa_HashInsert(ctx->Array.Objects, vao->Name, vao);
> }
>  }
>  
> @@ -323,11 +330,11 @@ save_array_object( struct gl_context *ctx, struct 
gl_vertex_array_object *obj )
>   * Do not deallocate the array object though.
>   */
>  static void
> -remove_array_object( struct gl_context *ctx, struct gl_vertex_array_object 
*obj )
> +remove_array_object(struct gl_context *ctx, struct gl_vertex_array_object 
*vao)
>  {
> -   if (obj->Name > 0) {
> +   if (vao->Name > 0) {
>/* remove from hash table */
> -  _mesa_HashRemove(ctx->Array.Objects, obj->Name);
> +

Re: [Mesa-dev] [PATCH 2/2] [RFC] radv: add scratch support for spilling.

2016-10-12 Thread Nicolai Hähnle

On 12.10.2016 01:49, Tom Stellard wrote:

On Tue, Oct 11, 2016 at 03:21:24PM +0200, Nicolai Hähnle wrote:

On 11.10.2016 07:36, Dave Airlie wrote:

On 11 October 2016 at 12:13, Dave Airlie  wrote:

On 11 October 2016 at 11:42, Dave Airlie  wrote:

On 11 October 2016 at 05:50, Dave Airlie  wrote:

On 10 October 2016 at 21:45, Arsenault, Matthew
 wrote:

I don't like adding explicit IR arguments for ABI arguments, especially this
one. Adding a special case for the first index feels dirty. The rest of llvm
also won't be aware of the specialness of the argument. It would be
problematic because bugpoint would eliminate the unused argument and then
codegen would have to fail in some way when the argument is missing


That's a good point, but is there an alternative without burning two
userdata SGPRs?

One possibility is to define an ABI that says:

1. SGPR0/1 points to an extra data region; it is reserved independently
from the shader arguments.
2. The first 64 bits of that extra data region point to the scratch buffer.
3. The main shader code can retrieve SGPR0/1 using an intrinsic.

This can be made to look somewhat similar to what HSA does.



What if we stored all shader inputs in the 'extra data region', with an
ABI that defined fixed offsets in the 'extra data region' for each
input.

Then as an optimization we could have the compiler map the values that
it needed from the 'extra data region' into user sgprs and communicate
this back to the driver.

This gets us something that works very quickly and still allows us to do
optimizations in the future.


That sounds overly complicated to me. I think it's a good thing that the 
driver can control the layout as much as possible.


We're likely to run into situations where we want to change some fixed 
aspect of the layout, and then changes need to be coordinated between 
the driver and LLVM.


Also, I'd mostly expect layout optimizations to be aimed at making state 
changes cheaper, and the driver is really the piece of code that has the 
information to do that, not the compiler.


Nicolai



-Tom




We should just hardcode the behaviour and switch both radv/radeonsi
over in one go?

I'll try and code up, using the first 64-bits of the first buffer
pointed to by userdata 0/1,
to store things.


I've looked at doing a dword fetch from the first two words of the 0/1 userdata,

It's not optimal for vulkan unfortunately, since the idea I had was per command
buffer I just allocate one scratch buffer of the size required at the end, and
patch it in at the start of the command buffer. However in the first
slot I was going
to use the push constants/dynamic buffer to store the value, however it looks
like I need to keep a list of everyone of these buffers I emit, and
backpatch them
all. It might not be too insane, just a slight bump in the keeping it simple.


I'm probably losing te plot here, but I'm considering a double indirection,

we load the 64-bit address from the first two dwords, then load the
64-bits dword
from that address to get the value.

This saves me allocating scratch bo's for secondary command buffers,
and also having to allocating ever increasing scratch bo's as shaders that
need more scratch get bound to the pipeline.
I'm not sure how much of an effect this should have for GL though.


I've posted a patch to this affect to the llvm phabricator.

It definitely is cleaner for the radv driver.


I still think it would be nice to have the level of indirection or
whatever one wants to call it as a function attribute. This would allow
you to change your mind about e.g. just sticking the scratch pointer
directly into SGPR0/1. radeonsi and radv don't have to be identical in
that regard.

Cheers
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Return correct result in EnumeratePhysicalDevices

2016-10-12 Thread Emil Velikov
Hi Nicolas,

On 6 October 2016 at 20:21, Nicolas Koch  wrote:
> If pPhysicalDevices is too small for all physical devices,
> the driver must return VK_INCOMPLETE.
> Since only a single physical device is supported, this is only the case
> when pPhysicalDeviceCount == 0 && pPhysicalDevices != NULL.
> ---
>  src/intel/vulkan/anv_device.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index c7b9979..76cbb69 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -385,6 +385,8 @@ VkResult anv_EnumeratePhysicalDevices(
> } else if (*pPhysicalDeviceCount >= 1) {
>pPhysicalDevices[0] = 
> anv_physical_device_to_handle(&instance->physicalDevice);
>*pPhysicalDeviceCount = 1;
> +   } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) {
> +  return VK_INCOMPLETE;
Looks like RADV could use the exact same fix
(s/intel/amd/;s/anv/radv/). Can you spin a patch for it ?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 01/25] configure.ac: Don't search llvm-config if it's known

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 00:02, Tobias Droste  wrote:
> This way LLVM_CONFIG can bet set from an env variable if it's outside
> the $llvm_prefix.
>
> This is not a must, but it helps testing.
>
Indeed. Nicely done !

> Signed-off-by: Tobias Droste 
> ---
>  configure.ac | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index b414edd..bdd46bc 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -2207,10 +2207,12 @@ if test "x$enable_gallium_llvm" = xauto; then
>  esac
>  fi
>  if test "x$enable_gallium_llvm" = xyes || test "x$HAVE_RADEON_VULKAN" = 
> xyes; then
> -if test -n "$llvm_prefix"; then
> -AC_PATH_TOOL([LLVM_CONFIG], [llvm-config], [no], 
> ["$llvm_prefix/bin"])
> -else
> -AC_PATH_TOOL([LLVM_CONFIG], [llvm-config], [no])
> +if test -z "${LLVM_CONFIG}"; then
Nit: Please drop the {}.

With that
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 02/25] configure.ac: Add helper function for targets/components

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 00:02, Tobias Droste  wrote:
> Add functions to add and check targets/components.
> Not used in this patch.
>
> The error message in llvm_add_component is disabled until it doesn't break 
> the build anymore.
> This is the same functionality as before where the components were added 
> without a check.
>
> Signed-off-by: Tobias Droste 
> ---
>  configure.ac | 35 +++
>  1 file changed, 35 insertions(+)
>
> diff --git a/configure.ac b/configure.ac
> index bdd46bc..69421ff 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -2196,7 +2196,42 @@ llvm_check_version_for() {
>  fi
>  }
>
> +llvm_add_default_components() {
> +driver_name=$1
>
> +# Required default components
> +llvm_add_component "engine" $driver_name
> +llvm_add_component "bitwriter" $driver_name
> +llvm_add_component "mcjit" $driver_name
> +llvm_add_component "mcdisassembler" $driver_name
> +
> +# Optional default components
> +if $LLVM_CONFIG --components | grep -iqw inteljitevents ; then
> +LLVM_COMPONENTS="${LLVM_COMPONENTS} inteljitevents"
> +fi
> +}
> +
> +llvm_add_component() {
Nit: move this function before its user (llvm_add_default_components)
Idea: call this ...components (plural) and feed the all components at once.

> +new_llvm_component=$1
> +driver_name=$2
> +
> +if $LLVM_CONFIG --components | grep -iqw $new_llvm_component ; then
> +LLVM_COMPONENTS="${LLVM_COMPONENTS} ${new_llvm_component}"
> +#else
> +#AC_MSG_ERROR([LLVM component '$new_llvm_component' not enabled in 
> your LLVM build. Required by $driver_name.])
This function adds required components, correct ? Then the above two
lines should not be commented out.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 03/25] configure.ac: Use new llvm_add_default_components

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 00:02, Tobias Droste  wrote:
> Signed-off-by: Tobias Droste 
> ---
>  configure.ac | 6 +-
>  1 file changed, 1 insertion(+), 5 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 69421ff..e48ed57 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -2282,11 +2282,7 @@ if test "x$enable_gallium_llvm" = xyes || test 
> "x$HAVE_RADEON_VULKAN" = xyes; th
>  AC_MSG_ERROR([LLVM 
> $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is 
> required])
>  fi
>
> -LLVM_COMPONENTS="engine bitwriter mcjit mcdisassembler"
> -
> -if $LLVM_CONFIG --components | grep -q inteljitevents ; then
> -LLVM_COMPONENTS="${LLVM_COMPONENTS} inteljitevents"
> -fi
> +llvm_add_default_components
>
$1/$driver_name seems empty - worth throwing "Gallium" or alike for
the time being ?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 04/25] configure.ac: Move oCL checks out of LLVM version check

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 00:02, Tobias Droste  wrote:
> The openCL checks don't need to be inside the LLVM version check.
> "llvm_check_version_for" also works if LLVM wasn't found.
>
> Signed-off-by: Tobias Droste 
> ---
>  configure.ac | 33 +
>  1 file changed, 17 insertions(+), 16 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index e48ed57..dcd59fc 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -2284,24 +2284,8 @@ if test "x$enable_gallium_llvm" = xyes || test 
> "x$HAVE_RADEON_VULKAN" = xyes; th
>
>  llvm_add_default_components
>
> -if test "x$enable_opencl" = xyes; then
> -llvm_check_version_for "3" "6" "0" "opencl"
> -
> -LLVM_COMPONENTS="${LLVM_COMPONENTS} all-targets ipo linker 
> instrumentation"
> -LLVM_COMPONENTS="${LLVM_COMPONENTS} irreader option objcarcopts 
> profiledata"
> -fi
>  DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT 
> -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
>  MESA_LLVM=1
> -
> -dnl Check for Clang internal headers
> -if test "x$enable_opencl" = xyes; then
> -if test -z "$CLANG_LIBDIR"; then
> -CLANG_LIBDIR=${LLVM_LIBDIR}
> -fi
> -CLANG_RESOURCE_DIR=$CLANG_LIBDIR/clang/${LLVM_VERSION}
> -AS_IF([test ! -f "$CLANG_RESOURCE_DIR/include/stddef.h"],
> -[AC_MSG_ERROR([Could not find clang internal header stddef.h 
> in $CLANG_RESOURCE_DIR Use --with-clang-libdir to specify the correct path to 
> the clang libraries.])])
> -fi
>  else
>  MESA_LLVM=0
>  LLVM_VERSION_INT=0
> @@ -2315,6 +2299,23 @@ else
>  fi
>  fi
>
> +if test "x$enable_opencl" = xyes; then
> +llvm_check_version_for "3" "6" "0" "opencl"
> +
> +LLVM_COMPONENTS="${LLVM_COMPONENTS} all-targets ipo linker 
> instrumentation"
> +LLVM_COMPONENTS="${LLVM_COMPONENTS} irreader option objcarcopts 
> profiledata"
> +fi
> +
> +dnl Check for Clang internal headers
> +if test "x$enable_opencl" = xyes; then
Nit: drop the second if test, yet preserve the comment ?
Disclaimer: haven't looked if later patches depend on the split.

With the above
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 05/25] configure.ac: Remove useless oCL LLVM check

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 00:02, Tobias Droste  wrote:
> This is handled by "llvm_check_version_for" a few lines below.
>
> Signed-off-by: Tobias Droste 
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 07/25] configure.ac: Move gallium checks out of LLVM version check

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 00:02, Tobias Droste  wrote:
> The gallium checks don't need to be inside the LLVM version check.
> If "enable-gallium-llvm" ist set this is called after the LLVM version check.
>
> Signed-off-by: Tobias Droste 
> ---
>  configure.ac | 18 --
>  1 file changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 0e4af6d..933e7b5 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -2276,12 +2276,6 @@ if test "x$enable_gallium_llvm" = xyes || test 
> "x$HAVE_RADEON_VULKAN" = xyes; th
>  LLVM_VERSION_INT=`echo $LLVM_VERSION | sed -e 
> 's/\([[0-9]]\)\.\([[0-9]]\)/\10\2/g'`
>  fi
>
> -LLVM_REQUIRED_VERSION_MAJOR="3"
> -LLVM_REQUIRED_VERSION_MINOR="3"
> -if test "$LLVM_VERSION_INT" -lt 
> "${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}"; then
> -AC_MSG_ERROR([LLVM 
> $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is 
> required])
> -fi
> -
>  llvm_add_default_components
>
>  DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT 
> -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
> @@ -2295,6 +2289,18 @@ else
>  LLVM_VERSION_INT=0
>  fi
>
> +gallium_llvm_check() {
> +LLVM_REQUIRED_VERSION_MAJOR="3"
> +LLVM_REQUIRED_VERSION_MINOR="3"
> +if test "$LLVM_VERSION_INT" -lt 
> "${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}"; then
> +AC_MSG_ERROR([LLVM 
> $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is 
> required])
> +fi
> +}
> +
> +if test "x$enable_gallium_llvm" = xyes; then
> +gallium_llvm_check
Reuse llvm_check_version_for "3" "3" "0" "Gallium" and update the
commit message ?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0/ir: fix overwriting of value backing non-constant gather offset

2016-10-12 Thread Samuel Pitoiset

Unfortunately, this introduces some regressions:

spec/arb_gpu_shader5/texturegatheroffset/fs-r-none-shadow-2d: fail
spec/arb_gpu_shader5/texturegatheroffset/fs-r-none-shadow-2darray: fail
spec/arb_gpu_shader5/texturegatheroffset/fs-r-none-shadow-2drect: fail
spec/arb_gpu_shader5/texturegatheroffset/vs-r-none-shadow-2d: fail
spec/arb_gpu_shader5/texturegatheroffset/vs-r-none-shadow-2darray: fail
spec/arb_gpu_shader5/texturegatheroffset/vs-r-none-shadow-2drect: fail

Can you have a look?

On 10/10/2016 06:12 PM, Ilia Mirkin wrote:

Normally the value is an immediate, which is moved to some temporary, so
there's no problem. In the case of a non-constant offset (as allowed by
ARB_gpu_shader5), we have to take care to copy it first before using it
to build up the bits.

This fixes a compilation error observed in F1 2015.

Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 3c3d611..4c013c4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -820,11 +820,11 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
  // Either there is 1 offset, which goes into the 2 low bytes of the
  // first source, or there are 4 offsets, which go into 2 sources (8
  // values, 1 byte each).
- Value *offs[2] = {NULL, NULL};
+ Value *offs[2] = {bld.getScratch(), bld.getScratch()};
  for (n = 0; n < i->tex.useOffsets; n++) {
 for (c = 0; c < 2; ++c) {
if ((n % 2) == 0 && c == 0)
-  offs[n / 2] = i->offset[n][c].get();
+  bld.mkMov(offs[n / 2], i->offset[n][c].get());
else
   bld.mkOp3(OP_INSBF, TYPE_U32,
 offs[n / 2],



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 15/22] anv: Use blorp for ClearDepthStencilImage

2016-10-12 Thread Pohjolainen, Topi
On Fri, Oct 07, 2016 at 09:41:13PM -0700, Jason Ekstrand wrote:
> Signed-off-by: Jason Ekstrand 
> ---
>  src/intel/vulkan/anv_blorp.c  |  60 
>  src/intel/vulkan/anv_meta_clear.c | 141 
> --
>  2 files changed, 60 insertions(+), 141 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 699032b..546737b 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -809,6 +809,66 @@ void anv_CmdClearColorImage(
> blorp_batch_finish(&batch);
>  }
>  
> +void anv_CmdClearDepthStencilImage(
> +VkCommandBuffer commandBuffer,
> +VkImage image_h,
> +VkImageLayout   imageLayout,
> +const VkClearDepthStencilValue* pDepthStencil,
> +uint32_trangeCount,
> +const VkImageSubresourceRange*  pRanges)
> +{
> +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
> +   ANV_FROM_HANDLE(anv_image, image, image_h);
> +
> +   struct blorp_batch batch;
> +   blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
> +
> +   struct blorp_surf depth, stencil;
> +   if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
> +  get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
> +   &depth);
> +   } else {
> +  memset(&depth, 0, sizeof(depth));
> +   }
> +
> +   if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
> +  get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT,
> +   &stencil);
> +   } else {
> +  memset(&stencil, 0, sizeof(stencil));
> +   }
> +
> +   for (unsigned r = 0; r < rangeCount; r++) {
> +  if (pRanges[r].aspectMask == 0)
> + continue;
> +
> +  bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
> +  bool clear_stencil = pRanges[r].aspectMask & 
> VK_IMAGE_ASPECT_STENCIL_BIT;
> +
> +  unsigned base_layer = pRanges[r].baseArrayLayer;
> +  unsigned layer_count = pRanges[r].layerCount;
> +
> +  for (unsigned i = 0; i < pRanges[r].levelCount; i++) {
> + const unsigned level = pRanges[r].baseMipLevel + i;
> + const unsigned level_width = anv_minify(image->extent.width, level);
> + const unsigned level_height = anv_minify(image->extent.height, 
> level);
> +
> + if (image->type == VK_IMAGE_TYPE_3D) {
> +base_layer = 0;

In the current pass below we only override the layer_count leaving base_layer
alone. Otherwise this looks functionally the same. With some explanation or
comment added:

Reviewed-by: Topi Pohjolainen 

> +layer_count = anv_minify(image->extent.depth, level);
> + }
> +
> + blorp_clear_depth_stencil(&batch, &depth, &stencil,
> +   level, base_layer, layer_count,
> +   0, 0, level_width, level_height,
> +   clear_depth, pDepthStencil->depth,
> +   clear_stencil, pDepthStencil->stencil);
> +  }
> +   }
> +
> +   blorp_batch_finish(&batch);
> +}
> +
>  static void
>  resolve_image(struct blorp_batch *batch,
>const struct anv_image *src_image,
> diff --git a/src/intel/vulkan/anv_meta_clear.c 
> b/src/intel/vulkan/anv_meta_clear.c
> index 11b471f..ce7f8a7 100644
> --- a/src/intel/vulkan/anv_meta_clear.c
> +++ b/src/intel/vulkan/anv_meta_clear.c
> @@ -752,147 +752,6 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
> *cmd_buffer)
> meta_clear_end(&saved_state, cmd_buffer);
>  }
>  
> -void anv_CmdClearDepthStencilImage(
> -VkCommandBuffer commandBuffer,
> -VkImage image_h,
> -VkImageLayout   imageLayout,
> -const VkClearDepthStencilValue* pDepthStencil,
> -uint32_trangeCount,
> -const VkImageSubresourceRange*  pRanges)
> -{
> -   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
> -   ANV_FROM_HANDLE(anv_image, image, image_h);
> -   struct anv_meta_saved_state saved_state;
> -
> -   meta_clear_begin(&saved_state, cmd_buffer);
> -
> -   VkDevice device_h = anv_device_to_handle(cmd_buffer->device);
> -
> -   for (uint32_t r = 0; r < rangeCount; r++) {
> -  const VkImageSubresourceRange *range = &pRanges[r];
> -  for (uint32_t l = 0; l < anv_get_levelCount(image, range); ++l) {
> - const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
> -  anv_minify(image->extent.depth, l) :
> -  anv_get_layerCount(image, range);
> - for (uint32_t s = 0; s < layer_count; ++s) {
> -struct anv_image_view iview;
> -anv_image_view_ini

Re: [Mesa-dev] [PATCH v3 08/25] configure.ac: Move LLVM version check to the top

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 00:02, Tobias Droste  wrote:
> A function with the LLVM version checked is moved to the top.
> The function is called where the old code was.
Replace the second line with "... in order to reuse/rework X"

> No functional change.
>
> Signed-off-by: Tobias Droste 
> ---
>  configure.ac | 91 
> 
>  1 file changed, 49 insertions(+), 42 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 933e7b5..0f19a4f 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -861,6 +861,54 @@ fi
>  AC_SUBST([SELINUX_CFLAGS])
>  AC_SUBST([SELINUX_LIBS])
>
> +dnl
> +dnl LLVM
> +dnl
> +llvm_get_version() {
Nit: The code below does a lot more than "get_version" -
get_environment/set_variables/other

With the above
Reviewed-by: Emil Velikov 

> +if test -z "${LLVM_CONFIG}"; then
> +if test -n "$llvm_prefix"; then
> +AC_PATH_TOOL([LLVM_CONFIG], [llvm-config], [no], 
> ["$llvm_prefix/bin"])
> +else
> +AC_PATH_TOOL([LLVM_CONFIG], [llvm-config], [no])
> +fi
> +fi
> +
> +if test "x$LLVM_CONFIG" != xno; then
> +LLVM_VERSION=`$LLVM_CONFIG --version | egrep -o '^[[0-9.]]+'`
...
> +else
> +MESA_LLVM=0
> +LLVM_VERSION_INT=0
Just realised that we should error out in this case. After all one
requests llvm, so silently ignoring that they're missing llvm-config
isn't a smart idea. Something like below (be that as a preparatory,
in-between or at the end of the series) would be great.

if test "x$LLVM_CONFIG" = xno; then
   AC_MSG_ERROR([...])
fi

LLVM_VERSION=...
...

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: copy over value's register id when resolving merge of a phi

2016-10-12 Thread Samuel Pitoiset

Sounds reasonable.

Reviewed-by: Samuel Pitoiset 

On 10/12/2016 02:51 AM, Ilia Mirkin wrote:

The offset needs to be properly copied over to the phi value, otherwise
it will get assigned to the base of the merge instead of the proper
location.

Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 7e64f7c..d36c853 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1905,8 +1905,10 @@ GCRA::resolveSplitsAndMerges()
  // their registers should be identical.
  if (v->getInsn()->op == OP_PHI || v->getInsn()->op == OP_UNION) {
 Instruction *phi = v->getInsn();
-for (int phis = 0; phi->srcExists(phis); ++phis)
+for (int phis = 0; phi->srcExists(phis); ++phis) {
phi->getSrc(phis)->join = v;
+   phi->getSrc(phis)->reg.data.id = v->reg.data.id;
+}
  }
  reg += v->reg.size;
   }



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 17/22] anv: Use blorp for ClearAttachments

2016-10-12 Thread Pohjolainen, Topi
On Fri, Oct 07, 2016 at 09:41:15PM -0700, Jason Ekstrand wrote:
> Signed-off-by: Jason Ekstrand 
> ---
>  src/intel/vulkan/anv_blorp.c  | 113 
> ++
>  src/intel/vulkan/anv_meta_clear.c |  24 
>  2 files changed, 113 insertions(+), 24 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 546737b..4279f62 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -870,6 +870,119 @@ void anv_CmdClearDepthStencilImage(
>  }
>  
>  static void
> +clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
> +   struct blorp_batch *batch,
> +   const VkClearAttachment *attachment,
> +   uint32_t rectCount, const VkClearRect *pRects)
> +{
> +   const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
> +   const struct anv_subpass *subpass = cmd_buffer->state.subpass;
> +   const uint32_t att = attachment->colorAttachment;
> +   const struct anv_image_view *iview =
> +  fb->attachments[subpass->color_attachments[att]];
> +   const struct anv_image *image = iview->image;
> +
> +   struct blorp_surf surf;
> +   get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, &surf);
> +
> +   union isl_color_value clear_color;
> +   memcpy(clear_color.u32, attachment->clearValue.color.uint32,
> +  sizeof(clear_color.u32));
> +
> +   static const bool color_write_disable[4] = { false, false, false, false };
> +
> +   for (uint32_t r = 0; r < rectCount; ++r) {
> +  const VkOffset2D offset = pRects[r].rect.offset;
> +  const VkExtent2D extent = pRects[r].rect.extent;
> +  blorp_clear(batch, &surf, iview->isl.format, iview->isl.swizzle,
> +  iview->isl.base_level,
> +  iview->isl.base_array_layer + pRects[r].baseArrayLayer,

I'm not that familiar with vulkan yet and looking emit_color_clear() does not
directly tell me if "pRects[r].baseArrayLayer" is relative to the view base
layer. If that is the case then this patch looks good to me:

Reviewed-by: Topi Pohjolainen 

> +  pRects[r].layerCount,
> +  offset.x, offset.y,
> +  offset.x + extent.width, offset.y + extent.height,
> +  clear_color, color_write_disable);
> +   }
> +}
> +
> +static void
> +clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
> +   struct blorp_batch *batch,
> +   const VkClearAttachment *attachment,
> +   uint32_t rectCount, const VkClearRect *pRects)
> +{
> +   const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
> +   const struct anv_subpass *subpass = cmd_buffer->state.subpass;
> +   const struct anv_image_view *iview =
> +  fb->attachments[subpass->depth_stencil_attachment];
> +   const struct anv_image *image = iview->image;
> +
> +   bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
> +   bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;

These two could be const.

> +
> +   struct blorp_surf depth, stencil;
> +   if (clear_depth) {
> +  get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
> +   &depth);
> +   } else {
> +  memset(&depth, 0, sizeof(depth));
> +   }
> +
> +   if (clear_stencil) {
> +  get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT,
> +   &stencil);
> +   } else {
> +  memset(&stencil, 0, sizeof(stencil));
> +   }
> +
> +   for (uint32_t r = 0; r < rectCount; ++r) {
> +  const VkOffset2D offset = pRects[r].rect.offset;
> +  const VkExtent2D extent = pRects[r].rect.extent;
> +  VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
> +  blorp_clear_depth_stencil(batch, &depth, &stencil,
> +iview->isl.base_level,
> +iview->isl.base_array_layer +
> +   pRects[r].baseArrayLayer,
> +pRects[r].layerCount,
> +offset.x, offset.y,
> +offset.x + extent.width,
> +offset.y + extent.height,
> +clear_depth, value.depth,
> +clear_stencil, value.stencil);
> +   }
> +}
> +
> +void anv_CmdClearAttachments(
> +VkCommandBuffer commandBuffer,
> +uint32_tattachmentCount,
> +const VkClearAttachment*pAttachments,
> +uint32_trectCount,
> +const VkClearRect*  pRects)
> +{
> +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
> +
> +   /* Because this gets called within a render pass, we tell blorp not 

[Mesa-dev] [PATCH 1/2] st/mesa: Take local references for sync object fences

2016-10-12 Thread Michel Dänzer
From: Michel Dänzer 

Fixes a race condition:

Thread AThread B


Test if so->fence != NULL
=> true
Set so->fence = NULL
Dereference so->fence
=> NULL dereference

Also, taking a reference prevents the fence from being destroyed.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98172
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Michel Dänzer 
---
 src/mesa/state_tracker/st_cb_syncobj.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_syncobj.c 
b/src/mesa/state_tracker/st_cb_syncobj.c
index 123925a..de01880 100644
--- a/src/mesa/state_tracker/st_cb_syncobj.c
+++ b/src/mesa/state_tracker/st_cb_syncobj.c
@@ -81,17 +81,22 @@ static void st_check_sync(struct gl_context *ctx, struct 
gl_sync_object *obj)
struct pipe_context *pipe = st_context(ctx)->pipe;
struct pipe_screen *screen = pipe->screen;
struct st_sync_object *so = (struct st_sync_object*)obj;
+   struct pipe_fence_handle *fence = NULL;
+
+   screen->fence_reference(screen, &fence, so->fence);
 
/* If the fence doesn't exist, assume it's signalled. */
-   if (!so->fence) {
+   if (!fence) {
   so->b.StatusFlag = GL_TRUE;
   return;
}
 
-   if (screen->fence_finish(screen, pipe, so->fence, 0)) {
+   if (screen->fence_finish(screen, pipe, fence, 0)) {
   screen->fence_reference(screen, &so->fence, NULL);
   so->b.StatusFlag = GL_TRUE;
}
+
+   screen->fence_reference(screen, &fence, NULL);
 }
 
 static void st_client_wait_sync(struct gl_context *ctx,
@@ -101,9 +106,12 @@ static void st_client_wait_sync(struct gl_context *ctx,
struct pipe_context *pipe = st_context(ctx)->pipe;
struct pipe_screen *screen = pipe->screen;
struct st_sync_object *so = (struct st_sync_object*)obj;
+   struct pipe_fence_handle *fence = NULL;
+
+   screen->fence_reference(screen, &fence, so->fence);
 
/* If the fence doesn't exist, assume it's signalled. */
-   if (!so->fence) {
+   if (!fence) {
   so->b.StatusFlag = GL_TRUE;
   return;
}
@@ -120,11 +128,12 @@ static void st_client_wait_sync(struct gl_context *ctx,
 * Assume GL_SYNC_FLUSH_COMMANDS_BIT is always set, because applications
 * forget to set it.
 */
-   if (so->fence &&
-   screen->fence_finish(screen, pipe, so->fence, timeout)) {
+   if (screen->fence_finish(screen, pipe, fence, timeout)) {
   screen->fence_reference(screen, &so->fence, NULL);
   so->b.StatusFlag = GL_TRUE;
}
+
+   screen->fence_reference(screen, &fence, NULL);
 }
 
 static void st_server_wait_sync(struct gl_context *ctx,
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] st/mesa: Refactor st_check_sync to use st_client_wait_sync

2016-10-12 Thread Michel Dänzer
From: Michel Dänzer 

Duplicate code cleanup, no functional change intended.

Signed-off-by: Michel Dänzer 
---
 src/mesa/state_tracker/st_cb_syncobj.c | 28 +---
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_syncobj.c 
b/src/mesa/state_tracker/st_cb_syncobj.c
index de01880..6e2cb26 100644
--- a/src/mesa/state_tracker/st_cb_syncobj.c
+++ b/src/mesa/state_tracker/st_cb_syncobj.c
@@ -76,29 +76,6 @@ static void st_fence_sync(struct gl_context *ctx, struct 
gl_sync_object *obj,
pipe->flush(pipe, &so->fence, PIPE_FLUSH_DEFERRED);
 }
 
-static void st_check_sync(struct gl_context *ctx, struct gl_sync_object *obj)
-{
-   struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct pipe_screen *screen = pipe->screen;
-   struct st_sync_object *so = (struct st_sync_object*)obj;
-   struct pipe_fence_handle *fence = NULL;
-
-   screen->fence_reference(screen, &fence, so->fence);
-
-   /* If the fence doesn't exist, assume it's signalled. */
-   if (!fence) {
-  so->b.StatusFlag = GL_TRUE;
-  return;
-   }
-
-   if (screen->fence_finish(screen, pipe, fence, 0)) {
-  screen->fence_reference(screen, &so->fence, NULL);
-  so->b.StatusFlag = GL_TRUE;
-   }
-
-   screen->fence_reference(screen, &fence, NULL);
-}
-
 static void st_client_wait_sync(struct gl_context *ctx,
 struct gl_sync_object *obj,
 GLbitfield flags, GLuint64 timeout)
@@ -136,6 +113,11 @@ static void st_client_wait_sync(struct gl_context *ctx,
screen->fence_reference(screen, &fence, NULL);
 }
 
+static void st_check_sync(struct gl_context *ctx, struct gl_sync_object *obj)
+{
+   st_client_wait_sync(ctx, obj, 0, 0);
+}
+  
 static void st_server_wait_sync(struct gl_context *ctx,
 struct gl_sync_object *obj,
 GLbitfield flags, GLuint64 timeout)
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 15/22] anv: Use blorp for ClearDepthStencilImage

2016-10-12 Thread Pohjolainen, Topi
On Wed, Oct 12, 2016 at 12:11:01PM +0300, Pohjolainen, Topi wrote:
> On Fri, Oct 07, 2016 at 09:41:13PM -0700, Jason Ekstrand wrote:
> > Signed-off-by: Jason Ekstrand 
> > ---
> >  src/intel/vulkan/anv_blorp.c  |  60 
> >  src/intel/vulkan/anv_meta_clear.c | 141 
> > --
> >  2 files changed, 60 insertions(+), 141 deletions(-)
> > 
> > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> > index 699032b..546737b 100644
> > --- a/src/intel/vulkan/anv_blorp.c
> > +++ b/src/intel/vulkan/anv_blorp.c
> > @@ -809,6 +809,66 @@ void anv_CmdClearColorImage(
> > blorp_batch_finish(&batch);
> >  }
> >  
> > +void anv_CmdClearDepthStencilImage(
> > +VkCommandBuffer commandBuffer,
> > +VkImage image_h,
> > +VkImageLayout   imageLayout,
> > +const VkClearDepthStencilValue* pDepthStencil,
> > +uint32_trangeCount,
> > +const VkImageSubresourceRange*  pRanges)
> > +{
> > +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
> > +   ANV_FROM_HANDLE(anv_image, image, image_h);
> > +
> > +   struct blorp_batch batch;
> > +   blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
> > +
> > +   struct blorp_surf depth, stencil;
> > +   if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
> > +  get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
> > +   &depth);
> > +   } else {
> > +  memset(&depth, 0, sizeof(depth));
> > +   }
> > +
> > +   if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
> > +  get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT,
> > +   &stencil);
> > +   } else {
> > +  memset(&stencil, 0, sizeof(stencil));
> > +   }
> > +
> > +   for (unsigned r = 0; r < rangeCount; r++) {
> > +  if (pRanges[r].aspectMask == 0)
> > + continue;

And this early bailout looks added functionality. It makes sense, just noting.

> > +
> > +  bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
> > +  bool clear_stencil = pRanges[r].aspectMask & 
> > VK_IMAGE_ASPECT_STENCIL_BIT;
> > +
> > +  unsigned base_layer = pRanges[r].baseArrayLayer;
> > +  unsigned layer_count = pRanges[r].layerCount;
> > +
> > +  for (unsigned i = 0; i < pRanges[r].levelCount; i++) {
> > + const unsigned level = pRanges[r].baseMipLevel + i;
> > + const unsigned level_width = anv_minify(image->extent.width, 
> > level);
> > + const unsigned level_height = anv_minify(image->extent.height, 
> > level);
> > +
> > + if (image->type == VK_IMAGE_TYPE_3D) {
> > +base_layer = 0;
> 
> In the current pass below we only override the layer_count leaving base_layer
> alone. Otherwise this looks functionally the same. With some explanation or
> comment added:
> 
> Reviewed-by: Topi Pohjolainen 
> 
> > +layer_count = anv_minify(image->extent.depth, level);
> > + }
> > +
> > + blorp_clear_depth_stencil(&batch, &depth, &stencil,
> > +   level, base_layer, layer_count,
> > +   0, 0, level_width, level_height,
> > +   clear_depth, pDepthStencil->depth,
> > +   clear_stencil, pDepthStencil->stencil);
> > +  }
> > +   }
> > +
> > +   blorp_batch_finish(&batch);
> > +}
> > +
> >  static void
> >  resolve_image(struct blorp_batch *batch,
> >const struct anv_image *src_image,
> > diff --git a/src/intel/vulkan/anv_meta_clear.c 
> > b/src/intel/vulkan/anv_meta_clear.c
> > index 11b471f..ce7f8a7 100644
> > --- a/src/intel/vulkan/anv_meta_clear.c
> > +++ b/src/intel/vulkan/anv_meta_clear.c
> > @@ -752,147 +752,6 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
> > *cmd_buffer)
> > meta_clear_end(&saved_state, cmd_buffer);
> >  }
> >  
> > -void anv_CmdClearDepthStencilImage(
> > -VkCommandBuffer commandBuffer,
> > -VkImage image_h,
> > -VkImageLayout   imageLayout,
> > -const VkClearDepthStencilValue* pDepthStencil,
> > -uint32_trangeCount,
> > -const VkImageSubresourceRange*  pRanges)
> > -{
> > -   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
> > -   ANV_FROM_HANDLE(anv_image, image, image_h);
> > -   struct anv_meta_saved_state saved_state;
> > -
> > -   meta_clear_begin(&saved_state, cmd_buffer);
> > -
> > -   VkDevice device_h = anv_device_to_handle(cmd_buffer->device);
> > -
> > -   for (uint32_t r = 0; r < rangeCount; r++) {
> > -  const VkImageSubresourceRange *range = &pRanges[r];
> > -  for (uint32_t l = 0; l < anv_get_levelCou

[Mesa-dev] [Bug 98172] Concurrent call to glClientWaitSync results in segfault in one of the waiters.

2016-10-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98172

--- Comment #4 from Michel Dänzer  ---
Shinji-san,

(In reply to shinji.suzuki from comment #3)
> Yes, it does! Thank you for your interest and effort in solving the issue.

Great, thank you for testing the patch.


> My itch is that I can't see why your patch eliminates the race.

Does the commit log of https://patchwork.freedesktop.org/patch/115219/ clarify
it?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18/22] anv: Use blorp for subpass clears

2016-10-12 Thread Pohjolainen, Topi
On Fri, Oct 07, 2016 at 09:41:16PM -0700, Jason Ekstrand wrote:
> Signed-off-by: Jason Ekstrand 
> ---
>  src/intel/vulkan/anv_blorp.c  |  81 +++
>  src/intel/vulkan/anv_meta_clear.c | 298 
> --
>  2 files changed, 81 insertions(+), 298 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 4279f62..968c887 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -982,6 +982,87 @@ void anv_CmdClearAttachments(
> blorp_batch_finish(&batch);
>  }
>  
> +static bool
> +subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
> +{
> +   const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
> +   uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
> +
> +   for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
> +  uint32_t a = cmd_state->subpass->color_attachments[i];
> +  if (cmd_state->attachments[a].pending_clear_aspects) {
> + return true;
> +  }
> +   }
> +
> +   if (ds != VK_ATTACHMENT_UNUSED &&
> +   cmd_state->attachments[ds].pending_clear_aspects) {
> +  return true;
> +   }
> +
> +   return false;
> +}
> +
> +void
> +anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
> +{
> +   const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
> +
> +   if (!subpass_needs_clear(cmd_buffer))
> +  return;
> +
> +   /* Because this gets called within a render pass, we tell blorp not to
> +* trash our depth and stencil buffers.
> +*/
> +   struct blorp_batch batch;
> +   blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
> +BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
> +
> +   VkClearRect clear_rect = {

Could be const.

> +  .rect = cmd_buffer->state.render_area,
> +  .baseArrayLayer = 0,
> +  .layerCount = cmd_buffer->state.framebuffer->layers,
> +   };
> +
> +   for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
> +  uint32_t a = cmd_state->subpass->color_attachments[i];

As well.

> +
> +  if (!cmd_state->attachments[a].pending_clear_aspects)
> + continue;
> +
> +  assert(cmd_state->attachments[a].pending_clear_aspects ==
> + VK_IMAGE_ASPECT_COLOR_BIT);
> +
> +  VkClearAttachment clear_att = {

And this.

> + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
> + .colorAttachment = i, /* Use attachment index relative to subpass */
> + .clearValue = cmd_state->attachments[a].clear_value,
> +  };
> +
> +  clear_color_attachment(cmd_buffer, &batch, &clear_att, 1, &clear_rect);
> +
> +  cmd_state->attachments[a].pending_clear_aspects = 0;
> +   }
> +
> +   uint32_t ds = cmd_state->subpass->depth_stencil_attachment;

And this.

> +
> +   if (ds != VK_ATTACHMENT_UNUSED &&
> +   cmd_state->attachments[ds].pending_clear_aspects) {
> +
> +  VkClearAttachment clear_att = {

And this.

All in all looks quite a bit simpler:

Reviewed-by: Topi Pohjolainen 

> + .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
> + .clearValue = cmd_state->attachments[ds].clear_value,
> +  };
> +
> +  clear_depth_stencil_attachment(cmd_buffer, &batch,
> + &clear_att, 1, &clear_rect);
> +
> +  cmd_state->attachments[ds].pending_clear_aspects = 0;
> +   }
> +
> +   blorp_batch_finish(&batch);
> +}
> +
>  static void
>  resolve_image(struct blorp_batch *batch,
>const struct anv_image *src_image,
> diff --git a/src/intel/vulkan/anv_meta_clear.c 
> b/src/intel/vulkan/anv_meta_clear.c
> index 6802229..2bc718b 100644
> --- a/src/intel/vulkan/anv_meta_clear.c
> +++ b/src/intel/vulkan/anv_meta_clear.c
> @@ -41,26 +41,6 @@ struct depthstencil_clear_vattrs {
>  };
>  
>  static void
> -meta_clear_begin(struct anv_meta_saved_state *saved_state,
> - struct anv_cmd_buffer *cmd_buffer)
> -{
> -   anv_meta_save(saved_state, cmd_buffer,
> - (1 << VK_DYNAMIC_STATE_VIEWPORT) |
> - (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE) |
> - (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK));
> -
> -   /* Avoid uploading more viewport states than necessary */
> -   cmd_buffer->state.dynamic.viewport.count = 0;
> -}
> -
> -static void
> -meta_clear_end(struct anv_meta_saved_state *saved_state,
> -   struct anv_cmd_buffer *cmd_buffer)
> -{
> -   anv_meta_restore(saved_state, cmd_buffer);
> -}
> -
> -static void
>  build_color_shaders(struct nir_shader **out_vs,
>  struct nir_shader **out_fs,
>  uint32_t frag_output)
> @@ -337,80 +317,6 @@ anv_device_finish_meta_clear_state(struct anv_device 
> *device)
>  }
>  
>  static void
> -emit_color_clear(struct anv_cmd_buffer *cmd_buffer,
> - const VkClearAttachment *clear_att,
> - const VkClearRect *clear_rect)
> -{
> -   struct anv_device *device = cmd_buffer->device;
> -   c

Re: [Mesa-dev] [PATCH v3 12/25] configure.ac: Move gallium LLVM checks

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 00:02, Tobias Droste  wrote:
> There's already a function for gallium LLVM checks.
> Move the remaining code to this function and call it for every driver
> that uses LLVM.
>
> Signed-off-by: Tobias Droste 
> ---
>  configure.ac | 31 +--
>  1 file changed, 13 insertions(+), 18 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 283e553..61a0253 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -2310,18 +2310,6 @@ if test "x$enable_gallium_llvm" = xauto; then
>  esac
>  fi
>
> -gallium_llvm_check() {
> -LLVM_REQUIRED_VERSION_MAJOR="3"
> -LLVM_REQUIRED_VERSION_MINOR="3"
> -if test "$LLVM_VERSION_INT" -lt 
> "${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}"; then
> -AC_MSG_ERROR([LLVM 
> $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is 
> required])
> -fi
> -}
> -
> -if test "x$enable_gallium_llvm" = xyes; then
> -gallium_llvm_check
> -fi
> -
>  dnl Directory for XVMC libs
>  AC_ARG_WITH([xvmc-libdir],
>  [AS_HELP_STRING([--with-xvmc-libdir=DIR],
> @@ -2378,12 +2366,16 @@ gallium_require_drm() {
>  }
>
>  gallium_require_llvm() {
> -if test "x$MESA_LLVM" = x0; then
> -case "$host" in *gnux32) return;; esac
> -case "$host_cpu" in
> -i*86|x86_64|amd64) AC_MSG_ERROR([LLVM is required to build $1 on x86 
> and x86_64]);;
> -esac
> -fi
> +case "$host" in *gnux32) return;; esac
> +case "$host_cpu" in
> +i*86|x86_64|amd64)
> +LLVM_REQUIRED_VERSION_MAJOR="3"
> +LLVM_REQUIRED_VERSION_MINOR="3"
> +if test "$LLVM_VERSION_INT" -lt 
> "${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}"; then
> +AC_MSG_ERROR([LLVM 
> $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is 
> required])
> +fi
> +;;
> +esac
>  }
>
The function it quite "ugly" as-is and this patch changes things in a fun way.

Namely: before you'll get the minimum required version check
regardless of host_cpu for everyone, while now you get the opposite -
everyone is 'constrained' by the host_cpu check. Admittedly I've have
not idea if llvmpipe is a thing on outside x86 land.

Checking the gallium_require_llvm users we want:
 - swr -> bail out if host_cpu !x86/x86-64 or gallium-llvm toggle is
off || llvm version req. is not met
 - r300 -> on host_cpu eq. x86/x86-64, check the gallium-llvm toggle
(this is 'premature' optimisation which we might want to rework/drop
in the long run)

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 20/22] anv: Get rid of graphics_pipeline_create_info_extra

2016-10-12 Thread Pohjolainen, Topi
On Fri, Oct 07, 2016 at 09:41:18PM -0700, Jason Ekstrand wrote:
> Now that we no longer have meta, all pipelines get created via the normal
> Vulkan pipeline creation mechanics.  There is no more need for this bit of
> extra magic data that we've been passing around.
> 
> Signed-off-by: Jason Ekstrand 
> ---
>  src/intel/vulkan/anv_genX.h   |  1 -
>  src/intel/vulkan/anv_pipeline.c   | 55 
> ---
>  src/intel/vulkan/anv_private.h| 15 --
>  src/intel/vulkan/gen7_pipeline.c  | 13 -
>  src/intel/vulkan/gen8_pipeline.c  | 16 +-
>  src/intel/vulkan/genX_pipeline.c  |  2 --
>  src/intel/vulkan/genX_pipeline_util.h | 34 +++---
>  7 files changed, 35 insertions(+), 101 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
> index 81ebbaa..b862c06 100644
> --- a/src/intel/vulkan/anv_genX.h
> +++ b/src/intel/vulkan/anv_genX.h
> @@ -62,7 +62,6 @@ VkResult
>  genX(graphics_pipeline_create)(VkDevice _device,
> struct anv_pipeline_cache *cache,
> const VkGraphicsPipelineCreateInfo 
> *pCreateInfo,
> -   const struct 
> anv_graphics_pipeline_create_info *extra,
> const VkAllocationCallbacks *alloc,
> VkPipeline *pPipeline);
>  
> diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
> index e835b14..4b5c603 100644
> --- a/src/intel/vulkan/anv_pipeline.c
> +++ b/src/intel/vulkan/anv_pipeline.c
> @@ -267,7 +267,6 @@ populate_gs_prog_key(const struct gen_device_info 
> *devinfo,
>  static void
>  populate_wm_prog_key(const struct gen_device_info *devinfo,
>   const VkGraphicsPipelineCreateInfo *info,
> - const struct anv_graphics_pipeline_create_info *extra,
>   struct brw_wm_prog_key *key)
>  {
> ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass);
> @@ -284,12 +283,8 @@ populate_wm_prog_key(const struct gen_device_info 
> *devinfo,
> /* XXX Vulkan doesn't appear to specify */
> key->clamp_fragment_color = false;
>  
> -   if (extra && extra->color_attachment_count >= 0) {
> -  key->nr_color_regions = extra->color_attachment_count;
> -   } else {
> -  key->nr_color_regions =
> - render_pass->subpasses[info->subpass].color_count;
> -   }
> +   key->nr_color_regions =
> +  render_pass->subpasses[info->subpass].color_count;
>  
> key->replicate_alpha = key->nr_color_regions > 1 &&
>info->pMultisampleState &&
> @@ -605,7 +600,6 @@ static VkResult
>  anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
>  struct anv_pipeline_cache *cache,
>  const VkGraphicsPipelineCreateInfo *info,
> -const struct anv_graphics_pipeline_create_info 
> *extra,
>  struct anv_shader_module *module,
>  const char *entrypoint,
>  const VkSpecializationInfo *spec_info)
> @@ -617,7 +611,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
> struct anv_shader_bin *bin = NULL;
> unsigned char sha1[20];
>  
> -   populate_wm_prog_key(&pipeline->device->info, info, extra, &key);
> +   populate_wm_prog_key(&pipeline->device->info, info, &key);
>  
> if (cache) {
>anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
> @@ -675,11 +669,6 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
>   num_rts += array_len;
>}
>  
> -  if (pipeline->use_repclear) {
> - assert(num_rts == 1);
> - key.nr_color_regions = 1;
> -  }
> -
>if (num_rts == 0) {
>   /* If we have no render targets, we need a null render target */
>   rt_bindings[0] = (struct anv_pipeline_binding) {
> @@ -707,8 +696,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
>unsigned code_size;
>const unsigned *shader_code =
>   brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
> -NULL, -1, -1, true, pipeline->use_repclear,
> -&code_size, NULL);
> +NULL, -1, -1, true, false, &code_size, NULL);
>if (shader_code == NULL) {
>   ralloc_free(mem_ctx);
>   return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> @@ -1013,7 +1001,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
>struct anv_device *device,
>struct anv_pipeline_cache *cache,
>const VkGraphicsPipelineCreateInfo *pCreateInfo,
> -  const struct anv_graphics_pipeline_create_info *extra,
>const VkAllocationCallbacks *alloc)
>  {
> VkResult result;
> @@ -1041,8 +1028,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
> p

Re: [Mesa-dev] [PATCH v3 24/25] configure.ac: Only add default LLVM components if needed

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 00:02, Tobias Droste  wrote:
> Each driver has to expllicitly call llvm_add_default_components to
> add the shared components.
> This way we can fail the build if a component is not found and avoid
> the recursive solution from a previous version of the pach series.
>
s/pach/patch/

Does this mean that the "default" components are required only by the
gallivm module ? Please rename the function to reflect that.

Don't recall if swr driver is/was using any of it, but the nv30 path
of nouveau does use it, iirc. In the latter you want to call the
function if --enable-gallium-llvm is set. Alternatively keep
llvm_add_default_components within the "test enable_gallium_llvm !=
xno" block.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] vbo: Correctly handle attribute offsets in dlist draw.

2016-10-12 Thread Mathias Fröhlich
On Wednesday, 24 August 2016 08:32:10 CEST mathias.froehl...@gmx.net wrote:
> From: Mathias Fröhlich 
> 
> Hi all,
> 
> kind of a ping with a rephrased commit message
> and the style change.
> Please review.

Ping.

best
Mathias

> 
> Thanks!
> 
> Mathias
> 
> 
> When executing a display list draw, for the offset
> list to be correct, the offset computation needs to
> accumulate all attribute size values in order.
> Specifically, if we are shuffling around the position
> and generic0 attributes, we may violate the order or
> if we do not walk the generic vbo attributes we may
> skip some of the attributes.
> Even if this is an unlikely usecase we can fix this
> by precomputing the offsets on the full attribute list
> and store the full offset list in the display list node.
> 
> v2: Formatting fix
> 
> Signed-off-by: Mathias Fröhlich 
> ---
>  src/mesa/vbo/vbo_save.h  |  1 +
>  src/mesa/vbo/vbo_save_api.c  |  5 +
>  src/mesa/vbo/vbo_save_draw.c | 35 +--
>  3 files changed, 23 insertions(+), 18 deletions(-)
> 
> diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h
> index 2843b3c..a61973f 100644
> --- a/src/mesa/vbo/vbo_save.h
> +++ b/src/mesa/vbo/vbo_save.h
> @@ -64,6 +64,7 @@ struct vbo_save_vertex_list {
> GLbitfield64 enabled; /**< mask of enabled vbo arrays. */
> GLubyte attrsz[VBO_ATTRIB_MAX];
> GLenum attrtype[VBO_ATTRIB_MAX];
> +   GLushort offsets[VBO_ATTRIB_MAX];
> GLuint vertex_size;  /**< size in GLfloats */
>  
> /* Copy of the final vertex from node->vertex_store->bufferobj.
> diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
> index f648ccc..36af426 100644
> --- a/src/mesa/vbo/vbo_save_api.c
> +++ b/src/mesa/vbo/vbo_save_api.c
> @@ -415,6 +415,7 @@ _save_compile_vertex_list(struct gl_context *ctx)
>  {
> struct vbo_save_context *save = &vbo_context(ctx)->save;
> struct vbo_save_vertex_list *node;
> +   GLushort offset = 0;
>  
> /* Allocate space for this structure in the display list currently
>  * being compiled.
> @@ -436,6 +437,10 @@ _save_compile_vertex_list(struct gl_context *ctx)
> node->vertex_size = save->vertex_size;
> node->buffer_offset =
>(save->buffer - save->vertex_store->buffer) * sizeof(GLfloat);
> +   for (unsigned i = 0; i < VBO_ATTRIB_MAX; ++i) {
> +  node->offsets[i] = offset;
> +  offset += node->attrsz[i] * sizeof(GLfloat);
> +   }
> node->count = save->vert_count;
> node->wrap_count = save->copied.nr;
> node->dangling_attr_ref = save->dangling_attr_ref;
> diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
> index 507ab82..e69c108 100644
> --- a/src/mesa/vbo/vbo_save_draw.c
> +++ b/src/mesa/vbo/vbo_save_draw.c
> @@ -26,6 +26,7 @@
>   *Keith Whitwell 
>   */
>  
> +#include 
>  #include "main/glheader.h"
>  #include "main/bufferobj.h"
>  #include "main/context.h"
> @@ -136,15 +137,10 @@ static void vbo_bind_vertex_list(struct gl_context 
*ctx,
> struct vbo_context *vbo = vbo_context(ctx);
> struct vbo_save_context *save = &vbo->save;
> struct gl_client_array *arrays = save->arrays;
> -   GLuint buffer_offset = node->buffer_offset;
> const GLuint *map;
> GLuint attr;
> -   GLubyte node_attrsz[VBO_ATTRIB_MAX];  /* copy of node->attrsz[] */
> -   GLenum node_attrtype[VBO_ATTRIB_MAX];  /* copy of node->attrtype[] */
> GLbitfield64 varying_inputs = 0x0;
> -
> -   memcpy(node_attrsz, node->attrsz, sizeof(node->attrsz));
> -   memcpy(node_attrtype, node->attrtype, sizeof(node->attrtype));
> +   bool generic_from_pos = false;
>  
> /* Install the default (ie Current) attributes first, then overlay
>  * all active ones.
> @@ -176,10 +172,7 @@ static void vbo_bind_vertex_list(struct gl_context 
*ctx,
> */
>if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 
0 &&
>(ctx->VertexProgram._Current->Base.InputsRead & 
VERT_BIT_GENERIC0)) {
> - save->inputs[VERT_ATTRIB_GENERIC0] = save->inputs[0];
> - node_attrsz[VERT_ATTRIB_GENERIC0] = node_attrsz[0];
> - node_attrtype[VERT_ATTRIB_GENERIC0] = node_attrtype[0];
> - node_attrsz[0] = 0;
> + generic_from_pos = true;
>}
>break;
> default:
> @@ -188,30 +181,36 @@ static void vbo_bind_vertex_list(struct gl_context 
*ctx,
>  
> for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
>const GLuint src = map[attr];
> +  const GLubyte size = node->attrsz[src];
>  
> -  if (node_attrsz[src]) {
> +  if (size) {
>   /* override the default array set above */
>   save->inputs[attr] = &arrays[attr];
>  
> -  arrays[attr].Ptr = (const GLubyte *) NULL + buffer_offset;
> -  arrays[attr].Size = node_attrsz[src];
> + const uintptr_t buffer_offset = node->buffer_offset;
> + arrays[attr].Ptr = ADD_POINTERS(buffer_offset, node-
>offsets[src]);
> + arrays[attr].Size = size;
>arrays[attr].St

[Mesa-dev] [Bug 97260] R9 290 low performance in Linux 4.7

2016-10-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97260

--- Comment #63 from alvarex  ---
If I remove the line "adev->ddev->mode_config.async_page_flip = true;" from the
files on comment 17, and boot with radeon.dpm=0 radeon.runpm=0. That will
trigger the bug, on Tombraider for expample it will run at 9FPS when previously
running at 75FPS, on amdgpu.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 25/25] configure.ac: Add required LLVM versions to the top

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 00:02, Tobias Droste  wrote:
> Consolidate the required LLVM versions at the top where the other
> versions for dependencies are listed.
> Rework the LLVM_VERSION and LLVM_VERSION_INT calculation to make the
> format "x.y.z." possible.
>
Not sure how others feel, but I love this.

Disclaimer: there's a lot of "follow-up" suggestion below. They are
just ideas that come to mind, not something that you are expected to
do :-)

> LLVM_VERSION_INT is now including LLVM_VERSION_PATCH.
> For the C define HAVE_LLVM is set now which does not include
> LLVM_VERSION_PATCH.
>
> Signed-off-by: Tobias Droste 
> ---
>  configure.ac | 47 +--
>  1 file changed, 33 insertions(+), 14 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 4847704..0d1530c 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -93,6 +93,15 @@ XVMC_REQUIRED=1.0.6
>  PYTHON_MAKO_REQUIRED=0.8.0
>  LIBSENSORS_REQUIRED=4.0.0
>
> +dnl LLVM versions
> +LLVM_VERSION_REQUIRED_GALLIUM=3.3.0
> +LLVM_VERSION_REQUIRED_LLVMPIPE=3.6.0
Not exactly sure why/how llvmpipe got 3.6 since the driver reuses the
"gallium" one.

> +LLVM_VERSION_REQUIRED_OPENCL=3.6.0
> +LLVM_VERSION_REQUIRED_R600=3.6.0
> +LLVM_VERSION_REQUIRED_RADEONSI=3.6.0

There's a small related gotcha: as-is at build time we get the
different codepaths thus, as people build against shared LLVM (hello
Archlinux, I'm looking at you) and update their LLVM without
rebuilding mesa (Arch I'm looking at you again) things go funny.

Tl;Dr; We really want to enable static linking by default and prod
distros to use it.

As an alternative (or in parallel really) we could bump to 3.6.2...

> +LLVM_VERSION_REQUIRED_RADV=3.9.0
> +LLVM_VERSION_REQUIRED_SWR=3.6.0
... and 3.6.1 + drop the older codepaths. Just a follow-up idea.

> +
Nit: Drop _VERSION part ?

Follow-up: worth checking with VMWare guys (Jose et al.) it they're
still using pre-3.6 llvm and doing code/configure cleanups.

>  dnl Check for progs
>  AC_PROG_CPP
>  AC_PROG_CC
> @@ -935,29 +944,38 @@ llvm_get_version() {
>  [#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])
>  AC_COMPUTE_INT([LLVM_VERSION_MINOR], [LLVM_VERSION_MINOR],
>  [#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])
> +AC_COMPUTE_INT([LLVM_VERSION_PATCH], [LLVM_VERSION_PATCH],
> +[#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])
> +
> +if test -z "$LLVM_VERSION_PATCH"; then
> +LLVM_VERSION_PATCH=`echo $LLVM_VERSION | cut -d. -f3 | egrep -o 
> '^[[0-9]]+'`
> +fi
>
IIRC the LLVM_VERSION parsing was added by the AMD folks since old
versions did not have the define. Might be worth checking with them
(and llvm headers/codebase) if that's no longer the case given their
min. required version.

Just an idea, in parallel to the above "require !=0 minor for swr/radeonsi" .

> -LLVM_VERSION_PATCH=`echo $LLVM_VERSION | cut -d. -f3 | egrep -o 
> '^[[0-9]]+'`
>  if test -z "$LLVM_VERSION_PATCH"; then
>  LLVM_VERSION_PATCH=0
>  fi
>
>  if test -n "${LLVM_VERSION_MAJOR}"; then
> -LLVM_VERSION_INT="${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MINOR}"
> -else
> -LLVM_VERSION_INT=`echo $LLVM_VERSION | sed -e 
> 's/\([[0-9]]\)\.\([[0-9]]\)/\10\2/g'`
> +
> LLVM_VERSION="${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}"
>  fi
>
Follow-up idea: just bail out if the LLVM_VERSION_* defines are not
set, and reuse those to construct HAVE_LLVM amongst others (dropping
the sed 'magic') or use them all together in favour of our local
macros ?

> -DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT 
> -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
> +LLVM_VERSION_INT=`echo $LLVM_VERSION | sed -e 
> 's/\.\([[1-9]][[0-9]]\)\.\([[0-9]]\)/\1\2/' -e 
> 's/\.\([[0-9]]\)\.\([[0-9]]\)/0\1\2/' -e 's/\.\([[1-9]][[0-9]]\)/\10/' -e 
> 's/\.\([[0-9]]\)/0\10/'`
> +HAVE_LLVM=`echo $LLVM_VERSION | sed -e 
> 's/\.\([[1-9]][[0-9]]\)\.\([[0-9]]\)/\1/' -e 
> 's/\.\([[0-9]]\)\.\([[0-9]]\)/0\1/' -e 's/\.\([[1-9]][[0-9]]\)/\1/' -e 
> 's/\.\([[0-9]]\)/0\1/'`
> +
The sed patterns have changed, please elaborate [a bit] why/how in the
commit msg.


>  llvm_check_version_for() {
> -if test "${LLVM_VERSION_INT}${LLVM_VERSION_PATCH}" -lt "${1}0${2}${3}"; 
> then
> -AC_MSG_ERROR([LLVM $1.$2.$3 or newer is required for $4])
> +llvm_target_version=`echo $1 | sed -e 
> 's/\.\([[1-9]][[0-9]]\)\.\([[0-9]]\)/\1\2/' -e 
> 's/\.\([[0-9]]\)\.\([[0-9]]\)/0\1\2/' -e 's/\.\([[1-9]][[0-9]]\)/\10/' -e 
> 's/\.\([[0-9]]\)/0\10/'`
> +
A something as simple as the following should do it as well, shouldn't it ?
  llvm_target_version=`echo $1 | sed 's/\.//g'`

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 24/25] configure.ac: Only add default LLVM components if needed

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 10:57, Emil Velikov  wrote:
> On 12 October 2016 at 00:02, Tobias Droste  wrote:
>> Each driver has to expllicitly call llvm_add_default_components to
>> add the shared components.
>> This way we can fail the build if a component is not found and avoid
>> the recursive solution from a previous version of the pach series.
>>
> s/pach/patch/
>
> Does this mean that the "default" components are required only by the
> gallivm module ? Please rename the function to reflect that.
>
> Don't recall if swr driver is/was using any of it, but the nv30 path
> of nouveau does use it, iirc. In the latter you want to call the
> function if --enable-gallium-llvm is set. Alternatively keep
> llvm_add_default_components within the "test enable_gallium_llvm !=
> xno" block.
>
In case you're wondering how the above might happen:

Some drivers (i915g, softpipe/llvmpipe, nv30, r300, svga?... ) use the
aux/draw module. The latter of which has LLVM codepaths which get
build if --enable-gallium-llvm is set.

Search "\https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: use TC write-back instead of full cache invalidation

2016-10-12 Thread Nicolai Hähnle

That's a nice improvement. For the series:

Reviewed-by: Nicolai Hähnle 

On 11.10.2016 16:48, Marek Olšák wrote:

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c|  2 +-
 src/gallium/drivers/radeonsi/si_state.c  | 12 +++-
 src/gallium/drivers/radeonsi/si_state_draw.c |  6 +++---
 3 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 632839f..e785106 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -694,21 +694,21 @@ static void si_launch_grid(

/* Add buffer sizes for memory checking in need_cs_space. */
r600_context_add_resource_size(ctx, &program->shader.bo->b.b);
/* TODO: add the scratch buffer */

if (info->indirect) {
r600_context_add_resource_size(ctx, info->indirect);

/* The hw doesn't read the indirect buffer via TC L2. */
if (r600_resource(info->indirect)->TC_L2_dirty) {
-   sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+   sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
r600_resource(info->indirect)->TC_L2_dirty = false;
}
}

si_need_cs_space(sctx);

if (!sctx->cs_shader_state.initialized)
si_initialize_compute(sctx);

if (sctx->b.flags)
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 34f3ed7..ad65fc2 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3390,35 +3390,29 @@ static void si_memory_barrier(struct pipe_context *ctx, 
unsigned flags)
 * automatically at end of shader, but the contents of other
 * L1 caches might still be stale. */
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
}

if (flags & PIPE_BARRIER_INDEX_BUFFER) {
/* Indices are read through TC L2 since VI.
 * L1 isn't used.
 */
if (sctx->screen->b.chip_class <= CIK)
-   sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+   sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}

if (flags & PIPE_BARRIER_FRAMEBUFFER)
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;

if (flags & (PIPE_BARRIER_FRAMEBUFFER |
-PIPE_BARRIER_INDIRECT_BUFFER)) {
-   /* Not sure if INV_GLOBAL_L2 is the best thing here.
-*
-* We need to make sure that TC L1 & L2 are written back to
-* memory, because CB fetches don't consider TC, but there's
-* no need to invalidate any TC cache lines. */
-   sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
-   }
+PIPE_BARRIER_INDIRECT_BUFFER))
+   sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 }

 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
 {
struct pipe_blend_state blend;

memset(&blend, 0, sizeof(blend));
blend.independent_blend_enable = true;
blend.rt[0].colormask = 0xf;
return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 33b6b23..c14e852 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1040,32 +1040,32 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
if (!ib.buffer)
return;
/* info->start will be added by the drawing code */
ib.offset -= start_offset;
}
}

/* VI reads index buffers through TC L2. */
if (info->indexed && sctx->b.chip_class <= CIK &&
r600_resource(ib.buffer)->TC_L2_dirty) {
-   sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+   sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
r600_resource(ib.buffer)->TC_L2_dirty = false;
}

if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) {
-   sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+   sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
r600_resource(info->indirect)->TC_L2_dirty = false;
}

if (info->indirect_params &&
r600_resource(info->indirect_params)->TC_L2_dirty) {
-   sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+   sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
r600_resource(info->indirect_params)->TC_L2_dirty = false;
}

/* Add buffer sizes for memory checking in need_cs_space. */
if (sctx->emi

Re: [Mesa-dev] [PATCH 2/2] winsys/amdgpu: fix infinite loop w/ RADEON_NOOP=1 caused by unsubmitted fences

2016-10-12 Thread Nicolai Hähnle

For the series:

Reviewed-by: Nicolai Hähnle 

On 11.10.2016 17:00, Marek Olšák wrote:

From: Marek Olšák 

---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index c0e810c..2b86827 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -30,20 +30,22 @@
  *  Marek Olšák 
  */

 #include "amdgpu_cs.h"
 #include "os/os_time.h"
 #include 
 #include 

 #include "amd/common/sid.h"

+DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
+
 /* FENCES */

 static struct pipe_fence_handle *
 amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
 unsigned ip_instance, unsigned ring)
 {
struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);

fence->reference.count = 1;
fence->ctx = ctx;
@@ -136,20 +138,23 @@ static bool amdgpu_fence_wait_rel_timeout(struct 
radeon_winsys *rws,
 {
return amdgpu_fence_wait(fence, timeout, false);
 }

 static struct pipe_fence_handle *
 amdgpu_cs_get_next_fence(struct radeon_winsys_cs *rcs)
 {
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct pipe_fence_handle *fence = NULL;

+   if (debug_get_option_noop())
+  return NULL;
+
if (cs->next_fence) {
   amdgpu_fence_reference(&fence, cs->next_fence);
   return fence;
}

fence = amdgpu_fence_create(cs->ctx,
cs->csc->request.ip_type,
cs->csc->request.ip_instance,
cs->csc->request.ring);
if (!fence)
@@ -1062,22 +1067,20 @@ cleanup:
 void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs)
 {
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys *ws = cs->ctx->ws;

/* Wait for any pending ioctl of this CS to complete. */
if (util_queue_is_initialized(&ws->cs_queue))
   util_queue_job_wait(&cs->flush_completed);
 }

-DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
-
 static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
unsigned flags,
struct pipe_fence_handle **fence)
 {
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys *ws = cs->ctx->ws;
int error_code = 0;

rcs->current.max_dw += amdgpu_cs_epilog_dws(cs->ring_type);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/os: Use unsigned integers for size computation

2016-10-12 Thread Nicolai Hähnle
Specifically, the point is that size_t is for CPU sizes, while this code 
is about GPU sizes. The patch is


Reviewed-by: Nicolai Hähnle 

On 11.10.2016 19:57, Axel Davy wrote:

size_t could be uint32_t when compiling 32 bits.

However that would lead to overflow, which was the initial reason of the
patch
218459771a1801d7ad20dd340ac35a50f2b5b81a

On 11/10/2016 19:43, Chuck Atkins wrote:

Shouldn't' the size argument itself be size_t instead of uint64_t?

- Chuck


On Tue, Oct 11, 2016 at 12:59 PM, Axel Davy mailto:axel.d...@ens.fr>> wrote:

Use uint64_t instead of int64_t in the calculation,
as the result is uint64_t.

Signed-off-by: Axel Davy mailto:axel.d...@ens.fr>>
---
This corrects the small mistake from
218459771a1801d7ad20dd340ac35a50f2b5b81a
as reported by Emil.

 src/gallium/auxiliary/os/os_misc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/os/os_misc.c
b/src/gallium/auxiliary/os/os_misc.c
index a32a9e5..09d4400 100644
--- a/src/gallium/auxiliary/os/os_misc.c
+++ b/src/gallium/auxiliary/os/os_misc.c
@@ -131,7 +131,7 @@ os_get_total_physical_memory(uint64_t *size)
if (phys_pages <= 0 || page_size <= 0)
   return false;

-   *size = (int64_t)phys_pages * (int64_t)page_size;
+   *size = (uint64_t)phys_pages * (uint64_t)page_size;
return true;
 #elif defined(PIPE_OS_APPLE) || defined(PIPE_OS_BSD)
size_t len = sizeof(*size);
@@ -159,7 +159,7 @@ os_get_total_physical_memory(uint64_t *size)
if (ret != B_OK || info.max_pages <= 0)
   return false;

-   *size = (int64_t)info.max_pages * (int64_t)B_PAGE_SIZE;
+   *size = (uint64_t)info.max_pages * (uint64_t)B_PAGE_SIZE;
return true;
 #elif defined(PIPE_OS_WINDOWS)
MEMORYSTATUSEX status;
--
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org 
https://lists.freedesktop.org/mailman/listinfo/mesa-dev







___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Return correct result in EnumeratePhysicalDevices

2016-10-12 Thread Nicolas Koch
On Wed, Oct 12, 2016 at 10:32 AM, Emil Velikov  wrote:
> Hi Nicolas,
>
> On 6 October 2016 at 20:21, Nicolas Koch  wrote:
>> If pPhysicalDevices is too small for all physical devices,
>> the driver must return VK_INCOMPLETE.
>> Since only a single physical device is supported, this is only the case
>> when pPhysicalDeviceCount == 0 && pPhysicalDevices != NULL.
>> ---
>>  src/intel/vulkan/anv_device.c | 2 ++
>>  1 file changed, 2 insertions(+)
>>
>> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
>> index c7b9979..76cbb69 100644
>> --- a/src/intel/vulkan/anv_device.c
>> +++ b/src/intel/vulkan/anv_device.c
>> @@ -385,6 +385,8 @@ VkResult anv_EnumeratePhysicalDevices(
>> } else if (*pPhysicalDeviceCount >= 1) {
>>pPhysicalDevices[0] = 
>> anv_physical_device_to_handle(&instance->physicalDevice);
>>*pPhysicalDeviceCount = 1;
>> +   } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) {
>> +  return VK_INCOMPLETE;
> Looks like RADV could use the exact same fix
> (s/intel/amd/;s/anv/radv/). Can you spin a patch for it ?
>
> -Emil

Sure, will do!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 98172] Concurrent call to glClientWaitSync results in segfault in one of the waiters.

2016-10-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98172

--- Comment #5 from shinji.suz...@gmail.com ---
Hello Michael-san.
I'm afraid my testing was not enough and I spoke too soon. Today I've run my
app remotely through ssh session by pointing DISPLAY to :0 as I'm away from my
home machine. Now the app crashes, though much less often, even with modified
library containing your patch. Remoting and screen being blanked seems to
affect some timing as I get around 25fps only v.s. around 35fps that I get when
I work on the console.
Here is the call stack. (assert() is my lame attempt in trying to understand
what code path is being taken, which is apparently compiled to no-op.)

state_tracker/st_cb_syncobj.c
118if (screen->fence_finish(screen, fence, timeout)) {
│
   │119   assert(0);  
│
  >│120   screen->fence_reference(screen, &so->fence, NULL);  
│
   │121   so->b.StatusFlag = GL_TRUE; 
│
   │122}  
│

r600_pipe_common.c
766 if (pipe_reference(&(*rdst)->reference, &rsrc->reference))
│
  >│767 ws->fence_reference(&(*rdst)->gfx, NULL); 
│
   │768 ws->fence_reference(&(*rdst)->sdma, NULL);
│
   │769 FREE(*rdst);  
│

radeon_drm_cs.c
667 static void radeon_fence_reference(struct pipe_fence_handle **dst,
│
   │668struct pipe_fence_handle *src) 
│
   │669 { 
│
  >│670 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
│
   │671 } 
│

src/gallium/auxiliary/pipebuffer/pb_buffer.h

235 static inline void
│
   │236 pb_reference(struct pb_buffer **dst,  
│
   │237  struct pb_buffer *src)   
│
   │238 { 
│
  >│239struct pb_buffer *old = *dst;  
│
   │240   
│
   │241if (pipe_reference(&(*dst)->reference, &src->reference))   
│
   │242   pb_destroy( old );  
│
   │243*dst = src;
│
   │244 } 
│

(gdb) p old
$1 = 
(gdb) p dst
$2 = (struct pb_buffer **) 0x8
(gdb) 

I'll attach the diff that corresponds to your patch to make sure I'm not making
mistake in applying your patch by hand as some hunks were rejected by 'patch'.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 98172] Concurrent call to glClientWaitSync results in segfault in one of the waiters.

2016-10-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98172

--- Comment #6 from shinji.suz...@gmail.com ---
Created attachment 127237
  --> https://bugs.freedesktop.org/attachment.cgi?id=127237&action=edit
Diff that went into my copy of mesa-source

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] st/mesa: Take local references for sync object fences

2016-10-12 Thread Nicolai Hähnle

On 12.10.2016 11:31, Michel Dänzer wrote:

From: Michel Dänzer 

Fixes a race condition:

Thread AThread B


Test if so->fence != NULL
=> true
Set so->fence = NULL
Dereference so->fence
=> NULL dereference

Also, taking a reference prevents the fence from being destroyed.


Nice :)



Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98172
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Michel Dänzer 
---
 src/mesa/state_tracker/st_cb_syncobj.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_syncobj.c 
b/src/mesa/state_tracker/st_cb_syncobj.c
index 123925a..de01880 100644
--- a/src/mesa/state_tracker/st_cb_syncobj.c
+++ b/src/mesa/state_tracker/st_cb_syncobj.c
@@ -81,17 +81,22 @@ static void st_check_sync(struct gl_context *ctx, struct 
gl_sync_object *obj)
struct pipe_context *pipe = st_context(ctx)->pipe;
struct pipe_screen *screen = pipe->screen;
struct st_sync_object *so = (struct st_sync_object*)obj;
+   struct pipe_fence_handle *fence = NULL;
+
+   screen->fence_reference(screen, &fence, so->fence);


This should probably really use p_atomic_read (or some kind of READ_ONCE 
macro if we had it) to make the intention clear, but I'm not sure 
whether that would compile everywhere since so->fence is a pointer.


In practice it doesn't matter, because the function call is a sufficient 
barrier. So either way,


Reviewed-by: Nicolai Hähnle 



/* If the fence doesn't exist, assume it's signalled. */
-   if (!so->fence) {
+   if (!fence) {
   so->b.StatusFlag = GL_TRUE;
   return;
}

-   if (screen->fence_finish(screen, pipe, so->fence, 0)) {
+   if (screen->fence_finish(screen, pipe, fence, 0)) {
   screen->fence_reference(screen, &so->fence, NULL);
   so->b.StatusFlag = GL_TRUE;
}
+
+   screen->fence_reference(screen, &fence, NULL);
 }

 static void st_client_wait_sync(struct gl_context *ctx,
@@ -101,9 +106,12 @@ static void st_client_wait_sync(struct gl_context *ctx,
struct pipe_context *pipe = st_context(ctx)->pipe;
struct pipe_screen *screen = pipe->screen;
struct st_sync_object *so = (struct st_sync_object*)obj;
+   struct pipe_fence_handle *fence = NULL;
+
+   screen->fence_reference(screen, &fence, so->fence);

/* If the fence doesn't exist, assume it's signalled. */
-   if (!so->fence) {
+   if (!fence) {
   so->b.StatusFlag = GL_TRUE;
   return;
}
@@ -120,11 +128,12 @@ static void st_client_wait_sync(struct gl_context *ctx,
 * Assume GL_SYNC_FLUSH_COMMANDS_BIT is always set, because applications
 * forget to set it.
 */
-   if (so->fence &&
-   screen->fence_finish(screen, pipe, so->fence, timeout)) {
+   if (screen->fence_finish(screen, pipe, fence, timeout)) {
   screen->fence_reference(screen, &so->fence, NULL);
   so->b.StatusFlag = GL_TRUE;
}
+
+   screen->fence_reference(screen, &fence, NULL);
 }

 static void st_server_wait_sync(struct gl_context *ctx,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] st/mesa: Refactor st_check_sync to use st_client_wait_sync

2016-10-12 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

On 12.10.2016 11:31, Michel Dänzer wrote:

From: Michel Dänzer 

Duplicate code cleanup, no functional change intended.

Signed-off-by: Michel Dänzer 
---
 src/mesa/state_tracker/st_cb_syncobj.c | 28 +---
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_syncobj.c 
b/src/mesa/state_tracker/st_cb_syncobj.c
index de01880..6e2cb26 100644
--- a/src/mesa/state_tracker/st_cb_syncobj.c
+++ b/src/mesa/state_tracker/st_cb_syncobj.c
@@ -76,29 +76,6 @@ static void st_fence_sync(struct gl_context *ctx, struct 
gl_sync_object *obj,
pipe->flush(pipe, &so->fence, PIPE_FLUSH_DEFERRED);
 }

-static void st_check_sync(struct gl_context *ctx, struct gl_sync_object *obj)
-{
-   struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct pipe_screen *screen = pipe->screen;
-   struct st_sync_object *so = (struct st_sync_object*)obj;
-   struct pipe_fence_handle *fence = NULL;
-
-   screen->fence_reference(screen, &fence, so->fence);
-
-   /* If the fence doesn't exist, assume it's signalled. */
-   if (!fence) {
-  so->b.StatusFlag = GL_TRUE;
-  return;
-   }
-
-   if (screen->fence_finish(screen, pipe, fence, 0)) {
-  screen->fence_reference(screen, &so->fence, NULL);
-  so->b.StatusFlag = GL_TRUE;
-   }
-
-   screen->fence_reference(screen, &fence, NULL);
-}
-
 static void st_client_wait_sync(struct gl_context *ctx,
 struct gl_sync_object *obj,
 GLbitfield flags, GLuint64 timeout)
@@ -136,6 +113,11 @@ static void st_client_wait_sync(struct gl_context *ctx,
screen->fence_reference(screen, &fence, NULL);
 }

+static void st_check_sync(struct gl_context *ctx, struct gl_sync_object *obj)
+{
+   st_client_wait_sync(ctx, obj, 0, 0);
+}
+
 static void st_server_wait_sync(struct gl_context *ctx,
 struct gl_sync_object *obj,
 GLbitfield flags, GLuint64 timeout)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: replace gl_framebuffer::_IntegerColor wih _IntegerBuffers

2016-10-12 Thread Marek Olšák
Reviewed-by: Marek Olšák 

On Oct 12, 2016 4:08 AM, "Brian Paul"  wrote:

> Use a bitmask to indicate which color buffers are integer-valued, rather
> than a bool.  Also, the old field was mis-computed.  If an integer buffer
> was followed by a non-integer buffer, the _IntegerColor field was wrongly
> set to false.
>
> This fixes the new piglit gl-3.1-mixed-int-float-fbo test.
> ---
>  src/mesa/drivers/common/meta.c   |  2 +-
>  src/mesa/main/api_validate.c |  2 +-
>  src/mesa/main/blend.c|  2 +-
>  src/mesa/main/fbobject.c | 10 ++
>  src/mesa/main/get.c  |  4 
>  src/mesa/main/get_hash_params.py |  2 +-
>  src/mesa/main/mtypes.h   |  3 +--
>  7 files changed, 15 insertions(+), 10 deletions(-)
>
> diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/
> meta.c
> index fdc4748..890e98a 100644
> --- a/src/mesa/drivers/common/meta.c
> +++ b/src/mesa/drivers/common/meta.c
> @@ -1750,7 +1750,7 @@ meta_clear(struct gl_context *ctx, GLbitfield
> buffers, bool glsl)
>z = invert_z(ctx->Depth.Clear);
> }
>
> -   if (fb->_IntegerColor) {
> +   if (fb->_IntegerBuffers) {
>assert(glsl);
>_mesa_meta_use_program(ctx, clear->IntegerShaderProg);
>_mesa_Uniform4iv(0, 1, ctx->Color.ClearColor.i);
> diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
> index c3c5a69..d3b4cab 100644
> --- a/src/mesa/main/api_validate.c
> +++ b/src/mesa/main/api_validate.c
> @@ -152,7 +152,7 @@ _mesa_valid_to_render(struct gl_context *ctx, const
> char *where)
>   /* If drawing to integer-valued color buffers, there must be an
>* active fragment shader (GL_EXT_texture_integer).
>*/
> - if (ctx->DrawBuffer && ctx->DrawBuffer->_IntegerColor) {
> + if (ctx->DrawBuffer && ctx->DrawBuffer->_IntegerBuffers) {
>  _mesa_error(ctx, GL_INVALID_OPERATION,
>  "%s(integer format but no fragment shader)",
> where);
>  return GL_FALSE;
> diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
> index ad79ee0..0322799 100644
> --- a/src/mesa/main/blend.c
> +++ b/src/mesa/main/blend.c
> @@ -945,7 +945,7 @@ _mesa_update_clamp_fragment_color(struct gl_context
> *ctx,
>  * - there is an integer colorbuffer
>  */
> if (!drawFb || !drawFb->_HasSNormOrFloatColorBuffer ||
> -   drawFb->_IntegerColor)
> +   drawFb->_IntegerBuffers)
>ctx->Color._ClampFragmentColor = GL_FALSE;
> else
>ctx->Color._ClampFragmentColor =
> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
> index 3b55e79..9204606 100644
> --- a/src/mesa/main/fbobject.c
> +++ b/src/mesa/main/fbobject.c
> @@ -970,6 +970,7 @@ _mesa_test_framebuffer_completeness(struct gl_context
> *ctx,
> fb->_AllColorBuffersFixedPoint = GL_TRUE;
> fb->_HasSNormOrFloatColorBuffer = GL_FALSE;
> fb->_HasAttachments = true;
> +   fb->_IntegerBuffers = 0;
>
> /* Start at -2 to more easily loop over all attachment points.
>  *  -2: depth buffer
> @@ -1090,13 +1091,14 @@ _mesa_test_framebuffer_completeness(struct
> gl_context *ctx,
>   continue;
>}
>
> -  /* check if integer color */
> -  fb->_IntegerColor = _mesa_is_format_integer_color(attFormat);
> -
> -  /* Update _AllColorBuffersFixedPoint and
> _HasSNormOrFloatColorBuffer. */
> +  /* Update flags describing color buffer datatypes */
>if (i >= 0) {
>   GLenum type = _mesa_get_format_datatype(attFormat);
>
> + /* check if integer color */
> + if (_mesa_is_format_integer_color(attFormat))
> +fb->_IntegerBuffers |= (1 << i);
> +
>   fb->_AllColorBuffersFixedPoint =
>  fb->_AllColorBuffersFixedPoint &&
>  (type == GL_UNSIGNED_NORMALIZED || type ==
> GL_SIGNED_NORMALIZED);
> diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
> index bd85bef..c11bcde 100644
> --- a/src/mesa/main/get.c
> +++ b/src/mesa/main/get.c
> @@ -1076,6 +1076,10 @@ find_custom_value(struct gl_context *ctx, const
> struct value_desc *d, union valu
> case GL_SAMPLE_BUFFERS:
>v->value_int = _mesa_geometric_samples(ctx->DrawBuffer) > 0;
>break;
> +   /* GL_EXT_textrue_integer */
> +   case GL_RGBA_INTEGER_MODE_EXT:
> +  v->value_int = (ctx->DrawBuffer->_IntegerBuffers != 0);
> +  break;
> /* GL_ATI_meminfo & GL_NVX_gpu_memory_info */
> case GL_VBO_FREE_MEMORY_ATI:
> case GL_TEXTURE_FREE_MEMORY_ATI:
> diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_
> params.py
> index 6849b5b..3c6b712 100644
> --- a/src/mesa/main/get_hash_params.py
> +++ b/src/mesa/main/get_hash_params.py
> @@ -890,7 +890,7 @@ descriptor=[
>[ "TEXTURE_CUBE_MAP_SEAMLESS", "CONTEXT_BOOL(Texture.CubeMapSeamless),
> extra_ARB_seamless_cube_map" ],
>
>  # GL_EXT_texture_integer
> -  [ "RGBA_INTEGER_MODE_EXT", "BUFFER_BOOL(_IntegerColor),
> extra_EXT_texture_integer_and_new_buffers" 

[Mesa-dev] [PATCH] radv: Return correct result in EnumeratePhysicalDevices

2016-10-12 Thread Nicolas Koch
If pPhysicalDevices is too small for all physical devices,
the driver must return VK_INCOMPLETE. Since only a single
physical device is supported, this is only the case when
pPhysicalDeviceCount == 0 && pPhysicalDevices != NULL.
---
 src/amd/vulkan/radv_device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 6e06863..71b1481 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -295,6 +295,8 @@ VkResult radv_EnumeratePhysicalDevices(
} else if (*pPhysicalDeviceCount >= 1) {
pPhysicalDevices[0] = 
radv_physical_device_to_handle(&instance->physicalDevice);
*pPhysicalDeviceCount = 1;
+   } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) {
+   return VK_INCOMPLETE;
} else {
*pPhysicalDeviceCount = 0;
}
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: use TC write-back instead of full cache invalidation

2016-10-12 Thread Edward O'Callaghan
yep!

Reviewed-by: Edward O'Callaghan 

On 10/12/2016 10:04 PM, Nicolai Hähnle wrote:
> That's a nice improvement. For the series:
> 
> Reviewed-by: Nicolai Hähnle 
> 
> On 11.10.2016 16:48, Marek Olšák wrote:
>> From: Marek Olšák 
>>
>> ---
>>  src/gallium/drivers/radeonsi/si_compute.c|  2 +-
>>  src/gallium/drivers/radeonsi/si_state.c  | 12 +++-
>>  src/gallium/drivers/radeonsi/si_state_draw.c |  6 +++---
>>  3 files changed, 7 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_compute.c
>> b/src/gallium/drivers/radeonsi/si_compute.c
>> index 632839f..e785106 100644
>> --- a/src/gallium/drivers/radeonsi/si_compute.c
>> +++ b/src/gallium/drivers/radeonsi/si_compute.c
>> @@ -694,21 +694,21 @@ static void si_launch_grid(
>>
>>  /* Add buffer sizes for memory checking in need_cs_space. */
>>  r600_context_add_resource_size(ctx, &program->shader.bo->b.b);
>>  /* TODO: add the scratch buffer */
>>
>>  if (info->indirect) {
>>  r600_context_add_resource_size(ctx, info->indirect);
>>
>>  /* The hw doesn't read the indirect buffer via TC L2. */
>>  if (r600_resource(info->indirect)->TC_L2_dirty) {
>> -sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> +sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>  r600_resource(info->indirect)->TC_L2_dirty = false;
>>  }
>>  }
>>
>>  si_need_cs_space(sctx);
>>
>>  if (!sctx->cs_shader_state.initialized)
>>  si_initialize_compute(sctx);
>>
>>  if (sctx->b.flags)
>> diff --git a/src/gallium/drivers/radeonsi/si_state.c
>> b/src/gallium/drivers/radeonsi/si_state.c
>> index 34f3ed7..ad65fc2 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.c
>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>> @@ -3390,35 +3390,29 @@ static void si_memory_barrier(struct
>> pipe_context *ctx, unsigned flags)
>>   * automatically at end of shader, but the contents of other
>>   * L1 caches might still be stale. */
>>  sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
>>  }
>>
>>  if (flags & PIPE_BARRIER_INDEX_BUFFER) {
>>  /* Indices are read through TC L2 since VI.
>>   * L1 isn't used.
>>   */
>>  if (sctx->screen->b.chip_class <= CIK)
>> -sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> +sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>  }
>>
>>  if (flags & PIPE_BARRIER_FRAMEBUFFER)
>>  sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
>>
>>  if (flags & (PIPE_BARRIER_FRAMEBUFFER |
>> - PIPE_BARRIER_INDIRECT_BUFFER)) {
>> -/* Not sure if INV_GLOBAL_L2 is the best thing here.
>> - *
>> - * We need to make sure that TC L1 & L2 are written back to
>> - * memory, because CB fetches don't consider TC, but there's
>> - * no need to invalidate any TC cache lines. */
>> -sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> -}
>> + PIPE_BARRIER_INDIRECT_BUFFER))
>> +sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>  }
>>
>>  static void *si_create_blend_custom(struct si_context *sctx, unsigned
>> mode)
>>  {
>>  struct pipe_blend_state blend;
>>
>>  memset(&blend, 0, sizeof(blend));
>>  blend.independent_blend_enable = true;
>>  blend.rt[0].colormask = 0xf;
>>  return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
>> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
>> b/src/gallium/drivers/radeonsi/si_state_draw.c
>> index 33b6b23..c14e852 100644
>> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>> @@ -1040,32 +1040,32 @@ void si_draw_vbo(struct pipe_context *ctx,
>> const struct pipe_draw_info *info)
>>  if (!ib.buffer)
>>  return;
>>  /* info->start will be added by the drawing code */
>>  ib.offset -= start_offset;
>>  }
>>  }
>>
>>  /* VI reads index buffers through TC L2. */
>>  if (info->indexed && sctx->b.chip_class <= CIK &&
>>  r600_resource(ib.buffer)->TC_L2_dirty) {
>> -sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> +sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>  r600_resource(ib.buffer)->TC_L2_dirty = false;
>>  }
>>
>>  if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) {
>> -sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> +sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>  r600_resource(info->indirect)->TC_L2_dirty = false;
>>  }
>>
>>  if (info->indirect_params &&
>>  r600_resource(info->indirect_params)->TC_L2_dirty) {
>> -sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> +sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>  r600_resource(info->indirect_params)->TC_L2_dirty = false;
>>  }
>>
>>  /* Add buffer sizes for memory checking in need_cs_space. */
>>  if (

Re: [Mesa-dev] [PATCH 06/15] ralloc: add a linear allocator as a child node of ralloc

2016-10-12 Thread Nicolai Hähnle

On 08.10.2016 12:58, Marek Olšák wrote:

From: Marek Olšák 


It would be useful to hook this up with Valgrind and address sanitizer. 
Some drivers already do that; basically, there are macros 
VALGRIND_MAKE_MEM_{NOACCESS, UNDEFINED, DEFINED} in valgrind/memcheck.h, 
and similarly address sanitizer has 
__asan_{poison,unpoison}_memory_region intrinsics.


The linear_size_chunks can serve as red zones helping to catch array 
overflows.


This doesn't necessarily have to be in the same commit, but it would be 
good to do it soon.





---
 src/util/ralloc.c | 355 ++
 src/util/ralloc.h |  84 -
 2 files changed, 435 insertions(+), 4 deletions(-)

diff --git a/src/util/ralloc.c b/src/util/ralloc.c
index bf21ac3..a8e660f 100644
--- a/src/util/ralloc.c
+++ b/src/util/ralloc.c
@@ -522,10 +522,365 @@ ralloc_vasprintf_rewrite_tail(char **str, size_t *start, 
const char *fmt,

ptr = resize(*str, *start + new_length + 1);
if (unlikely(ptr == NULL))
   return false;

vsnprintf(ptr + *start, new_length + 1, fmt, args);
*str = ptr;
*start += new_length;
return true;
 }
+
+/***
+ * Linear allocator for short-lived allocations.
+ ***
+ *
+ * The allocator consists of a parent node (2K buffer), which requires
+ * a ralloc parent, and child nodes (allocations). Child nodes can't be freed
+ * directly, because the parent doesn't track them. You have to release
+ * the parent node in order to release all its children.
+ *
+ * The allocator uses a fixed-sized buffer with a monotonically increasing
+ * offset after each allocation. If the buffer is all used, another buffer
+ * is allocated, sharing the same ralloc parent, so all buffers are at
+ * the same level in the ralloc hierarchy.
+ *
+ * The linear parent node is always the first buffer and keeps track of all
+ * other buffers.
+ */
+
+#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1))


Please rename this to ALIGN_POT or similar.



+#define MIN_LINEAR_BUFSIZE 2048
+#define LMAGIC 0x87b9c7d3
+
+struct linear_header {
+#ifdef DEBUG
+   unsigned magic;   /* for debugging */
+#endif
+   unsigned offset;  /* points to the first unused byte in the buffer */
+   unsigned size;/* size of the buffer */
+   void *ralloc_parent;  /* new buffers will use this */
+   struct linear_header *next;   /* next buffer if we have more */
+   struct linear_header *latest; /* the only buffer that has free space */
+
+   /* After this structure, the buffer begins.
+* Each suballocation consists of linear_size_chunk as its header followed
+* by the suballocation, so it goes:
+*
+* - linear_size_chunk
+* - allocated space
+* - linear_size_chunk
+* - allocated space
+* etc.
+*
+* linear_size_chunk is only needed by linear_realloc.
+*/
+};
+
+struct linear_size_chunk {
+   unsigned size; /* for realloc */
+   unsigned _padding;
+};
+
+typedef struct linear_header linear_header;
+typedef struct linear_size_chunk linear_size_chunk;
+
+#define LINEAR_PARENT_TO_HEADER(parent) \
+   (linear_header*) \
+   ((char*)(parent) - sizeof(linear_size_chunk) - sizeof(linear_header))
+
+/* Allocate the linear buffer with its header. */
+static linear_header *
+create_linear_node(void *ralloc_ctx, unsigned min_size)
+{
+   linear_header *node;
+
+   min_size += sizeof(linear_size_chunk);
+
+   if (likely(min_size < MIN_LINEAR_BUFSIZE))
+  min_size = MIN_LINEAR_BUFSIZE;
+
+   node = ralloc_size(ralloc_ctx, sizeof(linear_header) + min_size);
+   if (unlikely(!node))
+  return NULL;
+
+#ifdef DEBUG
+   node->magic = LMAGIC;
+#endif
+   node->offset = 0;
+   node->size = min_size;
+   node->ralloc_parent = ralloc_ctx;
+   node->next = NULL;
+   node->latest = node;
+   return node;
+}
+
+void *
+linear_alloc_child(void *parent, unsigned size)
+{
+   linear_header *first = LINEAR_PARENT_TO_HEADER(parent);
+   linear_header *latest = first->latest;
+   linear_header *new_node;
+   linear_size_chunk *ptr;
+   unsigned full_size;
+
+   assert(first->magic == LMAGIC);
+   assert(!latest->next);
+
+   size = ALIGN(size, 8);


sizeof(size_t) instead of 8? In a similar vein, you could use size_t in 
linear_size_chunk.




+   full_size = sizeof(linear_size_chunk) + size;
+
+   if (likely(latest->offset + full_size <= latest->size)) {
+alloc:


Please avoid the goto by inverting the logic here.



+  ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset);
+  ptr->size = size;
+  latest->offset += full_size;
+  return &ptr[1];
+   }
+
+   /* allocate a new node */
+   new_node = create_linear_node(latest->ralloc_parent, size);
+   if (unlikely(!new_node))
+  return NULL;
+
+   first->latest = new_node;
+   latest->latest = new_node;
+   latest->next = new_node;
+   latest = new_node;
+
+   goto alloc;
+

Re: [Mesa-dev] [PATCH 07/15] glcpp: use the linear allocator for most objects

2016-10-12 Thread Nicolai Hähnle

On 08.10.2016 12:58, Marek Olšák wrote:

From: Marek Olšák 

---
 src/compiler/glsl/glcpp/glcpp-lex.l   |   2 +-
 src/compiler/glsl/glcpp/glcpp-parse.y | 203 +++---
 src/compiler/glsl/glcpp/glcpp.h   |   1 +
 3 files changed, 94 insertions(+), 112 deletions(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l 
b/src/compiler/glsl/glcpp/glcpp-lex.l
index d09441a..f4a6876 100644
--- a/src/compiler/glsl/glcpp/glcpp-lex.l
+++ b/src/compiler/glsl/glcpp/glcpp-lex.l

[snip]

@@ -1002,51 +994,50 @@ _token_list_append_list(token_list_t *list, token_list_t 
*tail)
   list->head = tail->head;
} else {
   list->tail->next = tail->head;
}

list->tail = tail->tail;
list->non_space_tail = tail->non_space_tail;
 }

 static token_list_t *
-_token_list_copy(void *ctx, token_list_t *other)
+_token_list_copy(glcpp_parser_t *parser, token_list_t *other)
 {
token_list_t *copy;
token_node_t *node;

if (other == NULL)
   return NULL;

-   copy = _token_list_create (ctx);
+   copy = _token_list_create (parser);
for (node = other->head; node; node = node->next) {
-  token_t *new_token = ralloc (copy, token_t);
+  token_t *new_token = linear_alloc_child(parser->linalloc, 
sizeof(token_t));
   *new_token = *node->token;
-  _token_list_append (copy, new_token);
+  _token_list_append (parser, copy, new_token);
}

return copy;
 }

 static void
 _token_list_trim_trailing_space(token_list_t *list)
 {
token_node_t *tail, *next;

if (list->non_space_tail) {
   tail = list->non_space_tail->next;
   list->non_space_tail->next = NULL;
   list->tail = list->non_space_tail;

   while (tail) {
  next = tail->next;
- ralloc_free (tail);
  tail = next;
   }


This whole loop can be dropped now.



}
 }

 static int
 _token_list_is_empty_ignoring_space(token_list_t *l)
 {
token_node_t *n;

@@ -1177,69 +1168,70 @@ _token_print(char **out, size_t *len, token_t *token)
case PLACEHOLDER:
   /* Nothing to print. */
   break;
default:
   assert(!"Error: Don't know how to print token.");

   break;
}
 }

-/* Return a new token (ralloc()ed off of 'token') formed by pasting
- * 'token' and 'other'. Note that this function may return 'token' or
- * 'other' directly rather than allocating anything new.
+/* Return a new token formed by pasting 'token' and 'other'. Note that this
+ * function may return 'token' or 'other' directly rather than allocating
+ * anything new.
  *
  * Caution: Only very cursory error-checking is performed to see if
  * the final result is a valid single token. */
 static token_t *
-_token_paste(glcpp_parser_t *parser, token_t *token, token_t *other)
+_token_paste(glcpp_parser_t *parser, token_list_t *list, token_t *token,
+ token_t *other)


I seem to be blind... where is list used?

Nicolai


 {
token_t *combined = NULL;

/* Pasting a placeholder onto anything makes no change. */
if (other->type == PLACEHOLDER)
   return token;

/* When 'token' is a placeholder, just return 'other'. */
if (token->type == PLACEHOLDER)
   return other;

/* A very few single-character punctuators can be combined
 * with another to form a multi-character punctuator. */
switch (token->type) {
case '<':
   if (other->type == '<')
- combined = _token_create_ival (token, LEFT_SHIFT, LEFT_SHIFT);
+ combined = _token_create_ival (parser, LEFT_SHIFT, LEFT_SHIFT);
   else if (other->type == '=')
- combined = _token_create_ival (token, LESS_OR_EQUAL, LESS_OR_EQUAL);
+ combined = _token_create_ival (parser, LESS_OR_EQUAL, LESS_OR_EQUAL);
   break;
case '>':
   if (other->type == '>')
- combined = _token_create_ival (token, RIGHT_SHIFT, RIGHT_SHIFT);
+ combined = _token_create_ival (parser, RIGHT_SHIFT, RIGHT_SHIFT);
   else if (other->type == '=')
- combined = _token_create_ival (token, GREATER_OR_EQUAL, 
GREATER_OR_EQUAL);
+ combined = _token_create_ival (parser, GREATER_OR_EQUAL, 
GREATER_OR_EQUAL);
   break;
case '=':
   if (other->type == '=')
- combined = _token_create_ival (token, EQUAL, EQUAL);
+ combined = _token_create_ival (parser, EQUAL, EQUAL);
   break;
case '!':
   if (other->type == '=')
- combined = _token_create_ival (token, NOT_EQUAL, NOT_EQUAL);
+ combined = _token_create_ival (parser, NOT_EQUAL, NOT_EQUAL);
   break;
case '&':
   if (other->type == '&')
- combined = _token_create_ival (token, AND, AND);
+ combined = _token_create_ival (parser, AND, AND);
   break;
case '|':
   if (other->type == '|')
- combined = _token_create_ival (token, OR, OR);
+ combined = _token_create_ival (parser, OR, OR);
   break;
}

if (combined != NULL) {
   /* Inherit the location from the first token */

Re: [Mesa-dev] [PATCH 00/15] GLSL memory allocation rework for faster compilation

2016-10-12 Thread Nicolai Hähnle

Nice work! I sent some comments on patches 6 & 7. Patches 1-4 and 7-15 are

Reviewed-by: Nicolai Hähnle 

assuming you checked them with a Piglit run in addition to the shader-db.

Something that LLVM does for its intermediate representations is using 
Recycler objects. Instructions are allocated from a linear allocator, 
but when they are removed they are neither returned to the heap nor 
simply forgotten. Instead, the memory block is added to a linked list 
managed by the Recycler object, so that the next instruction allocation 
can be served from there.


I suspect that this could also help here because it's still very fast 
but keeps the cache footprint smaller.


Cheers,
Nicolai

On 08.10.2016 12:58, Marek Olšák wrote:

Hi,

This patch series reduces the number of malloc calls in the GLSL
compiler by 63%. That leads to better compile times and less heap
thrashing.

It's done by switching memory allocations in the GLSL compiler to my
new linear allocator that allocates out of a fixed-sized buffer with
a monotonically increasing offset. If more buffers are needed, it
chains them.

The new allocator is used in all places where short-lived allocations
are used with a high number of malloc calls. The series also contains
other improvements not related to the new allocator that also improve
compile times. The results are below.

I tested my shader-db with shaders only being compiled to TGSI.
(noop gallium driver)


master + libc's malloc:

 real   0m54.182s
 user   3m33.640s
 sys0m0.620s
 maxmem 275 MB


master + jemalloc preloaded:

 real   0m45.044s
 user   2m56.356s
 sys0m1.652s
 maxmem 284 MB


the series + libc's malloc:

 real   0m46.221s
 user   3m2.080s
 sys0m0.544s
 maxmem 270 MB


the series + jemalloc preloaded:

 real   0m40.729s
 user   2m39.564s
 sys0m1.232s
 maxmem 284 MB


The series without jemalloc almost caught up with jemalloc + master.
However, jemalloc also benefits.

Current Mesa needs 54.182s and it drops to 40.729s with my series and
jemalloc. The total change in compile time is -25% if we incorporate
both. Without jemalloc, the difference is only -14.7%.

With radeonsi, the improvement is approx. slightly more than 1/2 of that
(if you add the LLVM time). However, radeonsi also has asynchronous
shader compilation hiding LLVM overhead in some cases, so it depends.

Drivers with faster compiler backends will benefit more than radeonsi,
but will probably not reach -25% or -14.7% (except softpipe, which uses
TGSI as-is).

The memory usage looks reasonable in all tested cases.

Note: One of the first patches moves memset from ralloc to rzalloc.
I tested and fixed the GLSL source -> TGSI path, but other codepaths
may break, and you need to use valgrind to find all uninitialized
variables that relied on ralloc doing memset (if there are any).

You can also find it here:
https://cgit.freedesktop.org/~mareko/mesa/log/?h=glsl-alloc-rework

Please review.

 src/compiler/glsl/ast.h |   4 +-
 src/compiler/glsl/ast_to_hir.cpp|   4 +-
 src/compiler/glsl/ast_type.cpp  |  13 ++-
 src/compiler/glsl/glcpp/glcpp-lex.l |   2 +-
 src/compiler/glsl/glcpp/glcpp-parse.y   | 203 
+-
 src/compiler/glsl/glcpp/glcpp.h |   1 +
 src/compiler/glsl/glsl_lexer.ll |  16 +--
 src/compiler/glsl/glsl_parser.yy| 202 
+++---
 src/compiler/glsl/glsl_parser_extras.cpp|   6 +-
 src/compiler/glsl/glsl_parser_extras.h  |   4 +-
 src/compiler/glsl/glsl_symbol_table.cpp |  19 ++--
 src/compiler/glsl/glsl_symbol_table.h   |   1 +
 src/compiler/glsl/ir.cpp|   4 +
 src/compiler/glsl/ir.h  |  13 ++-
 src/compiler/glsl/link_uniform_blocks.cpp   |   2 +-
 src/compiler/glsl/list.h|   2 +-
 src/compiler/glsl/lower_packed_varyings.cpp |   8 +-
 src/compiler/glsl/opt_constant_propagation.cpp  |  14 ++-
 src/compiler/glsl/opt_copy_propagation.cpp  |   7 +-
 src/compiler/glsl/opt_copy_propagation_elements.cpp |  19 ++--
 src/compiler/glsl/opt_dead_code_local.cpp   |  12 ++-
 src/compiler/glsl_types.cpp |  38 +--
 src/compiler/glsl_types.h   |   6 +-
 src/compiler/nir/nir.c  |   8 +-
 src/compiler/spirv/vtn_variables.c  |   3 +-
 src/gallium/drivers/freedreno/ir3/ir3.c |   2 +-
 src/gallium/drivers/vc4/vc4_cl.c|   2 +-
 src/gallium/drivers/vc4/vc4_program.c   |   2 +-
 src/gallium/drivers/vc4/vc4_simulator.c |   5 +-
 src/mesa/drivers/dri/i965/brw_state_batch.c |   5 +-
 src/util/ralloc.c   | 392 
+++

Re: [Mesa-dev] [PATCH 3/3] radeonsi: Use the new image load/store intrinsic signatures

2016-10-12 Thread Nicolai Hähnle

On 11.10.2016 23:45, Tom Stellard wrote:

---
 src/gallium/drivers/radeonsi/si_shader.c | 59 +---
 1 file changed, 46 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4e07317..1f1fdf2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3575,16 +3575,29 @@ static void image_append_args(
const struct tgsi_full_instruction *inst = emit_data->inst;
LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
-
-   emit_data->args[emit_data->arg_count++] = i1false; /* r128 */
-   emit_data->args[emit_data->arg_count++] =
-   tgsi_is_array_image(target) ? i1true : i1false; /* da */
-   if (!atomic) {
-   emit_data->args[emit_data->arg_count++] =
-   inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE) ?
-   i1true : i1false; /* glc */
+   LLVMValueRef r128 = i1false;
+   LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false;
+   LLVMValueRef glc =
+   inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE) ?
+   i1true : i1false;
+   LLVMValueRef slc = i1false;
+   LLVMValueRef lwe = i1false;
+
+   if (atomic || (HAVE_LLVM <= 0x0309)) {
+   emit_data->args[emit_data->arg_count++] = r128;
+   emit_data->args[emit_data->arg_count++] = da;
+   if (!atomic) {
+   emit_data->args[emit_data->arg_count++] = glc;
+   }
+   emit_data->args[emit_data->arg_count++] = slc;
+   return;
}
-   emit_data->args[emit_data->arg_count++] = i1false; /* slc */
+
+   /* HAVE_LLVM >= 0x0400 */
+   emit_data->args[emit_data->arg_count++] = glc;
+   emit_data->args[emit_data->arg_count++] = slc;
+   emit_data->args[emit_data->arg_count++] = lwe;
+   emit_data->args[emit_data->arg_count++] = da;
 }

 /**
@@ -3761,7 +3774,9 @@ static void load_emit_memory(
 }

 static void get_image_intr_name(const char *base_name,
+   LLVMTypeRef data_type,
LLVMTypeRef coords_type,
+   LLVMTypeRef rsrc_type,
char *out_name, unsigned out_len)
 {
char coords_type_name[8];
@@ -3769,7 +3784,21 @@ static void get_image_intr_name(const char *base_name,
build_type_name_for_intr(coords_type, coords_type_name,
sizeof(coords_type_name));

+#if HAVE_LLVM <= 0x0309
snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
+#else
+   {
+   char data_type_name[8];
+   char rsrc_type_name[8];
+
+   build_type_name_for_intr(data_type, data_type_name,
+   sizeof(data_type_name));
+   build_type_name_for_intr(rsrc_type, rsrc_type_name,
+   sizeof(rsrc_type_name));
+   snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
+data_type_name, coords_type_name, rsrc_type_name);
+   }
+#endif


Can this also be a regular if-statement rather than preprocessor macros?

Apart from that, the series is

Reviewed-by: Nicolai Hähnle 


 }

 static void load_emit(
@@ -3781,7 +3810,7 @@ static void load_emit(
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
-   char intrinsic_name[32];
+   char intrinsic_name[64];

if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
load_emit_memory(ctx, emit_data);
@@ -3804,7 +3833,9 @@ static void load_emit(
LLVMReadOnlyAttribute);
} else {
get_image_intr_name("llvm.amdgcn.image.load",
-   LLVMTypeOf(emit_data->args[0]),
+   emit_data->dst_type, /* vdata */
+   LLVMTypeOf(emit_data->args[0]), /* coords */
+   LLVMTypeOf(emit_data->args[1]), /* rsrc */
intrinsic_name, sizeof(intrinsic_name));

emit_data->output[emit_data->chan] =
@@ -3981,7 +4012,7 @@ static void store_emit(
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
unsigned target = inst->Memory.Texture;
-   char intrinsic_name[32];
+   char intrinsic_name[64];

if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
store_emit_memory(ctx, emit_data);
@@ -4003,7 +4034,9 @@ static void store_emit(
emit_data->arg_count, 0);
 

[Mesa-dev] [PATCH] gallium: add PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY

2016-10-12 Thread Marek Olšák
From: Marek Olšák 

For performance tuning in drivers. It filters out window system
framebuffers and OpenGL renderbuffers.

radeonsi will use this to guess whether a depth buffer will be read
by a shader. There is no guarantee about what will actually happen.

This is a departure from PIPE_BIND flags which are defined to be strict
but they are useless in practice.
---
 src/gallium/include/pipe/p_defines.h | 1 +
 src/mesa/state_tracker/st_texture.c  | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index db96c51..037ed92 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -443,20 +443,21 @@ enum pipe_flush_flags
 #define PIPE_BIND_SCANOUT (1 << 19) /*  */
 #define PIPE_BIND_SHARED  (1 << 20) /* get_texture_handle ??? */
 #define PIPE_BIND_LINEAR  (1 << 21)
 
 
 /**
  * Flags for the driver about resource behaviour:
  */
 #define PIPE_RESOURCE_FLAG_MAP_PERSISTENT (1 << 0)
 #define PIPE_RESOURCE_FLAG_MAP_COHERENT   (1 << 1)
+#define PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY (1 << 2)
 #define PIPE_RESOURCE_FLAG_DRV_PRIV(1 << 16) /* driver/winsys private */
 #define PIPE_RESOURCE_FLAG_ST_PRIV (1 << 24) /* state-tracker/winsys 
private */
 
 /**
  * Hint about the expected lifecycle of a resource.
  * Sorted according to GPU vs CPU access.
  */
 enum pipe_resource_usage {
PIPE_USAGE_DEFAULT,/* fast GPU access */
PIPE_USAGE_IMMUTABLE,  /* fast GPU access, immutable */
diff --git a/src/mesa/state_tracker/st_texture.c 
b/src/mesa/state_tracker/st_texture.c
index a2c36ac..7b72ffd 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -84,21 +84,22 @@ st_texture_create(struct st_context *st,
memset(&pt, 0, sizeof(pt));
pt.target = target;
pt.format = format;
pt.last_level = last_level;
pt.width0 = width0;
pt.height0 = height0;
pt.depth0 = depth0;
pt.array_size = layers;
pt.usage = PIPE_USAGE_DEFAULT;
pt.bind = bind;
-   pt.flags = 0;
+   /* only set this for OpenGL textures, not renderbuffers */
+   pt.flags = PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY;
pt.nr_samples = nr_samples;
 
newtex = screen->resource_create(screen, &pt);
 
assert(!newtex || pipe_is_referenced(&newtex->reference));
 
return newtex;
 }
 
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: implement TC-compatible HTILE

2016-10-12 Thread Marek Olšák
From: Marek Olšák 

so that decompress blits aren't needed and depth texturing needs less
memory bandwidth.

Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible
HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16.
The format promotion is not visible to state trackers.

This is part of TC-compatible renderbuffer compression, which has 3 parts:
DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now.

I don't see a measurable increase in performance though.

(I tested Talos Principle and DiRT: Showdown, the latter is improved by
 0.5%, which is almost noise, and it originally used layered Z16,
 so at least we know that Z16 promoted to Z32F isn't slower now)
---
 src/gallium/drivers/radeon/r600_pipe_common.h  |  3 ++
 src/gallium/drivers/radeon/r600_texture.c  | 67 ++
 src/gallium/drivers/radeon/radeon_winsys.h |  4 ++
 src/gallium/drivers/radeonsi/si_blit.c | 11 -
 src/gallium/drivers/radeonsi/si_descriptors.c  |  7 ++-
 src/gallium/drivers/radeonsi/si_shader.c   | 18 ++-
 src/gallium/drivers/radeonsi/si_state.c| 39 +--
 src/gallium/drivers/radeonsi/si_state_draw.c   |  3 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 57 --
 9 files changed, 185 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 290b228..5cfcad6 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -238,27 +238,29 @@ struct r600_cmask_info {
unsigned yalign;
unsigned slice_tile_max;
unsigned base_address_reg;
 };
 
 struct r600_htile_info {
unsigned pitch;
unsigned height;
unsigned xalign;
unsigned yalign;
+   unsigned alignment;
 };
 
 struct r600_texture {
struct r600_resourceresource;
 
uint64_tsize;
unsignednum_level0_transfers;
+   enum pipe_formatdb_render_format;
boolis_depth;
booldb_compatible;
boolcan_sample_z;
boolcan_sample_s;
unsigneddirty_level_mask; /* each bit says if 
that mipmap is compressed */
unsignedstencil_dirty_level_mask; /* each bit 
says if that mipmap is compressed */
struct r600_texture *flushed_depth_texture;
struct radeon_surf  surface;
 
/* Colorbuffer compression and fast clear. */
@@ -266,20 +268,21 @@ struct r600_texture {
struct r600_cmask_info  cmask;
struct r600_resource*cmask_buffer;
uint64_tdcc_offset; /* 0 = disabled */
unsignedcb_color_info; /* fast clear enable bit 
*/
unsignedcolor_clear_value[2];
unsignedlast_msaa_resolve_target_micro_mode;
 
/* Depth buffer compression and fast clear. */
struct r600_htile_info  htile;
struct r600_resource*htile_buffer;
+   booltc_compatible_htile;
booldepth_cleared; /* if it was cleared at 
least once */
float   depth_clear_value;
boolstencil_cleared; /* if it was cleared 
at least once */
uint8_t stencil_clear_value;
 
boolnon_disp_tiling; /* R600-Cayman only */
 
/* Whether the texture is a displayable back buffer and needs DCC
 * decompression, which is expensive. Therefore, it's enabled only
 * if statistics suggest that it will pay off and it's allocated
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 57cdbcf..625d091 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -185,21 +185,22 @@ static unsigned r600_texture_get_offset(struct 
r600_texture *rtex, unsigned leve
return rtex->surface.level[level].offset +
   box->z * rtex->surface.level[level].slice_size +
   box->y / util_format_get_blockheight(format) * 
rtex->surface.level[level].pitch_bytes +
   box->x / util_format_get_blockwidth(format) * 
util_format_get_blocksize(format);
 }
 
 static int r600_init_surface(struct r600_common_screen *rscreen,
 struct radeon_surf *surface,
 const struct pipe_resource *ptex,
 unsigned array_mode,
-bool is_flushed_depth)
+bool is_flush

Re: [Mesa-dev] [PATCH 07/15] glcpp: use the linear allocator for most objects

2016-10-12 Thread Marek Olšák
On Wed, Oct 12, 2016 at 3:11 PM, Nicolai Hähnle  wrote:
> On 08.10.2016 12:58, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> ---
>>  src/compiler/glsl/glcpp/glcpp-lex.l   |   2 +-
>>  src/compiler/glsl/glcpp/glcpp-parse.y | 203
>> +++---
>>  src/compiler/glsl/glcpp/glcpp.h   |   1 +
>>  3 files changed, 94 insertions(+), 112 deletions(-)
>>
>> diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l
>> b/src/compiler/glsl/glcpp/glcpp-lex.l
>> index d09441a..f4a6876 100644
>> --- a/src/compiler/glsl/glcpp/glcpp-lex.l
>> +++ b/src/compiler/glsl/glcpp/glcpp-lex.l
>
> [snip]
>
>> @@ -1002,51 +994,50 @@ _token_list_append_list(token_list_t *list,
>> token_list_t *tail)
>>list->head = tail->head;
>> } else {
>>list->tail->next = tail->head;
>> }
>>
>> list->tail = tail->tail;
>> list->non_space_tail = tail->non_space_tail;
>>  }
>>
>>  static token_list_t *
>> -_token_list_copy(void *ctx, token_list_t *other)
>> +_token_list_copy(glcpp_parser_t *parser, token_list_t *other)
>>  {
>> token_list_t *copy;
>> token_node_t *node;
>>
>> if (other == NULL)
>>return NULL;
>>
>> -   copy = _token_list_create (ctx);
>> +   copy = _token_list_create (parser);
>> for (node = other->head; node; node = node->next) {
>> -  token_t *new_token = ralloc (copy, token_t);
>> +  token_t *new_token = linear_alloc_child(parser->linalloc,
>> sizeof(token_t));
>>*new_token = *node->token;
>> -  _token_list_append (copy, new_token);
>> +  _token_list_append (parser, copy, new_token);
>> }
>>
>> return copy;
>>  }
>>
>>  static void
>>  _token_list_trim_trailing_space(token_list_t *list)
>>  {
>> token_node_t *tail, *next;
>>
>> if (list->non_space_tail) {
>>tail = list->non_space_tail->next;
>>list->non_space_tail->next = NULL;
>>list->tail = list->non_space_tail;
>>
>>while (tail) {
>>   next = tail->next;
>> - ralloc_free (tail);
>>   tail = next;
>>}
>
>
> This whole loop can be dropped now.
>
>
>> }
>>  }
>>
>>  static int
>>  _token_list_is_empty_ignoring_space(token_list_t *l)
>>  {
>> token_node_t *n;
>>
>> @@ -1177,69 +1168,70 @@ _token_print(char **out, size_t *len, token_t
>> *token)
>> case PLACEHOLDER:
>>/* Nothing to print. */
>>break;
>> default:
>>assert(!"Error: Don't know how to print token.");
>>
>>break;
>> }
>>  }
>>
>> -/* Return a new token (ralloc()ed off of 'token') formed by pasting
>> - * 'token' and 'other'. Note that this function may return 'token' or
>> - * 'other' directly rather than allocating anything new.
>> +/* Return a new token formed by pasting 'token' and 'other'. Note that
>> this
>> + * function may return 'token' or 'other' directly rather than allocating
>> + * anything new.
>>   *
>>   * Caution: Only very cursory error-checking is performed to see if
>>   * the final result is a valid single token. */
>>  static token_t *
>> -_token_paste(glcpp_parser_t *parser, token_t *token, token_t *other)
>> +_token_paste(glcpp_parser_t *parser, token_list_t *list, token_t *token,
>> + token_t *other)
>
>
> I seem to be blind... where is list used?

It's probably a leftover from my initial work where each list was a
linear parent, but then I reworked it to use only one linear parent
per parser instance.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0/ir: fix textureGather with a single offset

2016-10-12 Thread Ilia Mirkin
Recent fix for non-const offsets broke the case of a single offset (vs 4
offsets). The later code relies on the offs array to contain null values
to tell whether they should be added onto the srcs list.

Fixes: 5239bd592 ("nvc0/ir: fix overwriting of value backing non-constant 
gather offset")
Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 4c013c4..dab3e2d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -820,11 +820,11 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
  // Either there is 1 offset, which goes into the 2 low bytes of the
  // first source, or there are 4 offsets, which go into 2 sources (8
  // values, 1 byte each).
- Value *offs[2] = {bld.getScratch(), bld.getScratch()};
+ Value *offs[2] = {NULL, NULL};
  for (n = 0; n < i->tex.useOffsets; n++) {
 for (c = 0; c < 2; ++c) {
if ((n % 2) == 0 && c == 0)
-  bld.mkMov(offs[n / 2], i->offset[n][c].get());
+  bld.mkMov(offs[n / 2] = bld.getScratch(), 
i->offset[n][c].get());
else
   bld.mkOp3(OP_INSBF, TYPE_U32,
 offs[n / 2],
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 98169] lm_sensors hud option crashes unigine heaven

2016-10-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98169

Steven Toth  changed:

   What|Removed |Added

 Attachment #127217|0   |1
is obsolete||

--- Comment #24 from Steven Toth  ---
Created attachment 127246
  --> https://bugs.freedesktop.org/attachment.cgi?id=127246&action=edit
Betetr version of the sensor_cleanup patch

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 98169] lm_sensors hud option crashes unigine heaven

2016-10-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98169

--- Comment #25 from Steven Toth  ---
Christoph,

Please remove the prior sensor-cleanup.patch, apply the attached
sensor-cleanupv2.patch.

This should prevent freeing of an object while its being used by a second hud
element.

Let me know, thanks.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] draw: initialize shader inputs

2016-10-12 Thread Jose Fonseca

On 11/10/16 23:04, srol...@vmware.com wrote:

From: Roland Scheidegger 

This should make the code more robust if a shader tries to use inputs which
aren't defined by the vertex element layout (which usually shouldn't happen).

No piglit change.
---
 src/gallium/auxiliary/draw/draw_llvm.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 87951fa..4270a8f 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1705,6 +1705,13 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
   lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
   io_itr, io, lp_loop.counter);
 #endif
+
+  for (j = draw->pt.nr_vertex_elements; j < PIPE_MAX_SHADER_INPUTS; j++) {
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+inputs[j][i] = lp_build_zero(gallivm, vs_type);
+ }
+  }
+
   for (i = 0; i < vector_length; ++i) {
  LLVMValueRef vert_index =
 LLVMBuildAdd(builder,



Reviewed-by: Jose Fonseca 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/8] mesa: rename gl_vertex_attrib_array::VertexBinding

2016-10-12 Thread Brian Paul

On 10/12/2016 01:43 AM, Mathias Fröhlich wrote:

On Tuesday, 11 October 2016 20:10:27 CEST Brian Paul wrote:

Rename to gl_vertex_attrib_array::BufferBindingIndex because this field
is an index into the array of buffer binding points.  This makes some
code a little easier to follow since there's also a "VertexBinding" field
in gl_vertex_array_object.


What about BindingIndex instead?
Either way is ok with me, but may be this helps not to get too long lines
including the benefits you are aiming for?


In this case, I think I prefer the longer, more descriptive name.

I'm also tempted to rename gl_vertex_array_object::VertexBinding[] to 
BufferBinding[].  Maybe later.


Thanks for reviewing.

-Brian



Mathias



---
  src/mesa/main/api_arrayelt.c  | 20 ++--
  src/mesa/main/arrayobj.c  |  8 
  src/mesa/main/mtypes.h|  2 +-
  src/mesa/main/varray.c| 16 
  src/mesa/vbo/vbo_exec_array.c |  6 +++---
  5 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index 15fbb8c..cc7dee3 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1566,7 +1566,7 @@ _ae_update_state(struct gl_context *ctx)
 /* conventional vertex arrays */
 if (vao->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) {
aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR_INDEX];
-  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
+  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
aa->offset = IndexFuncs[TYPE_IDX(aa->array->Type)];
check_vbo(actx, aa->binding->BufferObj);
aa++;
@@ -1574,7 +1574,7 @@ _ae_update_state(struct gl_context *ctx)

 if (vao->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) {
aa->array = &vao->VertexAttrib[VERT_ATTRIB_EDGEFLAG];
-  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
+  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
aa->offset = _gloffset_EdgeFlagv;
check_vbo(actx, aa->binding->BufferObj);
aa++;
@@ -1582,7 +1582,7 @@ _ae_update_state(struct gl_context *ctx)

 if (vao->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) {
aa->array = &vao->VertexAttrib[VERT_ATTRIB_NORMAL];
-  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
+  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
aa->offset = NormalFuncs[TYPE_IDX(aa->array->Type)];
check_vbo(actx, aa->binding->BufferObj);
aa++;
@@ -1590,7 +1590,7 @@ _ae_update_state(struct gl_context *ctx)

 if (vao->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) {
aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR0];
-  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
+  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
aa->offset = ColorFuncs[aa->array->Size-3][TYPE_IDX(aa->array-
Type)];
check_vbo(actx, aa->binding->BufferObj);
aa++;
@@ -1598,7 +1598,7 @@ _ae_update_state(struct gl_context *ctx)

 if (vao->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) {
aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR1];
-  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
+  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
aa->offset = SecondaryColorFuncs[TYPE_IDX(aa->array->Type)];
check_vbo(actx, aa->binding->BufferObj);
aa++;
@@ -1606,7 +1606,7 @@ _ae_update_state(struct gl_context *ctx)

 if (vao->VertexAttrib[VERT_ATTRIB_FOG].Enabled) {
aa->array = &vao->VertexAttrib[VERT_ATTRIB_FOG];
-  aa->binding = &vao->VertexBinding[aa->array->VertexBinding];
+  aa->binding = &vao->VertexBinding[aa->array->BufferBindingIndex];
aa->offset = FogCoordFuncs[TYPE_IDX(aa->array->Type)];
check_vbo(actx, aa->binding->BufferObj);
aa++;
@@ -1620,7 +1620,7 @@ _ae_update_state(struct gl_context *ctx)
* If we ever remove GL_NV_vertex_program this will have to

change.

*/
   at->array = attribArray;
- at->binding = &vao->VertexBinding[attribArray->VertexBinding];
+ at->binding = &vao->VertexBinding[attribArray-
BufferBindingIndex];
   assert(!at->array->Normalized);
   at->func = AttribFuncsNV[at->array->Normalized]
   [at->array->Size-1]
@@ -1638,7 +1638,7 @@ _ae_update_state(struct gl_context *ctx)
if (attribArray->Enabled) {
   GLint intOrNorm;
   at->array = attribArray;
- at->binding = &vao->VertexBinding[attribArray->VertexBinding];
+ at->binding = &vao->VertexBinding[attribArray-
BufferBindingIndex];
   /* Note: we can't grab the _glapi_Dispatch->VertexAttrib1fvNV
* function pointer here (for float arrays) since the pointer may
* change from one execution of _ae_ArrayElement() to
@@ -1669,7 +1669,7 @@ _ae_update_sta

[Mesa-dev] [Bug 97260] R9 290 low performance in Linux 4.7

2016-10-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97260

--- Comment #64 from Alex Deucher  ---
(In reply to alvarex from comment #63)
> If I remove the line "adev->ddev->mode_config.async_page_flip = true;" from
> the files on comment 17, and boot with radeon.dpm=0 radeon.runpm=0. That
> will trigger the bug, on Tombraider for expample it will run at 9FPS when
> previously running at 75FPS, on amdgpu.

When you set radoen.dpm=0 low performance is expected because you've disabled
the power management so the card stays at it's low boot up clocks.  You
shouldn't mess with the radeon.dpm option for this issue.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 98169] lm_sensors hud option crashes unigine heaven

2016-10-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98169

--- Comment #26 from Christoph Haag  ---
Created attachment 127250
  --> https://bugs.freedesktop.org/attachment.cgi?id=127250&action=edit
success

With the patches
https://bugs.freedesktop.org/show_bug.cgi?id=98169#c19
https://bugs.freedesktop.org/show_bug.cgi?id=98169#c25
it works now.

Thanks for the fix.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/22] anv/hiz: Perform HiZ resolves for all partial renders

2016-10-12 Thread Nanley Chery
On Tue, Oct 11, 2016 at 06:55:53PM -0700, Jason Ekstrand wrote:
> On Tue, Oct 11, 2016 at 6:16 PM, Nanley Chery  wrote:
> 
> > On Mon, Oct 10, 2016 at 06:00:49PM -0700, Jason Ekstrand wrote:
> > > On Mon, Oct 10, 2016 at 2:23 PM, Nanley Chery 
> > wrote:
> > >
> > > > On Fri, Oct 07, 2016 at 09:41:14PM -0700, Jason Ekstrand wrote:
> > > > > If we don't, we can end up with corruption in the portion of the
> > depth
> > > > > buffer that lies outside the render area when we do a HiZ resolve at
> > the
> > > > > end.  The only reason we weren't seeing this before was that all of
> > the
> > > > > meta-based clears such as VkCmdClearDepthStencilImage were internally
> > > > using
> > > > > HiZ so the HiZ buffer never truly got out-of-sync.  If the CTS ever
> > > > tested
> > > > > a depth upload (which doesn't care about HiZ) and then a partial
> > render
> > > > we
> > > > > would have seen problems.  Soon, we will be using blorp to do depth
> > > > clears
> > > > > and it won't bother with HiZ so we would get CTS regressions without
> > > > this.
> > > > >
> > > >
> > > > I understand the problem, but I think this solution unnecessarily
> > > > penalizes the user's renderpass.
> > > >
> > > > Since depth buffer updates via vkCopy*ToImage and
> > > > vkCmdClearDepthStencilImage cause the HiZ buffer to become stale,
> > > > calling
> > > >
> > > > genX(cmd_buffer_emit_hz_op)(cmd_buffer, BLORP_HIZ_OP_HIZ_RESOLVE);
> > > >
> > > > at the bottom of those commands should fix the issue without the extra
> > > > penalty. I'd imagine that as a prequisite, blorp would have to learn to
> > > > emit enough depth stencil state for this command.
> > > >
> > >
> > > I think that's dangerously mixing HiZ data validity models.  There are 3
> > > basic aux data validity models that we've thrown around:
> > >
> > >  1) AUX is always correct.
> > >  2) AUX is correct within a render pass and invalid outside.
> > >  3) Track whether or not AUX is valid and resolve only as needed.
> > >
> >
> > What is the definition of correct here? I'd assume you mean that the
> > data matches what's in the depth buffer, but that sometimes may not be
> > the case (STORE_OP_DONTCARE) yet the program behavior is correct
> > nonetheless.
> >
> 
> By "correct" I mean "consistent with the depth buffer" or, more precicely,
> "all well-defined pixels of the depth buffer are consistent with the HiZ
> buffer".  We *may* be able to avoid the depth resolve at the end if you
> have STORE_OP_DONT_CARE.  However, we would probably not do anything
> interesting with LOAD_OP_DONT_CARE.
> 


With this definition of correct (accessing either buffer will give you
the correct value due to their being consistent with each other), the
current implementation is arguably a course-grained version of (3) (no
tracking, let's call this 4) than it is (2). The HiZ buffer is only
consistent with the depth buffer when a user performs an operation that
likely requires it to be so. For example:

* LOAD_OP_LOAD -> HiZ Resolve (consistent)
* LOAD_OP_CLEAR -> No resolve, Fast Depth Clear (inconsistent)
* vkCmdDraw* -> No resolve (inconsistent)
* STORE_OP_STORE -> Depth Resolve (consistent)

> 
> > Also, could you please explain where the danger comes into play?
> >
> 
> We need to have a solid mental model of when HiZ and depth are consistent.
> Otherwise, we'll make mistakes, things will get inconsistent, and we'll

Agreed.

> have weird bugs.  This bug is a good example of this.  Our mental model (2)
> works fine except that we were leaking garbage depth from DONT_CARE when we
> have a partial areat.  Just doing a HiZ resolve after a blorp clear "fixes"
> the bug by making things always consistent (mental model 1).  But then it

As mentioned above, I'm not advocating mixing 1 and 2, but covering a
missed case in 4. Whether or not that mental model is solid seems like
a subjective claim.

> means that we have LOAD_OP_LOAD, we're doing two HiZ resolves which we
> don't want either.
> 

I wouldn't expect Vulkan apps to submit image copies as frequently as
render passes, so my thinking is that an extra HiZ resolve at the end of
an Image copy should have less of an impact on FPS than performing the
resolve on every clearing RP that doesn't use a full render area. I
did not write the patch to test my suggestion, but I was able to get a
measurable difference by forcing HiZ resolves on the triangle demo. I
won't post the numbers I obtained from the questionable method of
eye-balling the FPS counter (especially with the amount of variance
involved), so feel free to take a look at it yourself.  

I think the temporarily minor dip in performance introduced here is a
small price to pay for the removal of meta. I also think my suggestion
may be a lot more work than it seemed initially, so it's likely best to
revisit this later.

This patch is:
Reviewed-by: Nanley Chery 

> As I intended to say below, I don't mind moving to (1), but if that's what
> we want to do we should commit to it and ch

Re: [Mesa-dev] [PATCH 2/5] anv/cmd_buffer: Delay variable assignment in HZ function

2016-10-12 Thread Nanley Chery
On Mon, Oct 10, 2016 at 04:38:12PM -0700, Nanley Chery wrote:
> On Mon, Oct 10, 2016 at 04:01:29PM -0700, Jason Ekstrand wrote:
> > My patch to fix partial draws makes us use full_surface_op for HiZ resolves
> > as well so this patch (and maybe the next one?) are probably moot.
> > 
> 
> I realize that this patch conflicts with yours, but in my reply to that
> patch, I stated that I don't think the right fix is within this emit
> function.
> 

I've reviewed the patch in the aforementioned thread, so I'll send out a
v2 that takes this into account.

> -Nanley
> 
> > On Oct 10, 2016 3:33 PM, "Nanley Chery"  wrote:
> > 
> > > Delay the assignment of some variables until we know they will be used.
> > >
> > > Signed-off-by: Nanley Chery 
> > > ---
> > >  src/intel/vulkan/gen8_cmd_buffer.c | 32 +---
> > >  1 file changed, 17 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c
> > > b/src/intel/vulkan/gen8_cmd_buffer.c
> > > index 02c2d7c..e88ab02 100644
> > > --- a/src/intel/vulkan/gen8_cmd_buffer.c
> > > +++ b/src/intel/vulkan/gen8_cmd_buffer.c
> > > @@ -410,7 +410,6 @@ void
> > >  genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer *cmd_buffer,
> > >enum blorp_hiz_op op)
> > >  {
> > > -   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
> > > const struct anv_image_view *iview =
> > >anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
> > >
> > > @@ -421,21 +420,9 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer
> > > *cmd_buffer,
> > > if (cmd_buffer->state.pass->subpass_count > 1)
> > >return;
> > >
> > > +   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
> > > const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
> > > -
> > > -   /* Section 7.4. of the Vulkan 1.0.27 spec states:
> > > -*
> > > -*   "The render area must be contained within the framebuffer
> > > dimensions."
> > > -*
> > > -* Therefore, the only way the extent of the render area can match
> > > that of
> > > -* the image view is if the render area offset equals (0, 0).
> > > -*/
> > > -   const bool full_surface_op =
> > > - cmd_state->render_area.extent.width == iview->extent.width
> > > &&
> > > - cmd_state->render_area.extent.height ==
> > > iview->extent.height;
> > > -   if (full_surface_op)
> > > -  assert(cmd_state->render_area.offset.x == 0 &&
> > > - cmd_state->render_area.offset.y == 0);
> > > +   bool full_surface_op;
> > >
> > > /* This variable corresponds to the Pixel Dim column in the table
> > > below */
> > > struct isl_extent2d px_dim;
> > > @@ -447,6 +434,21 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer
> > > *cmd_buffer,
> > >VK_ATTACHMENT_LOAD_OP_CLEAR)
> > >   return;
> > >
> > > +  /* Section 7.4. of the Vulkan 1.0.27 spec states:
> > > +   *
> > > +   *   "The render area must be contained within the framebuffer
> > > dimensions."
> > > +   *
> > > +   * Therefore, the only way the extent of the render area can match
> > > that of
> > > +   * the image view is if the render area offset equals (0, 0).
> > > +   */
> > > +  full_surface_op =
> > > + cmd_state->render_area.extent.width == iview->extent.width
> > > &&
> > > + cmd_state->render_area.extent.height ==
> > > iview->extent.height;
> > > +  if (full_surface_op)
> > > + assert(cmd_state->render_area.offset.x == 0 &&
> > > +cmd_state->render_area.offset.y == 0);
> > > +
> > > +
> > >/* Apply alignment restrictions. Despite the BDW PRM mentioning
> > > this is
> > > * only needed for a depth buffer surface type of D16_UNORM, 
> > > testing
> > > * showed it to be necessary for other depth formats as well
> > > --
> > > 2.10.0
> > >
> > > ___
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] gallium: annotate sw_driver_descriptor instance as const data

2016-10-12 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Tue, Oct 11, 2016 at 8:39 PM, Emil Velikov  wrote:
> From: Emil Velikov 
>
> Already treated and handled as such.
>
> Signed-off-by: Emil Velikov 
> ---
>  src/gallium/include/state_tracker/sw_driver.h | 2 +-
>  src/gallium/targets/pipe-loader/pipe_swrast.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/include/state_tracker/sw_driver.h 
> b/src/gallium/include/state_tracker/sw_driver.h
> index 0eb2b44..1d59bd5 100644
> --- a/src/gallium/include/state_tracker/sw_driver.h
> +++ b/src/gallium/include/state_tracker/sw_driver.h
> @@ -16,6 +16,6 @@ struct sw_driver_descriptor
> } winsys[];
>  };
>
> -extern struct sw_driver_descriptor swrast_driver_descriptor;
> +extern const struct sw_driver_descriptor swrast_driver_descriptor;
>
>  #endif
> diff --git a/src/gallium/targets/pipe-loader/pipe_swrast.c 
> b/src/gallium/targets/pipe-loader/pipe_swrast.c
> index cf617f3..0a980b3 100644
> --- a/src/gallium/targets/pipe-loader/pipe_swrast.c
> +++ b/src/gallium/targets/pipe-loader/pipe_swrast.c
> @@ -23,7 +23,7 @@ swrast_create_screen(struct sw_winsys *ws)
>  }
>
>  PUBLIC
> -struct sw_driver_descriptor swrast_driver_descriptor = {
> +const struct sw_driver_descriptor swrast_driver_descriptor = {
> .create_screen = swrast_create_screen,
> .winsys = {
>  #ifdef HAVE_PIPE_LOADER_DRI
> --
> 2.10.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] draw: initialize shader inputs

2016-10-12 Thread Marek Olšák
On Wed, Oct 12, 2016 at 12:04 AM,   wrote:
> From: Roland Scheidegger 
>
> This should make the code more robust if a shader tries to use inputs which
> aren't defined by the vertex element layout (which usually shouldn't happen).
>
> No piglit change.
> ---
>  src/gallium/auxiliary/draw/draw_llvm.c | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
> b/src/gallium/auxiliary/draw/draw_llvm.c
> index 87951fa..4270a8f 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm.c
> @@ -1705,6 +1705,13 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
> draw_llvm_variant *variant,
>lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
>io_itr, io, lp_loop.counter);
>  #endif
> +
> +  for (j = draw->pt.nr_vertex_elements; j < PIPE_MAX_SHADER_INPUTS; j++) 
> {
> + for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
> +inputs[j][i] = lp_build_zero(gallivm, vs_type);
> + }
> +  }

It's better to use LLVMGetUndef for uninitialized inputs and temps.
That can eliminate more code than initializing everything to 0.
(radeonsi uses undef)

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: remove 'params' parameter from ctx->Driver.TexParameter()

2016-10-12 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, Oct 12, 2016 at 4:08 AM, Brian Paul  wrote:
> None of the drivers which implement this hook do anything with the
> texture parameter value.  Drivers just look at the pname and set a
> dirty flag if needed.
>
> We were doing some ugly casting and type conversion to setup the
> argument so that all goes away.
> ---
>  src/mesa/drivers/dri/nouveau/nouveau_state.c |  3 +--
>  src/mesa/drivers/dri/r200/r200_tex.c |  6 +++---
>  src/mesa/drivers/dri/radeon/radeon_tex.c |  2 +-
>  src/mesa/main/dd.h   |  5 ++---
>  src/mesa/main/teximage.c |  6 ++
>  src/mesa/main/texobj.c   | 15 ++-
>  src/mesa/main/texparam.c | 17 -
>  src/mesa/state_tracker/st_cb_texture.c   |  3 +--
>  8 files changed, 20 insertions(+), 37 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.c 
> b/src/mesa/drivers/dri/nouveau/nouveau_state.c
> index 6189997..de36fa4 100644
> --- a/src/mesa/drivers/dri/nouveau/nouveau_state.c
> +++ b/src/mesa/drivers/dri/nouveau/nouveau_state.c
> @@ -395,8 +395,7 @@ nouveau_tex_env(struct gl_context *ctx, GLenum target, 
> GLenum pname,
>
>  static void
>  nouveau_tex_parameter(struct gl_context *ctx,
> - struct gl_texture_object *t, GLenum pname,
> - const GLfloat *params)
> + struct gl_texture_object *t, GLenum pname)
>  {
> switch (pname) {
> case GL_TEXTURE_MAG_FILTER:
> diff --git a/src/mesa/drivers/dri/r200/r200_tex.c 
> b/src/mesa/drivers/dri/r200/r200_tex.c
> index ca92110..2a95f2d 100644
> --- a/src/mesa/drivers/dri/r200/r200_tex.c
> +++ b/src/mesa/drivers/dri/r200/r200_tex.c
> @@ -374,9 +374,9 @@ void r200TexUpdateParameters(struct gl_context *ctx, 
> GLuint unit)
>   * Changes variables and flags for a state update, which will happen at the
>   * next UpdateTextureState
>   */
> -static void r200TexParameter( struct gl_context *ctx,
> -   struct gl_texture_object *texObj,
> -   GLenum pname, const GLfloat *params )
> +static void r200TexParameter(struct gl_context *ctx,
> + struct gl_texture_object *texObj,
> + GLenum pname)
>  {
> radeonTexObj* t = radeon_tex_obj(texObj);
>
> diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c 
> b/src/mesa/drivers/dri/radeon/radeon_tex.c
> index d1aa1a1..083a5e1 100644
> --- a/src/mesa/drivers/dri/radeon/radeon_tex.c
> +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
> @@ -329,7 +329,7 @@ void radeonTexUpdateParameters(struct gl_context *ctx, 
> GLuint unit)
>
>  static void radeonTexParameter( struct gl_context *ctx,
> struct gl_texture_object *texObj,
> -   GLenum pname, const GLfloat *params )
> +   GLenum pname )
>  {
> radeonTexObj* t = radeon_tex_obj(texObj);
>
> diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
> index 7f53271..1d75b9f 100644
> --- a/src/mesa/main/dd.h
> +++ b/src/mesa/main/dd.h
> @@ -610,10 +610,9 @@ struct dd_function_table {
> /** Set texture environment parameters */
> void (*TexEnv)(struct gl_context *ctx, GLenum target, GLenum pname,
>const GLfloat *param);
> -   /** Set texture parameters */
> +   /** Set texture parameter (callee gets param value from the texObj) */
> void (*TexParameter)(struct gl_context *ctx,
> -struct gl_texture_object *texObj,
> -GLenum pname, const GLfloat *params);
> +struct gl_texture_object *texObj, GLenum pname);
> /** Set the viewport */
> void (*Viewport)(struct gl_context *ctx);
> /*@}*/
> diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
> index 411ec49..bc3b76a 100644
> --- a/src/mesa/main/teximage.c
> +++ b/src/mesa/main/teximage.c
> @@ -5082,12 +5082,10 @@ texture_buffer_range(struct gl_context *ctx,
>
> if (ctx->Driver.TexParameter) {
>if (offset != oldOffset) {
> - ctx->Driver.TexParameter(ctx, texObj, GL_TEXTURE_BUFFER_OFFSET,
> -  (const GLfloat *) &offset);
> + ctx->Driver.TexParameter(ctx, texObj, GL_TEXTURE_BUFFER_OFFSET);
>}
>if (size != oldSize) {
> - ctx->Driver.TexParameter(ctx, texObj, GL_TEXTURE_BUFFER_SIZE,
> -  (const GLfloat *) &size);
> + ctx->Driver.TexParameter(ctx, texObj, GL_TEXTURE_BUFFER_SIZE);
>}
> }
>
> diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
> index 9a051bc..fbd498d 100644
> --- a/src/mesa/main/texobj.c
> +++ b/src/mesa/main/texobj.c
> @@ -365,15 +365,12 @@ finish_texture_init(struct gl_context *ctx, GLenum 
> target,
>   obj->Sampler.MinFilter = filter;
>   obj->Sampler.MagFilter = filter;
>   

Re: [Mesa-dev] [PATCH] nvc0/ir: fix textureGather with a single offset

2016-10-12 Thread Samuel Pitoiset

You fixed the recent regressions, thanks!

Reviewed-by: Samuel Pitoiset 

On 10/12/2016 04:26 PM, Ilia Mirkin wrote:

Recent fix for non-const offsets broke the case of a single offset (vs 4
offsets). The later code relies on the offs array to contain null values
to tell whether they should be added onto the srcs list.

Fixes: 5239bd592 ("nvc0/ir: fix overwriting of value backing non-constant gather 
offset")
Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 4c013c4..dab3e2d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -820,11 +820,11 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
  // Either there is 1 offset, which goes into the 2 low bytes of the
  // first source, or there are 4 offsets, which go into 2 sources (8
  // values, 1 byte each).
- Value *offs[2] = {bld.getScratch(), bld.getScratch()};
+ Value *offs[2] = {NULL, NULL};
  for (n = 0; n < i->tex.useOffsets; n++) {
 for (c = 0; c < 2; ++c) {
if ((n % 2) == 0 && c == 0)
-  bld.mkMov(offs[n / 2], i->offset[n][c].get());
+  bld.mkMov(offs[n / 2] = bld.getScratch(), 
i->offset[n][c].get());
else
   bld.mkOp3(OP_INSBF, TYPE_U32,
 offs[n / 2],


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 98169] lm_sensors hud option crashes unigine heaven

2016-10-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98169

--- Comment #27 from Steven Toth  ---
Thanks for reporting these issue. I'll wrap them up and get them on the ML for
review.

Care to give me your Tested-By sign-off?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 98169] lm_sensors hud option crashes unigine heaven

2016-10-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98169

--- Comment #28 from Christoph Haag  ---
Sure.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/16] loader: remove final sysfs codepath in loader_get_device_name_for_fd()

2016-10-12 Thread Emil Velikov
On 11 October 2016 at 19:59, Gary Wong  wrote:
> On Tue, Oct 11, 2016 at 07:31:56PM +0100, Emil Velikov wrote:
>> Gary, I believe that nuking sysfs is perfectly reasonable but if there
>> are any technical reasons/obstacles why it must stay please shout.
>
> Yup, I agree with the removal.  If I find any exceptions in the future
> I may get in touch to reinstate the sysfs code or something like it,
> but it does appear the libdrm handler covers all those cases anyway.
>
It may be that you need a trivial change in drmParsePciDeviceInfo to
get things working, but we can sort that in libdrm.

>> Furthermore, if you and/or a fellow Gnu developers/users can prep some
>> patches for mesa [1], libdrm [2] and/or other graphics stack packages
>> that will be amazing.
>
> I don't maintain those guix configs you cited and I'm not sure who
> does, but I can investigate.
>
Amazing, thanks!

Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/12] isl: make locally used functions static

2016-10-12 Thread Chad Versace
On Tue 11 Oct 2016, Emil Velikov wrote:
> From: Emil Velikov 
> 
> Signed-off-by: Emil Velikov 
> ---
>  src/intel/isl/isl.c |  4 ++--
>  src/intel/isl/isl.h | 10 --
>  2 files changed, 2 insertions(+), 12 deletions(-)

Patches 1-5 are
Reviewed-by: Chad Versace 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 00/13] st/mesa: finally finish ARB_enhanced_layouts

2016-10-12 Thread Nicolai Hähnle

On 10.10.2016 10:32, Nicolai Hähnle wrote:

I've incorporated the minor comments I've received so far and fixed a
regression related to structs that I had missed. Luckily the fix is pretty
simple, because struct members cannot have explicit components. So it's
enough to force their usage mask to XYZW, which means they will be treated
the same as before. Please review!


I went ahead and pushed everything except for the last patch, after 
incorporating Marek's and Dave's suggestion, as well as fixing one last 
regression.


I _haven't_ pushed the GLSL 4.50 patch yet on purpose. My plan is to do 
that after the next release is branched off. There are still more than 
100 GL CTS failures to slog through, and I wouldn't count on managing 
that before the release. We'll get there for 17.0 though, I promise ;-)


Cheers,
Nicolai



Thanks
Nicolai
--
 docs/features.txt|  18 +-
 docs/relnotes/12.1.0.html|   2 +-
 src/compiler/glsl/ir_print_visitor.cpp   |  10 +-
 src/gallium/auxiliary/tgsi/tgsi_scan.c   |   9 +-
 src/gallium/auxiliary/tgsi/tgsi_ureg.c   | 115 +++-
 src/gallium/auxiliary/tgsi/tgsi_ureg.h   |  30 ++
 src/gallium/docs/source/screen.rst   |   8 +
 .../drivers/freedreno/freedreno_screen.c |   1 +
 src/gallium/drivers/i915/i915_screen.c   |   1 +
 src/gallium/drivers/ilo/ilo_screen.c |   1 +
 src/gallium/drivers/llvmpipe/lp_screen.c |   1 +
 .../drivers/nouveau/nv30/nv30_screen.c   |   1 +
 .../drivers/nouveau/nv50/nv50_screen.c   |   1 +
 .../drivers/nouveau/nvc0/nvc0_screen.c   |   1 +
 src/gallium/drivers/r300/r300_screen.c   |   1 +
 src/gallium/drivers/r600/r600_pipe.c |   1 +
 src/gallium/drivers/radeonsi/si_pipe.c   |   3 +-
 src/gallium/drivers/softpipe/sp_screen.c |   1 +
 src/gallium/drivers/svga/svga_screen.c   |   1 +
 src/gallium/drivers/swr/swr_screen.cpp   |   1 +
 src/gallium/drivers/vc4/vc4_screen.c |   1 +
 src/gallium/drivers/virgl/virgl_screen.c |   1 +
 src/gallium/include/pipe/p_defines.h |   1 +
 src/mesa/state_tracker/st_extensions.c   |   7 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 465 ++---
 25 files changed, 432 insertions(+), 250 deletions(-)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium/hud: bugfix: 68169 - Sensor extensions segfaults.

2016-10-12 Thread Steven Toth
https://bugs.freedesktop.org/show_bug.cgi?id=98169

Really a two part bug.

1. The recent extensions to the HUB framework were tested exclusively
with glxgears/demo/head. None of these tools exercise the
free_query_data() code path. When these codepaths were
tested, thanks to Christoph, using heaven-unigine
application, the sensors extension would segfault.

2. In a second case, same use case, extensions would
return zero values for cpufrequency, network stats etc.

On inspection, this was due to the 'unigine' application
created the HUD contexts twice (two screens?). The
extensions had never been tested in that configuration,
and the free_query_data() calls were thus trigger.

This patchset ensures that globally allocated
mechanisms for collecting and exposing various
disk, network, cpu frequency and sensors statistics
are retained correctly across multiple uses, and
free'd when no more callers require them.

Patchset was regression tested again with Unigine,
glxgears and glxdemo.

Tested-By: Christoph Haag 
Signed-off-by: Steven Toth 
---
 src/gallium/auxiliary/hud/hud_cpufreq.c  |  8 ++--
 src/gallium/auxiliary/hud/hud_diskstat.c | 10 +++---
 src/gallium/auxiliary/hud/hud_nic.c  |  8 ++--
 src/gallium/auxiliary/hud/hud_sensors_temp.c | 18 +-
 4 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_cpufreq.c 
b/src/gallium/auxiliary/hud/hud_cpufreq.c
index 4501bbb..39bdcb0 100644
--- a/src/gallium/auxiliary/hud/hud_cpufreq.c
+++ b/src/gallium/auxiliary/hud/hud_cpufreq.c
@@ -57,6 +57,7 @@ struct cpufreq_info
char sysfs_filename[128];
uint64_t KHz;
uint64_t last_time;
+   int users;
 };
 
 static int gcpufreq_count = 0;
@@ -116,8 +117,10 @@ static void
 free_query_data(void *p)
 {
struct cpufreq_info *cfi = (struct cpufreq_info *)p;
-   list_del(&cfi->list);
-   FREE(cfi);
+   if (--cfi->users == 0) {
+  list_del(&cfi->list);
+  FREE(cfi);
+   }
 }
 
 /**
@@ -159,6 +162,7 @@ hud_cpufreq_graph_install(struct hud_pane *pane, int 
cpu_index,
   return;
}
 
+   cfi->users++;
gr->query_data = cfi;
gr->query_new_value = query_cfi_load;
 
diff --git a/src/gallium/auxiliary/hud/hud_diskstat.c 
b/src/gallium/auxiliary/hud/hud_diskstat.c
index b248baf..000d6b3 100644
--- a/src/gallium/auxiliary/hud/hud_diskstat.c
+++ b/src/gallium/auxiliary/hud/hud_diskstat.c
@@ -73,6 +73,7 @@ struct diskstat_info
char sysfs_filename[128];
uint64_t last_time;
struct stat_s last_stat;
+   int users;
 };
 
 /* TODO: We don't handle dynamic block device / partition
@@ -165,9 +166,11 @@ query_dsi_load(struct hud_graph *gr)
 static void
 free_query_data(void *p)
 {
-   struct diskstat_info *nic = (struct diskstat_info *) p;
-   list_del(&nic->list);
-   FREE(nic);
+   struct diskstat_info *dsi = (struct diskstat_info *) p;
+   if (--dsi->users == 0) {
+  list_del(&dsi->list);
+  FREE(dsi);
+   }
 }
 
 /**
@@ -205,6 +208,7 @@ hud_diskstat_graph_install(struct hud_pane *pane, const 
char *dev_name,
else
   return;
 
+   dsi->users++;
gr->query_data = dsi;
gr->query_new_value = query_dsi_load;
 
diff --git a/src/gallium/auxiliary/hud/hud_nic.c 
b/src/gallium/auxiliary/hud/hud_nic.c
index fb6b8c0..e3f5910 100644
--- a/src/gallium/auxiliary/hud/hud_nic.c
+++ b/src/gallium/auxiliary/hud/hud_nic.c
@@ -59,6 +59,7 @@ struct nic_info
char throughput_filename[128];
uint64_t last_time;
uint64_t last_nic_bytes;
+   int users;
 };
 
 /* TODO: We don't handle dynamic NIC arrival or removal.
@@ -238,8 +239,10 @@ static void
 free_query_data(void *p)
 {
struct nic_info *nic = (struct nic_info *) p;
-   list_del(&nic->list);
-   FREE(nic);
+   if (--nic->users == 0) {
+  list_del(&nic->list);
+  FREE(nic);
+   }
 }
 
 /**
@@ -281,6 +284,7 @@ hud_nic_graph_install(struct hud_pane *pane, const char 
*nic_name,
else
   return;
 
+   nic->users++;
gr->query_data = nic;
gr->query_new_value = query_nic_load;
 
diff --git a/src/gallium/auxiliary/hud/hud_sensors_temp.c 
b/src/gallium/auxiliary/hud/hud_sensors_temp.c
index e41b847..c6f7d5b 100644
--- a/src/gallium/auxiliary/hud/hud_sensors_temp.c
+++ b/src/gallium/auxiliary/hud/hud_sensors_temp.c
@@ -68,6 +68,7 @@ struct sensors_temp_info
sensors_chip_name *chip;
const sensors_feature *feature;
double current, min, max, critical;
+   int users;
 };
 
 static double
@@ -193,11 +194,17 @@ static void
 free_query_data(void *p)
 {
struct sensors_temp_info *sti = (struct sensors_temp_info *) p;
-   list_del(&sti->list);
-   if (sti->chip)
-  sensors_free_chip_name(sti->chip);
-   FREE(sti);
-   sensors_cleanup();
+   if (--sti->users == 0) {
+  list_del(&sti->list);
+  gsensors_temp_count--;
+  if (sti->chip)
+ sensors_free_chip_name(sti->chip);
+  FREE(sti);
+   }
+
+   /* Destroy sensor singleton when after all refs released. */
+   if (!gsensors_temp_count)
+  sensors_clean

Re: [Mesa-dev] [PATCH 12/12] aubinator: replace pragma once with ifndef guard

2016-10-12 Thread Chad Versace
On Tue 11 Oct 2016, Emil Velikov wrote:
> From: Emil Velikov 
> 
> Signed-off-by: Emil Velikov 
> ---
>  src/intel/tools/decoder.h | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)

For the record, I prefer `#pragma once` to #ifndef guards. But
I understand that you want consistency.  Do you plan to update all the
headers?

For patches 6-12,
Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/16] loader: reimplement loader_get_user_preferred_fd via libdrm

2016-10-12 Thread Emil Velikov
On 11 October 2016 at 19:54, Axel Davy  wrote:
> On 11/10/2016 20:31, Emil Velikov wrote:
>>
>>   -   udev = udev_new();
>> -   if (!udev)
>> -  goto prime_clean;
>> +   if (drmGetDevice(default_fd, &device) != 0)
>> +  goto err;
>>   -   default_device_id_path_tag = get_id_path_tag_from_fd(udev,
>> default_fd);
>> -   if (!default_device_id_path_tag)
>> -  goto udev_clean;
>> -
>> -   is_different_device = 1;
>>  /* two format are supported:
>>   * "1": choose any other card than the card used by default.
>>   * id_path_tag: (for example "pci-_02_00_0") choose the card
>>   * with this id_path_tag.
>>   */
>>  if (!strcmp(prime,"1")) {
>> -  free(prime);
>> -  prime = strdup(default_device_id_path_tag);
>> -  /* request a card with a different card than the default card */
>> -  another_tag = 1;
>> -   } else if (!strcmp(default_device_id_path_tag, prime))
>> -  /* we are to get a new fd (render-node) of the same device */
>> -  is_different_device = 0;
>> +  /* Hmm... detection for 2-7 seems to be broken. Nicely done. */
>
> For DRI2, DRI_PRIME takes a number corresponding to the device number as
> configured in xorg (and you have xrandr commands to configure the
> behaviour). This doesn't work with the DRI3 scheme. However for
> retrocompatibility, after several discussions, we decided to let DRI2's most
> used command 'DRI_PRIME=1' to still sorta work on DRI3, but the meaning
> changed: DRI_PRIME=1 => 'give me any device that is not the compositor/xorg
> device'
No argument/objection against DRI_PRIME with DRI3, yet the current
hard coded DRI_PRIME=1 _only_ check is iffy. As per the spec there can
be 8 devices (0-8) with 0 always being the xorg/compositor one and 1-7
as 'other' devices. Yet here we only consider 1 and ignore everything
else :-\

>>
>> +  is_different_device = 1;
>> +   } else {
>> +  default_tag = drm_construct_id_path_tag(device);
>> +  if (default_tag == NULL)
>> + goto err;
>>   -   device_name = get_render_node_from_id_path_tag(udev,
>> -  prime,
>> -  another_tag);
>> -   if (device_name == NULL) {
>> +  if (strcmp(default_tag, prime) != 0) {
>> + free(default_tag);
>> + goto err;
>> +  }
>> +
>> +  free(default_tag);
>> +  /* we are to get a new fd (render-node) of the same device */
>> is_different_device = 0;
>> -  goto default_device_clean;
>> +  // XXX: WTF ^^ so one uses the new model only to point to the exact
>> same
>> +  // device and not the other more/less powerful GPU ?
>
> This case if when one has in his configuration file that the app should run
> on device XXX, which happens to be the one in use by the compositor.
> This case needed to be handled, and while it may not very seem useful now,
> it could be in the future an interesting case (On wayland, you could have
> the compositor use a card on some screens, and another card on the other,
> etc. In this case it makes sense for the user to specify the card, and it
> may or may not be the card used for the given screen).
>
There's something lost in the translation here:

Currently in case of non DRI_PRIME (i.e. path tag) we dive into
get_render_node_from_id_path_tag(), loop and trigger on !another_tag
&& !strcmp(id_path_tag, id_path_tag_tmp) (another_tag is zero here).

Or in other words the function returns the render node, of the exact
_same_ device. Which makes the whole new method of using pci-...
dubious since one cannot select the 'other' device :-\

That is unless I'm missing something ?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] st/nine: Remove useless code in nine_shader

2016-10-12 Thread Axel Davy
Since 1604efa6fda9b780e8537a131ad77f3e83e5a67a,
lconsti and lconstb don't need to be initialized.

Remove some leftovers from the previous code (which
has now invalid use of ARRAY_SIZE on a pointer instead
of an array).

Reported by Coverity.

Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/nine_shader.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/gallium/state_trackers/nine/nine_shader.c 
b/src/gallium/state_trackers/nine/nine_shader.c
index 0f8bcdd..ab21daf 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -3361,11 +3361,6 @@ tx_ctor(struct shader_translator *tx, struct 
nine_shader_info *info)
 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
 tx->regs.vT[i] = ureg_src_undef();
 
-for (i = 0; i < ARRAY_SIZE(tx->lconsti); ++i)
-tx->lconsti[i].idx = -1;
-for (i = 0; i < ARRAY_SIZE(tx->lconstb); ++i)
-tx->lconstb[i].idx = -1;
-
 sm1_read_version(tx);
 
 info->version = (tx->version.major << 4) | tx->version.minor;
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/12] aubinator: replace pragma once with ifndef guard

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 18:08, Chad Versace  wrote:
> On Tue 11 Oct 2016, Emil Velikov wrote:
>> From: Emil Velikov 
>>
>> Signed-off-by: Emil Velikov 
>> ---
>>  src/intel/tools/decoder.h | 5 -
>>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> For the record, I prefer `#pragma once` to #ifndef guards. But
> I understand that you want consistency.  Do you plan to update all the
> headers?
>
Yes, where possible*. I have an ancient WIP series which does NIR and
splits/reorders the headers.

Thanks
Emil
* SWR might be fun/contentious.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] st/nine: Fix possible segfault in surface ctor

2016-10-12 Thread Axel Davy
Regression introduced by
ba0274c7d6c3b77a36bbe1b444f427b0c873e2f3

Check the resource exists before assigning it
a flag (and use This->base.resource instead
of pResource, since the former may have a newly
allocate resource, while the latter would be
NULL).

This should reintroduce the behaviour of previous
code.

Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/surface9.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/nine/surface9.c 
b/src/gallium/state_trackers/nine/surface9.c
index dc31bb9..664b78f 100644
--- a/src/gallium/state_trackers/nine/surface9.c
+++ b/src/gallium/state_trackers/nine/surface9.c
@@ -187,8 +187,8 @@ NineSurface9_ctor( struct NineSurface9 *This,
 
 This->stride = nine_format_get_stride(This->base.info.format, 
pDesc->Width);
 
-if (pDesc->Usage & D3DUSAGE_DYNAMIC)
-pResource->flags |= NINE_RESOURCE_FLAG_LOCKABLE;
+if (This->base.resource && (pDesc->Usage & D3DUSAGE_DYNAMIC))
+This->base.resource->flags |= NINE_RESOURCE_FLAG_LOCKABLE;
 
 /* TODO: investigate what else exactly needs to be cleared */
 if (This->base.resource && (pDesc->Usage & D3DUSAGE_RENDERTARGET)) {
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0/ir: be more careful about preserving modifiers in SHLADD creation

2016-10-12 Thread Ilia Mirkin
First off, src2 was being given the wrong modifier, and secondly we were
forgetting to clear src0's modifier. Instead let's use the
ValueRef-based setter, which can also copy modifiers properly.

Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index e1596d8..dea9197 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2183,7 +2183,6 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
Value *src1 = add->getSrc(1);
ImmediateValue imm;
Instruction *shl;
-   Modifier mod[2];
Value *src;
int s;
 
@@ -2208,14 +2207,9 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
if (!shl->src(1).getImmediate(imm))
   return false;
 
-   mod[0] = add->src(0).mod;
-   mod[1] = add->src(1).mod;
-
add->op = OP_SHLADD;
add->setSrc(2, add->src(!s));
-   add->src(2).mod = mod[s];
-
-   add->setSrc(0, shl->getSrc(0));
+   add->setSrc(0, shl->src(0));
add->setSrc(1, new_ImmediateValue(shl->bb->getProgram(), imm.reg.data.u32));
add->src(1).mod = Modifier(0);
 
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] glx: Perform check for valid fbconfig against proper X-Screen.

2016-10-12 Thread Emil Velikov
On 11 October 2016 at 19:42, Mario Kleiner  wrote:
> Commit cf804b4455fac9e585b3600a8318caaced9c23de
> ('glx: fix crash with bad fbconfig') introduced a check
> in glXCreateNewContext() if the given config is a valid
> fbconfig.
>
> Unfortunately the check always checks the given config against
> the fbconfigs of the DefaultScreen(dpy), instead of the
> actual X-Screen specified in the config config->screen.
>
> This leads to failure whenever a GL context is created
> on a non-DefaultScreen(dpy), e.g., on X-Screen 1 of
> a multi-x-screen setup, where the default screen is
> typically 0.
>
> Fix this by using config->screen instead of DefaultScreen(dpy).
>
Nicely stopped Mario !

Skimming through mesa's codebase there's a few other cases of the
macro, of which the apple_glx_pbuffer_create() one (at least) needs a
similar fix.

There's also some interesting use of DefaultRootWindow(dpy), over
RootWindow(dpy, screen), plus users of the respective Xfoobar
functions rather than the foobar macros.

Can I interest you at checking/fixing those as well ;-)

Actual testing might be hard, so I'd CC Jeremy H (for the apple) and
Brian P for the xlib-libgl (src/mesa/drivers/x11/ and
src/gallium/state_trackers/glx/xlib/) ones for confirmation.

> Tested to fix context creation failure on a dual-x-screen setup.
>
> Signed-off-by: Mario Kleiner 
> Cc: "11.2 12.0" 
Reviewed-by: Emil Velikov 

> ---
>  src/glx/glxcmds.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
> index b0a1cb0..6abe0b9 100644
> --- a/src/glx/glxcmds.c
> +++ b/src/glx/glxcmds.c
> @@ -1626,7 +1626,6 @@ glXCreateNewContext(Display * dpy, GLXFBConfig fbconfig,
>  int renderType, GLXContext shareList, Bool allowDirect)
>  {
> struct glx_config *config = (struct glx_config *) fbconfig;
> -   int screen = DefaultScreen(dpy);
> struct glx_config **config_list;
> int list_size;
> unsigned i;
> @@ -1637,7 +1636,7 @@ glXCreateNewContext(Display * dpy, GLXFBConfig fbconfig,
> }
>
> config_list = (struct glx_config **)
> -  glXGetFBConfigs(dpy, screen, &list_size);
> +  glXGetFBConfigs(dpy, config->screen, &list_size);
>
Steven, looks like you're getting hit by this in bug 98169#comment 5.
Can you give it a test ?
Alternatively if you can track and/or open a bug report for that issue
it'll be greatly appreciated.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0/ir: be more careful about preserving modifiers in SHLADD creation

2016-10-12 Thread Samuel Pitoiset

I think we could also use those copy modifiers in some other places.

Reviewed-by: Samuel Pitoiset 

On 10/12/2016 07:32 PM, Ilia Mirkin wrote:

First off, src2 was being given the wrong modifier, and secondly we were
forgetting to clear src0's modifier. Instead let's use the
ValueRef-based setter, which can also copy modifiers properly.

Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index e1596d8..dea9197 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2183,7 +2183,6 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
Value *src1 = add->getSrc(1);
ImmediateValue imm;
Instruction *shl;
-   Modifier mod[2];
Value *src;
int s;

@@ -2208,14 +2207,9 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
if (!shl->src(1).getImmediate(imm))
   return false;

-   mod[0] = add->src(0).mod;
-   mod[1] = add->src(1).mod;
-
add->op = OP_SHLADD;
add->setSrc(2, add->src(!s));
-   add->src(2).mod = mod[s];
-
-   add->setSrc(0, shl->getSrc(0));
+   add->setSrc(0, shl->src(0));
add->setSrc(1, new_ImmediateValue(shl->bb->getProgram(), imm.reg.data.u32));
add->src(1).mod = Modifier(0);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: fix the coordinate overloading of llvm.amdgcn.image.atomic.cmpswap.*

2016-10-12 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Fixes GL45-CTS.shader_image_load_store.basic-allTargets-atomic*
---
 src/gallium/drivers/radeonsi/si_shader.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8b77fd1..25146e8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4188,24 +4188,29 @@ static void atomic_emit(
if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
atomic_emit_memory(ctx, emit_data);
return;
}
 
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
snprintf(intrinsic_name, sizeof(intrinsic_name),
 "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
} else {
+   LLVMValueRef coords;
char coords_type[8];
 
-   build_type_name_for_intr(LLVMTypeOf(emit_data->args[1]),
-   coords_type, sizeof(coords_type));
+   if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+   coords = emit_data->args[2];
+   else
+   coords = emit_data->args[1];
+
+   build_type_name_for_intr(coords, coords_type, 
sizeof(coords_type));
snprintf(intrinsic_name, sizeof(intrinsic_name),
 "llvm.amdgcn.image.atomic.%s.%s",
 action->intr_name, coords_type);
}
 
tmp = lp_build_intrinsic(
builder, intrinsic_name, bld_base->uint_bld.elem_type,
emit_data->args, emit_data->arg_count, 0);
emit_data->output[emit_data->chan] =
LLVMBuildBitCast(builder, tmp, bld_base->base.elem_type, "");
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Revert "mesa_glinterop: remove inclusion of GLX header"

2016-10-12 Thread Chih-Wei Huang
2016-10-04 7:03 GMT+08:00 Vinson Lee :
> This reverts commit 8472045b16b3e4621553fe451a20a9ba9f0d44b6.
>
> Conflicts:
>
> include/GL/mesa_glinterop.h
>
> This patch fixes this build error with GCC 4.4.
>
>   Compiling src/glx/dri_common_interop.c ...
> In file included from src/glx/dri_common_interop.c:33:
> include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’
> include/GL/glx.h:165: note: previous declaration of ‘GLXContext’ was here
>
> Fixes: 8472045b16b3 ("mesa_glinterop: remove inclusion of GLX header")
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96770
> Signed-off-by: Vinson Lee 
> ---
>  include/GL/mesa_glinterop.h |5 +
>  1 files changed, 1 insertions(+), 4 deletions(-)
>
> diff --git a/include/GL/mesa_glinterop.h b/include/GL/mesa_glinterop.h
> index 383d7f9..c6a967e 100644
> --- a/include/GL/mesa_glinterop.h
> +++ b/include/GL/mesa_glinterop.h
> @@ -52,15 +52,12 @@
>
>  #include 
>  #include 
> +#include 
>
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
>
> -/* Forward declarations to avoid inclusion of GL/glx.h */
> -typedef struct _XDisplay Display;
> -typedef struct __GLXcontextRec *GLXContext;
> -
>  /* Forward declarations to avoid inclusion of EGL/egl.h */
>  typedef void *EGLDisplay;
>  typedef void *EGLContext;
> --

NACK.
The patch breaks Android build (at least).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] st/glsl_to_tgsi: remove unnecessary ir_instruction argument from get_opcode

2016-10-12 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index be0aa2e..fd2485d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -520,21 +520,21 @@ public:
   st_src_reg src2 = undef_src,
   st_src_reg src3 = undef_src);
 
glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
   st_dst_reg dst, st_dst_reg dst1,
   st_src_reg src0 = undef_src,
   st_src_reg src1 = undef_src,
   st_src_reg src2 = undef_src,
   st_src_reg src3 = undef_src);
 
-   unsigned get_opcode(ir_instruction *ir, unsigned op,
+   unsigned get_opcode(unsigned op,
 st_dst_reg dst,
 st_src_reg src0, st_src_reg src1);
 
/**
 * Emit the correct dot-product instruction for the type of arguments
 */
glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
  st_dst_reg dst,
  st_src_reg src0,
  st_src_reg src1,
@@ -662,21 +662,21 @@ num_inst_src_regs(const glsl_to_tgsi_instruction *op)
 glsl_to_tgsi_instruction *
 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
st_dst_reg dst, st_dst_reg dst1,
st_src_reg src0, st_src_reg src1,
st_src_reg src2, st_src_reg src3)
 {
glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
int num_reladdr = 0, i, j;
bool dst_is_64bit[2];
 
-   op = get_opcode(ir, op, dst, src0, src1);
+   op = get_opcode(op, dst, src0, src1);
 
/* If we have to do relative addressing, we want to load the ARL
 * reg directly for one of the regs, and preload the other reladdr
 * sources into temps.
 */
num_reladdr += dst.reladdr != NULL || dst.reladdr2;
num_reladdr += dst1.reladdr != NULL || dst1.reladdr2;
num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
@@ -893,21 +893,21 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
unsigned op,
st_src_reg src2, st_src_reg src3)
 {
return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3);
 }
 
 /**
  * Determines whether to use an integer, unsigned integer, or float opcode
  * based on the operands and input opcode, then emits the result.
  */
 unsigned
-glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
+glsl_to_tgsi_visitor::get_opcode(unsigned op,
  st_dst_reg dst,
  st_src_reg src0, st_src_reg src1)
 {
enum glsl_base_type type = GLSL_TYPE_FLOAT;
 
if (op == TGSI_OPCODE_MOV)
return op;
 
assert(src0.type != GLSL_TYPE_ARRAY);
assert(src0.type != GLSL_TYPE_STRUCT);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] st/glsl_to_tgsi: fix textureGatherOffset with indirectly loaded offsets

2016-10-12 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 33c1f87..be0aa2e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -553,20 +553,21 @@ public:
   unsigned *base,
   unsigned *index,
   st_src_reg *reladdr);
   void calc_deref_offsets(ir_dereference *head,
   ir_dereference *tail,
   unsigned *array_elements,
   unsigned *base,
   unsigned *index,
   st_src_reg *indirect,
   unsigned *location);
+   st_src_reg canonicalize_gather_offset(st_src_reg offset);
 
bool try_emit_mad(ir_expression *ir,
   int mul_operand);
bool try_emit_mad_for_and_not(ir_expression *ir,
   int mul_operand);
 
void emit_swz(ir_expression *ir);
 
bool process_move_condition(ir_rvalue *ir);
 
@@ -3963,20 +3964,34 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference 
*ir,
   *base = *index;
   *array_size = 1;
}
 
if (location != 0x) {
   *base += 
this->shader_program->UniformStorage[location].opaque[shader].index;
   *index += 
this->shader_program->UniformStorage[location].opaque[shader].index;
}
 }
 
+st_src_reg
+glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset)
+{
+   if (offset.reladdr || offset.reladdr2) {
+  st_src_reg tmp = get_temp(glsl_type::ivec2_type);
+  st_dst_reg tmp_dst = st_dst_reg(tmp);
+  tmp_dst.writemask = WRITEMASK_XY;
+  emit_asm(NULL, TGSI_OPCODE_MOV, tmp_dst, offset);
+  return tmp;
+   }
+
+   return offset;
+}
+
 void
 glsl_to_tgsi_visitor::visit(ir_texture *ir)
 {
st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy;
st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
st_src_reg levels_src, reladdr;
st_dst_reg result_dst, coord_dst, cube_sc_dst;
glsl_to_tgsi_instruction *inst = NULL;
unsigned opcode = TGSI_OPCODE_NOP;
const glsl_type *sampler_type = ir->sampler->type;
@@ -4088,23 +4103,24 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
   component = this->result;
   if (ir->offset) {
  ir->offset->accept(this);
  if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) {
 const glsl_type *elt_type = ir->offset->type->fields.array;
 for (i = 0; i < ir->offset->type->length; i++) {
offset[i] = this->result;
offset[i].index += i * type_size(elt_type);
offset[i].type = elt_type->base_type;
offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
+   offset[i] = canonicalize_gather_offset(offset[i]);
 }
  } else {
-offset[0] = this->result;
+offset[0] = canonicalize_gather_offset(this->result);
  }
   }
   break;
case ir_lod:
   opcode = TGSI_OPCODE_LODQ;
   break;
case ir_texture_samples:
   opcode = TGSI_OPCODE_TXQS;
   break;
case ir_samples_identical:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/4] st/mesa: fix various corner cases and related cleanups

2016-10-12 Thread Nicolai Hähnle
Hi all,

this is admittedly a pretty random series, but that's what tends to happen
when you go through test suite failures. Please review!

Cheers,
Nicolai

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] st/glsl_to_tgsi: simplify translate_tex_offset

2016-10-12 Thread Nicolai Hähnle
From: Nicolai Hähnle 

This fixes a bug with offsets from uniforms which seems to have only been
noticed as a crash in piglit's
arb_gpu_shader5/compiler/builtin-functions/fs-gatherOffset-uniform-offset.frag
on radeonsi.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 64 +++---
 1 file changed, 14 insertions(+), 50 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f721506..33c1f87 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5661,74 +5661,38 @@ translate_src(struct st_translate *t, const st_src_reg 
*src_reg)
if (src_reg->reladdr != NULL) {
   assert(src_reg->file != PROGRAM_TEMPORARY);
   src = ureg_src_indirect(src, ureg_src(t->address[0]));
}
 
return src;
 }
 
 static struct tgsi_texture_offset
 translate_tex_offset(struct st_translate *t,
- const st_src_reg *in_offset, int idx)
+ const st_src_reg *in_offset)
 {
struct tgsi_texture_offset offset;
-   struct ureg_src imm_src;
-   struct ureg_dst dst;
-   int array;
+   struct ureg_src src = translate_src(t, in_offset);
 
-   switch (in_offset->file) {
-   case PROGRAM_IMMEDIATE:
-  assert(in_offset->index >= 0 && in_offset->index < t->num_immediates);
-  imm_src = t->immediates[in_offset->index];
-
-  offset.File = imm_src.File;
-  offset.Index = imm_src.Index;
-  offset.SwizzleX = imm_src.SwizzleX;
-  offset.SwizzleY = imm_src.SwizzleY;
-  offset.SwizzleZ = imm_src.SwizzleZ;
-  offset.Padding = 0;
-  break;
-   case PROGRAM_INPUT:
-  imm_src = t->inputs[t->inputMapping[in_offset->index]];
-  offset.File = imm_src.File;
-  offset.Index = imm_src.Index;
-  offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
-  offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
-  offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
-  offset.Padding = 0;
-  break;
-   case PROGRAM_TEMPORARY:
-  imm_src = ureg_src(t->temps[in_offset->index]);
-  offset.File = imm_src.File;
-  offset.Index = imm_src.Index;
-  offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
-  offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
-  offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
-  offset.Padding = 0;
-  break;
-   case PROGRAM_ARRAY:
-  array = in_offset->index >> 16;
+   offset.File = src.File;
+   offset.Index = src.Index;
+   offset.SwizzleX = src.SwizzleX;
+   offset.SwizzleY = src.SwizzleY;
+   offset.SwizzleZ = src.SwizzleZ;
+   offset.Padding = 0;
 
-  assert(array >= 0);
-  assert(array < (int)t->num_temp_arrays);
+   assert(!src.Indirect);
+   assert(!src.DimIndirect);
+   assert(!src.Dimension);
+   assert(!src.Absolute); /* those shouldn't be used with integers anyway */
+   assert(!src.Negate);
 
-  dst = t->arrays[array];
-  offset.File = dst.File;
-  offset.Index = dst.Index + (in_offset->index & 0x) - 0x8000;
-  offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
-  offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
-  offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
-  offset.Padding = 0;
-  break;
-   default:
-  break;
-   }
return offset;
 }
 
 static void
 compile_tgsi_instruction(struct st_translate *t,
  const glsl_to_tgsi_instruction *inst)
 {
struct ureg_program *ureg = t->ureg;
int i;
struct ureg_dst dst[2];
@@ -5778,21 +5742,21 @@ compile_tgsi_instruction(struct st_translate *t,
case TGSI_OPCODE_TXL2:
case TGSI_OPCODE_TG4:
case TGSI_OPCODE_LODQ:
   src[num_src] = t->samplers[inst->sampler.index];
   assert(src[num_src].File != TGSI_FILE_NULL);
   if (inst->sampler.reladdr)
  src[num_src] =
 ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
   num_src++;
   for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
- texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
+ texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
   }
   tex_target = st_translate_texture_target(inst->tex_target, 
inst->tex_shadow);
 
   ureg_tex_insn(ureg,
 inst->op,
 dst, num_dst,
 tex_target,
 texoffsets, inst->tex_offset_num_offset,
 src, num_src);
   return;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] st/mesa: fix vertex elements setup for doubles

2016-10-12 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Whether one or two slots are taken up by one API array depends on the
vertex shader, not on how the array is configured. When an array is
set up with fewer components than the shader expects, the high components
are undefined.

Fixes GL45-CTS.vertex_attrib_binding.basic-inputL-case1.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/state_tracker/st_atom_array.c | 98 +-
 1 file changed, 50 insertions(+), 48 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_array.c 
b/src/mesa/state_tracker/st_atom_array.c
index 1c2cfa7..e5b949f 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -379,47 +379,58 @@ static void init_velement(struct pipe_vertex_element 
*velement,
   int instance_divisor, int vbo_index)
 {
velement->src_offset = src_offset;
velement->src_format = format;
velement->instance_divisor = instance_divisor;
velement->vertex_buffer_index = vbo_index;
assert(velement->src_format);
 }
 
 static void init_velement_lowered(struct st_context *st,
+  const struct st_vertex_program *vp,
   struct pipe_vertex_element *velements,
   int src_offset, int format,
   int instance_divisor, int vbo_index,
   int nr_components, GLboolean doubles,
   GLuint *attr_idx)
 {
int idx = *attr_idx;
if (doubles) {
   int lower_format;
 
-  if (nr_components == 1)
+  if (nr_components < 2)
  lower_format = PIPE_FORMAT_R32G32_UINT;
-  else if (nr_components >= 2)
+  else
  lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
 
   init_velement(&velements[idx], src_offset,
 lower_format, instance_divisor, vbo_index);
   idx++;
 
-  if (nr_components > 2) {
- if (nr_components == 3)
-lower_format = PIPE_FORMAT_R32G32_UINT;
- else if (nr_components >= 4)
-lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
+  if (idx < vp->num_inputs &&
+  vp->index_to_input[idx] == ST_DOUBLE_ATTRIB_PLACEHOLDER) {
+ if (nr_components >= 3) {
+if (nr_components == 3)
+   lower_format = PIPE_FORMAT_R32G32_UINT;
+else
+   lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
+
+init_velement(&velements[idx], src_offset + 4 * sizeof(float),
+lower_format, instance_divisor, vbo_index);
+ } else {
+/* The values here are undefined. Fill in some conservative
+ * dummy values.
+ */
+init_velement(&velements[idx], src_offset, PIPE_FORMAT_R32G32_UINT,
+  instance_divisor, vbo_index);
+ }
 
- init_velement(&velements[idx], src_offset + 4 * sizeof(float),
-   lower_format, instance_divisor, vbo_index);
  idx++;
   }
} else {
   init_velement(&velements[idx], src_offset,
 format, instance_divisor, vbo_index);
   idx++;
}
*attr_idx = idx;
 }
 
@@ -428,24 +439,23 @@ static void init_velement_lowered(struct st_context *st,
  * or all live in user space.
  * \param vbuffer  returns vertex buffer info
  * \param velements  returns vertex element info
  */
 static boolean
 setup_interleaved_attribs(struct st_context *st,
   const struct st_vertex_program *vp,
   const struct st_vp_variant *vpv,
   const struct gl_client_array **arrays,
   struct pipe_vertex_buffer *vbuffer,
-  struct pipe_vertex_element velements[],
-  unsigned *num_velements)
+  struct pipe_vertex_element velements[])
 {
-   GLuint attr, attr_idx;
+   GLuint attr;
const GLubyte *low_addr = NULL;
GLboolean usingVBO;  /* all arrays in a VBO? */
struct gl_buffer_object *bufobj;
GLsizei stride;
 
/* Find the lowest address of the arrays we're drawing,
 * Init bufobj and stride.
 */
if (vpv->num_inputs) {
   const struct gl_client_array *array;
@@ -474,47 +484,43 @@ setup_interleaved_attribs(struct st_context *st,
else {
   /* not sure we'll ever have zero inputs, but play it safe */
   bufobj = NULL;
   stride = 0;
   low_addr = 0;
}
 
/* are the arrays in user space? */
usingVBO = _mesa_is_bufferobj(bufobj);
 
-   attr_idx = 0;
-   for (attr = 0; attr < vpv->num_inputs; attr++) {
+   for (attr = 0; attr < vpv->num_inputs;) {
   const struct gl_client_array *array;
   unsigned src_offset;
   unsigned src_format;
 
   array = get_client_array(vp, arrays, attr);
-  if (!array)
- continue;
+  assert(array);
 
   src_offset = (unsi

Re: [Mesa-dev] [PATCH] Revert "mesa_glinterop: remove inclusion of GLX header"

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 18:47, Chih-Wei Huang  wrote:
> 2016-10-04 7:03 GMT+08:00 Vinson Lee :
>> This reverts commit 8472045b16b3e4621553fe451a20a9ba9f0d44b6.
>>
>> Conflicts:
>>
>> include/GL/mesa_glinterop.h
>>
>> This patch fixes this build error with GCC 4.4.
>>
>>   Compiling src/glx/dri_common_interop.c ...
>> In file included from src/glx/dri_common_interop.c:33:
>> include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’
>> include/GL/glx.h:165: note: previous declaration of ‘GLXContext’ was here
>>
>> Fixes: 8472045b16b3 ("mesa_glinterop: remove inclusion of GLX header")
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96770
>> Signed-off-by: Vinson Lee 
>> ---
>>  include/GL/mesa_glinterop.h |5 +
>>  1 files changed, 1 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/GL/mesa_glinterop.h b/include/GL/mesa_glinterop.h
>> index 383d7f9..c6a967e 100644
>> --- a/include/GL/mesa_glinterop.h
>> +++ b/include/GL/mesa_glinterop.h
>> @@ -52,15 +52,12 @@
>>
>>  #include 
>>  #include 
>> +#include 
>>
>>  #ifdef __cplusplus
>>  extern "C" {
>>  #endif
>>
>> -/* Forward declarations to avoid inclusion of GL/glx.h */
>> -typedef struct _XDisplay Display;
>> -typedef struct __GLXcontextRec *GLXContext;
>> -
>>  /* Forward declarations to avoid inclusion of EGL/egl.h */
>>  typedef void *EGLDisplay;
>>  typedef void *EGLContext;
>> --
>
> NACK.
> The patch breaks Android build (at least).
Pretty much what I said before the patch made it to the list. Still ...

Yet again, props to you I found out that I never got to sending out a
better fix. Should be in your inbox in a second, so if you can give it
a try that'll be great.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa_glinterop: allow building without X and related headers

2016-10-12 Thread Emil Velikov
This commit effectively reverts c10dcb2ce837922c6ee4e191e6d6202098a5ee10
and fixes the typedef redefinition which inspired it.

In order to prevent requiring X packages at build time earlier commit
forward declared the required X/GLX typedefs. Since that approach
introduced typedef redefinition (a C11 feature) it was reverted.

To avoid the redefinition while _not_ mandating X and related headers
forward declare the structs and use those through the header.

As anyone uses the mesa interop header they ensure that the X (or others
in terms of EGL) headers are included, which ensures that everything is
resolved within the compilation unit.

Cc: Vinson Lee 
Cc: "12.0" 
Cc: Tapani Pälli 
Cc: Chih-Wei Huang 
Fixes: c10dcb2ce837 ("Revert "mesa_glinterop: remove inclusion of GLX
header"")
Fixes: 8472045b16b3 ("mesa_glinterop: remove inclusion of GLX header")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96770
Signed-off-by: Emil Velikov 
---
TL;DR; Yay typedefs, because they make things so much better ;-)

Tapani/Chih-Wei, this should fix the breakage that you are seing. Please
let me know if it sorts things on your end.

Vison, this should resolve things on your end as well. A confirmation
would be great.
---
 include/GL/mesa_glinterop.h | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/GL/mesa_glinterop.h b/include/GL/mesa_glinterop.h
index c6a967e..173476a 100644
--- a/include/GL/mesa_glinterop.h
+++ b/include/GL/mesa_glinterop.h
@@ -52,12 +52,15 @@
 
 #include 
 #include 
-#include 
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+/* Forward declarations to avoid inclusion of GL/glx.h */
+struct _XDisplay;
+struct __GLXcontextRec;
+
 /* Forward declarations to avoid inclusion of EGL/egl.h */
 typedef void *EGLDisplay;
 typedef void *EGLContext;
@@ -243,7 +246,7 @@ struct mesa_glinterop_export_out {
  * \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error
  */
 int
-MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
+MesaGLInteropGLXQueryDeviceInfo(struct _XDisplay *dpy, struct __GLXcontextRec 
*context,
 struct mesa_glinterop_device_info *out);
 
 
@@ -268,7 +271,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext 
context,
  * \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error
  */
 int
-MesaGLInteropGLXExportObject(Display *dpy, GLXContext context,
+MesaGLInteropGLXExportObject(struct _XDisplay *dpy, struct __GLXcontextRec 
*context,
  struct mesa_glinterop_export_in *in,
  struct mesa_glinterop_export_out *out);
 
@@ -283,11 +286,11 @@ MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext 
context,
  struct mesa_glinterop_export_out *out);
 
 
-typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(Display *dpy, GLXContext 
context,
+typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(struct _XDisplay *dpy, 
struct __GLXcontextRec *context,
  struct 
mesa_glinterop_device_info *out);
 typedef int (PFNMESAGLINTEROPEGLQUERYDEVICEINFOPROC)(EGLDisplay dpy, 
EGLContext context,
  struct 
mesa_glinterop_device_info *out);
-typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(Display *dpy, GLXContext 
context,
+typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(struct _XDisplay *dpy, 
struct __GLXcontextRec *context,
   struct 
mesa_glinterop_export_in *in,
   struct 
mesa_glinterop_export_out *out);
 typedef int (PFNMESAGLINTEROPEGLEXPORTOBJECTPROC)(EGLDisplay dpy, EGLContext 
context,
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/16] loader: reimplement loader_get_user_preferred_fd via libdrm

2016-10-12 Thread Axel Davy

On 12/10/2016 19:15, Emil Velikov wrote:

On 11 October 2016 at 19:54, Axel Davy  wrote:

On 11/10/2016 20:31, Emil Velikov wrote:

   -   udev = udev_new();
-   if (!udev)
-  goto prime_clean;
+   if (drmGetDevice(default_fd, &device) != 0)
+  goto err;
   -   default_device_id_path_tag = get_id_path_tag_from_fd(udev,
default_fd);
-   if (!default_device_id_path_tag)
-  goto udev_clean;
-
-   is_different_device = 1;
  /* two format are supported:
   * "1": choose any other card than the card used by default.
   * id_path_tag: (for example "pci-_02_00_0") choose the card
   * with this id_path_tag.
   */
  if (!strcmp(prime,"1")) {
-  free(prime);
-  prime = strdup(default_device_id_path_tag);
-  /* request a card with a different card than the default card */
-  another_tag = 1;
-   } else if (!strcmp(default_device_id_path_tag, prime))
-  /* we are to get a new fd (render-node) of the same device */
-  is_different_device = 0;
+  /* Hmm... detection for 2-7 seems to be broken. Nicely done. */

For DRI2, DRI_PRIME takes a number corresponding to the device number as
configured in xorg (and you have xrandr commands to configure the
behaviour). This doesn't work with the DRI3 scheme. However for
retrocompatibility, after several discussions, we decided to let DRI2's most
used command 'DRI_PRIME=1' to still sorta work on DRI3, but the meaning
changed: DRI_PRIME=1 => 'give me any device that is not the compositor/xorg
device'

No argument/objection against DRI_PRIME with DRI3, yet the current
hard coded DRI_PRIME=1 _only_ check is iffy. As per the spec there can
be 8 devices (0-8) with 0 always being the xorg/compositor one and 1-7
as 'other' devices. Yet here we only consider 1 and ignore everything
else :-\

Do you suggest to have DRI_PRIME=2 (up to 8) be the same than DRI_PRIME=1 ?



+  is_different_device = 1;
+   } else {
+  default_tag = drm_construct_id_path_tag(device);
+  if (default_tag == NULL)
+ goto err;
   -   device_name = get_render_node_from_id_path_tag(udev,
-  prime,
-  another_tag);
-   if (device_name == NULL) {
+  if (strcmp(default_tag, prime) != 0) {
+ free(default_tag);
+ goto err;
+  }
+
+  free(default_tag);
+  /* we are to get a new fd (render-node) of the same device */
 is_different_device = 0;
-  goto default_device_clean;
+  // XXX: WTF ^^ so one uses the new model only to point to the exact
same
+  // device and not the other more/less powerful GPU ?

This case if when one has in his configuration file that the app should run
on device XXX, which happens to be the one in use by the compositor.
This case needed to be handled, and while it may not very seem useful now,
it could be in the future an interesting case (On wayland, you could have
the compositor use a card on some screens, and another card on the other,
etc. In this case it makes sense for the user to specify the card, and it
may or may not be the card used for the given screen).


There's something lost in the translation here:

Currently in case of non DRI_PRIME (i.e. path tag) we dive into
get_render_node_from_id_path_tag(), loop and trigger on !another_tag
&& !strcmp(id_path_tag, id_path_tag_tmp) (another_tag is zero here).

Or in other words the function returns the render node, of the exact
_same_ device. Which makes the whole new method of using pci-...
dubious since one cannot select the 'other' device :-\

That is unless I'm missing something ?

-Emil



I'm not sure to understand exactly what you want to say.

Here is a summary of current behaviour.


device_id (env var, or dri conf) can be set to "1" (another device than 
default) or to an id path tag pci-XXX. The legacy env var DRI_PRIME 
overrides the setting if set (but is used the same way exactly).


If never the value doesn't match 1, or a valid id path tag, you get the 
default device.


If you set 1, it picks any device that has a different id path tag than 
the default device. This is obviously limited if you get many devices in 
the computer, and in this case you should rather use id path tags.


Else it tries to pick the device with the specified id path tag.

For the case when the specified id path tag is the id path tag of the 
default device, the default device is taken. It was decided to implement 
by taking the render node of the device in this case. I don't remember 
exactly why it was chosen instead of just taking the default fd. I think 
returning the default fd is fine as well.


Were you suggesting that the call to get_render_node_from_id_path_tag is 
useless when we detect that the id path tag is the same than for the 
default device ? In that case the code is equivalent to just take the 
render-node version of device. If you decide to just return the default 
fd in that situation, you can just avoid 

Re: [Mesa-dev] [PATCH v3 02/25] configure.ac: Add helper function for targets/components

2016-10-12 Thread Tobias Droste
Am Mittwoch, 12. Oktober 2016, 09:52:51 CEST schrieb Emil Velikov:
> On 12 October 2016 at 00:02, Tobias Droste  wrote:
> > Add functions to add and check targets/components.
> > Not used in this patch.
> > 
> > The error message in llvm_add_component is disabled until it doesn't break
> > the build anymore. This is the same functionality as before where the
> > components were added without a check.
> > 
> > Signed-off-by: Tobias Droste 
> > ---
> > 
> >  configure.ac | 35 +++
> >  1 file changed, 35 insertions(+)
> > 
> > diff --git a/configure.ac b/configure.ac
> > index bdd46bc..69421ff 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -2196,7 +2196,42 @@ llvm_check_version_for() {
> > 
> >  fi
> >  
> >  }
> > 
> > +llvm_add_default_components() {
> > +driver_name=$1
> > 
> > +# Required default components
> > +llvm_add_component "engine" $driver_name
> > +llvm_add_component "bitwriter" $driver_name
> > +llvm_add_component "mcjit" $driver_name
> > +llvm_add_component "mcdisassembler" $driver_name
> > +
> > +# Optional default components
> > +if $LLVM_CONFIG --components | grep -iqw inteljitevents ; then
> > +LLVM_COMPONENTS="${LLVM_COMPONENTS} inteljitevents"
> > +fi
> > +}
> > +
> > +llvm_add_component() {
> 
> Nit: move this function before its user (llvm_add_default_components)
> Idea: call this ...components (plural) and feed the all components at once.

Ok.

> 
> > +new_llvm_component=$1
> > +driver_name=$2
> > +
> > +if $LLVM_CONFIG --components | grep -iqw $new_llvm_component ; then
> > +LLVM_COMPONENTS="${LLVM_COMPONENTS} ${new_llvm_component}"
> > +#else
> > +#AC_MSG_ERROR([LLVM component '$new_llvm_component' not enabled
> > in your LLVM build. Required by $driver_name.])
> This function adds required components, correct ? Then the above two
> lines should not be commented out.

I can't enable this until later in the series as mentioned in the commit 
message.
This is how it worked before (there were no errors for the components).

At this point llvm_add_default_components will be added even if we don't need 
LLVM. This way someone without LLVM installed will get error messages even 
though he might just want to build the intel or softpipe driver.
It will be enabled in Patch 24.

> 
> -Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 04/25] configure.ac: Move oCL checks out of LLVM version check

2016-10-12 Thread Tobias Droste
Am Mittwoch, 12. Oktober 2016, 09:56:39 CEST schrieb Emil Velikov:
> >  fi
> > 
> > +if test "x$enable_opencl" = xyes; then
> > +llvm_check_version_for "3" "6" "0" "opencl"
> > +
> > +LLVM_COMPONENTS="${LLVM_COMPONENTS} all-targets ipo linker
> > instrumentation" +LLVM_COMPONENTS="${LLVM_COMPONENTS} irreader option
> > objcarcopts profiledata" +fi
> > +
> > +dnl Check for Clang internal headers
> > +if test "x$enable_opencl" = xyes; then
> 
> Nit: drop the second if test, yet preserve the comment ?
> Disclaimer: haven't looked if later patches depend on the split.

This is a "just move" patch, that's why I didn't change anything.
But this whole section will be changed later (Patch 11) so it actually doesn't 
matter.

I leave it as is but keep your reviewed-by, ok? :-)

> 
> With the above
> Reviewed-by: Emil Velikov 
> 
> -Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: only flip stipple pattern for winsys fbo's

2016-10-12 Thread Ilia Mirkin
Gallium is completely oblivious to whether the fbo is flipped or not.
Only flip the stipple pattern when the fbo is flipped as well. Otherwise
the driver has no idea when to unflip the pattern.

Fixes bin/gl-2.1-polygon-stipple-fs -fbo on nv50 and nvc0.

Signed-off-by: Ilia Mirkin 
---

This keeps working on llvmpipe. I assume this is because the emulation
uses fragcoord to determine the effect, which will be fixed up by the
state tracker's wpos logic.

 src/mesa/state_tracker/st_atom_stipple.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_stipple.c 
b/src/mesa/state_tracker/st_atom_stipple.c
index a30215f..5f7bf82 100644
--- a/src/mesa/state_tracker/st_atom_stipple.c
+++ b/src/mesa/state_tracker/st_atom_stipple.c
@@ -61,7 +61,7 @@ invert_stipple(GLuint dest[32], const GLuint src[32], GLuint 
winHeight)
 
 
 
-static void 
+static void
 update_stipple( struct st_context *st )
 {
const struct gl_context *ctx = st->ctx;
@@ -74,8 +74,12 @@ update_stipple( struct st_context *st )
 
   memcpy(st->state.poly_stipple, ctx->PolygonStipple, sz);
 
-  invert_stipple(newStipple.stipple, ctx->PolygonStipple,
- ctx->DrawBuffer->Height);
+  if (_mesa_is_user_fbo(ctx->DrawBuffer)) {
+ memcpy(newStipple.stipple, ctx->PolygonStipple, 
sizeof(newStipple.stipple));
+  } else {
+ invert_stipple(newStipple.stipple, ctx->PolygonStipple,
+ctx->DrawBuffer->Height);
+  }
 
   st->pipe->set_polygon_stipple(st->pipe, &newStipple);
}
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 07/25] configure.ac: Move gallium checks out of LLVM version check

2016-10-12 Thread Tobias Droste
Am Mittwoch, 12. Oktober 2016, 10:01:01 CEST schrieb Emil Velikov:
> > 
> > +gallium_llvm_check() {
> > +LLVM_REQUIRED_VERSION_MAJOR="3"
> > +LLVM_REQUIRED_VERSION_MINOR="3"
> > +if test "$LLVM_VERSION_INT" -lt
> > "${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}"; then +  
> >  AC_MSG_ERROR([LLVM
> > $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is
> > required]) +fi
> > +}
> > +
> > +if test "x$enable_gallium_llvm" = xyes; then
> > +gallium_llvm_check
> 
> Reuse llvm_check_version_for "3" "3" "0" "Gallium" and update the
> commit message ?

Again a just move patch. This will be done in patch 13.

> 
> -Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 08/25] configure.ac: Move LLVM version check to the top

2016-10-12 Thread Tobias Droste
Am Mittwoch, 12. Oktober 2016, 10:11:50 CEST schrieb Emil Velikov:
> On 12 October 2016 at 00:02, Tobias Droste  wrote:
> > A function with the LLVM version checked is moved to the top.
> > The function is called where the old code was.
> 
> Replace the second line with "... in order to reuse/rework X"
> 
> > No functional change.
> > 
> > Signed-off-by: Tobias Droste 
> > ---
> > 
> >  configure.ac | 91
> >   1 file
> >  changed, 49 insertions(+), 42 deletions(-)
> > 
> > diff --git a/configure.ac b/configure.ac
> > index 933e7b5..0f19a4f 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -861,6 +861,54 @@ fi
> > 
> >  AC_SUBST([SELINUX_CFLAGS])
> >  AC_SUBST([SELINUX_LIBS])
> > 
> > +dnl
> > +dnl LLVM
> > +dnl
> > +llvm_get_version() {
> 
> Nit: The code below does a lot more than "get_version" -
> get_environment/set_variables/other

Any ideas? :-D

> 
> With the above
> Reviewed-by: Emil Velikov 
> 
> > +if test -z "${LLVM_CONFIG}"; then
> > +if test -n "$llvm_prefix"; then
> > +AC_PATH_TOOL([LLVM_CONFIG], [llvm-config], [no],
> > ["$llvm_prefix/bin"]) +else
> > +AC_PATH_TOOL([LLVM_CONFIG], [llvm-config], [no])
> > +fi
> > +fi
> > +
> > +if test "x$LLVM_CONFIG" != xno; then
> > +LLVM_VERSION=`$LLVM_CONFIG --version | egrep -o '^[[0-9.]]+'`
> 
> ...
> 
> > +else
> > +MESA_LLVM=0
> > +LLVM_VERSION_INT=0
> 
> Just realised that we should error out in this case. After all one
> requests llvm, so silently ignoring that they're missing llvm-config
> isn't a smart idea. Something like below (be that as a preparatory,
> in-between or at the end of the series) would be great.

At this point in time we don't know if we actually need LLVM.
This will be handled by all the llvm_check_version_for() calls for each 
driver. 
This has also the advantage, that you not only know that you need LLVM but you 
also know which version!

> 
> if test "x$LLVM_CONFIG" = xno; then
>AC_MSG_ERROR([...])
> fi
> 
> LLVM_VERSION=...
> ...
> 
> -Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] st/glsl_to_tgsi: simplify translate_tex_offset

2016-10-12 Thread Ilia Mirkin
Patches 1-3 are

Reviewed-by: Ilia Mirkin 

Not comfortable enough with the vertex arrays code to review the last
one. I remember Dave had semi-good reasons for the if (!array) stuff
though.

On Wed, Oct 12, 2016 at 1:50 PM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> This fixes a bug with offsets from uniforms which seems to have only been
> noticed as a crash in piglit's
> arb_gpu_shader5/compiler/builtin-functions/fs-gatherOffset-uniform-offset.frag
> on radeonsi.
>
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 64 
> +++---
>  1 file changed, 14 insertions(+), 50 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index f721506..33c1f87 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -5661,74 +5661,38 @@ translate_src(struct st_translate *t, const 
> st_src_reg *src_reg)
> if (src_reg->reladdr != NULL) {
>assert(src_reg->file != PROGRAM_TEMPORARY);
>src = ureg_src_indirect(src, ureg_src(t->address[0]));
> }
>
> return src;
>  }
>
>  static struct tgsi_texture_offset
>  translate_tex_offset(struct st_translate *t,
> - const st_src_reg *in_offset, int idx)
> + const st_src_reg *in_offset)
>  {
> struct tgsi_texture_offset offset;
> -   struct ureg_src imm_src;
> -   struct ureg_dst dst;
> -   int array;
> +   struct ureg_src src = translate_src(t, in_offset);
>
> -   switch (in_offset->file) {
> -   case PROGRAM_IMMEDIATE:
> -  assert(in_offset->index >= 0 && in_offset->index < t->num_immediates);
> -  imm_src = t->immediates[in_offset->index];
> -
> -  offset.File = imm_src.File;
> -  offset.Index = imm_src.Index;
> -  offset.SwizzleX = imm_src.SwizzleX;
> -  offset.SwizzleY = imm_src.SwizzleY;
> -  offset.SwizzleZ = imm_src.SwizzleZ;
> -  offset.Padding = 0;
> -  break;
> -   case PROGRAM_INPUT:
> -  imm_src = t->inputs[t->inputMapping[in_offset->index]];
> -  offset.File = imm_src.File;
> -  offset.Index = imm_src.Index;
> -  offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
> -  offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
> -  offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
> -  offset.Padding = 0;
> -  break;
> -   case PROGRAM_TEMPORARY:
> -  imm_src = ureg_src(t->temps[in_offset->index]);
> -  offset.File = imm_src.File;
> -  offset.Index = imm_src.Index;
> -  offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
> -  offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
> -  offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
> -  offset.Padding = 0;
> -  break;
> -   case PROGRAM_ARRAY:
> -  array = in_offset->index >> 16;
> +   offset.File = src.File;
> +   offset.Index = src.Index;
> +   offset.SwizzleX = src.SwizzleX;
> +   offset.SwizzleY = src.SwizzleY;
> +   offset.SwizzleZ = src.SwizzleZ;
> +   offset.Padding = 0;
>
> -  assert(array >= 0);
> -  assert(array < (int)t->num_temp_arrays);
> +   assert(!src.Indirect);
> +   assert(!src.DimIndirect);
> +   assert(!src.Dimension);
> +   assert(!src.Absolute); /* those shouldn't be used with integers anyway */
> +   assert(!src.Negate);
>
> -  dst = t->arrays[array];
> -  offset.File = dst.File;
> -  offset.Index = dst.Index + (in_offset->index & 0x) - 0x8000;
> -  offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
> -  offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
> -  offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
> -  offset.Padding = 0;
> -  break;
> -   default:
> -  break;
> -   }
> return offset;
>  }
>
>  static void
>  compile_tgsi_instruction(struct st_translate *t,
>   const glsl_to_tgsi_instruction *inst)
>  {
> struct ureg_program *ureg = t->ureg;
> int i;
> struct ureg_dst dst[2];
> @@ -5778,21 +5742,21 @@ compile_tgsi_instruction(struct st_translate *t,
> case TGSI_OPCODE_TXL2:
> case TGSI_OPCODE_TG4:
> case TGSI_OPCODE_LODQ:
>src[num_src] = t->samplers[inst->sampler.index];
>assert(src[num_src].File != TGSI_FILE_NULL);
>if (inst->sampler.reladdr)
>   src[num_src] =
>  ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
>num_src++;
>for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
> - texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
> + texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
>}
>tex_target = st_translate_texture_target(inst->tex_target, 
> inst->tex_shadow);
>
>ureg_tex_insn(ureg,
>  inst->op,
>  dst, num_dst,
>  tex_target,
>  texoffsets, inst->tex_offset_num_offset,
>  src, num_src);
>  

Re: [Mesa-dev] [PATCH v3 12/25] configure.ac: Move gallium LLVM checks

2016-10-12 Thread Tobias Droste
Am Mittwoch, 12. Oktober 2016, 10:42:19 CEST schrieb Emil Velikov:
> On 12 October 2016 at 00:02, Tobias Droste  wrote:
> >  
> >  gallium_require_llvm() {
> > 
> > -if test "x$MESA_LLVM" = x0; then
> > -case "$host" in *gnux32) return;; esac
> > -case "$host_cpu" in
> > -i*86|x86_64|amd64) AC_MSG_ERROR([LLVM is required to build $1 on
> > x86 and x86_64]);; -esac
> > -fi
> > +case "$host" in *gnux32) return;; esac
> > +case "$host_cpu" in
> > +i*86|x86_64|amd64)
> > +LLVM_REQUIRED_VERSION_MAJOR="3"
> > +LLVM_REQUIRED_VERSION_MINOR="3"
> > +if test "$LLVM_VERSION_INT" -lt
> > "${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}"; then +  
> >  AC_MSG_ERROR([LLVM
> > $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is
> > required]) +fi
> > +;;
> > +esac
> > 
> >  }
> 
> The function it quite "ugly" as-is and this patch changes things in a fun
> way.
> 
> Namely: before you'll get the minimum required version check
> regardless of host_cpu for everyone, while now you get the opposite -
> everyone is 'constrained' by the host_cpu check. Admittedly I've have
> not idea if llvmpipe is a thing on outside x86 land.
> 
> Checking the gallium_require_llvm users we want:
>  - swr -> bail out if host_cpu !x86/x86-64 or gallium-llvm toggle is
> off || llvm version req. is not met
>  - r300 -> on host_cpu eq. x86/x86-64, check the gallium-llvm toggle
> (this is 'premature' optimisation which we might want to rework/drop
> in the long run)

Right now --enable-gallium-llvm will be set to no if it is "auto" on non x86.
gallivm/llvm use the x86 target so it probably only works there, that's why I 
did it this way. 

> 
> -Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] radv: correct variable name VISIBILITY_{, C}FLAGS

2016-10-12 Thread Emil Velikov
From: Emil Velikov 

The letter C was missing, thus in turn all the internal symbols were
exported.

As a result we hide ~150 symbols and cut ~36K from libvulkan_radeon.so.

Signed-off-by: Emil Velikov 
---
My current testing involves static linking LLVM (which exports the
world) so the final binary might have some spurous exports + is quite
large. One step at a time... we'll get to the rest in due time.
---
 src/amd/vulkan/Makefile.am | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am
index 6eaecd8..b6f3d00 100644
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -49,7 +49,8 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/include
 
-AM_CFLAGS = $(VISIBILITY_FLAGS) \
+AM_CFLAGS = \
+   $(VISIBILITY_CFLAGS) \
$(PTHREAD_CFLAGS) \
$(LLVM_CFLAGS)
 
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] radv: move AMDGPU_LIBS later in the link chain

2016-10-12 Thread Emil Velikov
From: Emil Velikov 

At the moment (albeit unlikely) one could get link-time issues, since
libdrm_amdgpu.so is before it's users in the link chain.

Signed-off-by: Emil Velikov 
---
As a nice bonus the diff vs the anv Makefile is quite small now ;-)
---
 src/amd/vulkan/Makefile.am | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am
index b6f3d00..e2f5c02 100644
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -58,7 +58,7 @@ VULKAN_SOURCES = \
$(VULKAN_GENERATED_FILES) \
$(VULKAN_FILES)
 
-VULKAN_LIB_DEPS = $(AMDGPU_LIBS)
+VULKAN_LIB_DEPS =
 
 
 if HAVE_PLATFORM_X11
@@ -100,6 +100,7 @@ VULKAN_LIB_DEPS += \
$(LLVM_LIBS) \
$(LIBELF_LIBS) \
$(PTHREAD_LIBS) \
+   $(AMDGPU_LIBS) \
$(LIBDRM_LIBS) \
$(PTHREAD_LIBS) \
$(DLOPEN_LIBS) \
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] amd/addrlib: hide private symbols via VISIBILITY_CXXFLAGS

2016-10-12 Thread Emil Velikov
From: Emil Velikov 

Private/internal symbols should never not be exported. Using the
CXXFLAGS cuts ~300 exported symbols and ~23K from libvulkan_radeon.so.

Signed-off-by: Emil Velikov 
---
 src/amd/Makefile.addrlib.am | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/amd/Makefile.addrlib.am b/src/amd/Makefile.addrlib.am
index 434e692..64823fc 100644
--- a/src/amd/Makefile.addrlib.am
+++ b/src/amd/Makefile.addrlib.am
@@ -30,6 +30,9 @@ addrlib_libamdgpu_addrlib_la_CPPFLAGS = \
-I$(srcdir)/addrlib/r800/chip \
-DBRAHMA_BUILD=1
 
+addrlib_libamdgpu_addrlib_la_CXXFLAGS = \
+   $(VISIBILITY_CXXFLAGS)
+
 noinst_LTLIBRARIES += $(ADDRLIB_LIBS)
 
 addrlib_libamdgpu_addrlib_la_SOURCES = $(ADDRLIB_FILES)
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] intel: automake: replace direct basename $@ invokation with $(@F)

2016-10-12 Thread Emil Velikov
From: Emil Velikov 

Use the shorthand make variable(s) as elsewhere in the build.

Signed-off-by: Emil Velikov 
---
 src/intel/Makefile.genxml.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
index f8b0363..519f30e 100644
--- a/src/intel/Makefile.genxml.am
+++ b/src/intel/Makefile.genxml.am
@@ -37,7 +37,7 @@ $(GENXML_GENERATED_FILES): genxml/gen_pack_header.py
 %_xml.h:  %.xml Makefile
$(MKDIR_GEN)
$(AM_V_GEN) echo -n "static const uint8_t " > $@; \
-   echo "`basename $@`_xml[] = {" | sed -e 's,_xml.h,,' >> $@; \
+   echo "$(@F)_xml[] = {" | sed -e 's,_xml.h,,' >> $@; \
cat $< | $(XXD) -i >> $@; \
echo "};" >> $@
 
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] automake: add radv to the `make distcheck' hooks

2016-10-12 Thread Emil Velikov
From: Emil Velikov 

Will allow us to catch issues (as fixed with previous patches) rather
than release a broken tarball.

Signed-off-by: Emil Velikov 
---
 Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.am b/Makefile.am
index b0fbed6..49b99de 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -44,7 +44,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
--with-egl-platforms=x11,wayland,drm,surfaceless \
--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \

--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr
 \
-   --with-vulkan-drivers=intel
+   --with-vulkan-drivers=intel,radeon
 
 ACLOCAL_AMFLAGS = -I m4
 
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa_glinterop: allow building without X and related headers

2016-10-12 Thread Chih-Wei Huang
2016-10-13 1:49 GMT+08:00 Emil Velikov :
> This commit effectively reverts c10dcb2ce837922c6ee4e191e6d6202098a5ee10
> and fixes the typedef redefinition which inspired it.
>
> In order to prevent requiring X packages at build time earlier commit
> forward declared the required X/GLX typedefs. Since that approach
> introduced typedef redefinition (a C11 feature) it was reverted.
>
> To avoid the redefinition while _not_ mandating X and related headers
> forward declare the structs and use those through the header.
>
> As anyone uses the mesa interop header they ensure that the X (or others
> in terms of EGL) headers are included, which ensures that everything is
> resolved within the compilation unit.
>
> Cc: Vinson Lee 
> Cc: "12.0" 
> Cc: Tapani Pälli 
> Cc: Chih-Wei Huang 
> Fixes: c10dcb2ce837 ("Revert "mesa_glinterop: remove inclusion of GLX
> header"")
> Fixes: 8472045b16b3 ("mesa_glinterop: remove inclusion of GLX header")
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96770
> Signed-off-by: Emil Velikov 
> ---
> TL;DR; Yay typedefs, because they make things so much better ;-)
>
> Tapani/Chih-Wei, this should fix the breakage that you are seing. Please
> let me know if it sorts things on your end.
>
> Vison, this should resolve things on your end as well. A confirmation
> would be great.
> ---
>  include/GL/mesa_glinterop.h | 13 -
>  1 file changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/include/GL/mesa_glinterop.h b/include/GL/mesa_glinterop.h
> index c6a967e..173476a 100644
> --- a/include/GL/mesa_glinterop.h
> +++ b/include/GL/mesa_glinterop.h
> @@ -52,12 +52,15 @@
>
>  #include 
>  #include 
> -#include 
>
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
>
> +/* Forward declarations to avoid inclusion of GL/glx.h */
> +struct _XDisplay;
> +struct __GLXcontextRec;
> +
>  /* Forward declarations to avoid inclusion of EGL/egl.h */
>  typedef void *EGLDisplay;
>  typedef void *EGLContext;
> @@ -243,7 +246,7 @@ struct mesa_glinterop_export_out {
>   * \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error
>   */
>  int
> -MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
> +MesaGLInteropGLXQueryDeviceInfo(struct _XDisplay *dpy, struct 
> __GLXcontextRec *context,
>  struct mesa_glinterop_device_info *out);
>
>
> @@ -268,7 +271,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, 
> EGLContext context,
>   * \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error
>   */
>  int
> -MesaGLInteropGLXExportObject(Display *dpy, GLXContext context,
> +MesaGLInteropGLXExportObject(struct _XDisplay *dpy, struct __GLXcontextRec 
> *context,
>   struct mesa_glinterop_export_in *in,
>   struct mesa_glinterop_export_out *out);
>
> @@ -283,11 +286,11 @@ MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext 
> context,
>   struct mesa_glinterop_export_out *out);
>
>
> -typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(Display *dpy, 
> GLXContext context,
> +typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(struct _XDisplay *dpy, 
> struct __GLXcontextRec *context,
>   struct 
> mesa_glinterop_device_info *out);
>  typedef int (PFNMESAGLINTEROPEGLQUERYDEVICEINFOPROC)(EGLDisplay dpy, 
> EGLContext context,
>   struct 
> mesa_glinterop_device_info *out);
> -typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(Display *dpy, GLXContext 
> context,
> +typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(struct _XDisplay *dpy, 
> struct __GLXcontextRec *context,
>struct 
> mesa_glinterop_export_in *in,
>struct 
> mesa_glinterop_export_out *out);
>  typedef int (PFNMESAGLINTEROPEGLEXPORTOBJECTPROC)(EGLDisplay dpy, EGLContext 
> context,
> --
> 2.10.0
>

Build OK on Android 7.0. Thanks!

BTW, the typedef EGLDisplay and EGLContext
has similar redefinition issues.
Should we fix them in the same way?

[ 95% 667/699] target  C: libGLES_mesa <=
external/mesa/src/egl/drivers/dri2/egl_dri2.c
In file included from external/mesa/src/egl/drivers/dri2/egl_dri2.c:58:
In file included from external/mesa/src/egl/drivers/dri2/egl_dri2.h:73:
In file included from external/mesa/src/egl/main/eglconfig.h:39:
In file included from external/mesa/src/egl/main/egltypedefs.h:34:
external/mesa/include/EGL/egl.h:55:15: warning: redefinition of
typedef 'EGLDisplay' is a C11 feature [-Wtypedef-redefinition]
typedef void *EGLDisplay;
  ^
external/mesa/include/GL/mesa_glinterop.h:65:15: note: previous
definition is here
typedef void *EGLDisplay;
  ^
In file included from external/mesa/src/egl/drivers/dri2/egl_dri2.c:58:
In file included from external/mesa/src/egl/drivers/dri2/egl_dri2.h:73:
In file included from external/mesa/src/egl/main/eglconfi

Re: [Mesa-dev] [PATCH 1/4] st/glsl_to_tgsi: simplify translate_tex_offset

2016-10-12 Thread Nicolai Hähnle

On 12.10.2016 20:12, Ilia Mirkin wrote:

Patches 1-3 are

Reviewed-by: Ilia Mirkin 


Thanks.


Not comfortable enough with the vertex arrays code to review the last
one. I remember Dave had semi-good reasons for the if (!array) stuff
though.


As far as I understood the code, the if (!array) was used to jump over 
the placeholders for the secondary slots of dvec3/dvec4. That should be 
covered by the fact that init_velement_lower is used to advance the attr 
index appropriately.


If there's a reason that I missed, that would be good to know :)

Cheers,
Nicolai



On Wed, Oct 12, 2016 at 1:50 PM, Nicolai Hähnle  wrote:

From: Nicolai Hähnle 

This fixes a bug with offsets from uniforms which seems to have only been
noticed as a crash in piglit's
arb_gpu_shader5/compiler/builtin-functions/fs-gatherOffset-uniform-offset.frag
on radeonsi.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 64 +++---
 1 file changed, 14 insertions(+), 50 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f721506..33c1f87 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5661,74 +5661,38 @@ translate_src(struct st_translate *t, const st_src_reg 
*src_reg)
if (src_reg->reladdr != NULL) {
   assert(src_reg->file != PROGRAM_TEMPORARY);
   src = ureg_src_indirect(src, ureg_src(t->address[0]));
}

return src;
 }

 static struct tgsi_texture_offset
 translate_tex_offset(struct st_translate *t,
- const st_src_reg *in_offset, int idx)
+ const st_src_reg *in_offset)
 {
struct tgsi_texture_offset offset;
-   struct ureg_src imm_src;
-   struct ureg_dst dst;
-   int array;
+   struct ureg_src src = translate_src(t, in_offset);

-   switch (in_offset->file) {
-   case PROGRAM_IMMEDIATE:
-  assert(in_offset->index >= 0 && in_offset->index < t->num_immediates);
-  imm_src = t->immediates[in_offset->index];
-
-  offset.File = imm_src.File;
-  offset.Index = imm_src.Index;
-  offset.SwizzleX = imm_src.SwizzleX;
-  offset.SwizzleY = imm_src.SwizzleY;
-  offset.SwizzleZ = imm_src.SwizzleZ;
-  offset.Padding = 0;
-  break;
-   case PROGRAM_INPUT:
-  imm_src = t->inputs[t->inputMapping[in_offset->index]];
-  offset.File = imm_src.File;
-  offset.Index = imm_src.Index;
-  offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
-  offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
-  offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
-  offset.Padding = 0;
-  break;
-   case PROGRAM_TEMPORARY:
-  imm_src = ureg_src(t->temps[in_offset->index]);
-  offset.File = imm_src.File;
-  offset.Index = imm_src.Index;
-  offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
-  offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
-  offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
-  offset.Padding = 0;
-  break;
-   case PROGRAM_ARRAY:
-  array = in_offset->index >> 16;
+   offset.File = src.File;
+   offset.Index = src.Index;
+   offset.SwizzleX = src.SwizzleX;
+   offset.SwizzleY = src.SwizzleY;
+   offset.SwizzleZ = src.SwizzleZ;
+   offset.Padding = 0;

-  assert(array >= 0);
-  assert(array < (int)t->num_temp_arrays);
+   assert(!src.Indirect);
+   assert(!src.DimIndirect);
+   assert(!src.Dimension);
+   assert(!src.Absolute); /* those shouldn't be used with integers anyway */
+   assert(!src.Negate);

-  dst = t->arrays[array];
-  offset.File = dst.File;
-  offset.Index = dst.Index + (in_offset->index & 0x) - 0x8000;
-  offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
-  offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
-  offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
-  offset.Padding = 0;
-  break;
-   default:
-  break;
-   }
return offset;
 }

 static void
 compile_tgsi_instruction(struct st_translate *t,
  const glsl_to_tgsi_instruction *inst)
 {
struct ureg_program *ureg = t->ureg;
int i;
struct ureg_dst dst[2];
@@ -5778,21 +5742,21 @@ compile_tgsi_instruction(struct st_translate *t,
case TGSI_OPCODE_TXL2:
case TGSI_OPCODE_TG4:
case TGSI_OPCODE_LODQ:
   src[num_src] = t->samplers[inst->sampler.index];
   assert(src[num_src].File != TGSI_FILE_NULL);
   if (inst->sampler.reladdr)
  src[num_src] =
 ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
   num_src++;
   for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
- texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
+ texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
   }
   tex_target = st_translate_texture_target(inst->tex_target, 
inst->tex_shadow);

   ureg_tex_insn(ureg,
 inst->op,
 dst, num_dst,
   

Re: [Mesa-dev] [PATCH v3 24/25] configure.ac: Only add default LLVM components if needed

2016-10-12 Thread Tobias Droste
Am Mittwoch, 12. Oktober 2016, 10:57:14 CEST schrieb Emil Velikov:
> On 12 October 2016 at 00:02, Tobias Droste  wrote:
> > Each driver has to expllicitly call llvm_add_default_components to
> > add the shared components.
> > This way we can fail the build if a component is not found and avoid
> > the recursive solution from a previous version of the pach series.
> 
> s/pach/patch/
> 
> Does this mean that the "default" components are required only by the
> gallivm module ? Please rename the function to reflect that.

No, there's probably some of them that are only used by gallivm, but not all 
of them. That's something I wanted to do on a follow up to explicitly add all 
llvm components for each driver. But this needs some forensics, to actually 
find out what's really needed ;-)

> 
> Don't recall if swr driver is/was using any of it, but the nv30 path
> of nouveau does use it, iirc. In the latter you want to call the
> function if --enable-gallium-llvm is set. Alternatively keep
> llvm_add_default_components within the "test enable_gallium_llvm !=
> xno" block.

I'm going to add it to nv30.
swr was calling that function before my changes too. So it probably uses it.

> 
> -Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 24/25] configure.ac: Only add default LLVM components if needed

2016-10-12 Thread Tobias Droste
Am Mittwoch, 12. Oktober 2016, 12:03:38 CEST schrieb Emil Velikov:
> > Does this mean that the "default" components are required only by the
> > gallivm module ? Please rename the function to reflect that.
> > 
> > Don't recall if swr driver is/was using any of it, but the nv30 path
> > of nouveau does use it, iirc. In the latter you want to call the
> > function if --enable-gallium-llvm is set. Alternatively keep
> > llvm_add_default_components within the "test enable_gallium_llvm !=
> > xno" block.
> 
> In case you're wondering how the above might happen:
> 
> Some drivers (i915g, softpipe/llvmpipe, nv30, r300, svga?... ) use the
> aux/draw module. The latter of which has LLVM codepaths which get
> build if --enable-gallium-llvm is set.
> 
> Search "\ Emil

I didn't know that. Then I have to change this a little bit.
I need to think about that.
So don't bother with patch 24 right now until I came up with a better way.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 12/25] configure.ac: Move gallium LLVM checks

2016-10-12 Thread Emil Velikov
On 12 October 2016 at 19:14, Tobias Droste  wrote:
> Am Mittwoch, 12. Oktober 2016, 10:42:19 CEST schrieb Emil Velikov:
>> On 12 October 2016 at 00:02, Tobias Droste  wrote:
>> >
>> >  gallium_require_llvm() {
>> >
>> > -if test "x$MESA_LLVM" = x0; then
>> > -case "$host" in *gnux32) return;; esac
>> > -case "$host_cpu" in
>> > -i*86|x86_64|amd64) AC_MSG_ERROR([LLVM is required to build $1 on
>> > x86 and x86_64]);; -esac
>> > -fi
>> > +case "$host" in *gnux32) return;; esac
>> > +case "$host_cpu" in
>> > +i*86|x86_64|amd64)
>> > +LLVM_REQUIRED_VERSION_MAJOR="3"
>> > +LLVM_REQUIRED_VERSION_MINOR="3"
>> > +if test "$LLVM_VERSION_INT" -lt
>> > "${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}"; then +
>> >  AC_MSG_ERROR([LLVM
>> > $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is
>> > required]) +fi
>> > +;;
>> > +esac
>> >
>> >  }
>>
>> The function it quite "ugly" as-is and this patch changes things in a fun
>> way.
>>
>> Namely: before you'll get the minimum required version check
>> regardless of host_cpu for everyone, while now you get the opposite -
>> everyone is 'constrained' by the host_cpu check. Admittedly I've have
>> not idea if llvmpipe is a thing on outside x86 land.
>>
>> Checking the gallium_require_llvm users we want:
>>  - swr -> bail out if host_cpu !x86/x86-64 or gallium-llvm toggle is
>> off || llvm version req. is not met
>>  - r300 -> on host_cpu eq. x86/x86-64, check the gallium-llvm toggle
>> (this is 'premature' optimisation which we might want to rework/drop
>> in the long run)
>
> Right now --enable-gallium-llvm will be set to no if it is "auto" on non x86.

> gallivm/llvm use the x86 target
I haven't look too closely at the llvm code but there's nothing in
scons/automake which supports this, perhaps Jose can confirm.

Jose, does the gallium llvm code require the x86 target or can it work
with other as well ? At least in theory.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 25/25] configure.ac: Add required LLVM versions to the top

2016-10-12 Thread Tobias Droste
Am Mittwoch, 12. Oktober 2016, 11:53:50 CEST schrieb Emil Velikov:
> > 
> > +dnl LLVM versions
> > +LLVM_VERSION_REQUIRED_GALLIUM=3.3.0
> > +LLVM_VERSION_REQUIRED_LLVMPIPE=3.6.0
> 
> Not exactly sure why/how llvmpipe got 3.6 since the driver reuses the
> "gallium" one.

Yes you're right. I'm going to change this to 3.3.0.

> 
> > +LLVM_VERSION_REQUIRED_OPENCL=3.6.0
> > +LLVM_VERSION_REQUIRED_R600=3.6.0
> > +LLVM_VERSION_REQUIRED_RADEONSI=3.6.0
> 
> There's a small related gotcha: as-is at build time we get the
> different codepaths thus, as people build against shared LLVM (hello
> Archlinux, I'm looking at you) and update their LLVM without
> rebuilding mesa (Arch I'm looking at you again) things go funny.
> 
> Tl;Dr; We really want to enable static linking by default and prod
> distros to use it.

I'm all in favor of statically linking LLVM (that's the way I'm doing this on 
my pc). 
I think the only reason this is not done is because people (also here on the 
list) don't want any static linkg of external libraries because of size or 
whatever.
So changing the default to static is easy, but I doubt it will make everyone 
happy ;-)

> 
> As an alternative (or in parallel really) we could bump to 3.6.2...
> 
> > +LLVM_VERSION_REQUIRED_RADV=3.9.0
> > +LLVM_VERSION_REQUIRED_SWR=3.6.0
> 
> ... and 3.6.1 + drop the older codepaths. Just a follow-up idea.
> 
> > +
> 
> Nit: Drop _VERSION part ?

Ok.

> 
> Follow-up: worth checking with VMWare guys (Jose et al.) it they're
> still using pre-3.6 llvm and doing code/configure cleanups.
> 
> >  dnl Check for progs
> >  AC_PROG_CPP
> >  AC_PROG_CC
> > 
> > @@ -935,29 +944,38 @@ llvm_get_version() {
> > 
> >  [#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])
> >  
> >  AC_COMPUTE_INT([LLVM_VERSION_MINOR], [LLVM_VERSION_MINOR],
> >  
> >  [#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])
> > 
> > +AC_COMPUTE_INT([LLVM_VERSION_PATCH], [LLVM_VERSION_PATCH],
> > +[#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])
> > +
> > +if test -z "$LLVM_VERSION_PATCH"; then
> > +LLVM_VERSION_PATCH=`echo $LLVM_VERSION | cut -d. -f3 | egrep
> > -o '^[[0-9]]+'` +fi
> 
> IIRC the LLVM_VERSION parsing was added by the AMD folks since old
> versions did not have the define. Might be worth checking with them
> (and llvm headers/codebase) if that's no longer the case given their
> min. required version.
> 
> Just an idea, in parallel to the above "require !=0 minor for swr/radeonsi"

3.6.0 has this already, so if only amd needs this, I'm going to drop this.

> .
> > -LLVM_VERSION_PATCH=`echo $LLVM_VERSION | cut -d. -f3 | egrep -o
> > '^[[0-9]]+'`> 
> >  if test -z "$LLVM_VERSION_PATCH"; then
> >  
> >  LLVM_VERSION_PATCH=0
> >  
> >  fi
> >  
> >  if test -n "${LLVM_VERSION_MAJOR}"; then
> > 
> > -   
> > LLVM_VERSION_INT="${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MINOR}"
> > -else
> > -LLVM_VERSION_INT=`echo $LLVM_VERSION | sed -e
> > 's/\([[0-9]]\)\.\([[0-9]]\)/\10\2/g'` +   
> > LLVM_VERSION="${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_
> > PATCH}"> 
> >  fi
> 
> Follow-up idea: just bail out if the LLVM_VERSION_* defines are not
> set, and reuse those to construct HAVE_LLVM amongst others (dropping
> the sed 'magic') or use them all together in favour of our local
> macros ?

The sed magic is needed there to add the leading 0 to the minor version if 
it's < 10:

3.6.1   -> 3061
3.10.2  -> 3102

If I can always use the header provided values this could be done easily with 
some if < 9 then ... else ... fi

> 
> > -DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT
> > -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH" +   
> > LLVM_VERSION_INT=`echo $LLVM_VERSION | sed -e
> > 's/\.\([[1-9]][[0-9]]\)\.\([[0-9]]\)/\1\2/' -e
> > 's/\.\([[0-9]]\)\.\([[0-9]]\)/0\1\2/' -e 's/\.\([[1-9]][[0-9]]\)/\10/' -e
> > 's/\.\([[0-9]]\)/0\10/'` +HAVE_LLVM=`echo $LLVM_VERSION | sed -e
> > 's/\.\([[1-9]][[0-9]]\)\.\([[0-9]]\)/\1/' -e
> > 's/\.\([[0-9]]\)\.\([[0-9]]\)/0\1/' -e 's/\.\([[1-9]][[0-9]]\)/\1/' -e
> > 's/\.\([[0-9]]\)/0\1/'` +
> 
> The sed patterns have changed, please elaborate [a bit] why/how in the
> commit msg.

This just makes the conversion for double digit minor numbers work. It didn't 
work with the old script.

> 
> >  llvm_check_version_for() {
> > 
> > -if test "${LLVM_VERSION_INT}${LLVM_VERSION_PATCH}" -lt
> > "${1}0${2}${3}"; then -AC_MSG_ERROR([LLVM $1.$2.$3 or newer is
> > required for $4]) +llvm_target_version=`echo $1 | sed -e
> > 's/\.\([[1-9]][[0-9]]\)\.\([[0-9]]\)/\1\2/' -e
> > 's/\.\([[0-9]]\)\.\([[0-9]]\)/0\1\2/' -e 's/\.\([[1-9]][[0-9]]\)/\10/' -e
> > 's/\.\([[0-9]]\)/0\10/'` +
> 
> A something as simple as the following should do it as well, shouldn't it ?
>   llvm_target_version=`echo

  1   2   >