Re: [Mesa-dev] [PATCH 1/2] main: Remove interface block array index for doing the name comparison

2015-10-23 Thread Samuel Iglesias Gonsálvez


On 22/10/15 13:06, Timothy Arceri wrote:
> On Thu, 2015-10-22 at 11:01 +0200, Samuel Iglesias Gonsalvez wrote:
>> From ARB_program_query_interface spec:
>>
>> "uint GetProgramResourceIndex(uint program, enum programInterface,
>>const char *name);
>>  [...]
>>  If  exactly matches the name string of one of the active
>> resources
>>  for , the index of the matched resource is
>> returned.
>>  Additionally, if  would exactly match the name string of an
>> active
>>  resource if "[0]" were appended to , the index of the matched
>>  resource is returned. [...]"
>>
>> "A string provided to GetProgramResourceLocation or
>>  GetProgramResourceLocationIndex is considered to match an active
>> variable
>>  if:
>> [...]
>>* if the string identifies the base name of an active array, where
>> the
>>  string would exactly match the name of the variable if the
>> suffix
>>  "[0]" were appended to the string;
>> [...]
>> "
>>
>> But this is only happening on those ARB_program_query_interface's
>> queries.
>> For the rest of specs we need to keep old behavior. For that reason,
>> arb_program_interface_query boolean is added to the affected
>> functions.
>>
>> Fixes the following two dEQP-GLES31 tests:
>>
>> dEQP
>> -GLES31.functional.program_interface_query.shader_storage_block.resou
>> rce_list.block_array
>> dEQP
>> -GLES31.functional.program_interface_query.shader_storage_block.resou
>> rce_list.block_array_single_element
>> Signed-off-by: Samuel Iglesias Gonsalvez 
>> Cc: Tapani Pälli 
>> ---
>>  src/mesa/main/program_resource.c |  6 ++---
>>  src/mesa/main/shader_query.cpp   | 58
>> +++-
>>  src/mesa/main/shaderapi.c|  4 +--
>>  src/mesa/main/shaderapi.h|  9 ---
>>  src/mesa/main/uniforms.c |  6 ++---
>>  5 files changed, 60 insertions(+), 23 deletions(-)
>>
>> diff --git a/src/mesa/main/program_resource.c
>> b/src/mesa/main/program_resource.c
>> index eb71fdd..d029926 100644
>> --- a/src/mesa/main/program_resource.c
>> +++ b/src/mesa/main/program_resource.c
>> @@ -251,7 +251,7 @@ _mesa_GetProgramResourceIndex(GLuint program,
>> GLenum programInterface,
>> case GL_UNIFORM_BLOCK:
>> case GL_SHADER_STORAGE_BLOCK:
>>res = _mesa_program_resource_find_name(shProg,
>> programInterface, name,
>> - &array_index);
>> + &array_index, true);
>>if (!res || array_index > 0)
>>   return GL_INVALID_INDEX;
>>  
>> @@ -377,7 +377,7 @@ _mesa_GetProgramResourceLocation(GLuint program,
>> GLenum programInterface,
>>   goto fail;
>> }
>>  
>> -   return _mesa_program_resource_location(shProg, programInterface,
>> name);
>> +   return _mesa_program_resource_location(shProg, programInterface,
>> name, true);
>>  fail:
>> _mesa_error(ctx, GL_INVALID_ENUM,
>> "glGetProgramResourceLocation(%s %s)",
>> _mesa_enum_to_string(programInterface), name);
>> @@ -411,5 +411,5 @@ _mesa_GetProgramResourceLocationIndex(GLuint
>> program, GLenum programInterface,
>> }
>>  
>> return _mesa_program_resource_location_index(shProg,
>> GL_PROGRAM_OUTPUT,
>> -name);
>> +name, true);
>>  }
>> diff --git a/src/mesa/main/shader_query.cpp
>> b/src/mesa/main/shader_query.cpp
>> index 8182d3d..49766ca 100644
>> --- a/src/mesa/main/shader_query.cpp
>> +++ b/src/mesa/main/shader_query.cpp
>> @@ -218,7 +218,7 @@ _mesa_GetAttribLocation(GLhandleARB program,
>> const GLcharARB * name)
>> unsigned array_index = 0;
>> struct gl_program_resource *res =
>>_mesa_program_resource_find_name(shProg, GL_PROGRAM_INPUT,
>> name,
>> -   &array_index);
>> +   &array_index, false);
>>  
>> if (!res)
>>return -1;
>> @@ -367,7 +367,7 @@ _mesa_GetFragDataIndex(GLuint program, const
>> GLchar *name)
>>return -1;
>>  
>> return _mesa_program_resource_location_index(shProg,
>> GL_PROGRAM_OUTPUT,
>> -name);
>> +name, false);
>>  }
>>  
>>  GLint GLAPIENTRY
>> @@ -404,7 +404,7 @@ _mesa_GetFragDataLocation(GLuint program, const
>> GLchar *name)
>> unsigned array_index = 0;
>> struct gl_program_resource *res =
>>_mesa_program_resource_find_name(shProg, GL_PROGRAM_OUTPUT,
>> name,
>> -   &array_index);
>> +   &array_index, false);
>>  
>> if (!res)
>>return -1;
>> @@ -533,9 +533,12 @@ valid_array_index(const GLchar *name, unsigned
>> *array_index)
>>  struct gl_program_resource *
>>  _mesa_program_resource_find_name(struct gl_shader_program *shProg,
>>   GLenum programInterface, const char
>> *name,

Re: [Mesa-dev] [PATCH 1/2] main: Remove interface block array index for doing the name comparison

2015-10-23 Thread Samuel Iglesias Gonsálvez


On 22/10/15 13:19, Samuel Iglesias Gonsálvez wrote:
> 
> 
> On 22/10/15 13:08, Tapani Pälli wrote:
>> On 10/22/2015 12:01 PM, Samuel Iglesias Gonsalvez wrote:
>>>  From ARB_program_query_interface spec:
>>>
>>> "uint GetProgramResourceIndex(uint program, enum programInterface,
>>> const char *name);
>>>   [...]
>>>   If  exactly matches the name string of one of the active
>>> resources
>>>   for , the index of the matched resource is returned.
>>>   Additionally, if  would exactly match the name string of an
>>> active
>>>   resource if "[0]" were appended to , the index of the matched
>>>   resource is returned. [...]"
>>>
>>> "A string provided to GetProgramResourceLocation or
>>>   GetProgramResourceLocationIndex is considered to match an active
>>> variable
>>>   if:
>>> [...]
>>> * if the string identifies the base name of an active array, where
>>> the
>>>   string would exactly match the name of the variable if the suffix
>>>   "[0]" were appended to the string;
>>> [...]
>>> "
>>>
>>> But this is only happening on those ARB_program_query_interface's
>>> queries.
>>> For the rest of specs we need to keep old behavior. For that reason,
>>> arb_program_interface_query boolean is added to the affected functions.
>>
>> This seems strange as the extension spec (and GL core spec) has set of
>> different examples that state the new queries to match behavior of old
>> functions (using words "are equivalent to calling"), this is why our
>> tests also cross-check that new and old functions return same values.
>> I'll need to have a look at tests below.
>>
>> As example extension spec says GetFragDataIndex to be equivalent to
>> calling  GetProgramResourceLocationIndex(program, PROGRAM_OUTPUT, name).
>>
> 
> OK, I am going to review that. Thanks for sharing this information.

OK, you are right, ARB_program_interface_query says that
GetFragDataIndex, GetUniformLocation,
GetActiveUniformName and others are equivalent to calling the respective
functions from ARB_program_interface_query API.

That was my original patch did, but I modified it to have this version
because a regression in piglit:

bin/arb_shader_objects-getuniformlocation-array-of-struct-of-array

It complains because it defines a shader like:

struct S { mat4 m; vec4 v[10]; };
uniform S s[10];
[...]

And the application does the following call:

/* From page 80 of the OpenGL 2.1 spec:
 *
 * "A valid name cannot be a structure, an array of structures, or
 * any portion of a single vector or a matrix."
 */
loc = glGetUniformLocation(prog, "s")
if (loc != -1) {
printf("s location = %d (should be -1)\n", loc);
pass = false;
}

If GetUniformLocation is equivalent to call to
GetProgramResourceLocation(program, UNIFORM, name), we return a location
value different than -1 for "s" case (as we will add "[0]" and compare
it to s[0] internally).

I ran the same test on NVIDIA proprietary OpenGL 4.40 driver and there
is no such regression: it returns -1 which is the expected value.

If we agree that Mesa should apply the name + "[0]" rule for these cases
too, I will update the patch with that change. Maybe we can do it only
if ARB_program_interface_query extension is supported.

What do you think Mesa should behave in this case?

Sam

> 
>>> Fixes the following two dEQP-GLES31 tests:
>>>
>>> dEQP-GLES31.functional.program_interface_query.shader_storage_block.resource_list.block_array
>>>
>>> dEQP-GLES31.functional.program_interface_query.shader_storage_block.resource_list.block_array_single_element
>>>
>>>
>>> Signed-off-by: Samuel Iglesias Gonsalvez 
>>> Cc: Tapani Pälli 
>>> ---
>>>   src/mesa/main/program_resource.c |  6 ++---
>>>   src/mesa/main/shader_query.cpp   | 58
>>> +++-
>>>   src/mesa/main/shaderapi.c|  4 +--
>>>   src/mesa/main/shaderapi.h|  9 ---
>>>   src/mesa/main/uniforms.c |  6 ++---
>>>   5 files changed, 60 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/src/mesa/main/program_resource.c
>>> b/src/mesa/main/program_resource.c
>>> index eb71fdd..d029926 100644
>>> --- a/src/mesa/main/program_resource.c
>>> +++ b/src/mesa/main/program_resource.c
>>> @@ -251,7 +251,7 @@ _mesa_GetProgramResourceIndex(GLuint program,
>>> GLenum programInterface,
>>>  case GL_UNIFORM_BLOCK:
>>>  case GL_SHADER_STORAGE_BLOCK:
>>> res = _mesa_program_resource_find_name(shProg,
>>> programInterface, name,
>>> - &array_index);
>>> + &array_index, true);
>>> if (!res || array_index > 0)
>>>return GL_INVALID_INDEX;
>>>   @@ -377,7 +377,7 @@ _mesa_GetProgramResourceLocation(GLuint program,
>>> GLenum programInterface,
>>>goto fail;
>>>  }
>>>   -   return _mesa_program_resource_location(shProg

Re: [Mesa-dev] [PATCH v2 4/7] st/va: implement VaCreateSurfaces2 and VaQuerySurfaceAttributes

2015-10-23 Thread Julien Isorce
This patch is missing "memset(&templat, 0, sizeof(templat));" so I am going
to submit a v3 for this one.

On 20 October 2015 at 17:34, Julien Isorce  wrote:

> Inspired from http://cgit.freedesktop.org/vaapi/intel-driver/
> especially src/i965_drv_video.c::i965_CreateSurfaces2.
>
> This patch is mainly to support gstreamer-vaapi and tools that uses
> this newer libva API. The first advantage of using VaCreateSurfaces2
> over existing VaCreateSurfaces, is that it is possible to select which
> the pixel format for the surface. Indeed with the simple VaCreateSurfaces
> function it is only possible to create a NV12 surface. It can be useful
> to create a RGBA surface to use with video post processing.
>
> The avaible pixel formats can be query with VaQuerySurfaceAttributes.
>
> Signed-off-by: Julien Isorce 
> ---
>  src/gallium/state_trackers/va/context.c|   5 +-
>  src/gallium/state_trackers/va/surface.c| 294
> -
>  src/gallium/state_trackers/va/va_private.h |   6 +-
>  3 files changed, 253 insertions(+), 52 deletions(-)
>
> diff --git a/src/gallium/state_trackers/va/context.c
> b/src/gallium/state_trackers/va/context.c
> index 8b003ae..9be9085 100644
> --- a/src/gallium/state_trackers/va/context.c
> +++ b/src/gallium/state_trackers/va/context.c
> @@ -81,7 +81,10 @@ static struct VADriverVTable vtable =
> &vlVaSetDisplayAttributes,
> &vlVaBufferInfo,
> &vlVaLockSurface,
> -   &vlVaUnlockSurface
> +   &vlVaUnlockSurface,
> +   NULL, /* DEPRECATED VaGetSurfaceAttributes */
> +   &vlVaCreateSurfaces2,
> +   &vlVaQuerySurfaceAttributes
>  };
>
>  PUBLIC VAStatus
> diff --git a/src/gallium/state_trackers/va/surface.c
> b/src/gallium/state_trackers/va/surface.c
> index 8d4487b..62fdf3c 100644
> --- a/src/gallium/state_trackers/va/surface.c
> +++ b/src/gallium/state_trackers/va/surface.c
> @@ -36,6 +36,7 @@
>  #include "util/u_surface.h"
>
>  #include "vl/vl_compositor.h"
> +#include "vl/vl_video_buffer.h"
>  #include "vl/vl_winsys.h"
>
>  #include "va_private.h"
> @@ -44,56 +45,8 @@ VAStatus
>  vlVaCreateSurfaces(VADriverContextP ctx, int width, int height, int
> format,
> int num_surfaces, VASurfaceID *surfaces)
>  {
> -   struct pipe_video_buffer templat = {};
> -   struct pipe_screen *pscreen;
> -   vlVaDriver *drv;
> -   int i;
> -
> -   if (!ctx)
> -  return VA_STATUS_ERROR_INVALID_CONTEXT;
> -
> -   if (!(width && height))
> -  return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
> -
> -   drv = VL_VA_DRIVER(ctx);
> -   pscreen = VL_VA_PSCREEN(ctx);
> -
> -   templat.buffer_format = pscreen->get_video_param
> -   (
> -  pscreen,
> -  PIPE_VIDEO_PROFILE_UNKNOWN,
> -  PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
> -  PIPE_VIDEO_CAP_PREFERED_FORMAT
> -   );
> -   templat.chroma_format = ChromaToPipe(format);
> -   templat.width = width;
> -   templat.height = height;
> -   templat.interlaced = pscreen->get_video_param
> -   (
> -  pscreen,
> -  PIPE_VIDEO_PROFILE_UNKNOWN,
> -  PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
> -  PIPE_VIDEO_CAP_PREFERS_INTERLACED
> -   );
> -
> -   for (i = 0; i < num_surfaces; ++i) {
> -  vlVaSurface *surf = CALLOC(1, sizeof(vlVaSurface));
> -  if (!surf)
> - goto no_res;
> -
> -  surf->templat = templat;
> -  surf->buffer = drv->pipe->create_video_buffer(drv->pipe, &templat);
> -  util_dynarray_init(&surf->subpics);
> -  surfaces[i] = handle_table_add(drv->htab, surf);
> -   }
> -
> -   return VA_STATUS_SUCCESS;
> -
> -no_res:
> -   if (i)
> -  vlVaDestroySurfaces(ctx, surfaces, i);
> -
> -   return VA_STATUS_ERROR_ALLOCATION_FAILED;
> +   return vlVaCreateSurfaces2(ctx, format, width, height, surfaces,
> num_surfaces,
> +  NULL, 0);
>  }
>
>  VAStatus
> @@ -349,3 +302,244 @@ vlVaUnlockSurface(VADriverContextP ctx, VASurfaceID
> surface)
>
> return VA_STATUS_ERROR_UNIMPLEMENTED;
>  }
> +
> +VAStatus
> +vlVaQuerySurfaceAttributes(VADriverContextP ctx, VAConfigID config,
> +   VASurfaceAttrib *attrib_list, unsigned int
> *num_attribs)
> +{
> +vlVaDriver *drv;
> +VASurfaceAttrib *attribs;
> +struct pipe_screen *pscreen;
> +int i;
> +
> +if (config == VA_INVALID_ID)
> +return VA_STATUS_ERROR_INVALID_CONFIG;
> +
> +if (!attrib_list && !num_attribs)
> +return VA_STATUS_ERROR_INVALID_PARAMETER;
> +
> +if (!attrib_list) {
> +*num_attribs = VASurfaceAttribCount;
> +return VA_STATUS_SUCCESS;
> +}
> +
> +if (!ctx)
> +   return VA_STATUS_ERROR_INVALID_CONTEXT;
> +
> +drv = VL_VA_DRIVER(ctx);
> +
> +if (!drv)
> +return VA_STATUS_ERROR_INVALID_CONTEXT;
> +
> +pscreen = VL_VA_PSCREEN(ctx);
> +
> +if (!pscreen)
> +   return VA_STATUS_ERROR_INVALID_CONTEXT;
> +
> +attribs = CALLOC(VASurfaceAttribCount, sizeof(VASurfaceAttrib));
> +
> +if (!attribs)
> +return VA_STATUS_ERROR_ALLOCATION_FAILED;
> +
> +i = 0;
> +
> + 

[Mesa-dev] [PATCH v3 4/7] st/va: implement VaCreateSurfaces2 and VaQuerySurfaceAttributes

2015-10-23 Thread Julien Isorce
Inspired from http://cgit.freedesktop.org/vaapi/intel-driver/
especially src/i965_drv_video.c::i965_CreateSurfaces2.

This patch is mainly to support gstreamer-vaapi and tools that uses
this newer libva API. The first advantage of using VaCreateSurfaces2
over existing VaCreateSurfaces, is that it is possible to select which
the pixel format for the surface. Indeed with the simple VaCreateSurfaces
function it is only possible to create a NV12 surface. It can be useful
to create a RGBA surface to use with video post processing.

The avaible pixel formats can be query with VaQuerySurfaceAttributes.

Signed-off-by: Julien Isorce 
---
 src/gallium/state_trackers/va/context.c|   5 +-
 src/gallium/state_trackers/va/surface.c| 296 -
 src/gallium/state_trackers/va/va_private.h |   6 +-
 3 files changed, 255 insertions(+), 52 deletions(-)

diff --git a/src/gallium/state_trackers/va/context.c 
b/src/gallium/state_trackers/va/context.c
index 6357c28..b586dcb 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -81,7 +81,10 @@ static struct VADriverVTable vtable =
&vlVaSetDisplayAttributes,
&vlVaBufferInfo,
&vlVaLockSurface,
-   &vlVaUnlockSurface
+   &vlVaUnlockSurface,
+   NULL, /* DEPRECATED VaGetSurfaceAttributes */
+   &vlVaCreateSurfaces2,
+   &vlVaQuerySurfaceAttributes
 };
 
 PUBLIC VAStatus
diff --git a/src/gallium/state_trackers/va/surface.c 
b/src/gallium/state_trackers/va/surface.c
index 8d4487b..e406f37 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -36,6 +36,7 @@
 #include "util/u_surface.h"
 
 #include "vl/vl_compositor.h"
+#include "vl/vl_video_buffer.h"
 #include "vl/vl_winsys.h"
 
 #include "va_private.h"
@@ -44,56 +45,8 @@ VAStatus
 vlVaCreateSurfaces(VADriverContextP ctx, int width, int height, int format,
int num_surfaces, VASurfaceID *surfaces)
 {
-   struct pipe_video_buffer templat = {};
-   struct pipe_screen *pscreen;
-   vlVaDriver *drv;
-   int i;
-
-   if (!ctx)
-  return VA_STATUS_ERROR_INVALID_CONTEXT;
-
-   if (!(width && height))
-  return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
-
-   drv = VL_VA_DRIVER(ctx);
-   pscreen = VL_VA_PSCREEN(ctx);
-
-   templat.buffer_format = pscreen->get_video_param
-   (
-  pscreen,
-  PIPE_VIDEO_PROFILE_UNKNOWN,
-  PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
-  PIPE_VIDEO_CAP_PREFERED_FORMAT
-   );
-   templat.chroma_format = ChromaToPipe(format);
-   templat.width = width;
-   templat.height = height;
-   templat.interlaced = pscreen->get_video_param
-   (
-  pscreen,
-  PIPE_VIDEO_PROFILE_UNKNOWN,
-  PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
-  PIPE_VIDEO_CAP_PREFERS_INTERLACED
-   );
-
-   for (i = 0; i < num_surfaces; ++i) {
-  vlVaSurface *surf = CALLOC(1, sizeof(vlVaSurface));
-  if (!surf)
- goto no_res;
-
-  surf->templat = templat;
-  surf->buffer = drv->pipe->create_video_buffer(drv->pipe, &templat);
-  util_dynarray_init(&surf->subpics);
-  surfaces[i] = handle_table_add(drv->htab, surf);
-   }
-
-   return VA_STATUS_SUCCESS;
-
-no_res:
-   if (i)
-  vlVaDestroySurfaces(ctx, surfaces, i);
-
-   return VA_STATUS_ERROR_ALLOCATION_FAILED;
+   return vlVaCreateSurfaces2(ctx, format, width, height, surfaces, 
num_surfaces,
+  NULL, 0);
 }
 
 VAStatus
@@ -349,3 +302,246 @@ vlVaUnlockSurface(VADriverContextP ctx, VASurfaceID 
surface)
 
return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
+
+VAStatus
+vlVaQuerySurfaceAttributes(VADriverContextP ctx, VAConfigID config,
+   VASurfaceAttrib *attrib_list, unsigned int 
*num_attribs)
+{
+vlVaDriver *drv;
+VASurfaceAttrib *attribs;
+struct pipe_screen *pscreen;
+int i;
+
+if (config == VA_INVALID_ID)
+return VA_STATUS_ERROR_INVALID_CONFIG;
+
+if (!attrib_list && !num_attribs)
+return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+if (!attrib_list) {
+*num_attribs = VASurfaceAttribCount;
+return VA_STATUS_SUCCESS;
+}
+
+if (!ctx)
+   return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+drv = VL_VA_DRIVER(ctx);
+
+if (!drv)
+return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+pscreen = VL_VA_PSCREEN(ctx);
+
+if (!pscreen)
+   return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+attribs = CALLOC(VASurfaceAttribCount, sizeof(VASurfaceAttrib));
+
+if (!attribs)
+return VA_STATUS_ERROR_ALLOCATION_FAILED;
+
+i = 0;
+
+if (config == PIPE_VIDEO_PROFILE_UNKNOWN) {
+   /* Assume VAEntrypointVideoProc for now. */
+   attribs[i].type = VASurfaceAttribPixelFormat;
+   attribs[i].value.type = VAGenericValueTypeInteger;
+   attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | 
VA_SURFACE_ATTRIB_SETTABLE;
+   attribs[i].value.value.i = VA_FOURCC_BGRA;
+   i++;
+
+   attribs[i].type = VASurfaceAttribPixelFormat;
+   attribs[i].value.type = VAGeneri

[Mesa-dev] [Bug 91118] Skydrift (running in Wine) crashes on start

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=91118

Béla Gyebrószki  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 CC||gyebr...@gmail.com
 Resolution|--- |FIXED

--- Comment #2 from Béla Gyebrószki  ---
I can't reproduce this bug anymore: both games start properly with current
mesa. Tried with wine 1.7.45 (the version available at the time when this bug
report was opened) and with current wine 1.7.53: the bug doesn't occur in
either version.

Marking fixed.

OpenGL vendor string: nouveau
OpenGL renderer string: Gallium 0.4 on NV92
OpenGL core profile version string: 3.3 (Core Profile) Mesa 11.1.0-devel
(git-3994ef5)
OpenGL core profile shading language version string: 3.30

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: add buffer block number information to atomic counter variables.

2015-10-23 Thread Timothy Arceri
On Fri, 2015-10-23 at 08:57 +0200, Samuel Iglesias Gonsalvez wrote:
> Atomic counter variables can have a 'binding' layout modifier.
> Unfortunately,
> the atomic counter buffers are not sorted by binding value in
> gl_shader_program's AtomicBuffers.
> 
> Save the atomic counter buffer block index into the variable, so we
> can use it inside the drivers.
> 
> Fixes 298 dEQP-GLES31.functional.atomic_counter.* tests and
> 9 dEQP-GLES31.functional.synchronization.inter_call.
>   without_memory_barrier.ssbo_atomic_counter_mixed_dispatch_*
> tests for i965 driver.

Hi Samuel,

I've sent a number of patches trying to fix the problem with the
binding without reintroducing the buffer_index field.

You can see the latest attempt here [1] based on feedback from curro
and Ilia, also see the bug for a history of the problem.

Tim

[1] http://patchwork.freedesktop.org/patch/60989/



> 
> Signed-off-by: Samuel Iglesias Gonsalvez 
> ---
>  src/glsl/ir.h|  1 +
>  src/glsl/link_atomics.cpp| 32 
>  src/glsl/linker.cpp  |  1 +
>  src/glsl/linker.h|  3 +++
>  src/glsl/nir/glsl_to_nir.cpp |  3 +--
>  5 files changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index 9c9f22d..d120203 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -851,6 +851,7 @@ public:
> * Location an atomic counter is stored at.
> */
>struct {
> + unsigned buffer_index;
>   unsigned offset;
>} atomic;
>  
> diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
> index 70ef0e1..d19cd81 100644
> --- a/src/glsl/link_atomics.cpp
> +++ b/src/glsl/link_atomics.cpp
> @@ -193,6 +193,38 @@ namespace {
> }
>  }
>  
> +/**
> + * Scan the program for atomic counters and store buffer index
> + * information into the variable data structure.
> + */
> +void
> +link_assign_atomic_counter_variable_buffer_index(struct
> gl_shader_program *prog)
> +{
> +   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
> +  gl_shader *sh = prog->_LinkedShaders[i];
> +
> +  if (sh == NULL)
> +  continue;
> +
> +  foreach_in_list(ir_instruction, node, sh->ir) {
> +  ir_variable *var = node->as_variable();
> +
> + if (var && var->data.mode == ir_var_uniform &&
> + var->type->contains_atomic()) {
> +for(unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
> +   if (prog->AtomicBuffers[i].Binding ==  (unsigned) var
> ->data.binding) {
> +  /* Save the buffer block index that corresponds to
> variable's
> +   * binding.
> +   */
> +  var->data.atomic.buffer_index = i;
> +  break;
> +   }
> +}
> + }
> +  }
> +   }
> +}
> +
>  void
>  link_assign_atomic_counter_resources(struct gl_context *ctx,
>   struct gl_shader_program *prog)
> diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
> index 424b92a..c8334f6 100644
> --- a/src/glsl/linker.cpp
> +++ b/src/glsl/linker.cpp
> @@ -4361,6 +4361,7 @@ link_shaders(struct gl_context *ctx, struct
> gl_shader_program *prog)
> update_array_sizes(prog);
> link_assign_uniform_locations(prog, ctx
> ->Const.UniformBooleanTrue);
> link_assign_atomic_counter_resources(ctx, prog);
> +   link_assign_atomic_counter_variable_buffer_index(prog);
> store_fragdepth_layout(prog);
>  
> link_calculate_subroutine_compat(prog);
> diff --git a/src/glsl/linker.h b/src/glsl/linker.h
> index c80be1c..f16dd64 100644
> --- a/src/glsl/linker.h
> +++ b/src/glsl/linker.h
> @@ -79,6 +79,9 @@ validate_interstage_uniform_blocks(struct
> gl_shader_program *prog,
> gl_shader **stages, int
> num_stages);
>  
>  extern void
> +link_assign_atomic_counter_variable_buffer_index(struct
> gl_shader_program *prog);
> +
> +extern void
>  link_assign_atomic_counter_resources(struct gl_context *ctx,
>   struct gl_shader_program
> *prog);
>  
> diff --git a/src/glsl/nir/glsl_to_nir.cpp
> b/src/glsl/nir/glsl_to_nir.cpp
> index 9b50a93..aaeb53b 100644
> --- a/src/glsl/nir/glsl_to_nir.cpp
> +++ b/src/glsl/nir/glsl_to_nir.cpp
> @@ -392,8 +392,7 @@ nir_visitor::visit(ir_variable *ir)
>  
> var->data.index = ir->data.index;
> var->data.binding = ir->data.binding;
> -   /* XXX Get rid of buffer_index */
> -   var->data.atomic.buffer_index = ir->data.binding;
> +   var->data.atomic.buffer_index = ir->data.atomic.buffer_index;
> var->data.atomic.offset = ir->data.atomic.offset;
> var->data.image.read_only = ir->data.image_read_only;
> var->data.image.write_only = ir->data.image_write_only;
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 20/21] mesa: Remove gl_extensions::dummy

2015-10-23 Thread Emil Velikov
On 23 October 2015 at 00:34, Nanley Chery  wrote:
> On Thu, Oct 22, 2015 at 3:40 AM, Emil Velikov 
> wrote:
>>
>> On 19 October 2015 at 23:44, Nanley Chery  wrote:
>> > From: Nanley Chery 
>> >
>> > This variable existed to provide an unsigned error value for
>> > name_to_offset(). Since o(extension_sentinel) is also a valid unsigned
>> > error value, save space and replace this mysterious variable with
>> > a less mysterious one (extension_sentinel).
>> >
>> That's the first time I see someone calling 0, a "unsigned error
>> value". Perhaps reword the commit message a bit ? The comment/function
>> description should be updated as well.
>>
>
> How's the following?
>
> This variable existed to enable 0 as a valid error value for
> name_to_offset(). Since o(extension_sentinel) is also a valid
> error value, save space and replace this mysterious variable
> with a less mysterious one (extension_sentinel).
>
One can bikeshed it until death, although it's substantially better
imho. As long as others are happy I'll keep quiet.

-Emil
P.S. Please don't forget to update the function comment/description.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: add buffer block number information to atomic counter variables.

2015-10-23 Thread Samuel Iglesias Gonsálvez


On 23/10/15 12:11, Timothy Arceri wrote:
> On Fri, 2015-10-23 at 08:57 +0200, Samuel Iglesias Gonsalvez wrote:
>> Atomic counter variables can have a 'binding' layout modifier.
>> Unfortunately,
>> the atomic counter buffers are not sorted by binding value in
>> gl_shader_program's AtomicBuffers.
>>
>> Save the atomic counter buffer block index into the variable, so we
>> can use it inside the drivers.
>>
>> Fixes 298 dEQP-GLES31.functional.atomic_counter.* tests and
>> 9 dEQP-GLES31.functional.synchronization.inter_call.
>>   without_memory_barrier.ssbo_atomic_counter_mixed_dispatch_*
>> tests for i965 driver.
> 
> Hi Samuel,
> 
> I've sent a number of patches trying to fix the problem with the
> binding without reintroducing the buffer_index field.
> 

Great!

> You can see the latest attempt here [1] based on feedback from curro
> and Ilia, also see the bug for a history of the problem.
> 

I will take a look at it later.

Thanks!

Sam

> Tim
> 
> [1] http://patchwork.freedesktop.org/patch/60989/
> 
> 
> 
>>
>> Signed-off-by: Samuel Iglesias Gonsalvez 
>> ---
>>  src/glsl/ir.h|  1 +
>>  src/glsl/link_atomics.cpp| 32 
>>  src/glsl/linker.cpp  |  1 +
>>  src/glsl/linker.h|  3 +++
>>  src/glsl/nir/glsl_to_nir.cpp |  3 +--
>>  5 files changed, 38 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
>> index 9c9f22d..d120203 100644
>> --- a/src/glsl/ir.h
>> +++ b/src/glsl/ir.h
>> @@ -851,6 +851,7 @@ public:
>> * Location an atomic counter is stored at.
>> */
>>struct {
>> + unsigned buffer_index;
>>   unsigned offset;
>>} atomic;
>>  
>> diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
>> index 70ef0e1..d19cd81 100644
>> --- a/src/glsl/link_atomics.cpp
>> +++ b/src/glsl/link_atomics.cpp
>> @@ -193,6 +193,38 @@ namespace {
>> }
>>  }
>>  
>> +/**
>> + * Scan the program for atomic counters and store buffer index
>> + * information into the variable data structure.
>> + */
>> +void
>> +link_assign_atomic_counter_variable_buffer_index(struct
>> gl_shader_program *prog)
>> +{
>> +   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
>> +  gl_shader *sh = prog->_LinkedShaders[i];
>> +
>> +  if (sh == NULL)
>> + continue;
>> +
>> +  foreach_in_list(ir_instruction, node, sh->ir) {
>> + ir_variable *var = node->as_variable();
>> +
>> + if (var && var->data.mode == ir_var_uniform &&
>> + var->type->contains_atomic()) {
>> +for(unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
>> +   if (prog->AtomicBuffers[i].Binding ==  (unsigned) var
>> ->data.binding) {
>> +  /* Save the buffer block index that corresponds to
>> variable's
>> +   * binding.
>> +   */
>> +  var->data.atomic.buffer_index = i;
>> +  break;
>> +   }
>> +}
>> + }
>> +  }
>> +   }
>> +}
>> +
>>  void
>>  link_assign_atomic_counter_resources(struct gl_context *ctx,
>>   struct gl_shader_program *prog)
>> diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
>> index 424b92a..c8334f6 100644
>> --- a/src/glsl/linker.cpp
>> +++ b/src/glsl/linker.cpp
>> @@ -4361,6 +4361,7 @@ link_shaders(struct gl_context *ctx, struct
>> gl_shader_program *prog)
>> update_array_sizes(prog);
>> link_assign_uniform_locations(prog, ctx
>> ->Const.UniformBooleanTrue);
>> link_assign_atomic_counter_resources(ctx, prog);
>> +   link_assign_atomic_counter_variable_buffer_index(prog);
>> store_fragdepth_layout(prog);
>>  
>> link_calculate_subroutine_compat(prog);
>> diff --git a/src/glsl/linker.h b/src/glsl/linker.h
>> index c80be1c..f16dd64 100644
>> --- a/src/glsl/linker.h
>> +++ b/src/glsl/linker.h
>> @@ -79,6 +79,9 @@ validate_interstage_uniform_blocks(struct
>> gl_shader_program *prog,
>> gl_shader **stages, int
>> num_stages);
>>  
>>  extern void
>> +link_assign_atomic_counter_variable_buffer_index(struct
>> gl_shader_program *prog);
>> +
>> +extern void
>>  link_assign_atomic_counter_resources(struct gl_context *ctx,
>>   struct gl_shader_program
>> *prog);
>>  
>> diff --git a/src/glsl/nir/glsl_to_nir.cpp
>> b/src/glsl/nir/glsl_to_nir.cpp
>> index 9b50a93..aaeb53b 100644
>> --- a/src/glsl/nir/glsl_to_nir.cpp
>> +++ b/src/glsl/nir/glsl_to_nir.cpp
>> @@ -392,8 +392,7 @@ nir_visitor::visit(ir_variable *ir)
>>  
>> var->data.index = ir->data.index;
>> var->data.binding = ir->data.binding;
>> -   /* XXX Get rid of buffer_index */
>> -   var->data.atomic.buffer_index = ir->data.binding;
>> +   var->data.atomic.buffer_index = ir->data.atomic.buffer_index;
>> var->data.atomic.offset = ir->data.atomic.offset;
>> var->data.image.read_only = ir->data.image_read_only;
>> var->data.image.write_

Re: [Mesa-dev] [PATCH v3 5/7] radeonsi: Implement DCC fast clear.

2015-10-23 Thread Bas Nieuwenhuizen
On Thu, Oct 22, 2015 at 12:12 PM, Marek Olšák  wrote:
>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
>> b/src/gallium/drivers/radeonsi/si_descriptors.c
>> index 5548cba3..a277fa5 100644
>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>> @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct pipe_context 
>> *ctx,
>> } else {
>> samplers->depth_texture_mask &= ~(1 << slot);
>> }
>> -   if (rtex->cmask.size || rtex->fmask.size) {
>> +   if (rtex->cmask.size || rtex->fmask.size || 
>> rtex->surface.dcc_enabled) {
>> samplers->compressed_colortex_mask |= 1 << 
>> slot;
>
> I'd like this flag to be set only when dirty_level_mask is non-zero.
> Setting this for all textures that have DCC is quite expensive in draw
> calls.

I think this code is incorrect even without considering DCC. If we do
a fast clear on a surface which allocates a cmask and then use that
surface as a texture without calling set_sampler_views in between
(because it was bound before) we get a stale compressed_colortex_mask.

Some testing shows that this can be triggered using OpenGL, although
the GL_ARB_texture_barrier extension may be needed to make the result
not undefined per the specification.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 03/21] mesa/extensions: Wrap array entries in macros

2015-10-23 Thread Emil Velikov
On 22 October 2015 at 23:22, Nanley Chery  wrote:
> On Thu, Oct 22, 2015 at 3:06 AM, Emil Velikov 
> wrote:

>>
>> That aside, I'm in favour of keeping the comments as is. The editing
>> comment does not apply imho.
>>
>
> Since, there isn't a unanimous opinion on this, I'll leave the FIXME
> to save some rebasing time. Are you referring to my git comment?
> Or to my previous reply about why I moved the FIXME?
>
"I moved it aside to make editing the table easier. " is the one I had in mind.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92625] [BYT] piglit glx_arb_sync_control@timing subtests fail

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92625

Emil Velikov  changed:

   What|Removed |Added

  Component|GLX |Drivers/DRI/i965
   Assignee|mesa-dev@lists.freedesktop. |i...@freedesktop.org
   |org |
 QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org

--- Comment #1 from Emil Velikov  ---
Not sure if you get the memo from the earlier QA team that most/all of these
tests produce inconsistent results. Or perhaps it's just my (SNB) system that
behaves in such a way ?

Note: that despite being a GLX test, these results are hardware specific.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92634] gallium's vl_mpeg12_decoder does not work with st/va

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92634

Bug ID: 92634
   Summary: gallium's vl_mpeg12_decoder does not work with st/va
   Product: Mesa
   Version: git
  Hardware: Other
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: julien.iso...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

By curiosity I tried gallium vaapi with "ilo" aka i965 driver and it crashed.

Not sure if it is a regression or if the root problem is that vl_mpeg12_decoder
does not support chunk decoding (i.e. be able to call decode_bitstream many
times between begin_frame / end_frame) which is required for st/va to work.

Following patches fixes the crashes but then I can only see blocky green video
:)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92634] gallium's vl_mpeg12_decoder does not work with st/va

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92634

--- Comment #1 from Julien Isorce  ---
Created attachment 119138
  --> https://bugs.freedesktop.org/attachment.cgi?id=119138&action=edit
build: enable st/va with ilo aka i965 driver

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92634] gallium's vl_mpeg12_decoder does not work with st/va

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92634

--- Comment #2 from Julien Isorce  ---
Created attachment 119139
  --> https://bugs.freedesktop.org/attachment.cgi?id=119139&action=edit
st/va: pass picture desc to begin and decode

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92634] gallium's vl_mpeg12_decoder does not work with st/va

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92634

--- Comment #3 from Julien Isorce  ---
Created attachment 119140
  --> https://bugs.freedesktop.org/attachment.cgi?id=119140&action=edit
vl_mpeg12_decoder: support st/va where intra_matrix can be set after
begin_frame

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallivm: Explicitly disable unsupported CPU features.

2015-10-23 Thread Jose Fonseca
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92214
---
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 69 ---
 1 file changed, 31 insertions(+), 38 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index e70a75f..0781e36 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -498,50 +498,43 @@ 
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
}
 
llvm::SmallVector MAttrs;
-   if (util_cpu_caps.has_sse) {
-  MAttrs.push_back("+sse");
-   }
-   if (util_cpu_caps.has_sse2) {
-  MAttrs.push_back("+sse2");
-   }
-   if (util_cpu_caps.has_sse3) {
-  MAttrs.push_back("+sse3");
-   }
-   if (util_cpu_caps.has_ssse3) {
-  MAttrs.push_back("+ssse3");
-   }
-   if (util_cpu_caps.has_sse4_1) {
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   /*
+* We need to unset attributes because sometimes LLVM mistakenly assumes
+* certain features are present given the processor name.
+*
+* https://bugs.freedesktop.org/show_bug.cgi?id=92214
+*/
+   MAttrs.push_back(util_cpu_caps.has_sse? "+sse": "-sse"   );
+   MAttrs.push_back(util_cpu_caps.has_sse2   ? "+sse2"   : "-sse2"  );
+   MAttrs.push_back(util_cpu_caps.has_sse3   ? "+sse3"   : "-sse3"  );
+   MAttrs.push_back(util_cpu_caps.has_ssse3  ? "+ssse3"  : "-ssse3" );
 #if HAVE_LLVM >= 0x0304
-  MAttrs.push_back("+sse4.1");
+   MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1");
 #else
-  MAttrs.push_back("+sse41");
+   MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse41"  : "-sse41" );
 #endif
-   }
-   if (util_cpu_caps.has_sse4_2) {
 #if HAVE_LLVM >= 0x0304
-  MAttrs.push_back("+sse4.2");
+   MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2");
 #else
-  MAttrs.push_back("+sse42");
+   MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse42"  : "-sse42" );
 #endif
-   }
-   if (util_cpu_caps.has_avx) {
-  /*
-   * AVX feature is not automatically detected from CPUID by the X86 target
-   * yet, because the old (yet default) JIT engine is not capable of
-   * emitting the opcodes. On newer llvm versions it is and at least some
-   * versions (tested with 3.3) will emit avx opcodes without this anyway.
-   */
-  MAttrs.push_back("+avx");
-  if (util_cpu_caps.has_f16c) {
- MAttrs.push_back("+f16c");
-  }
-  if (util_cpu_caps.has_avx2) {
- MAttrs.push_back("+avx2");
-  }
-   }
-   if (util_cpu_caps.has_altivec) {
-  MAttrs.push_back("+altivec");
-   }
+   /*
+* AVX feature is not automatically detected from CPUID by the X86 target
+* yet, because the old (yet default) JIT engine is not capable of
+* emitting the opcodes. On newer llvm versions it is and at least some
+* versions (tested with 3.3) will emit avx opcodes without this anyway.
+*/
+   MAttrs.push_back(util_cpu_caps.has_avx  ? "+avx"  : "-avx");
+   MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
+   MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
+#endif
+
+#if defined(PIPE_ARCH_PPC)
+   MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
+#endif
+
builder.setMAttrs(MAttrs);
 
 #if HAVE_LLVM >= 0x0305
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/7] RadeonSI: Unbind shaders properly

2015-10-23 Thread Marek Olšák
On Fri, Oct 23, 2015 at 5:08 AM, Michel Dänzer  wrote:
> On 23.10.2015 08:12, Marek Olšák wrote:
>> This series unbinds shaders properly in bind_(vs/fs/etc.)_state, because any 
>> context can release them now, therefore we can't rely on unbinding in 
>> delete_shader. This is basically a fix for sharable shader.
>>
>> It also gets rid of the dummy pixel shader. Instead, the driver just skips 
>> draw calls that have rasterizer_discard==0 and ps==NULL set at the same 
>> time. rasterizer_discard==1 and ps==NULL works as expected.
>>
>> Please review.
>
> Patches 1-5 are
>
> Reviewed-by: Michel Dänzer 
>
>
> Patches 6 & 7:
>
> Shouldn't we leave sctx->emitted.named.* intact in si_bind_*_shader and
> only clear those from si_delete_shader_selector/si_shader_destroy?
> Otherwise we might keep re-emitting the same shader PM4 state if the app
> / state tracker keeps unbinding and re-binding the same shader.

The thing is any context can release a shader and that shader can
still be bound in a different context than the one that's releasing it
and that can crash later. Or a user can release a shader and allocate
a new one and the pointer might be the same, causing the driver to
think the shader hasn't been changed.

I guess the real issue is that state trackers don't unbind shaders in
other contexts either, so the 2 patches don't help at all, they just
move the problem to a different place and add inefficiency. This needs
more thought, so let's drop them for now.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 5/7] radeonsi: Implement DCC fast clear.

2015-10-23 Thread Marek Olšák
On Fri, Oct 23, 2015 at 12:17 PM, Bas Nieuwenhuizen
 wrote:
> On Thu, Oct 22, 2015 at 12:12 PM, Marek Olšák  wrote:
>>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
>>> b/src/gallium/drivers/radeonsi/si_descriptors.c
>>> index 5548cba3..a277fa5 100644
>>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>>> @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct pipe_context 
>>> *ctx,
>>> } else {
>>> samplers->depth_texture_mask &= ~(1 << 
>>> slot);
>>> }
>>> -   if (rtex->cmask.size || rtex->fmask.size) {
>>> +   if (rtex->cmask.size || rtex->fmask.size || 
>>> rtex->surface.dcc_enabled) {
>>> samplers->compressed_colortex_mask |= 1 << 
>>> slot;
>>
>> I'd like this flag to be set only when dirty_level_mask is non-zero.
>> Setting this for all textures that have DCC is quite expensive in draw
>> calls.
>
> I think this code is incorrect even without considering DCC. If we do
> a fast clear on a surface which allocates a cmask and then use that
> surface as a texture without calling set_sampler_views in between
> (because it was bound before) we get a stale compressed_colortex_mask.
>
> Some testing shows that this can be triggered using OpenGL, although
> the GL_ARB_texture_barrier extension may be needed to make the result
> not undefined per the specification.

In that case, we should decompress in texture_barrier and not in draw calls.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92634] gallium's vl_mpeg12_decoder does not work with st/va

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92634

--- Comment #4 from Christian König  ---
Please submit "st/va: pass picture desc to begin and decode" to the mesa
mailing list. That looks like a bug to me which should be fixed asap.

Regarding "vl_mpeg12_decoder: support st/va where intra_matrix can be set after
begin_frame" I would rather like to see st/va delay calling begin_frame until
all the necessary data is available.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] Nir: Allow CSE of SSBO loads

2015-10-23 Thread Francisco Jerez
Connor Abbott  writes:

> On Thu, Oct 22, 2015 at 10:37 AM, Francisco Jerez  
> wrote:
>> Connor Abbott  writes:
>>
>>> On Thu, Oct 22, 2015 at 7:21 AM, Iago Toral Quiroga  
>>> wrote:
 I implemented this first as a separate optimization pass in GLSL IR [1], 
 but
 Curro pointed out that this being pretty much a restricted form of a CSE 
 pass
 it would probably make more sense to do it inside CSE (and we no longer 
 have
 a CSE pass in GLSL IR).

 Unlike other things we CSE in NIR, in the case of SSBO loads we need to 
 make
 sure that we invalidate previous entries in the set in the presence of
 conflicting instructions (i.e. SSBO writes to the same block and offset) or
 in the presence of memory barriers.

 If this is accepted I intend to extend this to also cover image reads, 
 which
 follow similar behavior.

 No regressions observed in piglit or dEQP's SSBO functional tests.

 [1] http://lists.freedesktop.org/archives/mesa-dev/2015-October/097718.html

 Iago Toral Quiroga (2):
   nir/cse: invalidate SSBO loads in presence of ssbo writes or memory
 barriers
   nir/instr_set: allow rewrite of SSBO loads

  src/glsl/nir/nir_instr_set.c |  24 ++--
  src/glsl/nir/nir_opt_cse.c   | 142 
 +++
  2 files changed, 162 insertions(+), 4 deletions(-)

 --
 1.9.1

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
>>> NAK, this isn't going to work. NIR CSE is designed for operations
>>> which can be moved around freely as long they're still dominated by
>>> the SSA values they use. It makes heavy advantage of this to avoid
>>> looking at the entire CFG and instead only at the current block and
>>> its parents in the dominance tree. For example, imagine you have
>>> something like:
>>>
>>> A = load_ssbo 0
>>> if  (cond) {
>>>store_ssbo 0
>>> }
>>> B = load_ssbo 0
>>>
>>> Then A and B can't be combined, but CSE will combine them anyways when
>>> it reaches B because it keeps a hash table of values dominating B and
>>> finds A as a match. It doesn't look at the if conditional at all
>>> because it doesn't dominate the load to B. This is great when you want
>>> to CSE pure things that don't depend on other side effects -- after
>>> all, this is the sort of efficiency that SSA is supposed to give us --
>>> but it means that as-is, it can't be used for e.g. SSBO's and images
>>> without completely changing how the pass works and making it less
>>> efficient.
>>>
>>> Now, that being said, I still think that we should definitely be doing
>>> this sort of thing in NIR now that we've finally added support for
>>> SSBO's and images. We've been trying to avoid adding new optimizations
>>> to GLSL, since we've been trying to move away from it. In addition,
>>> with SPIR-V on the way, anything added to GLSL IR now is something
>>> that we won't be able to use with SPIR-V shaders. Only doing it in FS
>>> doesn't sound so great either; we should be doing as much as possible
>>> at the midlevel, and combining SSBO loads is something that isn't
>>> FS-specific at all.
>>>
>>> There are two ways I can see support for this being added to NIR:
>>>
>>> 1. Add an extra fake source/destination to intrinsics with side
>>> effects, and add a pass to do essentially a conversion to SSA that
>>> wires up these "token" sources/destinations, or perhaps extend the
>>> existing to-SSA pass.
>>>
>> This is in fact the approach taken by other SSA-form IRs like LLVM's --
>> Side-effectful instructions take a "chain" input as argument and spit
>> out a new chain token which can then be passed as argument to subsequent
>> non-pure instructions for which a control-flow dependency exists.
>
> IIUC, LLVM actually doesn't take this approach; it has a special
> LoadCombine pass [1]  and various other load/store-specific
> optimizations as well, and doesn't use token passing.

It uses token passing extensively in selection DAGs to represent
control-flow dependencies between things like loads and stores.
E.g. see 'lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp'.

> But yeah, it's not exactly a new idea -- although I'm still not
> convinced that the "obvious" way of doing it can handle arbitrary code
> motion. For example, given something like:
>
> store foo, b;
> if (cond) {
> store foo, a;
> }
> ... = load foo;
>
> then the obvious way to handle the tokens would be:
>
> token1 = store foo, b;
> if (cond) {
> token2 = store foo, a, token1;
> }
> token3 = phi(token2, token1);
> ... = load foo, token3;
>

Yeah, I don't think it would be a good idea to allow arbitrary motion of
stores and atomics under this approach.

> This orders the load and the store, but now theoretically there's
> nothing preventing, say, GCM from moving the store

Re: [Mesa-dev] [PATCH v3 5/7] radeonsi: Implement DCC fast clear.

2015-10-23 Thread Bas Nieuwenhuizen
On Fri, Oct 23, 2015 at 12:50 PM, Marek Olšák  wrote:
> On Fri, Oct 23, 2015 at 12:17 PM, Bas Nieuwenhuizen
>  wrote:
>> On Thu, Oct 22, 2015 at 12:12 PM, Marek Olšák  wrote:
 diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
 b/src/gallium/drivers/radeonsi/si_descriptors.c
 index 5548cba3..a277fa5 100644
 --- a/src/gallium/drivers/radeonsi/si_descriptors.c
 +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
 @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct pipe_context 
 *ctx,
 } else {
 samplers->depth_texture_mask &= ~(1 << 
 slot);
 }
 -   if (rtex->cmask.size || rtex->fmask.size) {
 +   if (rtex->cmask.size || rtex->fmask.size || 
 rtex->surface.dcc_enabled) {
 samplers->compressed_colortex_mask |= 1 << 
 slot;
>>>
>>> I'd like this flag to be set only when dirty_level_mask is non-zero.
>>> Setting this for all textures that have DCC is quite expensive in draw
>>> calls.
>>
>> I think this code is incorrect even without considering DCC. If we do
>> a fast clear on a surface which allocates a cmask and then use that
>> surface as a texture without calling set_sampler_views in between
>> (because it was bound before) we get a stale compressed_colortex_mask.
>>
>> Some testing shows that this can be triggered using OpenGL, although
>> the GL_ARB_texture_barrier extension may be needed to make the result
>> not undefined per the specification.
>
> In that case, we should decompress in texture_barrier and not in draw calls.
>
> Marek


texture_barrier does not need to be called though, the language
changes might be needed.

Basically the test is

fbo1, fbo2 framebuffers with 1 color buffer each:

bind fbo2 as texture
clear fbo1 using shader
bind fbo1 as texture
clear fbo2 using shader
clear fbo1 using clear (which results in cmask being allocated for fbo1)
bind fbo2 as texture
copy fbo2 to fbo1 using copy shader (which wrongly does not decompress fbo1)

If the clear shader has an unused sampler2D, then we bind fbo1 as a
texture before the clear and do not rebind it afterwards.

Note that the normal clear is the only one for which we have a
rendering feedback loop, and it should not fetch from any texture at
all. I don't think we need a texture barrier for that.

I would think this would be most easily fixed by recomputing
compressed_colortex_mask after a fast clear.

- Bas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 5/8] i965/fs: Avoid scalar destinations in emit_uniformize()

2015-10-23 Thread Francisco Jerez
Kristian Høgsberg  writes:

> On Thu, Oct 22, 2015 at 12:24 PM, Matt Turner  wrote:
>> On Thu, Oct 22, 2015 at 11:37 AM, Kristian Høgsberg Kristensen
>>  wrote:
>>> The scalar destination registers break copy propagation. Instead compute
>>> the results to a regular register and then reference a component when we
>>> later use the result as a source.
>>
>> It might be hairy to get it working, but making copy propagation
>> handle scalar destinations is the "right" way forward.
>>

I'm pretty sure use-def chains will help with that :).

>> If we commit this patch, and later implement support for copy
>> propagating with scalar destinations, how will we remember to change
>> this back?
>
> What's the harm in leaving it in?
>
Scalar instructions take less cycles to execute and consume less
registers than vector ones -- The latter isn't a huge deal right now
because the register allocator will expand it to a 32B register anyway,
so the most you save currently is one register in SIMD16 mode.  Let's
put an XXX comment so we remember to change this back to a scalar when
the remaining compiler infrastructure is fixed to handle them properly.
With that change this patch is:

Reviewed-by: Francisco Jerez 

> Kristian


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 6/8] i965/fs: Drop offset_reg temporary in ssbo load

2015-10-23 Thread Francisco Jerez
Kristian Høgsberg Kristensen  writes:

> Now that we don't read each component one-by-one, we don't need the
> temoprary vgrf for the offset. More importantly, this register was type
> UD while the nir source was type D. This broke copy propagation and left
> a redundant MOV in the generated code.
>
> Signed-off-by: Kristian Høgsberg Kristensen 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 8 +++-
>  1 file changed, 3 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 00f200a..a82c616 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1521,13 +1521,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
> nir_intrinsic_instr *instr
>}
>  
>/* Get the offset to read from */
> -  fs_reg offset_reg = vgrf(glsl_type::uint_type);
> -  unsigned const_offset_bytes = 0;
> +  fs_reg offset_reg;
>if (has_indirect) {
> - bld.MOV(offset_reg, get_nir_src(instr->src[1]));
> + offset_reg = get_nir_src(instr->src[1]);
>} else {
> - const_offset_bytes = instr->const_index[0];
> - bld.MOV(offset_reg, fs_reg(const_offset_bytes));
> + offset_reg = fs_reg(instr->const_index[0]);
>}
>  
Reviewed-by: Francisco Jerez 

>/* Read the vector */
> -- 
> 2.6.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 8/8] i965/fs: Allow copy propagating into new surface access opcodes

2015-10-23 Thread Francisco Jerez
Kristian Høgsberg Kristensen  writes:

> Signed-off-by: Kristian Høgsberg Kristensen 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 15 +++
>  1 file changed, 15 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> index 5589716..97e206d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> @@ -612,6 +612,21 @@ fs_visitor::try_constant_propagate(fs_inst *inst, 
> acp_entry *entry)
>   }
>   break;
>  
> +  case SHADER_OPCODE_UNTYPED_ATOMIC:
> +  case SHADER_OPCODE_UNTYPED_SURFACE_READ:
> +  case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
> +  case SHADER_OPCODE_TYPED_ATOMIC:
> +  case SHADER_OPCODE_TYPED_SURFACE_READ:
> +  case SHADER_OPCODE_TYPED_SURFACE_WRITE:
> + /* We only propagate into the surface argument of the
> +  * instruction. Everything else goes through LOAD_PAYLOAD.
> +  */
> + if (i == 1) {
> +inst->src[i] = val;
> +progress = true;
> + }
> + break;
> +

Reviewed-by: Francisco Jerez 

>case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
>case SHADER_OPCODE_BROADCAST:
>   inst->src[i] = val;
> -- 
> 2.6.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 5/7] radeonsi: Implement DCC fast clear.

2015-10-23 Thread Marek Olšák
On Fri, Oct 23, 2015 at 1:30 PM, Bas Nieuwenhuizen
 wrote:
> On Fri, Oct 23, 2015 at 12:50 PM, Marek Olšák  wrote:
>> On Fri, Oct 23, 2015 at 12:17 PM, Bas Nieuwenhuizen
>>  wrote:
>>> On Thu, Oct 22, 2015 at 12:12 PM, Marek Olšák  wrote:
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 5548cba3..a277fa5 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct pipe_context 
> *ctx,
> } else {
> samplers->depth_texture_mask &= ~(1 << 
> slot);
> }
> -   if (rtex->cmask.size || rtex->fmask.size) {
> +   if (rtex->cmask.size || rtex->fmask.size || 
> rtex->surface.dcc_enabled) {
> samplers->compressed_colortex_mask |= 1 
> << slot;

 I'd like this flag to be set only when dirty_level_mask is non-zero.
 Setting this for all textures that have DCC is quite expensive in draw
 calls.
>>>
>>> I think this code is incorrect even without considering DCC. If we do
>>> a fast clear on a surface which allocates a cmask and then use that
>>> surface as a texture without calling set_sampler_views in between
>>> (because it was bound before) we get a stale compressed_colortex_mask.
>>>
>>> Some testing shows that this can be triggered using OpenGL, although
>>> the GL_ARB_texture_barrier extension may be needed to make the result
>>> not undefined per the specification.
>>
>> In that case, we should decompress in texture_barrier and not in draw calls.
>>
>> Marek
>
>
> texture_barrier does not need to be called though, the language
> changes might be needed.
>
> Basically the test is
>
> fbo1, fbo2 framebuffers with 1 color buffer each:
>
> bind fbo2 as texture
> clear fbo1 using shader
> bind fbo1 as texture
> clear fbo2 using shader
> clear fbo1 using clear (which results in cmask being allocated for fbo1)
> bind fbo2 as texture
> copy fbo2 to fbo1 using copy shader (which wrongly does not decompress fbo1)
>
> If the clear shader has an unused sampler2D, then we bind fbo1 as a
> texture before the clear and do not rebind it afterwards.
>
> Note that the normal clear is the only one for which we have a
> rendering feedback loop, and it should not fetch from any texture at
> all. I don't think we need a texture barrier for that.
>
> I would think this would be most easily fixed by recomputing
> compressed_colortex_mask after a fast clear.

I'm afraid I don't understand the given example. Copying to fbo1
doesn't need decompression, because fbo1 is a color buffer.

I'm assuming there are no image stores.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 7/8] i965/fs: Optimize ssbo stores

2015-10-23 Thread Francisco Jerez
Kristian Høgsberg Kristensen  writes:

> Write groups of enabled components together.
>
> Signed-off-by: Kristian Høgsberg Kristensen 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 56 
> +++-
>  1 file changed, 26 insertions(+), 30 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index a82c616..5fbb32f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1,3 +1,4 @@
> +

Stray whitespace.

>  /*
>   * Copyright © 2010 Intel Corporation
>   *
> @@ -1759,45 +1760,40 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
> nir_intrinsic_instr *instr
> nir->info.num_ssbos - 1);
>}
>  
> -  /* Offset */
> -  fs_reg offset_reg = vgrf(glsl_type::uint_type);
> -  unsigned const_offset_bytes = 0;
> -  if (has_indirect) {
> - bld.MOV(offset_reg, get_nir_src(instr->src[2]));
> -  } else {
> - const_offset_bytes = instr->const_index[0];
> - bld.MOV(offset_reg, fs_reg(const_offset_bytes));
> -  }
> -
>/* Value */
>fs_reg val_reg = get_nir_src(instr->src[0]);
>  
>/* Writemask */
>unsigned writemask = instr->const_index[1];
>  
> -  /* Write each component present in the writemask */
> -  unsigned skipped_channels = 0;
> -  for (int i = 0; i < instr->num_components; i++) {
> - int component_mask = 1 << i;
> - if (writemask & component_mask) {
> -if (skipped_channels) {
> -   if (!has_indirect) {
> -  const_offset_bytes += 4 * skipped_channels;
> -  bld.MOV(offset_reg, fs_reg(const_offset_bytes));
> -   } else {
> -  bld.ADD(offset_reg, offset_reg,
> -   brw_imm_ud(4 * skipped_channels));
> -   }
> -   skipped_channels = 0;
> -}
> +  /* Combine groups of consecutive enabled channels in one write
> +   * message. We use ffs to find the first enabled channel and then ffs 
> on
> +   * the bit-inverse, down-shifted writemask to determine the length of
> +   * the block of enabled bits.
> +   */
> +  while (writemask) {
> + unsigned first_component = ffs(writemask) - 1;
> + unsigned length = ffs(~(writemask >> first_component)) - 1;
> + fs_reg offset_reg;
>  
> -emit_untyped_write(bld, surf_index, offset_reg,
> -   offset(val_reg, bld, i),
> -   1 /* dims */, 1 /* size */,
> -   BRW_PREDICATE_NONE);
> + if (!has_indirect) {
> +offset_reg = fs_reg(instr->const_index[0] + 4 * first_component);
> + } else {
> +offset_reg = vgrf(glsl_type::uint_type);
> +bld.ADD(offset_reg,
> +retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD),
> +fs_reg(4 * first_component));
>   }
>  
> - skipped_channels++;
> + emit_untyped_write(bld, surf_index, offset_reg,
> +offset(val_reg, bld, first_component),
> +1 /* dims */, length,
> +BRW_PREDICATE_NONE);
> +
> + /* Clear the bits in the writemask that we just wrote, then try
> +  * again to see if more channels are left.
> +  */
> + writemask &= (15 << (first_component + length));

Looks good to me,

Reviewed-by: Francisco Jerez 

>}
>break;
> }
> -- 
> 2.6.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 5/7] radeonsi: Implement DCC fast clear.

2015-10-23 Thread Bas Nieuwenhuizen
On Fri, Oct 23, 2015 at 1:52 PM, Marek Olšák  wrote:
> On Fri, Oct 23, 2015 at 1:30 PM, Bas Nieuwenhuizen
>  wrote:
>> On Fri, Oct 23, 2015 at 12:50 PM, Marek Olšák  wrote:
>>> On Fri, Oct 23, 2015 at 12:17 PM, Bas Nieuwenhuizen
>>>  wrote:
 On Thu, Oct 22, 2015 at 12:12 PM, Marek Olšák  wrote:
>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
>> b/src/gallium/drivers/radeonsi/si_descriptors.c
>> index 5548cba3..a277fa5 100644
>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>> @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct pipe_context 
>> *ctx,
>> } else {
>> samplers->depth_texture_mask &= ~(1 << 
>> slot);
>> }
>> -   if (rtex->cmask.size || rtex->fmask.size) {
>> +   if (rtex->cmask.size || rtex->fmask.size || 
>> rtex->surface.dcc_enabled) {
>> samplers->compressed_colortex_mask |= 1 
>> << slot;
>
> I'd like this flag to be set only when dirty_level_mask is non-zero.
> Setting this for all textures that have DCC is quite expensive in draw
> calls.

 I think this code is incorrect even without considering DCC. If we do
 a fast clear on a surface which allocates a cmask and then use that
 surface as a texture without calling set_sampler_views in between
 (because it was bound before) we get a stale compressed_colortex_mask.

 Some testing shows that this can be triggered using OpenGL, although
 the GL_ARB_texture_barrier extension may be needed to make the result
 not undefined per the specification.
>>>
>>> In that case, we should decompress in texture_barrier and not in draw calls.
>>>
>>> Marek
>>
>>
>> texture_barrier does not need to be called though, the language
>> changes might be needed.
>>
>> Basically the test is
>>
>> fbo1, fbo2 framebuffers with 1 color buffer each:
>>
>> bind fbo2 as texture
>> clear fbo1 using shader
>> bind fbo1 as texture
>> clear fbo2 using shader
>> clear fbo1 using clear (which results in cmask being allocated for fbo1)

>> bind fbo2 as texture
>> copy fbo2 to fbo1 using copy shader (which wrongly does not decompress fbo1)

My apologies, these two lines should just be a copy fbo1 to fbo2,
which does need to eleminate the cmask fast clear.

- Bas

>> If the clear shader has an unused sampler2D, then we bind fbo1 as a
>> texture before the clear and do not rebind it afterwards.
>>
>> Note that the normal clear is the only one for which we have a
>> rendering feedback loop, and it should not fetch from any texture at
>> all. I don't think we need a texture barrier for that.
>>
>> I would think this would be most easily fixed by recomputing
>> compressed_colortex_mask after a fast clear.
>
> I'm afraid I don't understand the given example. Copying to fbo1
> doesn't need decompression, because fbo1 is a color buffer.
>
> I'm assuming there are no image stores.
>
> Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/va: pass picture desc to begin and decode

2015-10-23 Thread Julien Isorce
At least vl_mpeg12_decoder uses the picture
desc in begin_frame and decode_bitstream.

https://bugs.freedesktop.org/show_bug.cgi?id=92634

Signed-off-by: Julien Isorce 
---
 src/gallium/state_trackers/va/picture.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/va/picture.c 
b/src/gallium/state_trackers/va/picture.c
index 9b94b39..a0d530b 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -58,7 +58,7 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID 
context_id, VASurfaceID rende
   return VA_STATUS_ERROR_INVALID_SURFACE;
 
context->target = surf->buffer;
-   context->decoder->begin_frame(context->decoder, context->target, NULL);
+   context->decoder->begin_frame(context->decoder, context->target, 
&context->desc.base);
 
return VA_STATUS_SUCCESS;
 }
@@ -517,7 +517,7 @@ handleVASliceDataBufferType(vlVaContext *context, 
vlVaBuffer *buf)
buffers[num_buffers] = buf->data;
sizes[num_buffers] = buf->size;
++num_buffers;
-   context->decoder->decode_bitstream(context->decoder, context->target, NULL,
+   context->decoder->decode_bitstream(context->decoder, context->target, 
&context->desc.base,
   num_buffers, (const void * const*)buffers, sizes);
 }
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92634] gallium's vl_mpeg12_decoder does not work with st/va

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92634

Julien Isorce  changed:

   What|Removed |Added

 Attachment #119139|0   |1
is obsolete||

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92634] gallium's vl_mpeg12_decoder does not work with st/va

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92634

--- Comment #5 from Julien Isorce  ---
(In reply to Christian König from comment #4)
> Please submit "st/va: pass picture desc to begin and decode" to the mesa
> mailing list. That looks like a bug to me which should be fixed asap.
Thx. Done sent.

> 
> Regarding "vl_mpeg12_decoder: support st/va where intra_matrix can be set
> after begin_frame" I would rather like to see st/va delay calling
> begin_frame until all the necessary data is available.

Just to be sure I understand correctly, do you mean the fix should actually be
done in st/va directly, right ?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gbm: Add a flag to enable creation of rotated scanout buffers

2015-10-23 Thread Ville Syrjälä
On Fri, Oct 23, 2015 at 12:18:39PM +0900, Michel Dänzer wrote:
> On 23.10.2015 10:44, Vivek Kasireddy wrote:
> > For certain platforms that support rotated scanout buffers, currently,
> > there is no way to create them with the GBM DRI interface. This flag
> > will instruct the DRI driver to create the buffer by setting
> > additional requirements.
> > 
> > Cc: Kristian Hogsberg 
> > Signed-off-by: Vivek Kasireddy 
> > ---
> >  include/GL/internal/dri_interface.h | 1 +
> >  src/gbm/backends/dri/gbm_dri.c  | 9 +++--
> >  src/gbm/main/gbm.h  | 5 +
> >  3 files changed, 13 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/GL/internal/dri_interface.h 
> > b/include/GL/internal/dri_interface.h
> > index a0f155a..2271217 100644
> > --- a/include/GL/internal/dri_interface.h
> > +++ b/include/GL/internal/dri_interface.h
> > @@ -1091,6 +1091,7 @@ struct __DRIdri2ExtensionRec {
> >  #define __DRI_IMAGE_USE_SCANOUT0x0002
> >  #define __DRI_IMAGE_USE_CURSOR 0x0004 /* Depricated */
> >  #define __DRI_IMAGE_USE_LINEAR 0x0008
> > +#define __DRI_IMAGE_USE_SCANOUT_ROTATED_90_270 0x0010
> >  
> >  
> >  /**
> 
> Thank you for splitting out the driver change. Sorry I didn't think of
> this before, but it might be worth splitting out the dri_interface.h
> change as well. I'm fine either way, though.
> 
> 
> > diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
> > index 57cdeac..cde63de 100644
> > --- a/src/gbm/backends/dri/gbm_dri.c
> > +++ b/src/gbm/backends/dri/gbm_dri.c
> > @@ -539,7 +539,7 @@ gbm_dri_is_format_supported(struct gbm_device *gbm,
> >break;
> > case GBM_BO_FORMAT_ARGB:
> > case GBM_FORMAT_ARGB:
> > -  if (usage & GBM_BO_USE_SCANOUT)
> > +  if (usage & (GBM_BO_USE_SCANOUT | GBM_BO_USE_SCANOUT_ROTATED_90_270))
> >   return 0;
> >break;
> > default:
> > @@ -748,6 +748,8 @@ gbm_dri_bo_import(struct gbm_device *gbm,
> >  
> > if (usage & GBM_BO_USE_SCANOUT)
> >dri_use |= __DRI_IMAGE_USE_SCANOUT;
> > +   if (usage & GBM_BO_USE_SCANOUT_ROTATED_90_270)
> > +  dri_use |= __DRI_IMAGE_USE_SCANOUT_ROTATED_90_270;
> > if (usage & GBM_BO_USE_CURSOR)
> >dri_use |= __DRI_IMAGE_USE_CURSOR;
> > if (dri->image->base.version >= 2 &&
> > @@ -786,7 +788,8 @@ create_dumb(struct gbm_device *gbm,
> >  
> > is_cursor = (usage & GBM_BO_USE_CURSOR) != 0 &&
> >format == GBM_FORMAT_ARGB;
> > -   is_scanout = (usage & GBM_BO_USE_SCANOUT) != 0 &&
> > +   is_scanout = (usage & (GBM_BO_USE_SCANOUT |
> > +  GBM_BO_USE_SCANOUT_ROTATED_90_270)) != 0 &&
> >format == GBM_FORMAT_XRGB;
> > if (!is_cursor && !is_scanout) {
> >errno = EINVAL;
> > @@ -880,6 +883,8 @@ gbm_dri_bo_create(struct gbm_device *gbm,
> >  
> > if (usage & GBM_BO_USE_SCANOUT)
> >dri_use |= __DRI_IMAGE_USE_SCANOUT;
> > +   if (usage & GBM_BO_USE_SCANOUT_ROTATED_90_270)
> > +  dri_use |= __DRI_IMAGE_USE_SCANOUT_ROTATED_90_270;
> > if (usage & GBM_BO_USE_CURSOR)
> >dri_use |= __DRI_IMAGE_USE_CURSOR;
> > if (usage & GBM_BO_USE_LINEAR)
> > diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
> > index 2708e50..2ef7bd8 100644
> > --- a/src/gbm/main/gbm.h
> > +++ b/src/gbm/main/gbm.h
> > @@ -213,6 +213,11 @@ enum gbm_bo_flags {
> >  * Buffer is linear, i.e. not tiled.
> >  */
> > GBM_BO_USE_LINEAR = (1 << 4),
> > +   /**
> > +* Buffer would be rotated and some platforms have additional tiling
> > +* requirements for 90/270 rotated buffers.
> > +*/
> > +   GBM_BO_USE_SCANOUT_ROTATED_90_270 = (1 << 5),
> >  };
> >  
> >  int
> > 
> 
> I asked internally, and apparently our display hardware requires a
> rotation specific tiling mode for 180 degree rotation as well. In order
> to avoid having to add *_SCANOUT_ROTATED_180 later, would
> *_SCANOUT_ROTATED work for you as well? Or would using Y-tiling for 180
> degree rotation be an issue?

What about a bit per angle? To avoid hardware specifics.

-- 
Ville Syrjälä
Intel OTC
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 0/8] SSBO optimizations

2015-10-23 Thread Francisco Jerez
Kristian Høgsberg Kristensen  writes:

> Here's an updated and expanded ssbo optimization series. I found a bit
> of low-hanging fruit around dynamic ssbo array indexing. I removed the
> IMM shortcut in emit_uniformize() and added the constant propagation
> for the read and write opcodes. The result is the same for constant
> indexing, and it helps dynamic indexing a little bit.
>
> There's still a redundant MOV in the dynamic case that I'm going to
> punt on for now. We want to copy propagate the scalar surface index
> into the send instruction, but that fails as that's considered
> !can_do_source_mods(), but the surface index argument isn't part of
> the payload and can accept misc strides and modifiers.
>
Right... It seems kind of an abuse of can_do_source_mods() to use it in
order to find out whether the instruction supports scalar sources.
Anyway we could probably make it depend on the source index passed as
argument.

> I also took a look at ssbo stores and made it write out contiguous
> channels in the writemask together, in particular, the common case of
> writing a vec4 goes from 4 to 1 write instruction.
>
> Kristian Høgsberg Kristensen (8):
>   i965: Don't use message headers for untyped reads
>   i965/fs: Read all components of a SSBO field with one send
>   i965/fs: Use unsigned immediate 0 when eliminating
> SHADER_OPCODE_FIND_LIVE_CHANNEL
>   i965/fs: Don't uniformize surface index twice
>   i965/fs: Avoid scalar destinations in emit_uniformize()
>   i965/fs: Drop offset_reg temporary in ssbo load
>   i965/fs: Optimize ssbo stores
>   i965/fs: Allow copy propagating into new surface access opcodes
>
>  src/mesa/drivers/dri/i965/brw_eu_emit.c|  3 +-
>  src/mesa/drivers/dri/i965/brw_fs.cpp   |  4 +-
>  src/mesa/drivers/dri/i965/brw_fs_builder.h |  8 +-
>  .../drivers/dri/i965/brw_fs_copy_propagation.cpp   | 15 
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 91 
> +-
>  5 files changed, 58 insertions(+), 63 deletions(-)
>
> -- 
> 2.6.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/fs: Make emit_uniformize a no-op for immediates

2015-10-23 Thread Francisco Jerez
Kristian Høgsberg  writes:

> On Tue, Oct 20, 2015 at 11:56 AM, Francisco Jerez  
> wrote:
>> Kristian Høgsberg  writes:
>>
>>> On Tue, Oct 20, 2015 at 3:16 AM, Francisco Jerez  
>>> wrote:
 Kristian Høgsberg  writes:

> On Mon, Oct 19, 2015 at 4:19 AM, Francisco Jerez  
> wrote:
>> Neil Roberts  writes:
>>
>>> Just a thought, would it be better to move this check into the
>>> eliminate_find_live_channel optimisation? That way it could catch
>>> sources that become immediates through later optimisations. One problem
>>> with this that I've seen before is that eliminating the
>>> FIND_LIVE_CHANNEL doesn't cause the subsequent BROADCAST to be
>>> eliminated because the copy propagation doesn't work.
>>
>> I believe in this particular case the BROADCAST instruction should
>> already be eliminated (by opt_algebraic), because its first source is an
>> immediate, so this optimization would in fact be redundant with
>> opt_algebraic+constant propagation if it weren't because the latter is
>> unable to handle scalar copies.
>
> Yes, is happens as you say: first the FIND_LIVE_CHANNEL gets
> eliminated because it's outside control flow
> (fs_visitor::eliminate_find_live_channel). Then the broadcast gets
> reduced to a MOV in opt_algebraic because src0 is uniform (immediate
> constant). The problem then is the dst of the MOV has stride 0 which
> can_propagate_from() then bails on.
>
>> Another possibility that would likely
>> be more general than this (because it would handle cases in which, as
>> Neil said, the argument becomes an immediate only after optimization,
>> and because it would also avoid the issue with liveness analysis
>> extending the live ranges of scalar values incorrectly [1]) would be to
>> extend the destination register of the BROADCAST instructions to a
>> vector [2] and then add a case to the switch statement of
>> try_constant_propagate() so that the immediate MOV resulting from
>> opt_algebraic is propagated into surface instructions (see attachment).
>
> I wrote exactly this code to make constant propagate work, plus adding
> the extra opcodes to the switch statement. It works and I could
> certainly send that out after this, but
>
> 1) This doesn't mean we shouldn't do the if (src.file == IMM)
> shortcut. It saves the compiler a bit of work in the very common case
> of
> non-indirect buffer access.
>
> 2) I'm not even sure it makes sense to extend copy-propagation to do
> this (which is why I went back to just the IMM test). Anything that
> would be an immediate at this point should be an immediate, if not
> we're missing something in nir.
>
 Still this doesn't address the root of the problem, which is that
 emit_uniformize() emits scalar code that the rest of the compiler is not
 able to handle properly.
>>>
>>> This is not a case of papering over the root cause, it's about not
>>> creating the root cause in the first place. The output of
>>> emit_uniformize() always ends up as either a surface or a sampler
>>> index, which we only look at later in the generator. There are no
>>> other cases where the result of emit_uniformize() might be part of an
>>> expression that we can copy propagate or otherwise optimize. If the
>>> input to emit_uniformize() isn't an immediate where it could be, nir
>>> optimization needs fixing. So if we add these two lines to
>>> emit_uniformize() to pass immediates straight through, we avoid
>>> generating code that we have to extend the copy prop pass to handle.
>>>
>>
>> Kristian, there are legitimate uses of emit_uniformize() in which the
>> argument is not an immediate but still can be optimized out later on --
>> E.g. for images it will frequently be a uniform register, or a
>> non-constant expression calculated within uniform control flow (in which
>> case the FIND_LIVE_CHANNEL instruction emitted here will be reduced to a
>> constant MOV by eliminate_find_live_chaso nnel()).  In such cases we still
>> want copy-propagation to kick in, but it wont because of the problem I
>> was talking about with scalar writes.  Even if the instructions emitted
>> by emit_uniformize() cannot be optimized out, liveness analysis will
>> overestimate the live ranges of the temporaries used by
>> emit_uniformize() for the same reason, potentially causing the register
>> allocator to spill or run out of registers.
>
> Yeah, fair point. I went and took a look at non-constant surface index
> and sent out a new series that make that work better as well as ssbo
> write optimizations. I took out the imm shortcut and put in the
> generic optimizations. It makes non-const surface index a little
> better, but as I wrote in the cover letter, it still doesn't work that
> well as we don't propagate the surface index into the send
> instructions.
>
>> Anyway don't take me wrong, I

Re: [Mesa-dev] [PATCH] st/va: pass picture desc to begin and decode

2015-10-23 Thread Christian König

On 23.10.2015 14:25, Julien Isorce wrote:

At least vl_mpeg12_decoder uses the picture
desc in begin_frame and decode_bitstream.

https://bugs.freedesktop.org/show_bug.cgi?id=92634

Signed-off-by: Julien Isorce 


Reviewed-by: Christian König 


---
  src/gallium/state_trackers/va/picture.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/va/picture.c 
b/src/gallium/state_trackers/va/picture.c
index 9b94b39..a0d530b 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -58,7 +58,7 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID 
context_id, VASurfaceID rende
return VA_STATUS_ERROR_INVALID_SURFACE;
  
 context->target = surf->buffer;

-   context->decoder->begin_frame(context->decoder, context->target, NULL);
+   context->decoder->begin_frame(context->decoder, context->target, 
&context->desc.base);
  
 return VA_STATUS_SUCCESS;

  }
@@ -517,7 +517,7 @@ handleVASliceDataBufferType(vlVaContext *context, 
vlVaBuffer *buf)
 buffers[num_buffers] = buf->data;
 sizes[num_buffers] = buf->size;
 ++num_buffers;
-   context->decoder->decode_bitstream(context->decoder, context->target, NULL,
+   context->decoder->decode_bitstream(context->decoder, context->target, 
&context->desc.base,
num_buffers, (const void * const*)buffers, sizes);
  }
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/vec4: check opcode on vec4_instruction::reads_flag(channel)

2015-10-23 Thread Alejandro Piñeiro
Commit f17b78 added an alternative reads_flag(channel) that returned
if the instruction was reading a specific channel flag. By mistake it
only took into account the predicate, but when the opcode is
VS_OPCODE_UNPACK_FLAGS_SIMD4X2 there isn't any predicate, but the flag
are used.

That mistake caused some regressions on old hw. More information on
this bug:
https://bugs.freedesktop.org/show_bug.cgi?id=92621
---

The bug mentions G965, ILK and G45, but I only have available ILK to
test, so it would be good if someone confirms that the regressions
get solved on that hw too. Having said so, it would be strange if
it is not the case.

Additionally, the bug mentions 24 regressions. But it only lists 21.
And I only detected 17 regressions on ILK. So it would be good
to confirm how many regressions the series introduced, and if
this new commit solve all of them.

 src/mesa/drivers/dri/i965/brw_ir_vec4.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 74e9733..cc4104c 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -188,8 +188,8 @@ public:
 
bool reads_flag(unsigned c)
{
-  if (!reads_flag())
- return false;
+  if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
+ return true;
 
   switch (predicate) {
   case BRW_PREDICATE_NONE:
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92366] [BSW SKL] Regression: glx@glx-swap-event_async failed

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92366

cprigent  changed:

   What|Removed |Added

Summary|[BSW] Regression:   |[BSW SKL] Regression:
   |glx@glx-swap-event_async|glx@glx-swap-event_async
   |failed  |failed

--- Comment #1 from cprigent  ---
Reproduced on SKL-Y. Test was Pass with Mesa 11.03.

Platform: SKY LAKE Y A0 
CPU : Intel(R) Core(TM) m5-6Y57 CPU @ 1.10GHz (family: 6, model: 78  stepping:
3)
MCP : SKL-Y  D1 2+2 (ou ULX-D1)
QDF : QJK9 
CPU : SKL D0
Chipset PCH: Sunrise Point LP C1   
CRB : SKY LAKE Y LPDDR3 RVP3 CRB FAB2
Reworks : All Mandatories + FBS02,FBS03, F23, O-02 & O-06
Software
BIOS : SKLSE2R1.R00.X097.B02.1509020030
ME FW : 11.0.0.1173
Ksc (EC FW): 1.19
Linux distribution: Ubuntu 14.04 LTS 64 bits
kernel 4.3.0-rc5-drm-intel-nightly+ (f14b17b) from
git://anongit.freedesktop.org/drm-intel
commit f14b17b8639fb680c672d3d2f8d6414fcf0f82c4
Author: Daniel Vetter 
Date:   Fri Oct 16 19:27:49 2015 +0200
drm-intel-nightly: 2015y-10m-16d-17h-27m-10s UTC integration manifest
Mesa master commit 6f3954618b0fe273af76af79ce9ec56566b79b2a from
http://cgit.freedesktop.org/mesa/mesa/
xf86-video-intel - 2.99.917 from
http://cgit.freedesktop.org/xorg/driver/xf86-video-intel/
Libdrm - 2.4.65 from http://cgit.freedesktop.org/mesa/drm/
Libva - 1.6.1 from http://cgit.freedesktop.org/libva/
vaapi intel-driver - 1.6.1 from http://cgit.freedesktop.org/vaapi/intel-driver
Cairo - 1.14.2 from http://cgit.freedesktop.org/cairo
Xorg Xserver - 1.17.2 from http://cgit.freedesktop.org/xorg/xserver

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/vec4: select predicate based on writemask for sel emissions

2015-10-23 Thread Alejandro Piñeiro
Equivalent to commit 4eebeb but with sel operations. In this case
we select the PredCtrl based on the writemask.

This change allows cmod propagation to optimize out several
instructions.

Shader-db numbers:
total instructions in shared programs: 6235835 -> 6228008 (-0.13%)
instructions in affected programs: 219850 -> 212023 (-3.56%)
total loops in shared programs:1979 -> 1979 (0.00%)
helped:1192
HURT:  0
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 0f04f65..bc86be6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1437,8 +1437,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_bcsel:
   emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
   inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]);
-  inst->predicate = BRW_PREDICATE_NORMAL;
+  switch (dst.writemask) {
+  case WRITEMASK_X:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_X;
+ break;
+  case WRITEMASK_Y:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+ break;
+  case WRITEMASK_Z:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+ break;
+  case WRITEMASK_W:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_W;
+ break;
+  default:
+ inst->predicate = BRW_PREDICATE_NORMAL;
   break;
+  }
+   break;
 
case nir_op_fdot_replicated2:
   inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/20] i965: Enable gather push constants

2015-10-23 Thread Francisco Jerez
Abdiel Janulgue  writes:

> The 3DSTATE_GATHER_POOL_ALLOC is used to enable or disable the gather
> push constants feature within a context. This patch provides the toggle
> functionality of using gather push constants to program constant data
> within a batch.
>
> Using gather push constants require that a gather pool be allocated so
> that the resource streamer can flush the packed constants it gathered.
> The pool is later referenced by the 3DSTATE_CONSTANT_* command to
> program the push constant data.
>
> Also introduce INTEL_UBO_GATHER to selectively enable which shader stage
> uses gather constants for ubo fetches.
>
> v2: GEN8 support.
>
> Signed-off-by: Abdiel Janulgue 
> ---
>  src/mesa/drivers/dri/i965/brw_binding_tables.c | 59 
> ++
>  src/mesa/drivers/dri/i965/brw_context.c| 39 -
>  src/mesa/drivers/dri/i965/brw_context.h| 10 +
>  src/mesa/drivers/dri/i965/brw_state.h  |  1 +
>  4 files changed, 108 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
> b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> index 508f1f0..aa16b6d 100644
> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> @@ -276,6 +276,60 @@ gen7_update_binding_table_from_array(struct brw_context 
> *brw,
> ADVANCE_BATCH();
>  }
>  
> +static void
> +gen7_init_gather_pool(struct brw_context *brw)
> +{
> +   if (!brw->use_resource_streamer ||
> +   (!brw->vs_ubo_gather && !brw->gs_ubo_gather && !brw->fs_ubo_gather))
> +  return;
> +
> +   if (!brw->gather_pool.bo) {
> +  brw->gather_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "gather_pool",
> +   brw->gather_pool.size, 4096);
> +  brw->gather_pool.next_offset = 0;
> +   }
> +}
> +
> +void
> +gen7_toggle_gather_constants(struct brw_context *brw, bool enable)
> +{
> +   if (!brw->use_resource_streamer ||
> +   (enable && !(brw->vs_ubo_gather || brw->gs_ubo_gather ||
> +brw->fs_ubo_gather)))
> +  return;
> +

You should probably 'assert(brw->is_haswell || brw->gen == 8)' here.
And instead of having a separate gen7_init_gather_pool() function that
the caller needs to remember to call before this in order to avoid a
segfault below, you could just check the value of brw->gather_pool.bo
here and in case it's zero allocate a new buffer object to back the
pool.

> +   uint32_t dw1 = enable ? BRW_GATHER_CONSTANTS_ENABLE : 0;
> +   if (brw->is_haswell) {
> +  dw1 |= HSW_GATHER_CONSTANTS_RESERVED | (enable ? GEN7_MOCS_L3 : 0);
> +   } else if (brw->gen == 8) {
> +  dw1 |= (enable ? BDW_MOCS_WB : 0);
> +   }

How about:

| const unsigned mocs = (brw->gen == 8 ? BDW_MOCS_WB : GEN7_MOCS_L3);
| const unsigned dw1 = (enable ? BRW_GATHER_CONSTANTS_ENABLE | mocs) |
|  (brw->is_haswell ? HSW_GATHER_CONSTANTS_RESERVED : 0);

> +   int pkt_len = brw->gen >= 8 ? 4 : 3;

Could be const.

> +
> +   BEGIN_BATCH(pkt_len);
> +   OUT_BATCH(_3DSTATE_GATHER_POOL_ALLOC << 16 | (pkt_len - 2));
> +   if (brw->gen >=8 ) {

Missing whitespace before the 8 and extra whitespace after it.

> +  if (enable) {
> + OUT_RELOC64(brw->gather_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
> + OUT_BATCH(brw->gather_pool.bo->size);
> +  } else {
> + OUT_BATCH(dw1);
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> +  }
> +   } else {
> +  if (enable) {
> + OUT_RELOC(brw->gather_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
> + OUT_RELOC(brw->gather_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
> +   brw->gather_pool.bo->size);
> +  } else {
> + OUT_BATCH(dw1);
> + OUT_BATCH(0);
> +  }
> +   }
> +   ADVANCE_BATCH();
> +}
> +
>  /**
>   * Disable hardware binding table support, falling back to the
>   * older software-generated binding table mechanism.
> @@ -294,6 +348,7 @@ gen7_disable_hw_binding_tables(struct brw_context *brw)
> brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
>  
> int pkt_len = brw->gen >= 8 ? 4 : 3;
> +   gen7_toggle_gather_constants(brw, false);
>  
> BEGIN_BATCH(pkt_len);
> OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
> @@ -360,12 +415,16 @@ gen7_enable_hw_binding_tables(struct brw_context *brw)
>   brw->hw_bt_pool.bo->size);
> }
> ADVANCE_BATCH();
> +
> +   gen7_init_gather_pool(brw);
> +   gen7_toggle_gather_constants(brw, true);

Seems rather unexpected for gen7_enable_hw_binding_tables() to enable
gather constants as a side effect.

>  }
>  
>  void
>  gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
>  {
> brw->hw_bt_pool.next_offset = 0;
> +   brw->gather_pool.next_offset = 0;

IIUC gen7_reset_hw_bt_pool_offsets() will be called whenever we run out
of space in the HW binding table pool.  Why should that cause the gather
constant pool offset to be r

[Mesa-dev] [Bug 92634] gallium's vl_mpeg12_decoder does not work with st/va

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92634

--- Comment #6 from Christian König  ---
(In reply to Julien Isorce from comment #5)
> Just to be sure I understand correctly, do you mean the fix should actually
> be done in st/va directly, right ?

Yes, calling begin_picture() should be delayed until we have all the parameters
gathered.

I just hacked it together initially like this because we had time pressure (as
usually) and UVD didn't need those informations in begin_picture().

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92366] [BSW SKL] Regression: glx@glx-swap-event_async failed

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92366

Ben Widawsky  changed:

   What|Removed |Added

 Status|NEW |NEEDINFO
 CC||b...@bwidawsk.net

--- Comment #2 from Ben Widawsky  ---
Please bisect [regressions].

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 5/7] radeonsi: Implement DCC fast clear.

2015-10-23 Thread Marek Olšák
On Fri, Oct 23, 2015 at 1:57 PM, Bas Nieuwenhuizen
 wrote:
> On Fri, Oct 23, 2015 at 1:52 PM, Marek Olšák  wrote:
>> On Fri, Oct 23, 2015 at 1:30 PM, Bas Nieuwenhuizen
>>  wrote:
>>> On Fri, Oct 23, 2015 at 12:50 PM, Marek Olšák  wrote:
 On Fri, Oct 23, 2015 at 12:17 PM, Bas Nieuwenhuizen
  wrote:
> On Thu, Oct 22, 2015 at 12:12 PM, Marek Olšák  wrote:
>>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
>>> b/src/gallium/drivers/radeonsi/si_descriptors.c
>>> index 5548cba3..a277fa5 100644
>>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>>> @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct 
>>> pipe_context *ctx,
>>> } else {
>>> samplers->depth_texture_mask &= ~(1 << 
>>> slot);
>>> }
>>> -   if (rtex->cmask.size || rtex->fmask.size) {
>>> +   if (rtex->cmask.size || rtex->fmask.size || 
>>> rtex->surface.dcc_enabled) {
>>> samplers->compressed_colortex_mask |= 1 
>>> << slot;
>>
>> I'd like this flag to be set only when dirty_level_mask is non-zero.
>> Setting this for all textures that have DCC is quite expensive in draw
>> calls.
>
> I think this code is incorrect even without considering DCC. If we do
> a fast clear on a surface which allocates a cmask and then use that
> surface as a texture without calling set_sampler_views in between
> (because it was bound before) we get a stale compressed_colortex_mask.
>
> Some testing shows that this can be triggered using OpenGL, although
> the GL_ARB_texture_barrier extension may be needed to make the result
> not undefined per the specification.

 In that case, we should decompress in texture_barrier and not in draw 
 calls.

 Marek
>>>
>>>
>>> texture_barrier does not need to be called though, the language
>>> changes might be needed.
>>>
>>> Basically the test is
>>>
>>> fbo1, fbo2 framebuffers with 1 color buffer each:
>>>
>>> bind fbo2 as texture
>>> clear fbo1 using shader
>>> bind fbo1 as texture
>>> clear fbo2 using shader
>>> clear fbo1 using clear (which results in cmask being allocated for fbo1)
>
>>> bind fbo2 as texture
>>> copy fbo2 to fbo1 using copy shader (which wrongly does not decompress fbo1)
>
> My apologies, these two lines should just be a copy fbo1 to fbo2,
> which does need to eleminate the cmask fast clear.

That sounds like a texture barrier is required.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92626] [BYT] piglit glx@glx-query-drawable subtests failing

2015-10-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92626

Jairo Miramontes  changed:

   What|Removed |Added

  Component|GLX |Drivers/DRI/i965
   Assignee|mesa-dev@lists.freedesktop. |i...@freedesktop.org
   |org |
 QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radeonsi: add support for Stoney asics (v2)

2015-10-23 Thread Alex Deucher
On Thu, Oct 22, 2015 at 10:44 PM, Michel Dänzer  wrote:
> On 23.10.2015 01:09, Alex Deucher wrote:
>> From: Samuel Li 
>>
>> v2 (agd): rebase on mesa master, split pci ids to
>> separate commit
>>
>> Signed-off-by: Samuel Li 
>
> [...]
>
>> @@ -540,6 +541,7 @@ const char *r600_get_llvm_processor_name(enum 
>> radeon_family family)
>>   case CHIP_ICELAND: return "iceland";
>>   case CHIP_CARRIZO: return "carrizo";
>>   case CHIP_FIJI: return "fiji";
>> + case CHIP_STONEY: return "stoney";
>>   default: return "";
>>   }
>>  }
>
> Do we need a fallback here for existing versions of LLVM which don't
> recognize "stoney" yet?

Is there a way to check if a particular llvm version supports a
specific processor name?  Or should we just do something like return
carrizo for llvm 3.7 and stoney for newer versions?

Alex

>
> Other than that, looks good to me.
>
>
> Patch 2 is
>
> Reviewed-by: Michel Dänzer 
>
>
> --
> Earthling Michel Dänzer   |   http://www.amd.com
> Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/20] i965: Allocate space on the gather pool for plain uniforms

2015-10-23 Thread Francisco Jerez
Abdiel Janulgue  writes:

> Reserve space in the gather pool where the gathered uniforms are flushed.
>
> Signed-off-by: Abdiel Janulgue 
> ---
>  src/mesa/drivers/dri/i965/gen6_vs_state.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c 
> b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> index 6653a6d..b78166e 100644
> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> @@ -120,6 +120,14 @@ gen6_upload_push_constants(struct brw_context *brw,
> */
>assert(stage_state->push_const_size <= 32);
> }
> +   /* Allocate gather pool space for uniform and UBO entries in 512-bit 
> chunks*/

Missing punctuation and space at the end of the comment.

> +   if (brw->gather_pool.bo != NULL) {
> +  if (prog_data->nr_params > 0) {
> + int num_consts = ALIGN(prog_data->nr_params, 4) / 4;
> + stage_state->push_const_offset = brw->gather_pool.next_offset;
> + brw->gather_pool.next_offset += (ALIGN(num_consts, 4) / 4) * 64;
> +  }

This whole if-statement could be simplified to:

| if (brw->gather_pool.bo != NULL) {
|stage_state->push_const_offset = brw->gather_pool.next_offset;
|brw->gather_pool.next_offset += DIV_ROUND_UP(prog_data->nr_params, 16);
| }

And shouldn't this be doing something to make sure that there's enough
room left in the pool to hold all push constants of the shader in order
to avoid overflow?

> +   }
>  }
>  
>  static void
> -- 
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radeonsi: add support for Stoney asics (v2)

2015-10-23 Thread Marek Olšák
On Fri, Oct 23, 2015 at 5:11 PM, Alex Deucher  wrote:
> On Thu, Oct 22, 2015 at 10:44 PM, Michel Dänzer  wrote:
>> On 23.10.2015 01:09, Alex Deucher wrote:
>>> From: Samuel Li 
>>>
>>> v2 (agd): rebase on mesa master, split pci ids to
>>> separate commit
>>>
>>> Signed-off-by: Samuel Li 
>>
>> [...]
>>
>>> @@ -540,6 +541,7 @@ const char *r600_get_llvm_processor_name(enum 
>>> radeon_family family)
>>>   case CHIP_ICELAND: return "iceland";
>>>   case CHIP_CARRIZO: return "carrizo";
>>>   case CHIP_FIJI: return "fiji";
>>> + case CHIP_STONEY: return "stoney";
>>>   default: return "";
>>>   }
>>>  }
>>
>> Do we need a fallback here for existing versions of LLVM which don't
>> recognize "stoney" yet?
>
> Is there a way to check if a particular llvm version supports a
> specific processor name?  Or should we just do something like return
> carrizo for llvm 3.7 and stoney for newer versions?

We should just return "carrizo" for LLVM <= 3.7.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/20] i965: Allocate space on the gather pool for plain uniforms

2015-10-23 Thread Francisco Jerez
Francisco Jerez  writes:

> Abdiel Janulgue  writes:
>
>> Reserve space in the gather pool where the gathered uniforms are flushed.
>>
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  src/mesa/drivers/dri/i965/gen6_vs_state.c | 8 
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c 
>> b/src/mesa/drivers/dri/i965/gen6_vs_state.c
>> index 6653a6d..b78166e 100644
>> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
>> @@ -120,6 +120,14 @@ gen6_upload_push_constants(struct brw_context *brw,
>> */
>>assert(stage_state->push_const_size <= 32);
>> }
>> +   /* Allocate gather pool space for uniform and UBO entries in 512-bit 
>> chunks*/
>
> Missing punctuation and space at the end of the comment.
>
>> +   if (brw->gather_pool.bo != NULL) {
>> +  if (prog_data->nr_params > 0) {
>> + int num_consts = ALIGN(prog_data->nr_params, 4) / 4;
>> + stage_state->push_const_offset = brw->gather_pool.next_offset;
>> + brw->gather_pool.next_offset += (ALIGN(num_consts, 4) / 4) * 64;
>> +  }
>
> This whole if-statement could be simplified to:
>
> | if (brw->gather_pool.bo != NULL) {
> |stage_state->push_const_offset = brw->gather_pool.next_offset;
> |brw->gather_pool.next_offset += DIV_ROUND_UP(prog_data->nr_params, 16);

* 64 of course.

> | }
>
> And shouldn't this be doing something to make sure that there's enough
> room left in the pool to hold all push constants of the shader in order
> to avoid overflow?
>
>> +   }
>>  }
>>  
>>  static void
>> -- 
>> 1.9.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] gallivm: fix tex offsets with mirror repeat linear

2015-10-23 Thread Jose Fonseca

On 22/10/15 23:42, srol...@vmware.com wrote:

From: Roland Scheidegger 

Can't see why anyone would ever want to use this, but it was clearly broken.
This fixes the piglit texwrap offset test using this combination.
---
  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 9 +
  1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 125505e..26bfa0d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -405,16 +405,17 @@ lp_build_sample_wrap_linear(struct 
lp_build_sample_context *bld,
break;

 case PIPE_TEX_WRAP_MIRROR_REPEAT:
+  if (offset) {
+ offset = lp_build_int_to_float(coord_bld, offset);
+ offset = lp_build_div(coord_bld, offset, length_f);
+ coord = lp_build_add(coord_bld, coord, offset);
+  }
/* compute mirror function */
coord = lp_build_coord_mirror(bld, coord);

/* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
-  if (offset) {
- offset = lp_build_int_to_float(coord_bld, offset);
- coord = lp_build_add(coord_bld, coord, offset);
-  }

/* convert to int, compute lerp weight */
lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);



Nice finds.  Series is

Reviewed-by: Jose Fonseca 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/20] i965: Allocate space on the gather pool for UBO entries

2015-10-23 Thread Francisco Jerez
Abdiel Janulgue  writes:

> If there are UBO constant entries, append them to 
> stage_state->push_const_size.
> The gather pool contains the combined entries of both ordinary uniforms
> and UBO constants.
>
> Signed-off-by: Abdiel Janulgue 
> ---
>  src/mesa/drivers/dri/i965/gen6_vs_state.c | 20 ++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c 
> b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> index b78166e..843df94 100644
> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> @@ -59,7 +59,9 @@ gen6_upload_push_constants(struct brw_context *brw,
> struct gl_context *ctx = &brw->ctx;
>  
> if (prog_data->nr_params == 0) {
> -  stage_state->push_const_size = 0;
> +  if (prog_data->nr_ubo_params == 0) {
> + stage_state->push_const_size = 0;
> +  }

The 'if (prog_data->nr_ubo_params == 0)' conditional here only seems to
be required because you don't assign stage_state->push_const_size
consistently below depending on whether 'prog_data->nr_params == 0' or
not.  Why don't you drop this assignment, the
'stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;'
assignment in the other branch of this if, and the
'stage_state->push_const_size = ALIGN(prog_data->nr_params +
prog_data->nr_ubo_params, 8) / 8;' assignment introduced in this patch
and instead assign it once, right before the ' if (brw->gather_pool.bo
!= NULL)' conditional, like:

|stage_state->push_const_size = DIV_ROUND_UP(prog_data->nr_params +
|prog_data->nr_ubo_params, 8);

> } else {
>/* Updates the ParamaterValues[i] pointers for all parameters of the
> * basic type of PROGRAM_STATE_VAR.
> @@ -122,10 +124,24 @@ gen6_upload_push_constants(struct brw_context *brw,
> }
> /* Allocate gather pool space for uniform and UBO entries in 512-bit 
> chunks*/
> if (brw->gather_pool.bo != NULL) {
> +  unsigned gather_pool_next_offset = brw->gather_pool.next_offset;
> +

Set stage_state->push_const_offset to brw->gather_pool.next_offset up
front to avoid the need for this temporary.

>if (prog_data->nr_params > 0) {
>   int num_consts = ALIGN(prog_data->nr_params, 4) / 4;
> + gather_pool_next_offset += (ALIGN(num_consts, 4) / 4) * 64;
> +  }
> +
> +  if (prog_data->nr_ubo_params > 0) {
> + stage_state->push_const_size = ALIGN(prog_data->nr_params + 
> prog_data->nr_ubo_params, 8) / 8;
> + uint32_t num_constants = ALIGN(prog_data->nr_ubo_params, 4) / 4;
> + gather_pool_next_offset += (ALIGN(num_constants, 4) / 4) * 64;
> +  }

You set push_const_size to the 32B-aligned value of the sum of nr_params
and nr_ubo_params, but then increment gather_pool.next_offset by the sum
of the separately aligned values of both terms, which AFAICT may
overestimate the amount of space actually used from the pool.  I suggest
you drop the two conditionals above and instead do:

|brw->gather_pool.next_offset += DIV_ROUND_UP(prog_data->nr + 
prog_data->nr_ubo_params, 16) * 64;

> +
> +  if (gather_pool_next_offset > brw->gather_pool.next_offset) {
>   stage_state->push_const_offset = brw->gather_pool.next_offset;
> - brw->gather_pool.next_offset += (ALIGN(num_consts, 4) / 4) * 64;
> + brw->gather_pool.next_offset = gather_pool_next_offset;
> + assert(brw->gather_pool.next_offset < brw->gather_pool.bo->size);
> + assert(stage_state->push_const_offset < 
> brw->gather_pool.next_offset);
>}

This block should also become unnecessary if you make the changes I
suggested earlier.

> }
>  }
> -- 
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/vec4: check opcode on vec4_instruction::reads_flag(channel)

2015-10-23 Thread Kenneth Graunke
On Friday, October 23, 2015 04:04:35 PM Alejandro Piñeiro wrote:
> Commit f17b78 added an alternative reads_flag(channel) that returned
> if the instruction was reading a specific channel flag. By mistake it
> only took into account the predicate, but when the opcode is
> VS_OPCODE_UNPACK_FLAGS_SIMD4X2 there isn't any predicate, but the flag
> are used.
> 
> That mistake caused some regressions on old hw. More information on
> this bug:
> https://bugs.freedesktop.org/show_bug.cgi?id=92621
> ---
> 
> The bug mentions G965, ILK and G45, but I only have available ILK to
> test, so it would be good if someone confirms that the regressions
> get solved on that hw too. Having said so, it would be strange if
> it is not the case.
> 
> Additionally, the bug mentions 24 regressions. But it only lists 21.
> And I only detected 17 regressions on ILK. So it would be good
> to confirm how many regressions the series introduced, and if
> this new commit solve all of them.
> 
>  src/mesa/drivers/dri/i965/brw_ir_vec4.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> index 74e9733..cc4104c 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> @@ -188,8 +188,8 @@ public:
>  
> bool reads_flag(unsigned c)
> {
> -  if (!reads_flag())
> - return false;
> +  if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
> + return true;
>  
>switch (predicate) {
>case BRW_PREDICATE_NONE:
> 

Thanks, Alejandro!

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/20] i965: Allocate space on the gather pool for UBO entries

2015-10-23 Thread Francisco Jerez
Francisco Jerez  writes:

> Abdiel Janulgue  writes:
>
>> If there are UBO constant entries, append them to 
>> stage_state->push_const_size.
>> The gather pool contains the combined entries of both ordinary uniforms
>> and UBO constants.
>>
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  src/mesa/drivers/dri/i965/gen6_vs_state.c | 20 ++--
>>  1 file changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c 
>> b/src/mesa/drivers/dri/i965/gen6_vs_state.c
>> index b78166e..843df94 100644
>> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
>> @@ -59,7 +59,9 @@ gen6_upload_push_constants(struct brw_context *brw,
>> struct gl_context *ctx = &brw->ctx;
>>  
>> if (prog_data->nr_params == 0) {
>> -  stage_state->push_const_size = 0;
>> +  if (prog_data->nr_ubo_params == 0) {
>> + stage_state->push_const_size = 0;
>> +  }
>
> The 'if (prog_data->nr_ubo_params == 0)' conditional here only seems to
> be required because you don't assign stage_state->push_const_size
> consistently below depending on whether 'prog_data->nr_params == 0' or
> not.  Why don't you drop this assignment, the
> 'stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;'
> assignment in the other branch of this if, and the
> 'stage_state->push_const_size = ALIGN(prog_data->nr_params +
> prog_data->nr_ubo_params, 8) / 8;' assignment introduced in this patch
> and instead assign it once, right before the ' if (brw->gather_pool.bo
> != NULL)' conditional, like:
>
> |stage_state->push_const_size = DIV_ROUND_UP(prog_data->nr_params +
> |prog_data->nr_ubo_params, 8);
>
>> } else {
>>/* Updates the ParamaterValues[i] pointers for all parameters of the
>> * basic type of PROGRAM_STATE_VAR.
>> @@ -122,10 +124,24 @@ gen6_upload_push_constants(struct brw_context *brw,
>> }
>> /* Allocate gather pool space for uniform and UBO entries in 512-bit 
>> chunks*/
>> if (brw->gather_pool.bo != NULL) {
>> +  unsigned gather_pool_next_offset = brw->gather_pool.next_offset;
>> +
>
> Set stage_state->push_const_offset to brw->gather_pool.next_offset up
> front to avoid the need for this temporary.
>
>>if (prog_data->nr_params > 0) {
>>   int num_consts = ALIGN(prog_data->nr_params, 4) / 4;
>> + gather_pool_next_offset += (ALIGN(num_consts, 4) / 4) * 64;
>> +  }
>> +
>> +  if (prog_data->nr_ubo_params > 0) {
>> + stage_state->push_const_size = ALIGN(prog_data->nr_params + 
>> prog_data->nr_ubo_params, 8) / 8;
>> + uint32_t num_constants = ALIGN(prog_data->nr_ubo_params, 4) / 4;
>> + gather_pool_next_offset += (ALIGN(num_constants, 4) / 4) * 64;
>> +  }
>
> You set push_const_size to the 32B-aligned value of the sum of nr_params
> and nr_ubo_params, but then increment gather_pool.next_offset by the sum
> of the separately aligned values of both terms, which AFAICT may
> overestimate the amount of space actually used from the pool.  I suggest
> you drop the two conditionals above and instead do:
>
> |brw->gather_pool.next_offset += DIV_ROUND_UP(prog_data->nr + 
> prog_data->nr_ubo_params, 16) * 64;

Or even better so you can be sure that the value the gather pool offset
is incremented by matches the number of push constants actually used:

|brw->gather_pool.next_offset += ALIGN(stage_state->push_const_size *
|  REG_SIZE, 64);

>
>> +
>> +  if (gather_pool_next_offset > brw->gather_pool.next_offset) {
>>   stage_state->push_const_offset = brw->gather_pool.next_offset;
>> - brw->gather_pool.next_offset += (ALIGN(num_consts, 4) / 4) * 64;
>> + brw->gather_pool.next_offset = gather_pool_next_offset;
>> + assert(brw->gather_pool.next_offset < brw->gather_pool.bo->size);
>> + assert(stage_state->push_const_offset < 
>> brw->gather_pool.next_offset);
>>}
>
> This block should also become unnecessary if you make the changes I
> suggested earlier.
>
>> }
>>  }
>> -- 
>> 1.9.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] util: Add list_is_single() helper function

2015-10-23 Thread Eduardo Lima Mitev
Returns true is the list has exactly one element, otherwise returns false.
---
 src/util/list.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/util/list.h b/src/util/list.h
index b98ce59..2b03461 100644
--- a/src/util/list.h
+++ b/src/util/list.h
@@ -99,6 +99,14 @@ static inline bool list_empty(struct list_head *list)
return list->next == list;
 }
 
+/**
+ * Return true is the list has exactly one element, otherwise return false.
+ */
+static inline bool list_is_single(struct list_head *list)
+{
+   return list->next != NULL && list->next->next == list;
+}
+
 static inline unsigned list_length(struct list_head *list)
 {
struct list_head *node;
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] i965/nir/opt_peephole_ffma: Bypass fusion if any operand of fadd and fmul is a const

2015-10-23 Thread Eduardo Lima Mitev
When both fadd and fmul instructions have at least one operand that is a
constant and it is only used once, the total number of instructions can
be reduced from 3 (1 ffma + 2 load_const) to 2 (1 fmul + 1 fadd); because
the constants will be progagated as immediate operands of fmul and fadd.

This patch detects these situations and prevents fusing fmul+fadd into ffma.

Shader-db results on i965 Haswell:

total instructions in shared programs: 6235835 -> 6225895 (-0.16%)
instructions in affected programs: 1124094 -> 1114154 (-0.88%)
total loops in shared programs:1979 -> 1979 (0.00%)
helped:7612
HURT:  843
GAINED:4
LOST:  0
---
 .../drivers/dri/i965/brw_nir_opt_peephole_ffma.c   | 31 ++
 1 file changed, 31 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c 
b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
index a8448e7..c7fc15a 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
+++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
@@ -133,6 +133,28 @@ get_mul_for_src(nir_alu_src *src, int num_components,
return alu;
 }
 
+/**
+ * Given a list of (at least two) nir_alu_src's, tells if any of them is a
+ * constant value and is used only once.
+ */
+static bool
+any_alu_src_is_a_constant(nir_alu_src srcs[])
+{
+   for (unsigned i = 0; i < 2; i++) {
+  if (srcs[i].src.ssa->parent_instr->type == nir_instr_type_load_const) {
+ nir_load_const_instr *load_const =
+nir_instr_as_load_const (srcs[i].src.ssa->parent_instr);
+
+ if (list_is_single(&load_const->def.uses) &&
+ list_empty(&load_const->def.if_uses)) {
+return true;
+ }
+  }
+   }
+
+   return false;
+}
+
 static bool
 brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
 {
@@ -183,6 +205,15 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void 
*void_state)
   mul_src[0] = mul->src[0].src.ssa;
   mul_src[1] = mul->src[1].src.ssa;
 
+  /* If any of the operands of the fmul and any of the fadd is a constant,
+   * we bypass because it will be more efficient as the constants will be
+   * propagated as operands, potentially saving two load_const 
instructions.
+   */
+  if (any_alu_src_is_a_constant(mul->src) &&
+  any_alu_src_is_a_constant(add->src)) {
+ continue;
+  }
+
   if (abs) {
  for (unsigned i = 0; i < 2; i++) {
 nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/vec4: check opcode on vec4_instruction::reads_flag(channel)

2015-10-23 Thread Alejandro Piñeiro
On 23/10/15 17:46, Kenneth Graunke wrote:
> On Friday, October 23, 2015 04:04:35 PM Alejandro Piñeiro wrote:
>> Commit f17b78 added an alternative reads_flag(channel) that returned
>> if the instruction was reading a specific channel flag. By mistake it
>> only took into account the predicate, but when the opcode is
>> VS_OPCODE_UNPACK_FLAGS_SIMD4X2 there isn't any predicate, but the flag
>> are used.
>>
>> That mistake caused some regressions on old hw. More information on
>> this bug:
>> https://bugs.freedesktop.org/show_bug.cgi?id=92621
>> ---
>>
>> The bug mentions G965, ILK and G45, but I only have available ILK to
>> test, so it would be good if someone confirms that the regressions
>> get solved on that hw too. Having said so, it would be strange if
>> it is not the case.
>>
>> Additionally, the bug mentions 24 regressions. But it only lists 21.
>> And I only detected 17 regressions on ILK. So it would be good
>> to confirm how many regressions the series introduced, and if
>> this new commit solve all of them.
>>
>>  src/mesa/drivers/dri/i965/brw_ir_vec4.h | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
>> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> index 74e9733..cc4104c 100644
>> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> @@ -188,8 +188,8 @@ public:
>>  
>> bool reads_flag(unsigned c)
>> {
>> -  if (!reads_flag())
>> - return false;
>> +  if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
>> + return true;
>>  
>>switch (predicate) {
>>case BRW_PREDICATE_NONE:
>>
> Thanks, Alejandro!
>
> Reviewed-by: Kenneth Graunke 

Thanks to you for the quick review!

I have just pushed the commit. But as I mentioned before, I only have an
ILK to test the fix. So although I assume that this will solve the
problem on G45 and G965, it is still pending to confirm it.

BR

-- 
Alejandro Piñeiro (apinhe...@igalia.com)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] i965/nir/opt_peephole_ffma: Bypass fusion if any operand of fadd and fmul is a const

2015-10-23 Thread Patrick Baggett
On Fri, Oct 23, 2015 at 10:55 AM, Eduardo Lima Mitev 
wrote:

> When both fadd and fmul instructions have at least one operand that is a
> constant and it is only used once, the total number of instructions can
> be reduced from 3 (1 ffma + 2 load_const) to 2 (1 fmul + 1 fadd); because
> the constants will be progagated as immediate operands of fmul and fadd.
>
> This patch detects these situations and prevents fusing fmul+fadd into
> ffma.
>
> Shader-db results on i965 Haswell:
>
> total instructions in shared programs: 6235835 -> 6225895 (-0.16%)
> instructions in affected programs: 1124094 -> 1114154 (-0.88%)
> total loops in shared programs:1979 -> 1979 (0.00%)
> helped:7612
> HURT:  843
> GAINED:4
> LOST:  0
> ---
>  .../drivers/dri/i965/brw_nir_opt_peephole_ffma.c   | 31
> ++
>  1 file changed, 31 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
> b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
> index a8448e7..c7fc15a 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
> @@ -133,6 +133,28 @@ get_mul_for_src(nir_alu_src *src, int num_components,
> return alu;
>  }
>
> +/**
> + * Given a list of (at least two) nir_alu_src's, tells if any of them is a
> + * constant value and is used only once.
> + */
> +static bool
> +any_alu_src_is_a_constant(nir_alu_src srcs[])
> +{
> +   for (unsigned i = 0; i < 2; i++) {
> +  if (srcs[i].src.ssa->parent_instr->type ==
> nir_instr_type_load_const) {
> + nir_load_const_instr *load_const =
> +nir_instr_as_load_const (srcs[i].src.ssa->parent_instr);
> +
> + if (list_is_single(&load_const->def.uses) &&
> + list_empty(&load_const->def.if_uses)) {
> +return true;
> + }
> +  }
> +   }
> +
> +   return false;
> +}
> +
>

The comment above this functions reads "Given a list of (at least two)
nir_alu_src's...", but the function checks exactly two. Was it your
intention to support lists with size > 2?


>  static bool
>  brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
>  {
> @@ -183,6 +205,15 @@ brw_nir_opt_peephole_ffma_block(nir_block *block,
> void *void_state)
>mul_src[0] = mul->src[0].src.ssa;
>mul_src[1] = mul->src[1].src.ssa;
>
> +  /* If any of the operands of the fmul and any of the fadd is a
> constant,
> +   * we bypass because it will be more efficient as the constants
> will be
> +   * propagated as operands, potentially saving two load_const
> instructions.
> +   */
> +  if (any_alu_src_is_a_constant(mul->src) &&
> +  any_alu_src_is_a_constant(add->src)) {
> + continue;
> +  }
> +
>if (abs) {
>   for (unsigned i = 0; i < 2; i++) {
>  nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,
> --
> 2.5.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] Nir: Allow CSE of SSBO loads

2015-10-23 Thread Jason Ekstrand
On Thu, Oct 22, 2015 at 11:13 PM, Iago Toral  wrote:
> On Thu, 2015-10-22 at 09:09 -0700, Jason Ekstrand wrote:
>> On Thu, Oct 22, 2015 at 4:21 AM, Iago Toral Quiroga  
>> wrote:
>> > I implemented this first as a separate optimization pass in GLSL IR [1], 
>> > but
>> > Curro pointed out that this being pretty much a restricted form of a CSE 
>> > pass
>> > it would probably make more sense to do it inside CSE (and we no longer 
>> > have
>> > a CSE pass in GLSL IR).
>> >
>> > Unlike other things we CSE in NIR, in the case of SSBO loads we need to 
>> > make
>> > sure that we invalidate previous entries in the set in the presence of
>> > conflicting instructions (i.e. SSBO writes to the same block and offset) or
>> > in the presence of memory barriers.
>> >
>> > If this is accepted I intend to extend this to also cover image reads, 
>> > which
>> > follow similar behavior.
>> >
>> > No regressions observed in piglit or dEQP's SSBO functional tests.
>> >
>> > [1] http://lists.freedesktop.org/archives/mesa-dev/2015-October/097718.html
>>
>> I think you've gotten enough NAK's that I don't need to chime in
>> there.  Unfortunately, solving this in general is something of a
>> research project that both Connor and I have been thinking about for
>> quite some time.  I've been thinking off-and-on about how to add a
>> proper memory model to lower_vars_to_ssa for almost a year now and
>> still haven't come up with a good way to do it.  I don't know whether
>> SSBO's would be simpler or not.  We need a proper memory model for
>> both lower_vars_to_ssa and SSBO load/stores (and shared local
>> variables) but it's a substantial research project.
>>
>> This isn't to say that you couldn't do it.  Just know what you're taking on. 
>> ;-)
>
> Yeah, it does not make sense that I try to do this, you guys have
> clearly given this much more thought than me and know much better how a
> solution for this would fit in NIR than me.
>
>> That said, here's a suggestion for something that we *could* write
>> today, wouldn't be very hard, and wold solve a decent number of cases.
>>
>> For each block:
>>
>> 1) Create a new instruction set (don't use anything from any previous blocks)
>> 2) call add_or_rewrite on all ssbo load operations
>> 3) If you ever see a barrier or ssbo store, destroy the entire
>> instruction set and start again.
>
> Yep, this is what I was thinking for the load-combine pass that Connor
> suggested. However, I think that in this case we do not need to destroy
> the entire set when we find a store, only for memory barriers, right? I
> mean, there should be nothing preventing us from checking the
> offset/block of the store and compare it with the offset/block of the
> loads in the set to decide which ones we need to remove (like I was
> doing in my last patch)

That's where you get into the "special casing" I mentioned below.  If
you have an direct store, you would have to throw away any indirect
loads and then insert a fake direct load for the given offset.  If you
have an indirect store, you would have to throw away everything and
then insert a fake indirect load for the given offset.  So, yes, you
can do it, but it'll take a little more work.  You'll also probably
want to use a different hashing function than we use for CSE because
you need to be able to handle both real and fake loads and have them
compare/hash the same.

Does that make sense?
--Jason

>> This is something you could put together fairly quickly and would
>> handle a fair number of cases.  With a little special casing, you may
>> also be able to handle store and then an immediate load of the same
>> value or duplicate stores.  Anything much more complex than that is
>> going to take a lot more thought.
>
> Yes, I'll give this a try next. Thanks for all the comments and
> suggestions!
>
> Iago
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] i965/nir/opt_peephole_ffma: Bypass fusion if any operand of fadd and fmul is a const

2015-10-23 Thread Eduardo Lima Mitev
On 10/23/2015 06:22 PM, Patrick Baggett wrote:
> 
> 
> On Fri, Oct 23, 2015 at 10:55 AM, Eduardo Lima Mitev  > wrote:
> 
> When both fadd and fmul instructions have at least one operand that is a
> constant and it is only used once, the total number of instructions can
> be reduced from 3 (1 ffma + 2 load_const) to 2 (1 fmul + 1 fadd);
> because
> the constants will be progagated as immediate operands of fmul and fadd.
> 
> This patch detects these situations and prevents fusing fmul+fadd
> into ffma.
> 
> Shader-db results on i965 Haswell:
> 
> total instructions in shared programs: 6235835 -> 6225895 (-0.16%)
> instructions in affected programs: 1124094 -> 1114154 (-0.88%)
> total loops in shared programs:1979 -> 1979 (0.00%)
> helped:7612
> HURT:  843
> GAINED:4
> LOST:  0
> ---
>  .../drivers/dri/i965/brw_nir_opt_peephole_ffma.c   | 31
> ++
>  1 file changed, 31 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
> b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
> index a8448e7..c7fc15a 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
> @@ -133,6 +133,28 @@ get_mul_for_src(nir_alu_src *src, int
> num_components,
> return alu;
>  }
> 
> +/**
> + * Given a list of (at least two) nir_alu_src's, tells if any of
> them is a
> + * constant value and is used only once.
> + */
> +static bool
> +any_alu_src_is_a_constant(nir_alu_src srcs[])
> +{
> +   for (unsigned i = 0; i < 2; i++) {
> +  if (srcs[i].src.ssa->parent_instr->type ==
> nir_instr_type_load_const) {
> + nir_load_const_instr *load_const =
> +nir_instr_as_load_const (srcs[i].src.ssa->parent_instr);
> +
> + if (list_is_single(&load_const->def.uses) &&
> + list_empty(&load_const->def.if_uses)) {
> +return true;
> + }
> +  }
> +   }
> +
> +   return false;
> +}
> +
> 
> 
> The comment above this functions reads "Given a list of (at least two)
> nir_alu_src's...", but the function checks exactly two. Was it your
> intention to support lists with size > 2?
>  

Not really. It actually expects the sources of a mul or an add
instruction, which are exactly two. I will clarify the comment, before
pushing, to something like:

"Given a list of two nir_alu_src's, tells if any of them is a constant
value and is used only once. If so, returns true, otherwise returns false."

Thanks!

Eduardo

> 
>  static bool
>  brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
>  {
> @@ -183,6 +205,15 @@ brw_nir_opt_peephole_ffma_block(nir_block
> *block, void *void_state)
>mul_src[0] = mul->src[0].src.ssa;
>mul_src[1] = mul->src[1].src.ssa;
> 
> +  /* If any of the operands of the fmul and any of the fadd is
> a constant,
> +   * we bypass because it will be more efficient as the
> constants will be
> +   * propagated as operands, potentially saving two load_const
> instructions.
> +   */
> +  if (any_alu_src_is_a_constant(mul->src) &&
> +  any_alu_src_is_a_constant(add->src)) {
> + continue;
> +  }
> +
>if (abs) {
>   for (unsigned i = 0; i < 2; i++) {
>  nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,
> --
> 2.5.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org 
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/fs: Make emit_uniformize a no-op for immediates

2015-10-23 Thread Kristian Høgsberg
On Fri, Oct 23, 2015 at 5:38 AM, Francisco Jerez  wrote:
> Kristian Høgsberg  writes:
>
>> On Tue, Oct 20, 2015 at 11:56 AM, Francisco Jerez  
>> wrote:
>>> Kristian Høgsberg  writes:
>>>
 On Tue, Oct 20, 2015 at 3:16 AM, Francisco Jerez  
 wrote:
> Kristian Høgsberg  writes:
>
>> On Mon, Oct 19, 2015 at 4:19 AM, Francisco Jerez  
>> wrote:
>>> Neil Roberts  writes:
>>>
 Just a thought, would it be better to move this check into the
 eliminate_find_live_channel optimisation? That way it could catch
 sources that become immediates through later optimisations. One problem
 with this that I've seen before is that eliminating the
 FIND_LIVE_CHANNEL doesn't cause the subsequent BROADCAST to be
 eliminated because the copy propagation doesn't work.
>>>
>>> I believe in this particular case the BROADCAST instruction should
>>> already be eliminated (by opt_algebraic), because its first source is an
>>> immediate, so this optimization would in fact be redundant with
>>> opt_algebraic+constant propagation if it weren't because the latter is
>>> unable to handle scalar copies.
>>
>> Yes, is happens as you say: first the FIND_LIVE_CHANNEL gets
>> eliminated because it's outside control flow
>> (fs_visitor::eliminate_find_live_channel). Then the broadcast gets
>> reduced to a MOV in opt_algebraic because src0 is uniform (immediate
>> constant). The problem then is the dst of the MOV has stride 0 which
>> can_propagate_from() then bails on.
>>
>>> Another possibility that would likely
>>> be more general than this (because it would handle cases in which, as
>>> Neil said, the argument becomes an immediate only after optimization,
>>> and because it would also avoid the issue with liveness analysis
>>> extending the live ranges of scalar values incorrectly [1]) would be to
>>> extend the destination register of the BROADCAST instructions to a
>>> vector [2] and then add a case to the switch statement of
>>> try_constant_propagate() so that the immediate MOV resulting from
>>> opt_algebraic is propagated into surface instructions (see attachment).
>>
>> I wrote exactly this code to make constant propagate work, plus adding
>> the extra opcodes to the switch statement. It works and I could
>> certainly send that out after this, but
>>
>> 1) This doesn't mean we shouldn't do the if (src.file == IMM)
>> shortcut. It saves the compiler a bit of work in the very common case
>> of
>> non-indirect buffer access.
>>
>> 2) I'm not even sure it makes sense to extend copy-propagation to do
>> this (which is why I went back to just the IMM test). Anything that
>> would be an immediate at this point should be an immediate, if not
>> we're missing something in nir.
>>
> Still this doesn't address the root of the problem, which is that
> emit_uniformize() emits scalar code that the rest of the compiler is not
> able to handle properly.

 This is not a case of papering over the root cause, it's about not
 creating the root cause in the first place. The output of
 emit_uniformize() always ends up as either a surface or a sampler
 index, which we only look at later in the generator. There are no
 other cases where the result of emit_uniformize() might be part of an
 expression that we can copy propagate or otherwise optimize. If the
 input to emit_uniformize() isn't an immediate where it could be, nir
 optimization needs fixing. So if we add these two lines to
 emit_uniformize() to pass immediates straight through, we avoid
 generating code that we have to extend the copy prop pass to handle.

>>>
>>> Kristian, there are legitimate uses of emit_uniformize() in which the
>>> argument is not an immediate but still can be optimized out later on --
>>> E.g. for images it will frequently be a uniform register, or a
>>> non-constant expression calculated within uniform control flow (in which
>>> case the FIND_LIVE_CHANNEL instruction emitted here will be reduced to a
>>> constant MOV by eliminate_find_live_chaso nnel()).  In such cases we still
>>> want copy-propagation to kick in, but it wont because of the problem I
>>> was talking about with scalar writes.  Even if the instructions emitted
>>> by emit_uniformize() cannot be optimized out, liveness analysis will
>>> overestimate the live ranges of the temporaries used by
>>> emit_uniformize() for the same reason, potentially causing the register
>>> allocator to spill or run out of registers.
>>
>> Yeah, fair point. I went and took a look at non-constant surface index
>> and sent out a new series that make that work better as well as ssbo
>> write optimizations. I took out the imm shortcut and put in the
>> generic optimizations. It makes non-const surface index a little
>> better, but as I wrote in the co

[Mesa-dev] [PATCH 1/2] radeonsi: add support for Stoney asics (v3)

2015-10-23 Thread Alex Deucher
From: Samuel Li 

v2 (agd): rebase on mesa master, split pci ids to
separate commit
v3 (agd): use carrizo for llvm processor name for
llvm 3.7 and older

Signed-off-by: Samuel Li 
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 6 ++
 src/gallium/drivers/radeon/radeon_winsys.h| 1 +
 src/gallium/drivers/radeonsi/si_state.c   | 1 +
 src/gallium/winsys/amdgpu/drm/amdgpu_id.h | 8 ++--
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 6 +-
 5 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 7ac94ca..4ce0c6a 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -416,6 +416,7 @@ static const char* r600_get_chip_name(struct 
r600_common_screen *rscreen)
case CHIP_ICELAND: return "AMD ICELAND";
case CHIP_CARRIZO: return "AMD CARRIZO";
case CHIP_FIJI: return "AMD FIJI";
+   case CHIP_STONEY: return "AMD STONEY";
default: return "AMD unknown";
}
 }
@@ -540,6 +541,11 @@ const char *r600_get_llvm_processor_name(enum 
radeon_family family)
case CHIP_ICELAND: return "iceland";
case CHIP_CARRIZO: return "carrizo";
case CHIP_FIJI: return "fiji";
+#if HAVE_LLVM <= 0x0307
+   case CHIP_STONEY: return "carrizo";
+#else
+   case CHIP_STONEY: return "stoney";
+#endif
default: return "";
}
 }
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index b91e1ad..5f13c1e 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -137,6 +137,7 @@ enum radeon_family {
 CHIP_ICELAND,
 CHIP_CARRIZO,
 CHIP_FIJI,
+CHIP_STONEY,
 CHIP_LAST,
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 243bdc6..a71ff49 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3336,6 +3336,7 @@ static void si_init_config(struct si_context *sctx)
break;
case CHIP_KABINI:
case CHIP_MULLINS:
+   case CHIP_STONEY:
raster_config = 0x;
raster_config_1 = 0x;
break;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_id.h 
b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
index 8882c41..90fe0cd 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
@@ -151,11 +151,15 @@ enum {
 
 /* CZ specific rev IDs */
 enum {
-   CZ_CARRIZO_A0  = 0x01,
+   CARRIZO_A0   = 0x01,
+STONEY_A0= 0x61,
CZ_UNKNOWN  = 0xFF
 };
 
 #define ASICREV_IS_CARRIZO(eChipRev) \
-   (eChipRev >= CARRIZO_A0)
+   ((eChipRev >= CARRIZO_A0) && (eChipRev < STONEY_A0))
+
+#define ASICREV_IS_STONEY(eChipRev) \
+   ((eChipRev >= STONEY_A0) && (eChipRev < CZ_UNKNOWN))
 
 #endif /* AMDGPU_ID_H */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index c877249..32cd9d9 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -226,7 +226,11 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
   break;
case CHIP_CARRIZO:
   ws->family = FAMILY_CZ;
-  ws->rev_id = CZ_CARRIZO_A0;
+  ws->rev_id = CARRIZO_A0;
+  break;
+   case CHIP_STONEY:
+  ws->family = FAMILY_CZ;
+  ws->rev_id = STONEY_A0;
   break;
case CHIP_FIJI:
   ws->family = FAMILY_VI;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/20] i965: Store gather table information in the program data

2015-10-23 Thread Francisco Jerez
Abdiel Janulgue  writes:

> The resource streamer is able to gather and pack sparsely-located
> constant data from any buffer object by a referring to a gather table
> This patch adds support for keeping track of these constant data
> fetches into a gather table.
>
> The gather table is generated from two sources. Ordinary uniform fetches
> are stored first. These are then combined with a separate table containing
> UBO entries. The separate entry for UBOs is needed to make it easier to
> generate the gather mask when combining and packing the constant data.
>
> Signed-off-by: Abdiel Janulgue 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h  | 11 +++
>  src/mesa/drivers/dri/i965/brw_gs.c   |  5 +
>  src/mesa/drivers/dri/i965/brw_program.c  |  5 +
>  src/mesa/drivers/dri/i965/brw_shader.cpp |  5 -
>  src/mesa/drivers/dri/i965/brw_shader.h   | 11 +++
>  src/mesa/drivers/dri/i965/brw_vs.c   |  6 ++
>  src/mesa/drivers/dri/i965/brw_wm.c   |  5 +
>  7 files changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 33c49b7..de0db5a 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -364,9 +364,12 @@ struct brw_stage_prog_data {
> GLuint nr_params;   /**< number of float params/constants */
> GLuint nr_pull_params;
> unsigned nr_image_params;
> +   unsigned nr_ubo_params;

You already used this field in an earlier patch of this series, please
include the declaration in the other patch to avoid breaking the build
and make bisecting easier.

> +   unsigned nr_gather_table;

'nr_gather_table' sounds like you may have more than one gather table ;).
How about 'nr_gather_constants'? (And maybe rename 'gather_table' to
'gather_constants' if you want to keep the names of both fields
consistent.)

>  
> unsigned curb_read_length;
> unsigned total_scratch;
> +   unsigned max_ubo_const_block;

Because gen7_submit_gather_table() is already O(n) on the number of
gather table entries and you could probably calculate it easily there, I
doubt you gain much from having this value precomputed in
brw_stage_prog_data, but if you insist I guess some other name like
"max_gather_buffer_index" would be more descriptive.

>  
> /**
>  * Register where the thread expects to find input data from the URB
> @@ -385,6 +388,14 @@ struct brw_stage_prog_data {
> const gl_constant_value **param;
> const gl_constant_value **pull_param;
>  
> +   /** Combined gather table containing uniform and UBO entries */
> +   struct {
> +  int reg;

What's reg supposed to do?

> +  unsigned channel_mask;
> +  unsigned const_block;
> +  unsigned const_offset;
> +   } *gather_table;
> +
> /**
>  * Image metadata passed to the shader as uniforms.  This is deliberately
>  * ignored by brw_stage_prog_data_compare() because its contents don't 
> have
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
> b/src/mesa/drivers/dri/i965/brw_gs.c
> index 16ea684..17e87b8 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -72,6 +72,11 @@ brw_codegen_gs_prog(struct brw_context *brw,
>rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
> c.prog_data.base.base.nr_params = param_count;
> c.prog_data.base.base.nr_image_params = gs->NumImages;
> +   c.prog_data.base.base.nr_gather_table = 0;
> +   c.prog_data.base.base.gather_table =
> +  rzalloc_size(NULL, sizeof(*c.prog_data.base.base.gather_table) *
> +   (c.prog_data.base.base.nr_params +
> +c.prog_data.base.base.nr_ubo_params));
>  
If you named the struct that represents a gather table entry you could
use rzalloc_array here.

> if (brw->gen >= 7) {
>if (gp->program.OutputType == GL_POINTS) {
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
> b/src/mesa/drivers/dri/i965/brw_program.c
> index 1ac0ed2..aa805be 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -558,6 +558,10 @@ brw_stage_prog_data_compare(const struct 
> brw_stage_prog_data *a,
> if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void 
> *)))
>return false;
>  
> +   if (memcmp(a->gather_table, b->gather_table,
> +  a->nr_gather_table * sizeof(*a->gather_table)))
> +  return false;
> +

Because the gather table is a function of the program code and not the
other way around this memcmp() shouldn't be necessary.

> return true;
>  }
>  
> @@ -568,6 +572,7 @@ brw_stage_prog_data_free(const void *p)
>  
> ralloc_free(prog_data->param);
> ralloc_free(prog_data->pull_param);
> +   ralloc_free(prog_data->gather_table);
>  }
>  
>  void
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index de1a7fe..9d45cfe 10064

Re: [Mesa-dev] [PATCH v3 5/7] radeonsi: Implement DCC fast clear.

2015-10-23 Thread Bas Nieuwenhuizen
On Fri, Oct 23, 2015 at 4:57 PM, Marek Olšák  wrote:
> On Fri, Oct 23, 2015 at 1:57 PM, Bas Nieuwenhuizen
>  wrote:
>> On Fri, Oct 23, 2015 at 1:52 PM, Marek Olšák  wrote:
>>> On Fri, Oct 23, 2015 at 1:30 PM, Bas Nieuwenhuizen
>>>  wrote:
 On Fri, Oct 23, 2015 at 12:50 PM, Marek Olšák  wrote:
> On Fri, Oct 23, 2015 at 12:17 PM, Bas Nieuwenhuizen
>  wrote:
>> On Thu, Oct 22, 2015 at 12:12 PM, Marek Olšák  wrote:
 diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
 b/src/gallium/drivers/radeonsi/si_descriptors.c
 index 5548cba3..a277fa5 100644
 --- a/src/gallium/drivers/radeonsi/si_descriptors.c
 +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
 @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct 
 pipe_context *ctx,
 } else {
 samplers->depth_texture_mask &= ~(1 << 
 slot);
 }
 -   if (rtex->cmask.size || rtex->fmask.size) {
 +   if (rtex->cmask.size || rtex->fmask.size || 
 rtex->surface.dcc_enabled) {
 samplers->compressed_colortex_mask |= 
 1 << slot;
>>>
>>> I'd like this flag to be set only when dirty_level_mask is non-zero.
>>> Setting this for all textures that have DCC is quite expensive in draw
>>> calls.
>>
>> I think this code is incorrect even without considering DCC. If we do
>> a fast clear on a surface which allocates a cmask and then use that
>> surface as a texture without calling set_sampler_views in between
>> (because it was bound before) we get a stale compressed_colortex_mask.
>>
>> Some testing shows that this can be triggered using OpenGL, although
>> the GL_ARB_texture_barrier extension may be needed to make the result
>> not undefined per the specification.
>
> In that case, we should decompress in texture_barrier and not in draw 
> calls.
>
> Marek


 texture_barrier does not need to be called though, the language
 changes might be needed.

 Basically the test is

 fbo1, fbo2 framebuffers with 1 color buffer each:

 bind fbo2 as texture
 clear fbo1 using shader
 bind fbo1 as texture
 clear fbo2 using shader
 clear fbo1 using clear (which results in cmask being allocated for fbo1)
>>
 bind fbo2 as texture
 copy fbo2 to fbo1 using copy shader (which wrongly does not decompress 
 fbo1)
>>
>> My apologies, these two lines should just be a copy fbo1 to fbo2,
>> which does need to eleminate the cmask fast clear.
>
> That sounds like a texture barrier is required.
>
> Marek

I think it valid if even without ARB_texture_barrier as the only place
where we could have a rendering feedback loop is the clear. The shader
clears and the copy do not have the same fbo as texture and therefore
no render feedback loop.

I am not sure if a clear classifies as a GL rendering operation. If it
is not, we have no render feedback loop. If it is, it is still not a
render feedback loop as the active fragment and vertex shaders (the
clear shader) do not contain instructions that sample from that
texture.

- Bas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/20] i965: Assign hw-binding table index for each UBO constant buffer.

2015-10-23 Thread Francisco Jerez
Abdiel Janulgue  writes:

> To be able to refer to a constant buffer, the resource streamer needs
> to index it with a hardware binding table entry. This blankets the ubo
> buffers with hardware binding table indices.
>
> Gather constants hardware fetches in 16-entry binding table blocks.
> So we need to use a block that is unused.
>
> Signed-off-by: Abdiel Janulgue 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h  | 11 +++
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  6 ++
>  2 files changed, 17 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index de0db5a..58edaf4 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -762,6 +762,17 @@ struct brw_vs_prog_data {
>  
>  #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
>  
> +/** Start of hardware binding table index for uniform gather constant 
> entries.
> + *  This must be aligned to the start of a hardware binding table block (a 
> block
> + *  is a group 16 binding table entries).
> + */
> +#define BRW_UNIFORM_GATHER_INDEX_START 32
> +
Does this mean that you use a fixed range of binding table indices (32
to 47) for gather?  Isn't that going to clash with entries allocated for
other surfaces like textures and render buffers if there are enough of
them?  Shouldn't you allocate this block dynamically as is done for
other surfaces in backend_shader::assign_common_binding_table_offsets?

I suggest you allocate the binding table block used for gather before
all other surfaces (i.e. at index 0) so you don't need to waste entries
as padding in order to guarantee that the block has the expected
alignment.

> +/** Appended to the end of the binding table index for uniform constant 
> buffers
> + *  to indicate start of the UBO gather constant binding table.
> + */
> +#define BRW_UBO_GATHER_INDEX_APPEND 2

What does this do?  Two entries of the gather block of surfaces are
reserved for something?  What for?

> +
>  /* Note: brw_gs_prog_data_compare() must be updated when adding fields to
>   * this struct!
>   */
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 8213f4e..fab553b 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -917,6 +917,12 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
>bo->size - binding->Offset,
>&surf_offsets[i],
>dword_pitch);
> +  if (brw->gather_pool.bo) {
> + int bt_idx = BRW_UNIFORM_GATHER_INDEX_START +
> +BRW_UBO_GATHER_INDEX_APPEND + i;
> + gen7_edit_hw_binding_table_entry(brw, stage_state->stage,
> +  bt_idx, surf_offsets[i]);

Is there any reason you need to edit the binding table at this point?
Couldn't you just assign the surface state offsets to the entries
allocated for gather in the brw_stage_state::surf_offset array, so that
they're uploaded in one block together with the other surface state
entries in the binding table by brw_upload_binding_table()?

> +  }
> }
>  
> if (shader->NumUniformBlocks)
> -- 
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/20] i965: Assign hw-binding table index for uniform constant buffer block

2015-10-23 Thread Francisco Jerez
Abdiel Janulgue  writes:

> Assign the uploaded uniform block with hardware binding table indices.
> This is indexed by the resource streamer to fetch the constant buffers
> referred to by our gather table entries.
>
> Signed-off-by: Abdiel Janulgue 
> ---
>  src/mesa/drivers/dri/i965/gen6_vs_state.c | 13 +++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c 
> b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> index 843df94..d43af5b 100644
> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> @@ -72,9 +72,18 @@ gen6_upload_push_constants(struct brw_context *brw,
>gl_constant_value *param;
>unsigned i;
>  
> -  param = brw_state_batch(brw, type,
> -   prog_data->nr_params * sizeof(gl_constant_value),
> +  const uint32_t size = prog_data->nr_params * sizeof(gl_constant_value);
> +  param = brw_state_batch(brw, type, size,
> 32, &stage_state->push_const_offset);
> +  if (brw->gather_pool.bo != NULL) {
> + uint32_t surf_offset = 0;
> + brw_create_constant_surface(brw, brw->batch.bo,
> + stage_state->push_const_offset,
> + size, &surf_offset, false);
> + gen7_edit_hw_binding_table_entry(brw, stage_state->stage,
> +  BRW_UNIFORM_GATHER_INDEX_START,
> +  surf_offset);
> +  }

Same question as in the last patch: Why don't you use the normal
infrastructure for uploading binding tables?

>  
>STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
>  
> -- 
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965/skl: Add GT4 PCI IDs

2015-10-23 Thread Ben Widawsky
Like other gen8+ hardware, the hardware automatically scales up thread counts
and URB sizes, so there is no need to do anything but add the PCI IDs.

FINISHME: This patch still needs testing before merge.

Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Ben Widawsky 
---
 include/pci_ids/i965_pci_ids.h  | 5 -
 src/mesa/drivers/dri/i965/brw_device_info.c | 4 
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index 8a42599..7d23547 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -122,8 +122,11 @@ CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2")
 CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2")
 CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
 CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
-CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3")
 CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
+CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
b/src/mesa/drivers/dri/i965/brw_device_info.c
index a6a3bb6..e7a016c 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -335,6 +335,10 @@ static const struct brw_device_info 
brw_device_info_skl_gt3 = {
GEN9_FEATURES, .gt = 3,
 };
 
+static const struct brw_device_info brw_device_info_skl_gt4 = {
+   GEN9_FEATURES, .gt = 4,
+};
+
 static const struct brw_device_info brw_device_info_bxt = {
GEN9_FEATURES,
.is_broxton = 1,
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965/skl: PCI ID cleanup and brand strings

2015-10-23 Thread Ben Widawsky
A few new PCI ids are added here, and one is removed (0x190B) because it no
longer seems to exist anywhere.

Signed-off-by: Ben Widawsky 
---
 include/pci_ids/i965_pci_ids.h | 40 ++--
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index 7d23547..a561f70 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -109,24 +109,28 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 
(Broadwell GT3e)")
 CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
 CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
 CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
-CHIPSET(0x1902, skl_gt1, "Intel(R) Skylake DT  GT1")
-CHIPSET(0x1906, skl_gt1, "Intel(R) Skylake ULT GT1")
-CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake SRV GT1")
-CHIPSET(0x190B, skl_gt1, "Intel(R) Skylake Halo GT1")
-CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake ULX GT1")
-CHIPSET(0x1912, skl_gt2, "Intel(R) Skylake DT  GT2")
-CHIPSET(0x1916, skl_gt2, "Intel(R) Skylake ULT GT2")
-CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake SRV GT2")
-CHIPSET(0x191B, skl_gt2, "Intel(R) Skylake Halo GT2")
-CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2")
-CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2")
-CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
-CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
-CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
-CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x1902, skl_gt1, "Intel® HD Graphics 510 (Skylake GT1)")
+CHIPSET(0x1906, skl_gt1, "Intel® HD Graphics 510 (Skylake GT1)")
+CHIPSET(0x190A, skl_gt1, "Intel® Skylake GT1")
+CHIPSET(0x190E, skl_gt1, "Intel® Skylake GT1")
+CHIPSET(0x1912, skl_gt2, "Intel® HD Graphics 530 (Skylake GT2)")
+CHIPSET(0x1913, skl_gt2, "Intel® Skylake GT2f")
+CHIPSET(0x1915, skl_gt2, "Intel® Skylake GT2f")
+CHIPSET(0x1916, skl_gt2, "Intel® HD Graphics 520 (Skylake GT2)")
+CHIPSET(0x1917, skl_gt2, "Intel® Skylake GT2f")
+CHIPSET(0x191A, skl_gt2, "Intel® Skylake GT2")
+CHIPSET(0x191B, skl_gt2, "Intel® HD Graphics 530 (Skylake GT2)")
+CHIPSET(0x191D, skl_gt2, "Intel® HD Graphics P530 (Skylake GT2)")
+CHIPSET(0x191E, skl_gt2, "Intel® HD Graphics 515 (Skylake GT2)")
+CHIPSET(0x1921, skl_gt2, "Intel® Skylake GT2f")
+CHIPSET(0x1923, skl_gt3, "Intel® Iris™ Graphics 540 (Skylake GT3e)")
+CHIPSET(0x1926, skl_gt3, "Intel® HD Graphics 535 (Skylake GT3)")
+CHIPSET(0x1927, skl_gt3, "Intel® Iris™ Graphics 550 (Skylake GT3e)")
+CHIPSET(0x192B, skl_gt3, "Iris™ Graphics Iris™ Graphics (Skylake GT3fe)")
+CHIPSET(0x1932, skl_gt4, "Intel® Iris™ Pro Graphics 570/580 (Skylake GT4)")
+CHIPSET(0x193A, skl_gt4, "Intel® Iris™ Pro Graphics P580 (Skylake GT4)")
+CHIPSET(0x193B, skl_gt4, "Intel® Iris™ Pro Graphics 580 (Skylake GT4)")
+CHIPSET(0x193D, skl_gt4, "Intel® Iris™ Pro Graphics P580 (Skylake GT4)")
 CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/skl: PCI ID cleanup and brand strings

2015-10-23 Thread Ilia Mirkin
On Fri, Oct 23, 2015 at 1:37 PM, Ben Widawsky
 wrote:
> A few new PCI ids are added here, and one is removed (0x190B) because it no
> longer seems to exist anywhere.
>
> Signed-off-by: Ben Widawsky 
> ---
>  include/pci_ids/i965_pci_ids.h | 40 ++--
>  1 file changed, 22 insertions(+), 18 deletions(-)
>
> diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
> index 7d23547..a561f70 100644
> --- a/include/pci_ids/i965_pci_ids.h
> +++ b/include/pci_ids/i965_pci_ids.h
> @@ -109,24 +109,28 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 
> (Broadwell GT3e)")
>  CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
>  CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
>  CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
> -CHIPSET(0x1902, skl_gt1, "Intel(R) Skylake DT  GT1")
> -CHIPSET(0x1906, skl_gt1, "Intel(R) Skylake ULT GT1")
> -CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake SRV GT1")
> -CHIPSET(0x190B, skl_gt1, "Intel(R) Skylake Halo GT1")
> -CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake ULX GT1")
> -CHIPSET(0x1912, skl_gt2, "Intel(R) Skylake DT  GT2")
> -CHIPSET(0x1916, skl_gt2, "Intel(R) Skylake ULT GT2")
> -CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake SRV GT2")
> -CHIPSET(0x191B, skl_gt2, "Intel(R) Skylake Halo GT2")
> -CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2")
> -CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2")
> -CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
> -CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
> -CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
> -CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
> -CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
> -CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
> -CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
> +CHIPSET(0x1902, skl_gt1, "Intel® HD Graphics 510 (Skylake GT1)")

Are you sure you want to include non-ascii characters here? For
example your patch encodes this as UTF-8 and displays like this on my
term:

+CHIPSET(0x1902, skl_gt1, "Intel® HD Graphics 510 (Skylake GT1)")

I think (R) and (tm) are the accepted strings to use in these
scenarios... [you could also have some huge complex thing to localize
to the current LC setting, but... seems like overkill.]

  -ilia

> +CHIPSET(0x1906, skl_gt1, "Intel® HD Graphics 510 (Skylake GT1)")
> +CHIPSET(0x190A, skl_gt1, "Intel® Skylake GT1")
> +CHIPSET(0x190E, skl_gt1, "Intel® Skylake GT1")
> +CHIPSET(0x1912, skl_gt2, "Intel® HD Graphics 530 (Skylake GT2)")
> +CHIPSET(0x1913, skl_gt2, "Intel® Skylake GT2f")
> +CHIPSET(0x1915, skl_gt2, "Intel® Skylake GT2f")
> +CHIPSET(0x1916, skl_gt2, "Intel® HD Graphics 520 (Skylake GT2)")
> +CHIPSET(0x1917, skl_gt2, "Intel® Skylake GT2f")
> +CHIPSET(0x191A, skl_gt2, "Intel® Skylake GT2")
> +CHIPSET(0x191B, skl_gt2, "Intel® HD Graphics 530 (Skylake GT2)")
> +CHIPSET(0x191D, skl_gt2, "Intel® HD Graphics P530 (Skylake GT2)")
> +CHIPSET(0x191E, skl_gt2, "Intel® HD Graphics 515 (Skylake GT2)")
> +CHIPSET(0x1921, skl_gt2, "Intel® Skylake GT2f")
> +CHIPSET(0x1923, skl_gt3, "Intel® Iris™ Graphics 540 (Skylake GT3e)")
> +CHIPSET(0x1926, skl_gt3, "Intel® HD Graphics 535 (Skylake GT3)")
> +CHIPSET(0x1927, skl_gt3, "Intel® Iris™ Graphics 550 (Skylake GT3e)")
> +CHIPSET(0x192B, skl_gt3, "Iris™ Graphics Iris™ Graphics (Skylake GT3fe)")
> +CHIPSET(0x1932, skl_gt4, "Intel® Iris™ Pro Graphics 570/580 (Skylake GT4)")
> +CHIPSET(0x193A, skl_gt4, "Intel® Iris™ Pro Graphics P580 (Skylake GT4)")
> +CHIPSET(0x193B, skl_gt4, "Intel® Iris™ Pro Graphics 580 (Skylake GT4)")
> +CHIPSET(0x193D, skl_gt4, "Intel® Iris™ Pro Graphics P580 (Skylake GT4)")
>  CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
>  CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
>  CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
> --
> 2.6.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 10/21] mesa: Remove equality check in helper functions

2015-10-23 Thread Nanley Chery
On Wed, Oct 21, 2015 at 11:43 PM, Erik Faye-Lund 
wrote:

> On Tue, Oct 20, 2015 at 12:44 AM, Nanley Chery 
> wrote:
> > From: Nanley Chery 
> >
> > Since the version numbers being compared are integral and we don't ever
> > expect gl_context::Version to be equal to 0, subtract 1 from the rhs of
> > the equation and perform the optimization of removing the equality check.
>
> Is this really an improvement? Changing '>=' to '>' and then making
> all versions off-by-one seems like trading less readable and debugable
> code for no real upside...
>

After looking at the assembly generated, I've determined that this is
actually not an improvement. I'll drop this patch. I was using .text size
as my metric earlier. I now realize that it isn't completely reliable.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] util: Add list_is_single() helper function

2015-10-23 Thread Matt Turner
On Fri, Oct 23, 2015 at 8:55 AM, Eduardo Lima Mitev  wrote:
> Returns true is the list has exactly one element, otherwise returns false.
> ---
>  src/util/list.h | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/src/util/list.h b/src/util/list.h
> index b98ce59..2b03461 100644
> --- a/src/util/list.h
> +++ b/src/util/list.h
> @@ -99,6 +99,14 @@ static inline bool list_empty(struct list_head *list)
> return list->next == list;
>  }
>
> +/**
> + * Return true is the list has exactly one element, otherwise return false.

How about "Returns whether the list has exactly one element."

> + */
> +static inline bool list_is_single(struct list_head *list)

Doesn't look like anywhere else uses const arguments, but this could
be. No matter...

> +{
> +   return list->next != NULL && list->next->next == list;
> +}
> +

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 05/21] mesa/extensions: Add extension::version

2015-10-23 Thread Nanley Chery
On Thu, Oct 22, 2015 at 3:17 AM, Emil Velikov 
wrote:

> On 19 October 2015 at 23:36, Nanley Chery  wrote:
> > From: Nanley Chery 
> >
> > Enable limiting advertised extension support by context version with
> > finer granularity. GLuint is chosen over smaller datatypes because,
> > when this field is eventually used, usage of this datatype provides
> > the smallest .text size of the compiled library.
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/mesa/main/extensions.c   |  15 +-
> >  src/mesa/main/extensions_table.h | 626
> +++
> >  2 files changed, 326 insertions(+), 315 deletions(-)
> >
> > diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
> > index 30f5b98..390e026 100644
> > --- a/src/mesa/main/extensions.c
> > +++ b/src/mesa/main/extensions.c
> > @@ -66,6 +66,11 @@ struct extension {
> > /** Set of API's in which the extension exists, as a bitset. */
> > uint8_t api_set;
> >
> > +   /** Minimum version the extension requires for the given API
> > +* (see gl_api defined in mtypes.h)
> > +*/
> > +   GLuint version[API_OPENGL_LAST + 1];
> > +
> Please use uint*t type, like the surrounding code and add a note that
> the version is 10*major+minor.
>
>
Will do.

> -Emil
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965/skl: Add GT4 PCI IDs

2015-10-23 Thread Ben Widawsky
On Fri, Oct 23, 2015 at 10:37:29AM -0700, Ben Widawsky wrote:
> Like other gen8+ hardware, the hardware automatically scales up thread counts
> and URB sizes, so there is no need to do anything but add the PCI IDs.
> 
> FINISHME: This patch still needs testing before merge.
> 
> Cc: mesa-sta...@lists.freedesktop.org
> Signed-off-by: Ben Widawsky 
> ---
>  include/pci_ids/i965_pci_ids.h  | 5 -
>  src/mesa/drivers/dri/i965/brw_device_info.c | 4 
>  2 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
> index 8a42599..7d23547 100644
> --- a/include/pci_ids/i965_pci_ids.h
> +++ b/include/pci_ids/i965_pci_ids.h
> @@ -122,8 +122,11 @@ CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2")
>  CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2")
>  CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
>  CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
> -CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3")
>  CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")

This should be removed in the next patch actually. I screwed up the rebase.

> +CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
> +CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
> +CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
> +CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
>  CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
>  CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
>  CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
> diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
> b/src/mesa/drivers/dri/i965/brw_device_info.c
> index a6a3bb6..e7a016c 100644
> --- a/src/mesa/drivers/dri/i965/brw_device_info.c
> +++ b/src/mesa/drivers/dri/i965/brw_device_info.c
> @@ -335,6 +335,10 @@ static const struct brw_device_info 
> brw_device_info_skl_gt3 = {
> GEN9_FEATURES, .gt = 3,
>  };
>  
> +static const struct brw_device_info brw_device_info_skl_gt4 = {
> +   GEN9_FEATURES, .gt = 4,
> +};
> +
>  static const struct brw_device_info brw_device_info_bxt = {
> GEN9_FEATURES,
> .is_broxton = 1,
> -- 
> 2.6.1
> 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 05/21] mesa/extensions: Add extension::version

2015-10-23 Thread Nanley Chery
On Thu, Oct 22, 2015 at 11:02 AM, Chad Versace 
wrote:

> On Mon 19 Oct 2015, Nanley Chery wrote:
> > From: Nanley Chery 
> >
> > Enable limiting advertised extension support by context version with
> > finer granularity. GLuint is chosen over smaller datatypes because,
> > when this field is eventually used, usage of this datatype provides
> > the smallest .text size of the compiled library.
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/mesa/main/extensions.c   |  15 +-
> >  src/mesa/main/extensions_table.h | 626
> +++
> >  2 files changed, 326 insertions(+), 315 deletions(-)
>
>
> > @@ -83,8 +88,14 @@ struct extension {
> >   * \brief Table of supported OpenGL extensions for all API's.
> >   */
> >  static const struct extension extension_table[] = {
> > -#define EXT(name_str, driver_cap, api_flags, ) \
> > -{ .name = "GL_" #name_str, .offset = o(driver_cap), .api_set =
> api_flags, .year = },
> > +#define EXT(name_str, driver_cap, api_flags, gll_ver, glc_ver,
> gles_ver, gles2_ver, ) \
> > +{ .name = "GL_" #name_str, .offset = o(driver_cap), .api_set =
> api_flags, \
> > +  .version = { \
> > +[API_OPENGL_COMPAT] = gll_ver, \
> > +[API_OPENGL_CORE]   = glc_ver, \
> > +[API_OPENGLES]  = gles_ver, \
> > +[API_OPENGLES2] = gles2_ver, \
> > +   }, .year = },
>
> Please place '.year' and the final '}' each on its own line. That makes
> the macro easier to read.
>
>
Will do.


> >  #include "extensions_table.h"
> >  #undef EXT
> >  };
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/20] i965: Include UBO parameter sizes in push constant parameters

2015-10-23 Thread Francisco Jerez
Abdiel Janulgue  writes:

> Now that we consider UBO constants as push constants, we need to include
> the sizes of the UBO's constant slots in the visitor's uniform slot sizes.
> This information is needed to properly pack vector constants tightly next to
> each other.
>
> Signed-off-by: Abdiel Janulgue 
> ---
>  src/mesa/drivers/dri/i965/brw_gs.c  |  1 +
>  src/mesa/drivers/dri/i965/brw_program.h |  3 +++
>  src/mesa/drivers/dri/i965/brw_vs.c  |  3 +++
>  src/mesa/drivers/dri/i965/brw_wm.c  | 22 ++
>  4 files changed, 29 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
> b/src/mesa/drivers/dri/i965/brw_gs.c
> index 17e87b8..7641cc5 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -72,6 +72,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
>rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
> c.prog_data.base.base.nr_params = param_count;
> c.prog_data.base.base.nr_image_params = gs->NumImages;
> +   c.prog_data.base.base.nr_ubo_params = brw_count_ubo_params(gs);
> c.prog_data.base.base.nr_gather_table = 0;
> c.prog_data.base.base.gather_table =
>rzalloc_size(NULL, sizeof(*c.prog_data.base.base.gather_table) *
> diff --git a/src/mesa/drivers/dri/i965/brw_program.h 
> b/src/mesa/drivers/dri/i965/brw_program.h
> index 00e8f3f..20f5371 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.h
> +++ b/src/mesa/drivers/dri/i965/brw_program.h
> @@ -182,6 +182,9 @@ void
>  brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
>  struct gl_shader *shader, struct gl_program *prog);
>  
> +int
> +brw_count_ubo_params(struct gl_shader *fs);
> +
>  #ifdef __cplusplus
>  } /* extern "C" */
>  #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
> b/src/mesa/drivers/dri/i965/brw_vs.c
> index 8501796..1ec2bc9 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -141,6 +141,9 @@ brw_codegen_vs_prog(struct brw_context *brw,
>  stage_prog_data->nr_image_params);
> stage_prog_data->nr_params = param_count;
>  
> +   stage_prog_data->nr_ubo_params = 0;

This assignment seems redundant, the prog data struct is already memset
to zero at the top of the function.

> +   if (vs)
> +  stage_prog_data->nr_ubo_params = brw_count_ubo_params(vs);

You could move this assignment into the other 'if (vs)' conditional
above.

> stage_prog_data->nr_gather_table = 0;
> stage_prog_data->gather_table =
>rzalloc_size(NULL, sizeof(*stage_prog_data->gather_table) *
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
> b/src/mesa/drivers/dri/i965/brw_wm.c
> index 204baa6..44efba0 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -33,6 +33,7 @@
>  #include "main/framebuffer.h"
>  #include "program/prog_parameter.h"
>  #include "program/program.h"
> +#include "glsl/nir/nir_types.h"
>  #include "intel_mipmap_tree.h"
>  
>  #include "util/ralloc.h"
> @@ -149,6 +150,23 @@ brw_wm_prog_data_compare(const void *in_a, const void 
> *in_b)
> return true;
>  }
>  
> +int

The result of this function is always non-negative so I guess its return
type should be unsigned.

> +brw_count_ubo_params(struct gl_shader *shader)
> +{
> +   int nr_ubo = 0;

Same here.

> +   for (int i = 0; i < shader->NumUniformBlocks; i++) {
> +  for (int p = 0; p < shader->UniformBlocks[i].NumUniforms; p++) {
> + const struct glsl_type *type = 
> shader->UniformBlocks[i].Uniforms[p].Type;
> + int array_sz = glsl_get_array_size(type);
> + array_sz = MAX2(array_sz, 1);
> + int components = 
> glsl_get_components(glsl_get_type_without_array(type));
> + nr_ubo += components * array_sz;

Why don't you just do 'nr_ubo += 
shader->UniformBlocks[i].Uniforms[p].Type->component_slots();'?

> +  }
> +   }
> +
> +   return nr_ubo;
> +}
> +

I guess brw_shader.cpp would be a more appropriate place for this
function rather than brw_wm.c since it doesn't do anything specific to
fragment shaders.  If anything you'd be able to use the C++ methods of
glsl_type and the last two patches would become unnecessary.

>  /**
>   * All Mesa program -> GPU code generation goes through this function.
>   * Depending on the instructions used (i.e. flow control instructions)
> @@ -208,6 +226,10 @@ brw_codegen_wm_prog(struct brw_context *brw,
>  prog_data.base.nr_image_params);
> prog_data.base.nr_params = param_count;
>  
> +   prog_data.base.nr_ubo_params = 0;
> +   if (fs)
> +  prog_data.base.nr_ubo_params = brw_count_ubo_params(fs);
> +
Same comments as for brw_codegen_vs_prog().  And I guess you're missing
an analogous change for compute shaders?

> prog_data.base.nr_gather_table = 0;
> prog_data.base.gather_table =
>rzalloc_size(NULL, sizeof(*prog_data.base.gather_table) *
> -- 
> 1.9.1
>
> __

Re: [Mesa-dev] [PATCH 2/2] i965/skl: PCI ID cleanup and brand strings

2015-10-23 Thread Ben Widawsky
On Fri, Oct 23, 2015 at 01:44:38PM -0400, Ilia Mirkin wrote:
> On Fri, Oct 23, 2015 at 1:37 PM, Ben Widawsky
>  wrote:
> > A few new PCI ids are added here, and one is removed (0x190B) because it no
> > longer seems to exist anywhere.
> >
> > Signed-off-by: Ben Widawsky 
> > ---
> >  include/pci_ids/i965_pci_ids.h | 40 
> > ++--
> >  1 file changed, 22 insertions(+), 18 deletions(-)
> >
> > diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
> > index 7d23547..a561f70 100644
> > --- a/include/pci_ids/i965_pci_ids.h
> > +++ b/include/pci_ids/i965_pci_ids.h
> > @@ -109,24 +109,28 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 
> > (Broadwell GT3e)")
> >  CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
> >  CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
> >  CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
> > -CHIPSET(0x1902, skl_gt1, "Intel(R) Skylake DT  GT1")
> > -CHIPSET(0x1906, skl_gt1, "Intel(R) Skylake ULT GT1")
> > -CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake SRV GT1")
> > -CHIPSET(0x190B, skl_gt1, "Intel(R) Skylake Halo GT1")
> > -CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake ULX GT1")
> > -CHIPSET(0x1912, skl_gt2, "Intel(R) Skylake DT  GT2")
> > -CHIPSET(0x1916, skl_gt2, "Intel(R) Skylake ULT GT2")
> > -CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake SRV GT2")
> > -CHIPSET(0x191B, skl_gt2, "Intel(R) Skylake Halo GT2")
> > -CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2")
> > -CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2")
> > -CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
> > -CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
> > -CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
> > -CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
> > -CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
> > -CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
> > -CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
> > +CHIPSET(0x1902, skl_gt1, "Intel® HD Graphics 510 (Skylake GT1)")
> 
> Are you sure you want to include non-ascii characters here? For
> example your patch encodes this as UTF-8 and displays like this on my
> term:
> 
> +CHIPSET(0x1902, skl_gt1, "Intel® HD Graphics 510 (Skylake GT1)")
> 
> I think (R) and (tm) are the accepted strings to use in these
> scenarios... [you could also have some huge complex thing to localize
> to the current LC setting, but... seems like overkill.]
> 
>   -ilia

I thought I would give it a shot, but I figured someone would complain (and I'm
not surprised it was you)... I copied all of these directly from libreoffice
opening a .xlsx, so it could very well just be wrong as opposed to a
misinterpretation.

I would prefer to switch away from ascii, but the fact that anyone said anything
means I will just switch it over.

> 
> > +CHIPSET(0x1906, skl_gt1, "Intel® HD Graphics 510 (Skylake GT1)")
> > +CHIPSET(0x190A, skl_gt1, "Intel® Skylake GT1")
> > +CHIPSET(0x190E, skl_gt1, "Intel® Skylake GT1")
> > +CHIPSET(0x1912, skl_gt2, "Intel® HD Graphics 530 (Skylake GT2)")
> > +CHIPSET(0x1913, skl_gt2, "Intel® Skylake GT2f")
> > +CHIPSET(0x1915, skl_gt2, "Intel® Skylake GT2f")
> > +CHIPSET(0x1916, skl_gt2, "Intel® HD Graphics 520 (Skylake GT2)")
> > +CHIPSET(0x1917, skl_gt2, "Intel® Skylake GT2f")
> > +CHIPSET(0x191A, skl_gt2, "Intel® Skylake GT2")
> > +CHIPSET(0x191B, skl_gt2, "Intel® HD Graphics 530 (Skylake GT2)")
> > +CHIPSET(0x191D, skl_gt2, "Intel® HD Graphics P530 (Skylake GT2)")
> > +CHIPSET(0x191E, skl_gt2, "Intel® HD Graphics 515 (Skylake GT2)")
> > +CHIPSET(0x1921, skl_gt2, "Intel® Skylake GT2f")
> > +CHIPSET(0x1923, skl_gt3, "Intel® Iris™ Graphics 540 (Skylake GT3e)")
> > +CHIPSET(0x1926, skl_gt3, "Intel® HD Graphics 535 (Skylake GT3)")
> > +CHIPSET(0x1927, skl_gt3, "Intel® Iris™ Graphics 550 (Skylake GT3e)")
> > +CHIPSET(0x192B, skl_gt3, "Iris™ Graphics Iris™ Graphics (Skylake GT3fe)")

I just noticed this bug which I copied blindly. I'll fix this too

> > +CHIPSET(0x1932, skl_gt4, "Intel® Iris™ Pro Graphics 570/580 (Skylake GT4)")
> > +CHIPSET(0x193A, skl_gt4, "Intel® Iris™ Pro Graphics P580 (Skylake GT4)")
> > +CHIPSET(0x193B, skl_gt4, "Intel® Iris™ Pro Graphics 580 (Skylake GT4)")
> > +CHIPSET(0x193D, skl_gt4, "Intel® Iris™ Pro Graphics P580 (Skylake GT4)")
> >  CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
> >  CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
> >  CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
> > --
> > 2.6.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev

-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] intel: Cleanup SKL PCI ID definitions.

2015-10-23 Thread Ben Widawsky
This removes ones which aren't used 0x190b, 192a), and adds some new ones. I
kept the original names where possible.

Cc: Kristian Høgsberg 
Cc: Damien Lespiau 
Signed-off-by: Ben Widawsky 
---
 intel/intel_chipset.h | 46 ++
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/intel/intel_chipset.h b/intel/intel_chipset.h
index 6c8dc73..a0f17c6 100644
--- a/intel/intel_chipset.h
+++ b/intel/intel_chipset.h
@@ -165,21 +165,24 @@
 #define PCI_CHIP_CHERRYVIEW_2  0x22b2
 #define PCI_CHIP_CHERRYVIEW_3  0x22b3
 
-#define PCI_CHIP_SKYLAKE_ULT_GT2   0x1916
+#define PCI_CHIP_SKYLAKE_DT_GT10x1902
 #define PCI_CHIP_SKYLAKE_ULT_GT1   0x1906
-#define PCI_CHIP_SKYLAKE_ULT_GT3   0x1926
-#define PCI_CHIP_SKYLAKE_ULT_GT2F  0x1921
-#define PCI_CHIP_SKYLAKE_ULX_GT1   0x190E
-#define PCI_CHIP_SKYLAKE_ULX_GT2   0x191E
+#define PCI_CHIP_SKYLAKE_SRV_GT1   0x190A /* Reserved */
+#define PCI_CHIP_SKYLAKE_ULX_GT1   0x190E /* Reserved */
 #define PCI_CHIP_SKYLAKE_DT_GT20x1912
-#define PCI_CHIP_SKYLAKE_DT_GT10x1902
+#define PCI_CHIP_SKYLAKE_FUSED0_GT20x1913 /* Reserved */
+#define PCI_CHIP_SKYLAKE_FUSED1_GT20x1915 /* Reserved */
+#define PCI_CHIP_SKYLAKE_ULT_GT2   0x1916
+#define PCI_CHIP_SKYLAKE_FUSED2_GT20x1917 /* Reserved */
+#define PCI_CHIP_SKYLAKE_SRV_GT2   0x191A /* Reserved */
 #define PCI_CHIP_SKYLAKE_HALO_GT2  0x191B
-#define PCI_CHIP_SKYLAKE_HALO_GT3  0x192B
-#define PCI_CHIP_SKYLAKE_HALO_GT1  0x190B
-#define PCI_CHIP_SKYLAKE_SRV_GT2   0x191A
-#define PCI_CHIP_SKYLAKE_SRV_GT3   0x192A
-#define PCI_CHIP_SKYLAKE_SRV_GT1   0x190A
 #define PCI_CHIP_SKYLAKE_WKS_GT2   0x191D
+#define PCI_CHIP_SKYLAKE_ULX_GT2   0x191E
+#define PCI_CHIP_SKYLAKE_MOBILE_GT20x1921 /* Reserved */
+#define PCI_CHIP_SKYLAKE_GT3E_540  0x1923
+#define PCI_CHIP_SKYLAKE_GT3   0x1926
+#define PCI_CHIP_SKYLAKE_GT3E_550  0x1927
+#define PCI_CHIP_SKYLAKE_HALO_GT3  0x192B /* Reserved */
 #define PCI_CHIP_SKYLAKE_DT_GT40x1932
 #define PCI_CHIP_SKYLAKE_SRV_GT4   0x193A
 #define PCI_CHIP_SKYLAKE_H_GT4 0x193B
@@ -351,20 +354,23 @@
 #define IS_SKL_GT1(devid)  ((devid) == PCI_CHIP_SKYLAKE_ULT_GT1|| \
 (devid) == PCI_CHIP_SKYLAKE_ULX_GT1|| \
 (devid) == PCI_CHIP_SKYLAKE_DT_GT1 || \
-(devid) == PCI_CHIP_SKYLAKE_HALO_GT1   || \
 (devid) == PCI_CHIP_SKYLAKE_SRV_GT1)
 
-#define IS_SKL_GT2(devid)  ((devid) == PCI_CHIP_SKYLAKE_ULT_GT2|| \
-(devid) == PCI_CHIP_SKYLAKE_ULT_GT2F   || \
-(devid) == PCI_CHIP_SKYLAKE_ULX_GT2|| \
-(devid) == PCI_CHIP_SKYLAKE_DT_GT2 || \
-(devid) == PCI_CHIP_SKYLAKE_HALO_GT2   || \
+#define IS_SKL_GT2(devid)  ((devid) == PCI_CHIP_SKYLAKE_DT_GT2 || \
+(devid) == PCI_CHIP_SKYLAKE_FUSED0_GT2 || \
+(devid) == PCI_CHIP_SKYLAKE_FUSED1_GT2 || \
+(devid) == PCI_CHIP_SKYLAKE_ULT_GT2|| \
+(devid) == PCI_CHIP_SKYLAKE_FUSED2_GT2 || \
 (devid) == PCI_CHIP_SKYLAKE_SRV_GT2|| \
-(devid) == PCI_CHIP_SKYLAKE_WKS_GT2)
+(devid) == PCI_CHIP_SKYLAKE_HALO_GT2   || \
+(devid) == PCI_CHIP_SKYLAKE_WKS_GT2|| \
+(devid) == PCI_CHIP_SKYLAKE_ULX_GT2|| \
+(devid) == PCI_CHIP_SKYLAKE_MOBILE_GT2)
 
-#define IS_SKL_GT3(devid)  ((devid) == PCI_CHIP_SKYLAKE_ULT_GT3|| \
+#define IS_SKL_GT3(devid)  ((devid) == PCI_CHIP_SKYLAKE_GT3|| \
 (devid) == PCI_CHIP_SKYLAKE_HALO_GT3   || \
-(devid) == PCI_CHIP_SKYLAKE_SRV_GT3)
+(devid) == PCI_CHIP_SKYLAKE_GT3E_540   || \
+(devid) == PCI_CHIP_SKYLAKE_GT3E_550)
 
 #define IS_SKL_GT4(devid)  ((devid) == PCI_CHIP_SKYLAKE_DT_GT4 || \
 (devid) == PCI_CHIP_SKYLAKE_SRV_GT4|| \
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] intel: Add SKL GT4 PCI IDs

2015-10-23 Thread Ben Widawsky
Cc: Kristian Høgsberg 
Cc: Damien Lespiau 
Signed-off-by: Ben Widawsky 
---
 intel/intel_chipset.h | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/intel/intel_chipset.h b/intel/intel_chipset.h
index 253ea71..6c8dc73 100644
--- a/intel/intel_chipset.h
+++ b/intel/intel_chipset.h
@@ -180,6 +180,10 @@
 #define PCI_CHIP_SKYLAKE_SRV_GT3   0x192A
 #define PCI_CHIP_SKYLAKE_SRV_GT1   0x190A
 #define PCI_CHIP_SKYLAKE_WKS_GT2   0x191D
+#define PCI_CHIP_SKYLAKE_DT_GT40x1932
+#define PCI_CHIP_SKYLAKE_SRV_GT4   0x193A
+#define PCI_CHIP_SKYLAKE_H_GT4 0x193B
+#define PCI_CHIP_SKYLAKE_WKS_GT4   0x193D
 
 #define PCI_CHIP_BROXTON_0 0x0A84
 #define PCI_CHIP_BROXTON_1 0x1A84
@@ -362,9 +366,15 @@
 (devid) == PCI_CHIP_SKYLAKE_HALO_GT3   || \
 (devid) == PCI_CHIP_SKYLAKE_SRV_GT3)
 
+#define IS_SKL_GT4(devid)  ((devid) == PCI_CHIP_SKYLAKE_DT_GT4 || \
+(devid) == PCI_CHIP_SKYLAKE_SRV_GT4|| \
+(devid) == PCI_CHIP_SKYLAKE_H_GT4  || \
+(devid) == PCI_CHIP_SKYLAKE_WKS_GT4)
+
 #define IS_SKYLAKE(devid)  (IS_SKL_GT1(devid) || \
 IS_SKL_GT2(devid) || \
-IS_SKL_GT3(devid))
+IS_SKL_GT3(devid) || \
+IS_SKL_GT4(devid))
 
 #define IS_BROXTON(devid)  ((devid) == PCI_CHIP_BROXTON_0  || \
 (devid) == PCI_CHIP_BROXTON_1  || \
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] util: Add list_is_single() helper function

2015-10-23 Thread Jason Ekstrand
On Fri, Oct 23, 2015 at 10:51 AM, Matt Turner  wrote:
> On Fri, Oct 23, 2015 at 8:55 AM, Eduardo Lima Mitev  wrote:
>> Returns true is the list has exactly one element, otherwise returns false.
>> ---
>>  src/util/list.h | 8 
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/src/util/list.h b/src/util/list.h
>> index b98ce59..2b03461 100644
>> --- a/src/util/list.h
>> +++ b/src/util/list.h
>> @@ -99,6 +99,14 @@ static inline bool list_empty(struct list_head *list)
>> return list->next == list;
>>  }
>>
>> +/**
>> + * Return true is the list has exactly one element, otherwise return false.
>
> How about "Returns whether the list has exactly one element."
>
>> + */
>> +static inline bool list_is_single(struct list_head *list)

The kernel list calls this list_is_singular.  We've tried to keep
things similar, so it would be good to use the kernel list name.

>
> Doesn't look like anywhere else uses const arguments, but this could
> be. No matter...

Yeah, const would be good.

>> +{
>> +   return list->next != NULL && list->next->next == list;
>> +}
>> +
>
> Reviewed-by: Matt Turner 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] [v2] i965/skl: Add GT4 PCI IDs

2015-10-23 Thread Ben Widawsky
Like other gen8+ hardware, the hardware automatically scales up thread counts
and URB sizes, so there is no need to do anything but add the PCI IDs.

FINISHME: This patch still needs testing before merge.

v2: Remove the PCI ID removal. That should be done as part of the next patch.

Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Ben Widawsky 
---
 include/pci_ids/i965_pci_ids.h  | 4 
 src/mesa/drivers/dri/i965/brw_device_info.c | 4 
 2 files changed, 8 insertions(+)

diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index 8a42599..626064a 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -124,6 +124,10 @@ CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
 CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
 CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3")
 CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
+CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
b/src/mesa/drivers/dri/i965/brw_device_info.c
index a6a3bb6..e7a016c 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -335,6 +335,10 @@ static const struct brw_device_info 
brw_device_info_skl_gt3 = {
GEN9_FEATURES, .gt = 3,
 };
 
+static const struct brw_device_info brw_device_info_skl_gt4 = {
+   GEN9_FEATURES, .gt = 4,
+};
+
 static const struct brw_device_info brw_device_info_bxt = {
GEN9_FEATURES,
.is_broxton = 1,
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] [v2] i965/skl: PCI ID cleanup and brand strings

2015-10-23 Thread Ben Widawsky
A few new PCI ids are added here, and two are removed (0x190B, 0x192A) because
it no longer seems to exist anywhere.

v2: Update commit message to reflect the removal of 0x192a as well.
Only use ascii characters (Ilia)

Signed-off-by: Ben Widawsky 
---
 include/pci_ids/i965_pci_ids.h | 41 ++---
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index 626064a..bb51e5c 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -109,25 +109,28 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 
(Broadwell GT3e)")
 CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
 CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
 CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
-CHIPSET(0x1902, skl_gt1, "Intel(R) Skylake DT  GT1")
-CHIPSET(0x1906, skl_gt1, "Intel(R) Skylake ULT GT1")
-CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake SRV GT1")
-CHIPSET(0x190B, skl_gt1, "Intel(R) Skylake Halo GT1")
-CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake ULX GT1")
-CHIPSET(0x1912, skl_gt2, "Intel(R) Skylake DT  GT2")
-CHIPSET(0x1916, skl_gt2, "Intel(R) Skylake ULT GT2")
-CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake SRV GT2")
-CHIPSET(0x191B, skl_gt2, "Intel(R) Skylake Halo GT2")
-CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2")
-CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2")
-CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
-CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
-CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3")
-CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
-CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
+CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
+CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
+CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1")
+CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
+CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x1915, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x1916, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)")
+CHIPSET(0x1917, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2")
+CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
+CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)")
+CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)")
+CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
+CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)")
+CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)")
+CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)")
+CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 570/580 (Skylake GT4)")
+CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4)")
+CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4)")
+CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4)")
 CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] util: Add list_is_single() helper function

2015-10-23 Thread Eduardo Lima Mitev
On 10/23/2015 07:51 PM, Matt Turner wrote:
> On Fri, Oct 23, 2015 at 8:55 AM, Eduardo Lima Mitev  wrote:
>> Returns true is the list has exactly one element, otherwise returns false.
>> ---
>>  src/util/list.h | 8 
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/src/util/list.h b/src/util/list.h
>> index b98ce59..2b03461 100644
>> --- a/src/util/list.h
>> +++ b/src/util/list.h
>> @@ -99,6 +99,14 @@ static inline bool list_empty(struct list_head *list)
>> return list->next == list;
>>  }
>>
>> +/**
>> + * Return true is the list has exactly one element, otherwise return false.
> 
> How about "Returns whether the list has exactly one element."
> 

Ok.

>> + */
>> +static inline bool list_is_single(struct list_head *list)
> 
> Doesn't look like anywhere else uses const arguments, but this could
> be. No matter...
>

Yes, it seemed obvious to add const, but consistency with existing code
triumphed. I will add the 'const' to set a precedent for next time.

>> +{
>> +   return list->next != NULL && list->next->next == list;
>> +}
>> +
> 
> Reviewed-by: Matt Turner 
> 

Thanks!

Eduardo
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] util: Add list_is_single() helper function

2015-10-23 Thread Eduardo Lima Mitev
On 10/23/2015 08:00 PM, Jason Ekstrand wrote:
> On Fri, Oct 23, 2015 at 10:51 AM, Matt Turner  wrote:
>> On Fri, Oct 23, 2015 at 8:55 AM, Eduardo Lima Mitev  wrote:
>>> Returns true is the list has exactly one element, otherwise returns false.
>>> ---
>>>  src/util/list.h | 8 
>>>  1 file changed, 8 insertions(+)
>>>
>>> diff --git a/src/util/list.h b/src/util/list.h
>>> index b98ce59..2b03461 100644
>>> --- a/src/util/list.h
>>> +++ b/src/util/list.h
>>> @@ -99,6 +99,14 @@ static inline bool list_empty(struct list_head *list)
>>> return list->next == list;
>>>  }
>>>
>>> +/**
>>> + * Return true is the list has exactly one element, otherwise return false.
>>
>> How about "Returns whether the list has exactly one element."
>>
>>> + */
>>> +static inline bool list_is_single(struct list_head *list)
> 
> The kernel list calls this list_is_singular.  We've tried to keep
> things similar, so it would be good to use the kernel list name.
> 

Ok, will rename it locally to 'list_is_singular'.

>>
>> Doesn't look like anywhere else uses const arguments, but this could
>> be. No matter...
> 
> Yeah, const would be good.
> 

Added it locally already.

Eduardo

>>> +{
>>> +   return list->next != NULL && list->next->next == list;
>>> +}
>>> +
>>
>> Reviewed-by: Matt Turner 
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/20] i965/fs: Append uniform entries to the gather table

2015-10-23 Thread Francisco Jerez
Abdiel Janulgue  writes:

> This patch generates the gather table entries for ordinary uniforms
> if they are present. The uniform constants here will later be packed
> together with UBO constants.
>
> Signed-off-by: Abdiel Janulgue 
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 18 +-
>  1 file changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index d240371..e39d821 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1863,8 +1863,11 @@ fs_visitor::assign_constant_locations()
>}
> }
>  
> -   stage_prog_data->nr_params = num_push_constants;
> stage_prog_data->nr_pull_params = num_pull_constants;
> +   stage_prog_data->nr_params = 0;

I doubt it's useful to set this to zero here only to calculate it again
below.

> +
> +   unsigned const_reg_access[uniforms];

VLAs are a non-standard extension to C++.  Usually I'd recommend to use
the C++ standard library instead, but because it's banned from the i965
back-end I guess the best you can get is new[]/delete[].

> +   memset(const_reg_access, 0, sizeof(const_reg_access));
>
This memset would be unnecessary if you used new[] to value-initialize
the array.

> /* Up until now, the param[] array has been indexed by reg + reg_offset
>  * of UNIFORM registers.  Move pull constants into pull_param[] and
> @@ -1881,8 +1884,21 @@ fs_visitor::assign_constant_locations()
>   stage_prog_data->pull_param[pull_constant_loc[i]] = value;
>} else if (push_constant_loc[i] != -1) {
>   stage_prog_data->param[push_constant_loc[i]] = value;
> + int p = stage_prog_data->nr_params++;
> +
> + /* access table for uniform registers*/
> + const_reg_access[(ALIGN(prog_data->nr_params, 4) / 4) - 1] |=
> +(1 << (p % 4));

Actually why do you need the const_reg_access array?  You could assign
the channel mask straight away like:

|  const unsigned p = push_constant_loc[i];
|  stage_prog_data->gather_table[p / 4].channel_mask |= 1 << (p % 4);

Then drop the loop below.

Is there any reason why you leave gather table entries half-initialized
at this point?  It would make sense to initialize const_offset to
ALIGN(4 * p, 16) already so you can handle UBO and non-UBO entries
consistently later on in gen7_submit_gather_table().

>}
> }
> +
> +   int num_consts = ALIGN(prog_data->nr_params, 4) / 4;
> +   for (int i = 0; i < num_consts; i++) {
> +  int p = stage_prog_data->nr_gather_table++;
> +  stage_prog_data->gather_table[p].reg = -1;
> +  stage_prog_data->gather_table[p].channel_mask =
> + const_reg_access[i];
> +   }
>  }
>  
>  /**
> -- 
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] [v3] i965/skl: PCI ID cleanup and brand strings

2015-10-23 Thread Ben Widawsky
A few new PCI ids are added here, and two are removed (0x190B, 0x192A) because
it no longer seems to exist anywhere.

v2: Update commit message to reflect the removal of 0x192a as well.
Only use ascii characters (Ilia)

v3: I misinterpreted the table. I don't actually know the brand strings for GT4
parts.

Signed-off-by: Ben Widawsky 
---
 include/pci_ids/i965_pci_ids.h | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index 626064a..2747dc5 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -109,21 +109,24 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 
(Broadwell GT3e)")
 CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
 CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
 CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
-CHIPSET(0x1902, skl_gt1, "Intel(R) Skylake DT  GT1")
-CHIPSET(0x1906, skl_gt1, "Intel(R) Skylake ULT GT1")
-CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake SRV GT1")
-CHIPSET(0x190B, skl_gt1, "Intel(R) Skylake Halo GT1")
-CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake ULX GT1")
-CHIPSET(0x1912, skl_gt2, "Intel(R) Skylake DT  GT2")
-CHIPSET(0x1916, skl_gt2, "Intel(R) Skylake ULT GT2")
-CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake SRV GT2")
-CHIPSET(0x191B, skl_gt2, "Intel(R) Skylake Halo GT2")
-CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2")
-CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2")
-CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
-CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
-CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3")
-CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
+CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
+CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
+CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
+CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1")
+CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
+CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x1915, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x1916, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)")
+CHIPSET(0x1917, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2")
+CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
+CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)")
+CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)")
+CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
+CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)")
+CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)")
+CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)")
 CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/20] i965/fs: Append uniform entries to the gather table

2015-10-23 Thread Matt Turner
On Fri, Oct 23, 2015 at 11:23 AM, Francisco Jerez  wrote:
> Abdiel Janulgue  writes:
>
>> This patch generates the gather table entries for ordinary uniforms
>> if they are present. The uniform constants here will later be packed
>> together with UBO constants.
>>
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs.cpp | 18 +-
>>  1 file changed, 17 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index d240371..e39d821 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -1863,8 +1863,11 @@ fs_visitor::assign_constant_locations()
>>}
>> }
>>
>> -   stage_prog_data->nr_params = num_push_constants;
>> stage_prog_data->nr_pull_params = num_pull_constants;
>> +   stage_prog_data->nr_params = 0;
>
> I doubt it's useful to set this to zero here only to calculate it again
> below.
>
>> +
>> +   unsigned const_reg_access[uniforms];
>
> VLAs are a non-standard extension to C++.  Usually I'd recommend to use
> the C++ standard library instead, but because it's banned from the i965
> back-end I guess the best you can get is new[]/delete[].

So... why not just use the extension to C++? Does some compiler we
care about not support them?

GCC does: https://gcc.gnu.org/onlinedocs/gcc/Variable-Length.html

Looks like this use would be allowed by clang as well:
http://clang.llvm.org/compatibility.html#vla
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 18/21] mesa/extensions: Remove extra memsets on gl_extensions

2015-10-23 Thread Nanley Chery
On Thu, Oct 22, 2015 at 3:32 AM, Emil Velikov 
wrote:

> On 19 October 2015 at 23:44, Nanley Chery  wrote:
> > From: Nanley Chery 
> >
> > Aside from those modified in this commit, all gl_extensions structs are
> > zero-initialized by default. There is therefore no need to memset the
> > structs to 0. Also, remove the open-coded memset in
> > _mesa_init_extensions().
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/mesa/main/extensions.c | 18 --
> >  1 file changed, 4 insertions(+), 14 deletions(-)
> >
> > diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
> > index 365e7ed..6cd2b27 100644
> > --- a/src/mesa/main/extensions.c
> > +++ b/src/mesa/main/extensions.c
> > @@ -37,8 +37,8 @@
> >  #include "macros.h"
> >  #include "mtypes.h"
> >
> > -struct gl_extensions _mesa_extension_override_enables;
> > -struct gl_extensions _mesa_extension_override_disables;
> > +struct gl_extensions _mesa_extension_override_enables = {0};
> > +struct gl_extensions _mesa_extension_override_disables = {0};
> Side note: once ARB_compute_shader lands in for i965 we can even make
> these local/static.
>
>
That's interesting. How so?

-Emil
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 18/21] mesa/extensions: Remove extra memsets on gl_extensions

2015-10-23 Thread Jordan Justen
On 2015-10-22 03:32:58, Emil Velikov wrote:
> On 19 October 2015 at 23:44, Nanley Chery  wrote:
> > From: Nanley Chery 
> >
> > Aside from those modified in this commit, all gl_extensions structs are
> > zero-initialized by default. There is therefore no need to memset the
> > structs to 0. Also, remove the open-coded memset in
> > _mesa_init_extensions().
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/mesa/main/extensions.c | 18 --
> >  1 file changed, 4 insertions(+), 14 deletions(-)
> >
> > diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
> > index 365e7ed..6cd2b27 100644
> > --- a/src/mesa/main/extensions.c
> > +++ b/src/mesa/main/extensions.c
> > @@ -37,8 +37,8 @@
> >  #include "macros.h"
> >  #include "mtypes.h"
> >
> > -struct gl_extensions _mesa_extension_override_enables;
> > -struct gl_extensions _mesa_extension_override_disables;
> > +struct gl_extensions _mesa_extension_override_enables = {0};
> > +struct gl_extensions _mesa_extension_override_disables = {0};
> Side note: once ARB_compute_shader lands in for i965 we can even make
> these local/static.

I added these variables specifically so a driver could check to see
what extensions were overridden early in context creation. Making them
static would undo that feature...

The alternative before was to add a custom environment variable. (See
10e03b44)

-Jordan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/20] i965/fs: Append uniform entries to the gather table

2015-10-23 Thread Francisco Jerez
Matt Turner  writes:

> On Fri, Oct 23, 2015 at 11:23 AM, Francisco Jerez  
> wrote:
>> Abdiel Janulgue  writes:
>>
>>> This patch generates the gather table entries for ordinary uniforms
>>> if they are present. The uniform constants here will later be packed
>>> together with UBO constants.
>>>
>>> Signed-off-by: Abdiel Janulgue 
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_fs.cpp | 18 +-
>>>  1 file changed, 17 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_fs.cpp
>>> index d240371..e39d821 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>>> @@ -1863,8 +1863,11 @@ fs_visitor::assign_constant_locations()
>>>}
>>> }
>>>
>>> -   stage_prog_data->nr_params = num_push_constants;
>>> stage_prog_data->nr_pull_params = num_pull_constants;
>>> +   stage_prog_data->nr_params = 0;
>>
>> I doubt it's useful to set this to zero here only to calculate it again
>> below.
>>
>>> +
>>> +   unsigned const_reg_access[uniforms];
>>
>> VLAs are a non-standard extension to C++.  Usually I'd recommend to use
>> the C++ standard library instead, but because it's banned from the i965
>> back-end I guess the best you can get is new[]/delete[].
>
> So... why not just use the extension to C++? Does some compiler we
> care about not support them?
>
> GCC does: https://gcc.gnu.org/onlinedocs/gcc/Variable-Length.html
>
> Looks like this use would be allowed by clang as well:
> http://clang.llvm.org/compatibility.html#vla

I believe only GCC has a more or less reasonable implementation, Clang's
implementation doesn't allow much of what GCC's allows, like objects
with user-defined constructors or anything non-POD (seems mainly
intended for compatibility with C99 code).  MS VC++ doesn't support it
at all.

The C99 VLA feature was BTW made optional again in C11, and the similar
feature proposed for C++14 (runtime-sized arrays) was rejected by the
standards committee.  Anyway it doesn't seem like a big loss for C++,
there are already standards-compliant ways to get a type-safe array of
automatic duration and dynamically determined size, which support
non-POD C++ objects as element type and behave as first-class C++
objects themselves unlike in the various VLA-like language extensions.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/20] i965/fs: Append uniform entries to the gather table

2015-10-23 Thread Matt Turner
On Fri, Oct 23, 2015 at 12:38 PM, Francisco Jerez  wrote:
> Matt Turner  writes:
>
>> On Fri, Oct 23, 2015 at 11:23 AM, Francisco Jerez  
>> wrote:
>>> Abdiel Janulgue  writes:
>>>
 This patch generates the gather table entries for ordinary uniforms
 if they are present. The uniform constants here will later be packed
 together with UBO constants.

 Signed-off-by: Abdiel Janulgue 
 ---
  src/mesa/drivers/dri/i965/brw_fs.cpp | 18 +-
  1 file changed, 17 insertions(+), 1 deletion(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs.cpp
 index d240371..e39d821 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
 @@ -1863,8 +1863,11 @@ fs_visitor::assign_constant_locations()
}
 }

 -   stage_prog_data->nr_params = num_push_constants;
 stage_prog_data->nr_pull_params = num_pull_constants;
 +   stage_prog_data->nr_params = 0;
>>>
>>> I doubt it's useful to set this to zero here only to calculate it again
>>> below.
>>>
 +
 +   unsigned const_reg_access[uniforms];
>>>
>>> VLAs are a non-standard extension to C++.  Usually I'd recommend to use
>>> the C++ standard library instead, but because it's banned from the i965
>>> back-end I guess the best you can get is new[]/delete[].
>>
>> So... why not just use the extension to C++? Does some compiler we
>> care about not support them?
>>
>> GCC does: https://gcc.gnu.org/onlinedocs/gcc/Variable-Length.html
>>
>> Looks like this use would be allowed by clang as well:
>> http://clang.llvm.org/compatibility.html#vla
>
> I believe only GCC has a more or less reasonable implementation, Clang's
> implementation doesn't allow much of what GCC's allows, like objects
> with user-defined constructors or anything non-POD (seems mainly
> intended for compatibility with C99 code).  MS VC++ doesn't support it
> at all.

Okay, so the usage in this patch seems fine since we don't care about
MSVC and Clang's support should be sufficient to handle an array of
unsigned.

> The C99 VLA feature was BTW made optional again in C11, and the similar
> feature proposed for C++14 (runtime-sized arrays) was rejected by the
> standards committee.  Anyway it doesn't seem like a big loss for C++,
> there are already standards-compliant ways to get a type-safe array of
> automatic duration and dynamically determined size, which support
> non-POD C++ objects as element type and behave as first-class C++
> objects themselves unlike in the various VLA-like language extensions.

Interesting. Thanks for the information.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/4] HEVC support for VA-API

2015-10-23 Thread Leo Liu
The series support HEVC codec for VA-API and fix for VDPAU
they are tested with the latest ffmpeg and gstreamer-vaapi 
plugins.

Boyuan Zhang (4):
  vl: add RefPicList defines for VAAPI HEVC decode
  radeon/uvd: implement and add flag for VAAPI HEVC decode
  st/va: add VAAPI HEVC decode support
  st/vdpau: disable RefPicList for Vdpau HEVC

 configure.ac   |   2 +-
 src/gallium/drivers/radeon/radeon_uvd.c|   7 ++
 src/gallium/drivers/radeon/radeon_uvd.h|   9 ++
 src/gallium/include/pipe/p_video_state.h   |   2 +
 src/gallium/state_trackers/va/config.c |   2 +-
 src/gallium/state_trackers/va/context.c|  20 
 src/gallium/state_trackers/va/picture.c| 182 +
 src/gallium/state_trackers/va/va_private.h |   5 +
 src/gallium/state_trackers/vdpau/decode.c  |   1 +
 9 files changed, 228 insertions(+), 2 deletions(-)

-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] vl: add RefPicList defines for VAAPI HEVC decode

2015-10-23 Thread Leo Liu
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
Reviewed-by: Christian König 
Reviewed-by: Leo Liu 
---
 src/gallium/include/pipe/p_video_state.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/include/pipe/p_video_state.h 
b/src/gallium/include/pipe/p_video_state.h
index 7d13151..d353be6 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -479,6 +479,8 @@ struct pipe_h265_picture_desc
uint8_t RefPicSetStCurrBefore[8];
uint8_t RefPicSetStCurrAfter[8];
uint8_t RefPicSetLtCurr[8];
+   uint8_t RefPicList[2][15];
+   bool UseRefPicList;
 };
 
 #ifdef __cplusplus
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] st/va: add VAAPI HEVC decode support

2015-10-23 Thread Leo Liu
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
Reviewed-by: Christian König 
Reviewed-by: Leo Liu 
---
 configure.ac   |   2 +-
 src/gallium/state_trackers/va/config.c |   2 +-
 src/gallium/state_trackers/va/context.c|  20 
 src/gallium/state_trackers/va/picture.c| 182 +
 src/gallium/state_trackers/va/va_private.h |   5 +
 5 files changed, 209 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5aa19a5..3f15881 100644
--- a/configure.ac
+++ b/configure.ac
@@ -81,7 +81,7 @@ PRESENTPROTO_REQUIRED=1.0
 LIBUDEV_REQUIRED=151
 GLPROTO_REQUIRED=1.4.14
 LIBOMXIL_BELLAGIO_REQUIRED=0.0
-LIBVA_REQUIRED=0.35.0
+LIBVA_REQUIRED=0.38.0
 VDPAU_REQUIRED=1.1
 WAYLAND_REQUIRED=1.2.0
 XCB_REQUIRED=1.9.3
diff --git a/src/gallium/state_trackers/va/config.c 
b/src/gallium/state_trackers/va/config.c
index cfb0b25..5030f9e 100644
--- a/src/gallium/state_trackers/va/config.c
+++ b/src/gallium/state_trackers/va/config.c
@@ -45,7 +45,7 @@ vlVaQueryConfigProfiles(VADriverContextP ctx, VAProfile 
*profile_list, int *num_
*num_profiles = 0;
 
pscreen = VL_VA_PSCREEN(ctx);
-   for (p = PIPE_VIDEO_PROFILE_MPEG2_SIMPLE; p <= 
PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH; ++p)
+   for (p = PIPE_VIDEO_PROFILE_MPEG2_SIMPLE; p <= 
PIPE_VIDEO_PROFILE_HEVC_MAIN_444; ++p)
   if (pscreen->get_video_param(pscreen, p, 
PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_SUPPORTED)) {
  vap = PipeToProfile(p);
  if (vap != VAProfileNone)
diff --git a/src/gallium/state_trackers/va/context.c 
b/src/gallium/state_trackers/va/context.c
index 8b003ae..5adbe76 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -194,6 +194,21 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID 
config_id, int picture_width,
   }
}
 
+   if (u_reduce_video_profile(context->decoder->profile) ==
+ PIPE_VIDEO_FORMAT_HEVC) {
+  context->desc.h265.pps = CALLOC_STRUCT(pipe_h265_pps);
+  if (!context->desc.h265.pps) {
+ FREE(context);
+ return VA_STATUS_ERROR_ALLOCATION_FAILED;
+  }
+  context->desc.h265.pps->sps = CALLOC_STRUCT(pipe_h265_sps);
+  if (!context->desc.h265.pps->sps) {
+ FREE(context->desc.h265.pps);
+ FREE(context);
+ return VA_STATUS_ERROR_ALLOCATION_FAILED;
+  }
+   }
+
context->desc.base.profile = config_id;
*context_id = handle_table_add(drv->htab, context);
 
@@ -216,6 +231,11 @@ vlVaDestroyContext(VADriverContextP ctx, VAContextID 
context_id)
   FREE(context->desc.h264.pps->sps);
   FREE(context->desc.h264.pps);
}
+   if (u_reduce_video_profile(context->decoder->profile) ==
+ PIPE_VIDEO_FORMAT_HEVC) {
+  FREE(context->desc.h265.pps->sps);
+  FREE(context->desc.h265.pps);
+   }
context->decoder->destroy(context->decoder);
FREE(context);
handle_table_remove(drv->htab, context_id);
diff --git a/src/gallium/state_trackers/va/picture.c 
b/src/gallium/state_trackers/va/picture.c
index 9b94b39..93ec00b 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -81,6 +81,7 @@ handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext 
*context, vlVaBuffer *
VAPictureParameterBufferH264 *h264;
VAPictureParameterBufferVC1 * vc1;
VAPictureParameterBufferMPEG4 *mpeg4;
+   VAPictureParameterBufferHEVC *hevc;
vlVaSurface *surf_forward;
vlVaSurface *surf_backward;
unsigned int i;
@@ -286,6 +287,157 @@ handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext 
*context, vlVaBuffer *
 
   break;
 
+  case PIPE_VIDEO_FORMAT_HEVC:
+  assert(buf->size >= sizeof(VAPictureParameterBufferHEVC) && 
buf->num_elements == 1);
+  hevc = buf->data;
+  context->desc.h265.pps->sps->chroma_format_idc = 
hevc->pic_fields.bits.chroma_format_idc;
+  context->desc.h265.pps->sps->separate_colour_plane_flag =
+ hevc->pic_fields.bits.separate_colour_plane_flag;
+  context->desc.h265.pps->sps->pic_width_in_luma_samples = 
hevc->pic_width_in_luma_samples;
+  context->desc.h265.pps->sps->pic_height_in_luma_samples = 
hevc->pic_height_in_luma_samples;
+  context->desc.h265.pps->sps->bit_depth_luma_minus8 = 
hevc->bit_depth_luma_minus8;
+  context->desc.h265.pps->sps->bit_depth_chroma_minus8 = 
hevc->bit_depth_chroma_minus8;
+  context->desc.h265.pps->sps->log2_max_pic_order_cnt_lsb_minus4 =
+ hevc->log2_max_pic_order_cnt_lsb_minus4;
+  context->desc.h265.pps->sps->sps_max_dec_pic_buffering_minus1 =
+ hevc->sps_max_dec_pic_buffering_minus1;
+  context->desc.h265.pps->sps->log2_min_luma_coding_block_size_minus3 =
+ hevc->log2_min_luma_coding_block_size_minus3;
+  context->desc.h265.pps->sps->log2_diff_max_min_luma_coding_block_size =
+ hevc->log2_diff_max_min_luma_coding_block_size;
+  context->desc.h265.pps->sps->log2_min_transform_block_size_minus2 =
+  

[Mesa-dev] [PATCH 4/4] st/vdpau: disable RefPicList for Vdpau HEVC

2015-10-23 Thread Leo Liu
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
Reviewed-by: Christian König 
Reviewed-by: Leo Liu 
---
 src/gallium/state_trackers/vdpau/decode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/state_trackers/vdpau/decode.c 
b/src/gallium/state_trackers/vdpau/decode.c
index 3233799..f85bce8 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -518,6 +518,7 @@ vlVdpDecoderRenderH265(struct pipe_h265_picture_desc 
*picture,
memcpy(picture->RefPicSetStCurrBefore, picture_info->RefPicSetStCurrBefore, 
8);
memcpy(picture->RefPicSetStCurrAfter, picture_info->RefPicSetStCurrAfter, 
8);
memcpy(picture->RefPicSetLtCurr, picture_info->RefPicSetLtCurr, 8);
+   picture->UseRefPicList = false;
 
return VDP_STATUS_OK;
 }
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] radeon/uvd: implement and add flag for VAAPI HEVC decode

2015-10-23 Thread Leo Liu
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
Reviewed-by: Christian König 
Reviewed-by: Leo Liu 
---
 src/gallium/drivers/radeon/radeon_uvd.c | 7 +++
 src/gallium/drivers/radeon/radeon_uvd.h | 9 +
 2 files changed, 16 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_uvd.c 
b/src/gallium/drivers/radeon/radeon_uvd.c
index c3ac7e7..33b0136 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -478,6 +478,8 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder 
*dec, struct pipe_video
result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
result.sps_info_flags |= 1 << 9;
+   if (pic->UseRefPicList == true)
+   result.sps_info_flags |= 1 << 10;
 
result.chroma_format = pic->pps->sps->chroma_format_idc;
result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
@@ -586,6 +588,11 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder 
*dec, struct pipe_video
memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
 
+   for (i = 0 ; i < 2 ; i++) {
+   for (int j = 0 ; j < 15 ; j++)
+   result.direct_reflist[i][j] = pic->RefPicList[i][j];
+   }
+
/* TODO
result.highestTid;
result.isNonRef;
diff --git a/src/gallium/drivers/radeon/radeon_uvd.h 
b/src/gallium/drivers/radeon/radeon_uvd.h
index 452fbd6..9cc0a69 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.h
+++ b/src/gallium/drivers/radeon/radeon_uvd.h
@@ -233,6 +233,15 @@ struct ruvd_h265 {
 
uint8_t highestTid;
uint8_t isNonRef;
+
+   uint8_t p010_mode;
+   uint8_t msb_mode;
+   uint8_t luma_10to8;
+   uint8_t chroma_10to8;
+   uint8_t sclr_luma10to8;
+   uint8_t sclr_chroma10to8;
+
+   uint8_t direct_reflist[2][15];
 };
 
 struct ruvd_vc1 {
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallivm: disable f16c when not using AVX

2015-10-23 Thread sroland
From: Roland Scheidegger 

f16c intrinsic can only be emitted when AVX is used. So when we disable AVX
due to forcing 128bit vectors we must not use this intrinsic (depending on
llvm version, this worked previously because llvm used AVX even when we didn't
tell it to, however I've seen this fail with llvm 3.3 since
718249843b915decf8fccec92e466ac1a6219934 which seems to have the side effect
of disabling avx in llvm albeit it only touches sse flags really).
Possibly one day should actually try to use avx even with 128bit vectors...
---
 src/gallium/auxiliary/gallivm/lp_bld_init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 017d075..e6eede8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -427,6 +427,7 @@ lp_build_init(void)
*/
   util_cpu_caps.has_avx = 0;
   util_cpu_caps.has_avx2 = 0;
+  util_cpu_caps.has_f16c = 0;
}
 
 #ifdef PIPE_ARCH_PPC_64
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 5/7] radeonsi: Implement DCC fast clear.

2015-10-23 Thread Marek Olšák
On Fri, Oct 23, 2015 at 7:06 PM, Bas Nieuwenhuizen
 wrote:
> On Fri, Oct 23, 2015 at 4:57 PM, Marek Olšák  wrote:
>> On Fri, Oct 23, 2015 at 1:57 PM, Bas Nieuwenhuizen
>>  wrote:
>>> On Fri, Oct 23, 2015 at 1:52 PM, Marek Olšák  wrote:
 On Fri, Oct 23, 2015 at 1:30 PM, Bas Nieuwenhuizen
  wrote:
> On Fri, Oct 23, 2015 at 12:50 PM, Marek Olšák  wrote:
>> On Fri, Oct 23, 2015 at 12:17 PM, Bas Nieuwenhuizen
>>  wrote:
>>> On Thu, Oct 22, 2015 at 12:12 PM, Marek Olšák  wrote:
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 5548cba3..a277fa5 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct 
> pipe_context *ctx,
> } else {
> samplers->depth_texture_mask &= ~(1 
> << slot);
> }
> -   if (rtex->cmask.size || rtex->fmask.size) {
> +   if (rtex->cmask.size || rtex->fmask.size || 
> rtex->surface.dcc_enabled) {
> samplers->compressed_colortex_mask |= 
> 1 << slot;

 I'd like this flag to be set only when dirty_level_mask is non-zero.
 Setting this for all textures that have DCC is quite expensive in draw
 calls.
>>>
>>> I think this code is incorrect even without considering DCC. If we do
>>> a fast clear on a surface which allocates a cmask and then use that
>>> surface as a texture without calling set_sampler_views in between
>>> (because it was bound before) we get a stale compressed_colortex_mask.
>>>
>>> Some testing shows that this can be triggered using OpenGL, although
>>> the GL_ARB_texture_barrier extension may be needed to make the result
>>> not undefined per the specification.
>>
>> In that case, we should decompress in texture_barrier and not in draw 
>> calls.
>>
>> Marek
>
>
> texture_barrier does not need to be called though, the language
> changes might be needed.
>
> Basically the test is
>
> fbo1, fbo2 framebuffers with 1 color buffer each:
>
> bind fbo2 as texture
> clear fbo1 using shader
> bind fbo1 as texture
> clear fbo2 using shader
> clear fbo1 using clear (which results in cmask being allocated for fbo1)
>>>
> bind fbo2 as texture
> copy fbo2 to fbo1 using copy shader (which wrongly does not decompress 
> fbo1)
>>>
>>> My apologies, these two lines should just be a copy fbo1 to fbo2,
>>> which does need to eleminate the cmask fast clear.
>>
>> That sounds like a texture barrier is required.
>>
>> Marek
>
> I think it valid if even without ARB_texture_barrier as the only place
> where we could have a rendering feedback loop is the clear. The shader
> clears and the copy do not have the same fbo as texture and therefore
> no render feedback loop.
>
> I am not sure if a clear classifies as a GL rendering operation. If it
> is not, we have no render feedback loop. If it is, it is still not a
> render feedback loop as the active fragment and vertex shaders (the
> clear shader) do not contain instructions that sample from that
> texture.

The texture barrier ensures that the previous writes are visible to
the next read of the texture. The previous reads are irrelevant.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 17/21] mesa: Fix ARB_ES3_compatibility functionality leaks

2015-10-23 Thread Nanley Chery
On Tue, Oct 20, 2015 at 4:32 PM, Nanley Chery  wrote:

>
>
> On Tue, Oct 20, 2015 at 8:45 AM, Marek Olšák  wrote:
>
>> NAK. I'd like this extension in compatibility contexts. The fact the
>> spec requires OpenGL 3.3 was just authors' laziness.
>>
>>
> If it's the right thing to do, I can allow all versions of GL
> compatibility by
> simply replacing '33' with 'GLL' in a v2. I'll try to nail down what the
> actual minimum version required is.
>
>

It seems that GLSL 3.3 and ARB_occlusion_query2 are required instead of
OpenGL 3.3.
I'd like to spend more time confirming this and seeing if the spec authors
would agree.
Until then, I'll drop this patch from this series.

Generally, when a spec requires 3.3, we should allow 3.0 at least,
>> because Mesa doesn't support 3.3 compatibility contexts.
>>
>>
>
> I will keep this in mind.
>
>
>> Unless you have a very good _technical_ reason not to support this
>> extension, it should be supported by all versions. This applies to
>> other such patches as well.
>>
>>
> GL  3.3 may have introduced a new feature on which this extension is
> dependent.
> If this feature is not contained within any other extension and our
> context version
> is less than 3.3, we may not be able to actually implement the extension
> spec as
> described. I will try to see if this is the case. I agree that technical
> roadblocks should be the
> only things which prevent us from advertising certain extensions.
>
> Thanks,
> Nanley
>
> Marek
>>
>> On Tue, Oct 20, 2015 at 12:44 AM, Nanley Chery 
>> wrote:
>> > From: Nanley Chery 
>> >
>> > Stop leaks into GLES1/2 and pre-3.3 GL contexts in all uses.
>> >
>> > The extension spec lists OpenGL 3.3 as one of the requirements, so
>> > update the extension table accordingly.
>> >
>> > v2. Require 3.3 for GL legacy contexts as well (Chad).
>> >
>> > Signed-off-by: Nanley Chery 
>> > ---
>> >  src/mesa/main/enable.c   | 4 ++--
>> >  src/mesa/main/extensions_table.h | 2 +-
>> >  src/mesa/main/glformats.c| 2 +-
>> >  src/mesa/main/queryobj.c | 2 +-
>> >  4 files changed, 5 insertions(+), 5 deletions(-)
>> >
>> > diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
>> > index 42f6799..ee95334 100644
>> > --- a/src/mesa/main/enable.c
>> > +++ b/src/mesa/main/enable.c
>> > @@ -972,7 +972,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum
>> cap, GLboolean state)
>> >   break;
>> >
>> >case GL_PRIMITIVE_RESTART_FIXED_INDEX:
>> > -if (!_mesa_is_gles3(ctx) &&
>> !ctx->Extensions.ARB_ES3_compatibility)
>> > +if (!_mesa_is_gles3(ctx) &&
>> !_mesa_has_ARB_ES3_compatibility(ctx))
>> >  goto invalid_enum_error;
>> >   if (ctx->Array.PrimitiveRestartFixedIndex != state) {
>> >  FLUSH_VERTICES(ctx, _NEW_TRANSFORM);
>> > @@ -1582,7 +1582,7 @@ _mesa_IsEnabled( GLenum cap )
>> >   return ctx->Array.PrimitiveRestart;
>> >
>> >case GL_PRIMITIVE_RESTART_FIXED_INDEX:
>> > -if (!_mesa_is_gles3(ctx) &&
>> !ctx->Extensions.ARB_ES3_compatibility) {
>> > +if (!_mesa_is_gles3(ctx) &&
>> !_mesa_has_ARB_ES3_compatibility(ctx)) {
>> >  goto invalid_enum_error;
>> >   }
>> >   return ctx->Array.PrimitiveRestartFixedIndex;
>> > diff --git a/src/mesa/main/extensions_table.h
>> b/src/mesa/main/extensions_table.h
>> > index 62e0804..760fbe9 100644
>> > --- a/src/mesa/main/extensions_table.h
>> > +++ b/src/mesa/main/extensions_table.h
>> > @@ -4,7 +4,7 @@
>> >  #define ES2 1u
>> >  #define  x ~0u
>> >  EXT(ARB_ES2_compatibility   , ARB_ES2_compatibility
>>   , GLL, GLC,  x ,  x , 2009)
>> > -EXT(ARB_ES3_compatibility   , ARB_ES3_compatibility
>>   , GLL, GLC,  x ,  x , 2012)
>> > +EXT(ARB_ES3_compatibility   , ARB_ES3_compatibility
>>   ,  33,  33,  x ,  x , 2012)
>> >  EXT(ARB_arrays_of_arrays, ARB_arrays_of_arrays
>>, GLL, GLC,  x ,  x , 2012)
>> >  EXT(ARB_base_instance   , ARB_base_instance
>>   , GLL, GLC,  x ,  x , 2011)
>> >  EXT(ARB_blend_func_extended , ARB_blend_func_extended
>>   , GLL, GLC,  x ,  x , 2009)
>> > diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
>> > index 542dcfc..6b602d8 100644
>> > --- a/src/mesa/main/glformats.c
>> > +++ b/src/mesa/main/glformats.c
>> > @@ -1325,7 +1325,7 @@ _mesa_is_compressed_format(const struct
>> gl_context *ctx, GLenum format)
>> >return _mesa_is_gles(ctx)
>> >   && ctx->Extensions.OES_compressed_ETC1_RGB8_texture;
>> > case MESA_FORMAT_LAYOUT_ETC2:
>> > -  return _mesa_is_gles3(ctx) ||
>> ctx->Extensions.ARB_ES3_compatibility;
>> > +  return _mesa_is_gles3(ctx) ||
>> _mesa_has_ARB_ES3_compatibility(ctx);
>> > case MESA_FORMAT_LAYOUT_BPTC:
>> >return _mesa_has_ARB_texture_compression_bptc(ctx);
>> > case MESA_FORMAT_LAYOUT_ASTC:
>> > diff --git a/

[Mesa-dev] [PATCH] i965: Remove unused devinfo revision

2015-10-23 Thread Ben Widawsky
I left the function to obtain the revision because it is, and will continue to
be useful in the future. I'd rather not have to dig it up every time we need it.
Comments left at the implementation to say as much.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/brw_device_info.c |  2 +-
 src/mesa/drivers/dri/i965/brw_device_info.h |  2 +-
 src/mesa/drivers/dri/i965/intel_screen.c| 14 +++---
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
b/src/mesa/drivers/dri/i965/brw_device_info.c
index e7a016c..789b93f 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -363,7 +363,7 @@ static const struct brw_device_info brw_device_info_bxt = {
 };
 
 const struct brw_device_info *
-brw_get_device_info(int devid, int revision)
+brw_get_device_info(int devid)
 {
const struct brw_device_info *devinfo;
switch (devid) {
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h 
b/src/mesa/drivers/dri/i965/brw_device_info.h
index 2a73e93..4911c23 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -86,4 +86,4 @@ struct brw_device_info
/** @} */
 };
 
-const struct brw_device_info *brw_get_device_info(int devid, int revision);
+const struct brw_device_info *brw_get_device_info(int devid);
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 590c45d..fb95fb6 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1357,7 +1357,16 @@ set_max_gl_versions(struct intel_screen *screen)
}
 }
 
-static int
+/**
+ * Return the revision (generally the revid field of the PCI header) of the
+ * graphics device.
+ *
+ * XXX: This function is useful to keep around even if it is not currently in
+ * use. It is necessary for new platforms and revision specific workarounds or
+ * features. Please don't remove it so that we know it at least continues to
+ * build.
+ */
+static __attribute__((__unused__)) int
 brw_get_revision(int fd)
 {
struct drm_i915_getparam gp;
@@ -1416,8 +1425,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
return false;
 
intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
-   intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID,
-  brw_get_revision(psp->fd));
+   intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID);
if (!intelScreen->devinfo)
   return false;
 
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radeonsi: add support for Stoney asics (v3)

2015-10-23 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, Oct 23, 2015 at 6:49 PM, Alex Deucher  wrote:
> From: Samuel Li 
>
> v2 (agd): rebase on mesa master, split pci ids to
> separate commit
> v3 (agd): use carrizo for llvm processor name for
> llvm 3.7 and older
>
> Signed-off-by: Samuel Li 
> ---
>  src/gallium/drivers/radeon/r600_pipe_common.c | 6 ++
>  src/gallium/drivers/radeon/radeon_winsys.h| 1 +
>  src/gallium/drivers/radeonsi/si_state.c   | 1 +
>  src/gallium/winsys/amdgpu/drm/amdgpu_id.h | 8 ++--
>  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 6 +-
>  5 files changed, 19 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
> b/src/gallium/drivers/radeon/r600_pipe_common.c
> index 7ac94ca..4ce0c6a 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -416,6 +416,7 @@ static const char* r600_get_chip_name(struct 
> r600_common_screen *rscreen)
> case CHIP_ICELAND: return "AMD ICELAND";
> case CHIP_CARRIZO: return "AMD CARRIZO";
> case CHIP_FIJI: return "AMD FIJI";
> +   case CHIP_STONEY: return "AMD STONEY";
> default: return "AMD unknown";
> }
>  }
> @@ -540,6 +541,11 @@ const char *r600_get_llvm_processor_name(enum 
> radeon_family family)
> case CHIP_ICELAND: return "iceland";
> case CHIP_CARRIZO: return "carrizo";
> case CHIP_FIJI: return "fiji";
> +#if HAVE_LLVM <= 0x0307
> +   case CHIP_STONEY: return "carrizo";
> +#else
> +   case CHIP_STONEY: return "stoney";
> +#endif
> default: return "";
> }
>  }
> diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
> b/src/gallium/drivers/radeon/radeon_winsys.h
> index b91e1ad..5f13c1e 100644
> --- a/src/gallium/drivers/radeon/radeon_winsys.h
> +++ b/src/gallium/drivers/radeon/radeon_winsys.h
> @@ -137,6 +137,7 @@ enum radeon_family {
>  CHIP_ICELAND,
>  CHIP_CARRIZO,
>  CHIP_FIJI,
> +CHIP_STONEY,
>  CHIP_LAST,
>  };
>
> diff --git a/src/gallium/drivers/radeonsi/si_state.c 
> b/src/gallium/drivers/radeonsi/si_state.c
> index 243bdc6..a71ff49 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -3336,6 +3336,7 @@ static void si_init_config(struct si_context *sctx)
> break;
> case CHIP_KABINI:
> case CHIP_MULLINS:
> +   case CHIP_STONEY:
> raster_config = 0x;
> raster_config_1 = 0x;
> break;
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_id.h 
> b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
> index 8882c41..90fe0cd 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
> @@ -151,11 +151,15 @@ enum {
>
>  /* CZ specific rev IDs */
>  enum {
> -   CZ_CARRIZO_A0  = 0x01,
> +   CARRIZO_A0   = 0x01,
> +STONEY_A0= 0x61,
> CZ_UNKNOWN  = 0xFF
>  };
>
>  #define ASICREV_IS_CARRIZO(eChipRev) \
> -   (eChipRev >= CARRIZO_A0)
> +   ((eChipRev >= CARRIZO_A0) && (eChipRev < STONEY_A0))
> +
> +#define ASICREV_IS_STONEY(eChipRev) \
> +   ((eChipRev >= STONEY_A0) && (eChipRev < CZ_UNKNOWN))
>
>  #endif /* AMDGPU_ID_H */
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
> b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> index c877249..32cd9d9 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> @@ -226,7 +226,11 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
>break;
> case CHIP_CARRIZO:
>ws->family = FAMILY_CZ;
> -  ws->rev_id = CZ_CARRIZO_A0;
> +  ws->rev_id = CARRIZO_A0;
> +  break;
> +   case CHIP_STONEY:
> +  ws->family = FAMILY_CZ;
> +  ws->rev_id = STONEY_A0;
>break;
> case CHIP_FIJI:
>ws->family = FAMILY_VI;
> --
> 1.8.3.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] radeonsi: add Stoney pci ids

2015-10-23 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Thu, Oct 22, 2015 at 6:09 PM, Alex Deucher  wrote:
> From: Samuel Li 
>
> Signed-off-by: Samuel Li 
> ---
>  include/pci_ids/radeonsi_pci_ids.h | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/include/pci_ids/radeonsi_pci_ids.h 
> b/include/pci_ids/radeonsi_pci_ids.h
> index 52eada1..bcf15a1 100644
> --- a/include/pci_ids/radeonsi_pci_ids.h
> +++ b/include/pci_ids/radeonsi_pci_ids.h
> @@ -181,3 +181,5 @@ CHIPSET(0x9876, CARRIZO_, CARRIZO)
>  CHIPSET(0x9877, CARRIZO_, CARRIZO)
>
>  CHIPSET(0x7300, FIJI_, FIJI)
> +
> +CHIPSET(0x98E4, STONEY_, STONEY)
> --
> 1.8.3.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 15/21] mesa: Fix EXT_texture_sRGB functionality leaks

2015-10-23 Thread Nanley Chery
On Thu, Oct 22, 2015 at 12:15 PM, Chad Versace 
wrote:

> On Mon 19 Oct 2015, Nanley Chery wrote:
> > From: Nanley Chery 
> >
> > Stop leaks into the following contexts:
> >* GLES in _mesa_base_tex_format() and lookup_view_class().
> >* Pre-1.1 GL legacy contexts in all uses.
> >
> > Stop allowing compressed sRGB formats as valid formats in GLES3
> > contexts. I realized this was happening when CTS failures occured after
> > fixing the extension functionality leak with the helper function.
>
> Do you mean that this patch *fixes* CTS failures? If so, which CTS for
> which GLES version?  There are so many CTS's in the world :(
>

I meant that when I first modified _mesa_base_tex_format() to use the helper
function, I had failures in piglit, dEQP, and the Khronos CTS. "failures in
every
test-suite" probably would've been better than "CTS failures" here. It was
because of those failures, that I realized that we were incorrectly allowing
formats like GL_COMPRESSED_SRGB_EXT in GLES contexts. This motivated
the split of the switch statement you see in the diff of
_mesa_base_tex_format().
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Remove unused devinfo revision

2015-10-23 Thread Ben Widawsky
On Fri, Oct 23, 2015 at 02:38:39PM -0700, Ben Widawsky wrote:
> I left the function to obtain the revision because it is, and will continue to
> be useful in the future. I'd rather not have to dig it up every time we need 
> it.
> Comments left at the implementation to say as much.
> 

Added locally I've also referenced the SHA where I originally missed this:
commit 28ed1e08e8ba98ebd4ff0b56326372f0df9c73ad
Author: Ben Widawsky 
Date:   Fri Aug 7 13:58:37 2015 -0700

i965/skl: Remove early platform support


> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/brw_device_info.c |  2 +-
>  src/mesa/drivers/dri/i965/brw_device_info.h |  2 +-
>  src/mesa/drivers/dri/i965/intel_screen.c| 14 +++---
>  3 files changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
> b/src/mesa/drivers/dri/i965/brw_device_info.c
> index e7a016c..789b93f 100644
> --- a/src/mesa/drivers/dri/i965/brw_device_info.c
> +++ b/src/mesa/drivers/dri/i965/brw_device_info.c
> @@ -363,7 +363,7 @@ static const struct brw_device_info brw_device_info_bxt = 
> {
>  };
>  
>  const struct brw_device_info *
> -brw_get_device_info(int devid, int revision)
> +brw_get_device_info(int devid)
>  {
> const struct brw_device_info *devinfo;
> switch (devid) {
> diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h 
> b/src/mesa/drivers/dri/i965/brw_device_info.h
> index 2a73e93..4911c23 100644
> --- a/src/mesa/drivers/dri/i965/brw_device_info.h
> +++ b/src/mesa/drivers/dri/i965/brw_device_info.h
> @@ -86,4 +86,4 @@ struct brw_device_info
> /** @} */
>  };
>  
> -const struct brw_device_info *brw_get_device_info(int devid, int revision);
> +const struct brw_device_info *brw_get_device_info(int devid);
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index 590c45d..fb95fb6 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -1357,7 +1357,16 @@ set_max_gl_versions(struct intel_screen *screen)
> }
>  }
>  
> -static int
> +/**
> + * Return the revision (generally the revid field of the PCI header) of the
> + * graphics device.
> + *
> + * XXX: This function is useful to keep around even if it is not currently in
> + * use. It is necessary for new platforms and revision specific workarounds 
> or
> + * features. Please don't remove it so that we know it at least continues to
> + * build.
> + */
> +static __attribute__((__unused__)) int
>  brw_get_revision(int fd)
>  {
> struct drm_i915_getparam gp;
> @@ -1416,8 +1425,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
> return false;
>  
> intelScreen->deviceID = 
> drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
> -   intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID,
> -  brw_get_revision(psp->fd));
> +   intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID);
> if (!intelScreen->devinfo)
>return false;
>  
> -- 
> 2.6.1
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: add Stoney to si_init_gs_info()

2015-10-23 Thread Alex Deucher
This patch was originally written before stoney support
was merged.  Add stoney.

Signed-off-by: Alex Deucher 
---
 src/gallium/drivers/radeonsi/si_pipe.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index e653799..047bbf4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -586,6 +586,7 @@ static bool si_init_gs_info(struct si_screen *sscreen)
case CHIP_MULLINS:
case CHIP_ICELAND:
case CHIP_CARRIZO:
+   case CHIP_STONEY:
sscreen->gs_table_depth = 16;
return true;
case CHIP_TAHITI:
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >