[Mesa-dev] [PATCH] android: Remove builtin_compiler

2013-09-13 Thread Adrian Negreanu
From: Adrian Negreanu 

the first part was done in:

   commit c845140a20efa6a30a5465301d1f9b4acea79155
   Author: Kenneth Graunke 
   Date:   Tue Sep 3 21:22:17 2013 -0700

Signed-off-by: Adrian Negreanu 
---
 src/glsl/Android.gen.mk | 19 ---
 1 file changed, 19 deletions(-)

diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
index 2ed9fa6..7ec56d4 100644
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -35,10 +35,6 @@ sources := \
glcpp/glcpp-lex.c \
glcpp/glcpp-parse.c
 
-ifneq ($(LOCAL_IS_HOST_MODULE),true)
-sources += builtin_function.cpp
-endif
-
 LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
 
 LOCAL_C_INCLUDES += $(intermediates) $(intermediates)/glcpp 
$(MESA_TOP)/src/glsl/glcpp
@@ -81,18 +77,3 @@ $(intermediates)/glcpp/glcpp-lex.c: 
$(LOCAL_PATH)/glcpp/glcpp-lex.l
 
 $(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y
$(call glsl_local-y-to-c-and-h)
-
-BUILTIN_COMPILER := 
$(BUILD_OUT_EXECUTABLES)/mesa_builtin_compiler$(BUILD_EXECUTABLE_SUFFIX)
-
-builtin_function_deps := \
-   $(LOCAL_PATH)/builtins/tools/generate_builtins.py \
-   $(LOCAL_PATH)/builtins/tools/texture_builtins.py \
-   $(BUILTIN_COMPILER) \
-   $(wildcard $(LOCAL_PATH)/builtins/profiles/*) \
-   $(wildcard $(LOCAL_PATH)/builtins/ir/*)
-
-$(intermediates)/builtin_function.cpp: PRIVATE_SCRIPT := $(MESA_PYTHON2) 
$(LOCAL_PATH)/builtins/tools/generate_builtins.py
-$(intermediates)/builtin_function.cpp: $(builtin_function_deps)
-   @mkdir -p $(dir $@)
-   @echo "Gen GLSL: $(PRIVATE_MODULE) <= $(notdir $@)"
-   $(hide) $(PRIVATE_SCRIPT) $(BUILTIN_COMPILER) > $@ || rm -f $@
-- 
1.8.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] android: Remove builtin_compiler

2013-09-13 Thread Ian Romanick
Getting rid of the builtin_compiler from the Android build is a good
idea... but I'll let Chad or someone who knows /anything/ about the
Android build system actually review the patch.

Acked-by: Ian Romanick 

I don't think I want to remove any of the other bits of the builtin
compiler or standalone compiler.  I've been working on resurrecting the
standalone compiler for use as a developer tool.

http://cgit.freedesktop.org/~idr/mesa/log/?h=standalone-compiler

On 09/13/2013 03:58 AM, Adrian Negreanu wrote:
> From: Adrian Negreanu 
> 
> the first part was done in:
> 
>commit c845140a20efa6a30a5465301d1f9b4acea79155
>Author: Kenneth Graunke 
>Date:   Tue Sep 3 21:22:17 2013 -0700
> 
> Signed-off-by: Adrian Negreanu 
> ---
>  src/glsl/Android.gen.mk | 19 ---
>  1 file changed, 19 deletions(-)
> 
> diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
> index 2ed9fa6..7ec56d4 100644
> --- a/src/glsl/Android.gen.mk
> +++ b/src/glsl/Android.gen.mk
> @@ -35,10 +35,6 @@ sources := \
>   glcpp/glcpp-lex.c \
>   glcpp/glcpp-parse.c
>  
> -ifneq ($(LOCAL_IS_HOST_MODULE),true)
> -sources += builtin_function.cpp
> -endif
> -
>  LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
>  
>  LOCAL_C_INCLUDES += $(intermediates) $(intermediates)/glcpp 
> $(MESA_TOP)/src/glsl/glcpp
> @@ -81,18 +77,3 @@ $(intermediates)/glcpp/glcpp-lex.c: 
> $(LOCAL_PATH)/glcpp/glcpp-lex.l
>  
>  $(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y
>   $(call glsl_local-y-to-c-and-h)
> -
> -BUILTIN_COMPILER := 
> $(BUILD_OUT_EXECUTABLES)/mesa_builtin_compiler$(BUILD_EXECUTABLE_SUFFIX)
> -
> -builtin_function_deps := \
> - $(LOCAL_PATH)/builtins/tools/generate_builtins.py \
> - $(LOCAL_PATH)/builtins/tools/texture_builtins.py \
> - $(BUILTIN_COMPILER) \
> - $(wildcard $(LOCAL_PATH)/builtins/profiles/*) \
> - $(wildcard $(LOCAL_PATH)/builtins/ir/*)
> -
> -$(intermediates)/builtin_function.cpp: PRIVATE_SCRIPT := $(MESA_PYTHON2) 
> $(LOCAL_PATH)/builtins/tools/generate_builtins.py
> -$(intermediates)/builtin_function.cpp: $(builtin_function_deps)
> - @mkdir -p $(dir $@)
> - @echo "Gen GLSL: $(PRIVATE_MODULE) <= $(notdir $@)"
> - $(hide) $(PRIVATE_SCRIPT) $(BUILTIN_COMPILER) > $@ || rm -f $@
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] android: Remove builtin_compiler

2013-09-13 Thread Adrian M Negreanu
On Fri, Sep 13, 2013 at 5:01 PM, Ian Romanick  wrote:
> Getting rid of the builtin_compiler from the Android build is a good
> idea... but I'll let Chad or someone who knows /anything/ about the
> Android build system actually review the patch.
>
> Acked-by: Ian Romanick 
>
> I don't think I want to remove any of the other bits of the builtin
> compiler or standalone compiler.  I've been working on resurrecting the
> standalone compiler for use as a developer tool.
>
> http://cgit.freedesktop.org/~idr/mesa/log/?h=standalone-compiler
>

(add Chad to cc:)
The parts removed from Android.gen.mk were using generate_builtins.py,
which was also removed; that will trigger a compile failure.



> On 09/13/2013 03:58 AM, Adrian Negreanu wrote:
>> From: Adrian Negreanu 
>>
>> the first part was done in:
>>
>>commit c845140a20efa6a30a5465301d1f9b4acea79155
>>Author: Kenneth Graunke 
>>Date:   Tue Sep 3 21:22:17 2013 -0700
>>
>> Signed-off-by: Adrian Negreanu 
>> ---
>>  src/glsl/Android.gen.mk | 19 ---
>>  1 file changed, 19 deletions(-)
>>
>> diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
>> index 2ed9fa6..7ec56d4 100644
>> --- a/src/glsl/Android.gen.mk
>> +++ b/src/glsl/Android.gen.mk
>> @@ -35,10 +35,6 @@ sources := \
>>   glcpp/glcpp-lex.c \
>>   glcpp/glcpp-parse.c
>>
>> -ifneq ($(LOCAL_IS_HOST_MODULE),true)
>> -sources += builtin_function.cpp
>> -endif
>> -
>>  LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
>>
>>  LOCAL_C_INCLUDES += $(intermediates) $(intermediates)/glcpp 
>> $(MESA_TOP)/src/glsl/glcpp
>> @@ -81,18 +77,3 @@ $(intermediates)/glcpp/glcpp-lex.c: 
>> $(LOCAL_PATH)/glcpp/glcpp-lex.l
>>
>>  $(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y
>>   $(call glsl_local-y-to-c-and-h)
>> -
>> -BUILTIN_COMPILER := 
>> $(BUILD_OUT_EXECUTABLES)/mesa_builtin_compiler$(BUILD_EXECUTABLE_SUFFIX)
>> -
>> -builtin_function_deps := \
>> - $(LOCAL_PATH)/builtins/tools/generate_builtins.py \
>> - $(LOCAL_PATH)/builtins/tools/texture_builtins.py \
>> - $(BUILTIN_COMPILER) \
>> - $(wildcard $(LOCAL_PATH)/builtins/profiles/*) \
>> - $(wildcard $(LOCAL_PATH)/builtins/ir/*)
>> -
>> -$(intermediates)/builtin_function.cpp: PRIVATE_SCRIPT := $(MESA_PYTHON2) 
>> $(LOCAL_PATH)/builtins/tools/generate_builtins.py
>> -$(intermediates)/builtin_function.cpp: $(builtin_function_deps)
>> - @mkdir -p $(dir $@)
>> - @echo "Gen GLSL: $(PRIVATE_MODULE) <= $(notdir $@)"
>> - $(hide) $(PRIVATE_SCRIPT) $(BUILTIN_COMPILER) > $@ || rm -f $@
>>
>



-- 
Regards!
http://groleo.wordpress.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 69321] New: starting openCL crashes/boots system

2013-09-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=69321

  Priority: medium
Bug ID: 69321
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: starting openCL crashes/boots system
  Severity: normal
Classification: Unclassified
OS: All
  Reporter: udo...@xs4all.nl
  Hardware: Other
Status: NEW
   Version: git
 Component: Other
   Product: Mesa

git mesa, git llvm, git libclc on Fedora 19 w/ kernel.org 3.10.x
start bfgminer with openCL enabled and box crashes/boots the PC
A10-5800K w/ ARUBA graphics (Cayman)

This worked OK say 10 days ago.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 69321] starting openCL crashes/boots system

2013-09-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=69321

--- Comment #1 from udo  ---
Got no logging yet (dmesg, messages, xorg.0.log)

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 69321] starting openCL crashes/boots system

2013-09-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=69321

Alex Deucher  changed:

   What|Removed |Added

   Assignee|mesa-dev@lists.freedesktop. |dri-devel@lists.freedesktop
   |org |.org
  Component|Other   |Drivers/Gallium/r600

--- Comment #2 from Alex Deucher  ---
Can you bisect?

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] gallium-bind-sampler-states branch

2013-09-13 Thread Roland Scheidegger
Am 13.09.2013 02:46, schrieb Brian Paul:
> 
> I just pushed a gallium-bind-sampler-states branch to my git repo at
> git://people.freedesktop.org/~brianp/mesa
> 
> It replaces the four
> pipe_context::bind_fragment/vertex/geometry/compute_sampler_states()
> functions with a single bind_sampler_states() function:
> 
>  void (*bind_sampler_states)(struct pipe_context *,
>  unsigned shader, unsigned start_slot,
>  unsigned num_samplers, void **samplers);
> 
> At this point start_slot is always zero (at least for non-compute
> shaders).  And as the updated gallium docs explain, at some point calls
> to bind_sampler_states() will be used to updated sub-ranges, but that
> never happens currently.
> 
> I've updated all the drivers, state trackers, utils, etc.
> 
> I've tested the svga, llvmpipe and softpipe drivers.  'make check' and a
> texture subset of piglit pass w/out regressions.  I'd appreciate it if
> other driver developers would test their favorite driver.
> 
> 
> Next, I'd like to consolidate the
> set_vertex/geometry/fragment/compute_sampler_views() functions with a
> single function.  But I have no idea when I'll get around to that.
> 

This looks good to me. And I'm very much in favor of doing the same for
sampler_views() (even more so because for d3d10 we need to have 128 of
them not just 16 and I suspect they get changed more often too).

Roland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] wayland: Add support for eglSwapInterval

2013-09-13 Thread Neil Roberts
Oh no, I meant to send this to the wayland-devel list. git send-email is
hard. Sorry about that!

- Neil
-
Intel Corporation (UK) Limited
Registered No. 1134945 (England)
Registered Office: Pipers Way, Swindon SN3 1RJ
VAT No: 860 2173 47

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: Enable extensions implied by GLSL 3.30

2013-09-13 Thread Ian Romanick
From: Ian Romanick 

Signed-off-by: Ian Romanick 
Reported-by: Dan Ginsburg 
---
 src/glsl/glsl_parser_extras.cpp | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 3f65ca5..e92f36e 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -306,6 +306,11 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE 
*locp, int version,
if (this->language_version == 300 && this->es_shader) {
   this->ARB_explicit_attrib_location_enable = true;
}
+
+   if (this->language_version >= 330) {
+  this->ARB_shader_bit_encoding_enable = true;
+  this->ARB_explicit_attrib_location_enable = true;
+   }
 }
 
 extern "C" {
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Remove all traces of GL_OES_matrix_get

2013-09-13 Thread Ian Romanick
From: Ian Romanick 

I believe this extension was enabled by accident.  As far as I can tell,
there has never been any code in Mesa to actually support it.  Not only
that, this extension is only useful in the common-lite profile, and Mesa
does the common profile.

Signed-off-by: Ian Romanick 
---
I'd like to at least apply this patch to 9.1 and 9.2.  If we actually
care about this OpenGL ES 1.1 extension, it should be easy enough to
implement for master.  I'd rather just remove it.

 src/mesa/main/extensions.c   | 1 -
 src/mesa/main/get_hash_params.py | 5 -
 src/mesa/main/glheader.h | 6 --
 3 files changed, 12 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 4e46cf0..b206c5b 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -262,7 +262,6 @@ static const struct extension extension_table[] = {
{ "GL_OES_framebuffer_object",  o(dummy_true),  
 ES1,   2005 },
{ "GL_OES_get_program_binary",  o(dummy_true),  
   ES2, 2008 },
{ "GL_OES_mapbuffer",   o(dummy_true),  
 ES1 | ES2, 2005 },
-   { "GL_OES_matrix_get",  o(dummy_true),  
 ES1,   2004 },
{ "GL_OES_packed_depth_stencil",
o(EXT_packed_depth_stencil), ES1 | ES2, 2007 },
{ "GL_OES_point_size_array",o(dummy_true),  
 ES1,   2004 },
{ "GL_OES_point_sprite",o(ARB_point_sprite),
 ES1,   2004 },
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index fde4537..f51862e 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -229,11 +229,6 @@ descriptor=[
 
 
 { "apis": ["GLES"], "params": [
-# XXX: OES_matrix_get
-  [ "MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES", "" ],
-  [ "PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES", "" ],
-  [ "TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES", "" ],
-
 # OES_point_size_array
   [ "POINT_SIZE_ARRAY_OES", 
"ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled, TYPE_BOOLEAN)" ],
   [ "POINT_SIZE_ARRAY_TYPE_OES", 
"ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Type, TYPE_ENUM)" ],
diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h
index b3b0c52..4ed51b3 100644
--- a/src/mesa/main/glheader.h
+++ b/src/mesa/main/glheader.h
@@ -115,12 +115,6 @@ typedef void *GLeglImageOES;
 #define GL_PALETTE8_RGB5_A1_OES 0x8B99
 #endif
 
-#ifndef GL_OES_matrix_get
-#define GL_MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES   0x898D
-#define GL_PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES  0x898E
-#define GL_TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES 0x898F
-#endif
-
 #ifndef GL_ES_VERSION_2_0
 #define GL_SHADER_BINARY_FORMATS0x8DF8
 #define GL_NUM_SHADER_BINARY_FORMATS0x8DF9
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] wayland: Add support for eglSwapInterval

2013-09-13 Thread Neil Roberts
Here is another version of the patch which brings back the blocking
when there are no buffers available to cope with the situation where
the compositor isn't immediately releasing buffers. Maybe we could
leave the decision about whether to increase the buffer count to 4 as
a separate patch.

Sorry for spamming the list a bit much with this patch.

Regards,
- Neil

-- >8 --

The Wayland EGL platform now respects the eglSwapInterval value. The value is
clamped to either 0 or 1 because it is difficult (and probably not useful) to
sync to more than 1 redraw.

The main change is that if the swap interval is 0 then instead of installing a
frame callback it will just call the display sync method and throttle itself
to that. When the application is not running fullscreen the compositor is
likely to release the previous buffer immediately so this gives the
application the best chance of reusing the buffer.

If there are no buffers available then instead of returning with an error,
get_back_bo will now block until a buffer becomes available. This is necessary
if the compositor is not releasing buffers immediately. As there are only
three buffers, this could actually mean that the client ends up throttled to
the vblank anyway because Weston can hold on to three buffers when the client
is fullscreen. We could fix this by increasing the buffer count to 4 or
changing Weston and KMS to allow cancelling a pending buffer swap, but for now
this patch ignores that problem.

This also moves the vblank configuration defines from platform_x11.c to the
common egl_dri2.h header so they can be shared by both platforms.
---
 src/egl/drivers/dri2/egl_dri2.h |   7 ++
 src/egl/drivers/dri2/platform_wayland.c | 159 
 src/egl/drivers/dri2/platform_x11.c |   6 --
 3 files changed, 147 insertions(+), 25 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index fba5f81..cc657ba 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -175,6 +175,7 @@ struct dri2_egl_surface
intdx;
intdy;
struct wl_callback*frame_callback;
+   struct wl_callback*throttle_callback;
int   format;
 #endif
 
@@ -221,6 +222,12 @@ struct dri2_egl_image
__DRIimage *dri_image;
 };
 
+/* From xmlpool/options.h, user exposed so should be stable */
+#define DRI_CONF_VBLANK_NEVER 0
+#define DRI_CONF_VBLANK_DEF_INTERVAL_0 1
+#define DRI_CONF_VBLANK_DEF_INTERVAL_1 2
+#define DRI_CONF_VBLANK_ALWAYS_SYNC 3
+
 /* standard typecasts */
 _EGL_DRIVER_STANDARD_TYPECASTS(dri2_egl)
 _EGL_DRIVER_TYPECAST(dri2_egl_image, _EGLImage, obj)
diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index ffc5959..6ee6ffb 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -180,8 +180,16 @@ dri2_create_window_surface(_EGLDriver *drv, _EGLDisplay 
*disp,
   _EGLConfig *conf, EGLNativeWindowType window,
   const EGLint *attrib_list)
 {
-   return dri2_create_surface(drv, disp, EGL_WINDOW_BIT, conf,
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   _EGLSurface *surf;
+
+   surf = dri2_create_surface(drv, disp, EGL_WINDOW_BIT, conf,
  window, attrib_list);
+
+   if (surf != NULL)
+  drv->API.SwapInterval(drv, disp, surf, dri2_dpy->default_swap_interval);
+
+   return surf;
 }
 
 /**
@@ -216,6 +224,8 @@ dri2_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, 
_EGLSurface *surf)
 
if (dri2_surf->frame_callback)
   wl_callback_destroy(dri2_surf->frame_callback);
+   if (dri2_surf->throttle_callback)
+  wl_callback_destroy(dri2_surf->throttle_callback);
 
if (dri2_surf->base.Type == EGL_WINDOW_BIT) {
   dri2_surf->wl_win->private = NULL;
@@ -261,24 +271,46 @@ get_back_bo(struct dri2_egl_surface *dri2_surf, 
__DRIbuffer *buffer)
__DRIimage *image;
int i, name, pitch;
 
-   /* There might be a buffer release already queued that wasn't processed */
-   wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_dpy->wl_queue);
+   if (dri2_surf->throttle_callback == NULL) {
+  /* There might be a buffer release already queued that wasn't processed 
*/
+  wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_dpy->wl_queue);
+   } else {
+  /* If we aren't throttling to the frame callbacks then the compositor
+   * may have sent a release event after the last attach so we'll wait
+   * until the sync for the commit request completes in order to have the
+   * best chance of reusing a buffer */
+  do {
+ if (wl_display_dispatch_queue(dri2_dpy->wl_dpy,
+   dri2_dpy->wl_queue) == -1)
+return EGL_FALSE;
+  } while (dri2_surf->throttle_callback != NULL);
+   }
 
if (dri2_surf->back == NULL) {
-  for (i = 0

[Mesa-dev] [PATCH] gallivm: some bits of seamless cube filtering implementation

2013-09-13 Thread sroland
From: Roland Scheidegger 

Simply adjust wrap mode to clamp_to_edge. This is all that's needed for a
correct implementation for nearest filtering, and it's way better than
using repeat wrap for instance for linear filtering (though obviously this
doesn't actually do seamless filtering).
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c |1 +
 src/gallium/auxiliary/gallivm/lp_bld_sample.h |1 +
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   41 ++---
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 9b0a92c..c775382 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -155,6 +155,7 @@ lp_sampler_static_sampler_state(struct 
lp_static_sampler_state *state,
state->wrap_r= sampler->wrap_r;
state->min_img_filter= sampler->min_img_filter;
state->mag_img_filter= sampler->mag_img_filter;
+   state->seamless_cube_map = sampler->seamless_cube_map;
 
if (sampler->max_lod > 0.0f) {
   state->min_mip_filter = sampler->min_mip_filter;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h 
b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index e6b9f30..803a99e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -114,6 +114,7 @@ struct lp_static_sampler_state
unsigned lod_bias_non_zero:1;
unsigned apply_min_lod:1;  /**< min_lod > 0 ? */
unsigned apply_max_lod:1;  /**< max_lod < last_level ? */
+   unsigned seamless_cube_map:1;
 
/* Hacks */
unsigned force_nearest_s:1;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 7e98919..1b564d9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -2123,8 +2123,21 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
   debug_printf("  .min_mip_filter = %u\n", 
derived_sampler_state.min_mip_filter);
}
 
-   min_img_filter = static_sampler_state->min_img_filter;
-   mag_img_filter = static_sampler_state->mag_img_filter;
+   if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
+static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
+   static_sampler_state->seamless_cube_map)
+   {
+  /*
+   * Seamless filtering ignores wrap modes.
+   * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
+   * bilinear it's not correct but way better than using for instance 
repeat.
+   */
+  derived_sampler_state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+  derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   }
+
+   min_img_filter = derived_sampler_state.min_img_filter;
+   mag_img_filter = derived_sampler_state.mag_img_filter;
 
 
/*
@@ -2260,16 +2273,16 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
   LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
   boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
 /* not sure this is strictly needed or simply 
impossible */
-static_sampler_state->compare_mode == 
PIPE_TEX_COMPARE_NONE &&
-lp_is_simple_wrap_mode(static_sampler_state->wrap_s);
+derived_sampler_state.compare_mode == 
PIPE_TEX_COMPARE_NONE &&
+lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
 
   use_aos &= bld.num_lods <= num_quads ||
- static_sampler_state->min_img_filter ==
-static_sampler_state->mag_img_filter;
+ derived_sampler_state.min_img_filter ==
+derived_sampler_state.mag_img_filter;
   if (dims > 1) {
- use_aos &= lp_is_simple_wrap_mode(static_sampler_state->wrap_t);
+ use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
  if (dims > 2) {
-use_aos &= lp_is_simple_wrap_mode(static_sampler_state->wrap_r);
+use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
  }
   }
 
@@ -2278,12 +2291,12 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
  debug_printf("%s: using floating point linear filtering for %s\n",
   __FUNCTION__, bld.format_desc->short_name);
  debug_printf("  min_img %d  mag_img %d  mip %d  wraps %d  wrapt %d  
wrapr %d\n",
-  static_sampler_state->min_img_filter,
-  static_sampler_state->mag_img_filter,
-  static_sampler_state->min_mip_filter,
-  static_sampler_state->wrap_s,
-  static_sampler_state->wrap_t,
-  static_sampler_state->wrap_r);
+  derived_sampler_state.min_img_filter,
+  derived_sampler_state.mag_img_filte

[Mesa-dev] [PATCH] gallivm: some bits of seamless cube filtering implementation

2013-09-13 Thread sroland
From: Roland Scheidegger 

Simply adjust wrap mode to clamp_to_edge. This is all that's needed for a
correct implementation for nearest filtering, and it's way better than
using repeat wrap for instance for linear filtering (though obviously this
doesn't actually do seamless filtering).

v2: fix s/t wrap not r/s...
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c |1 +
 src/gallium/auxiliary/gallivm/lp_bld_sample.h |1 +
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   41 ++---
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 9b0a92c..c775382 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -155,6 +155,7 @@ lp_sampler_static_sampler_state(struct 
lp_static_sampler_state *state,
state->wrap_r= sampler->wrap_r;
state->min_img_filter= sampler->min_img_filter;
state->mag_img_filter= sampler->mag_img_filter;
+   state->seamless_cube_map = sampler->seamless_cube_map;
 
if (sampler->max_lod > 0.0f) {
   state->min_mip_filter = sampler->min_mip_filter;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h 
b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index e6b9f30..803a99e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -114,6 +114,7 @@ struct lp_static_sampler_state
unsigned lod_bias_non_zero:1;
unsigned apply_min_lod:1;  /**< min_lod > 0 ? */
unsigned apply_max_lod:1;  /**< max_lod < last_level ? */
+   unsigned seamless_cube_map:1;
 
/* Hacks */
unsigned force_nearest_s:1;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 7e98919..355e97d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -2123,8 +2123,21 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
   debug_printf("  .min_mip_filter = %u\n", 
derived_sampler_state.min_mip_filter);
}
 
-   min_img_filter = static_sampler_state->min_img_filter;
-   mag_img_filter = static_sampler_state->mag_img_filter;
+   if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
+static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
+   static_sampler_state->seamless_cube_map)
+   {
+  /*
+   * Seamless filtering ignores wrap modes.
+   * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
+   * bilinear it's not correct but way better than using for instance 
repeat.
+   */
+  derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+  derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   }
+
+   min_img_filter = derived_sampler_state.min_img_filter;
+   mag_img_filter = derived_sampler_state.mag_img_filter;
 
 
/*
@@ -2260,16 +2273,16 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
   LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
   boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
 /* not sure this is strictly needed or simply 
impossible */
-static_sampler_state->compare_mode == 
PIPE_TEX_COMPARE_NONE &&
-lp_is_simple_wrap_mode(static_sampler_state->wrap_s);
+derived_sampler_state.compare_mode == 
PIPE_TEX_COMPARE_NONE &&
+lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
 
   use_aos &= bld.num_lods <= num_quads ||
- static_sampler_state->min_img_filter ==
-static_sampler_state->mag_img_filter;
+ derived_sampler_state.min_img_filter ==
+derived_sampler_state.mag_img_filter;
   if (dims > 1) {
- use_aos &= lp_is_simple_wrap_mode(static_sampler_state->wrap_t);
+ use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
  if (dims > 2) {
-use_aos &= lp_is_simple_wrap_mode(static_sampler_state->wrap_r);
+use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
  }
   }
 
@@ -2278,12 +2291,12 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
  debug_printf("%s: using floating point linear filtering for %s\n",
   __FUNCTION__, bld.format_desc->short_name);
  debug_printf("  min_img %d  mag_img %d  mip %d  wraps %d  wrapt %d  
wrapr %d\n",
-  static_sampler_state->min_img_filter,
-  static_sampler_state->mag_img_filter,
-  static_sampler_state->min_mip_filter,
-  static_sampler_state->wrap_s,
-  static_sampler_state->wrap_t,
-  static_sampler_state->wrap_r);
+  derived_sampler_state.min_img_filter,
+  derive

Re: [Mesa-dev] [PATCH] mesa: Remove all traces of GL_OES_matrix_get

2013-09-13 Thread Kenneth Graunke
On 09/13/2013 09:37 AM, Ian Romanick wrote:
> From: Ian Romanick 
> 
> I believe this extension was enabled by accident.  As far as I can tell,
> there has never been any code in Mesa to actually support it.  Not only
> that, this extension is only useful in the common-lite profile, and Mesa
> does the common profile.
> 
> Signed-off-by: Ian Romanick 
> ---
> I'd like to at least apply this patch to 9.1 and 9.2.  If we actually
> care about this OpenGL ES 1.1 extension, it should be easy enough to
> implement for master.  I'd rather just remove it.

ES 1.1 extensions that we don't know of any applications using?  Doesn't
sound worth implementing properly.  I'm in favor of removing it.

Reviewed-by: Kenneth Graunke 

Ack on 9.2 and 9.1 as well.  Sounds reasonable.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] glsl: Enable ARB_explicit_attrib_location in GLSL 3.30.

2013-09-13 Thread Kenneth Graunke
GLSL 3.30 wholly includes the ARB_explicit_attrib_location
functionality.  Failing to set this flag meant we wouldn't handle the
layout qualifiers appropriately.

Signed-off-by: Kenneth Graunke 
Reported-by: Dan Ginsburg 
Cc: Ian Romanick 
---
 src/glsl/glsl_parser_extras.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index cac5a18..aa3ad8d 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -304,7 +304,7 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE 
*locp, int version,
   this->ARB_uniform_buffer_object_enable = true;
}
 
-   if (this->language_version == 300 && this->es_shader) {
+   if (this->is_version(330, 300)) {
   this->ARB_explicit_attrib_location_enable = true;
}
 }
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] glsl: Set ARB_shader_bit_encoding_enable = true for GLSL 3.30 / ES 3.00.

2013-09-13 Thread Kenneth Graunke
This should have no effect, since the only user of this flag is
builtin_functions.cpp, which already explicitly checks for the core
versions.  Still, it's probably not a bad idea.

Signed-off-by: Kenneth Graunke 
Cc: Ian Romanick 
---
 src/glsl/glsl_parser_extras.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index aa3ad8d..a2432a3 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -306,6 +306,7 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE 
*locp, int version,
 
if (this->is_version(330, 300)) {
   this->ARB_explicit_attrib_location_enable = true;
+  this->ARB_shader_bit_encoding_enable = true;
}
 }
 
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Enable extensions implied by GLSL 3.30

2013-09-13 Thread Kenneth Graunke
On 09/13/2013 09:13 AM, Ian Romanick wrote:
> From: Ian Romanick 
> 
> Signed-off-by: Ian Romanick 
> Reported-by: Dan Ginsburg 
> ---
>  src/glsl/glsl_parser_extras.cpp | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
> index 3f65ca5..e92f36e 100644
> --- a/src/glsl/glsl_parser_extras.cpp
> +++ b/src/glsl/glsl_parser_extras.cpp
> @@ -306,6 +306,11 @@ 
> _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version,
> if (this->language_version == 300 && this->es_shader) {
>this->ARB_explicit_attrib_location_enable = true;
> }
> +
> +   if (this->language_version >= 330) {
> +  this->ARB_shader_bit_encoding_enable = true;
> +  this->ARB_explicit_attrib_location_enable = true;
> +   }
>  }
>  
>  extern "C" {
> 

The real purpose of this patch appears to be enabling
ARB_explicit_attrib_location functionality in GLSL 3.30; AFAIK, we don't
have any Piglit tests for this yet.  This is a good change, but it could
be simplified.

The other part, setting ARB_shader_bit_encoding_enable = true, should
actually have no effect.  The only usage of that flag is in
builtin_functions.cpp:

static bool
shader_bit_encoding(const _mesa_glsl_parse_state *state)
{
   return state->is_version(330, 300) ||
  state->ARB_shader_bit_encoding_enable ||
  state->ARB_gpu_shader5_enable;
}

...which already enables support in 3.30 and ES 3.00.

I'm sending out three patches which replace this.  The first enables
ARB_explicit_attrib_location, and the next two frob around with the
ARB_shader_bit_encoding enables.  I'm pretty ambivalent about the last
two, so I'd welcome opinions.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/17] mesa: Get GL_MAX_VARYING_FLOATS_ARB from VertexProgram.MaxOutputComponents

2013-09-13 Thread Paul Berry
On 11 September 2013 16:28, Ian Romanick  wrote:

> On 09/11/2013 04:05 PM, Paul Berry wrote:
> > On 10 September 2013 12:10, Ian Romanick  > > wrote:
> >
> > From: Ian Romanick  > >
> >
> > Signed-off-by: Ian Romanick  > >
> > ---
> >  src/mesa/main/get.c  | 4 
> >  src/mesa/main/get_hash_params.py | 2 +-
> >  2 files changed, 1 insertion(+), 5 deletions(-)
> >
> > diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
> > index 34eb6be..ae45bf8 100644
> > --- a/src/mesa/main/get.c
> > +++ b/src/mesa/main/get.c
> > @@ -718,10 +718,6 @@ find_custom_value(struct gl_context *ctx, const
> > struct value_desc *d, union valu
> >ASSERT(v->value_int_n.n <= ARRAY_SIZE(v->value_int_n.ints));
> >break;
> >
> > -   case GL_MAX_VARYING_FLOATS_ARB:
> > -  v->value_int = ctx->Const.MaxVarying * 4;
> > -  break;
> > -
> > /* Various object names */
> >
> > case GL_TEXTURE_BINDING_1D:
> > diff --git a/src/mesa/main/get_hash_params.py
> > b/src/mesa/main/get_hash_params.py
> > index c0dbf45..3d47443 100644
> > --- a/src/mesa/main/get_hash_params.py
> > +++ b/src/mesa/main/get_hash_params.py
> > @@ -365,7 +365,7 @@ descriptor=[
> >
> >  # GL_ARB_vertex_shader
> >[ "MAX_VERTEX_UNIFORM_COMPONENTS_ARB",
> > "CONTEXT_INT(Const.VertexProgram.MaxUniformComponents),
> > extra_ARB_vertex_shader" ],
> > -  [ "MAX_VARYING_FLOATS_ARB", "LOC_CUSTOM, TYPE_INT, 0,
> > extra_ARB_vertex_shader" ],
> > +  [ "MAX_VARYING_FLOATS_ARB",
> > "CONTEXT_INT(Const.VertexProgram.MaxOutputComponents),
> > extra_ARB_vertex_shader" ],
> >
> >  # GL_EXT_framebuffer_blit
> >  # NOTE: GL_DRAW_FRAMEBUFFER_BINDING_EXT ==
> GL_FRAMEBUFFER_BINDING_EXT
> > --
> > 1.8.1.4
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org  mesa-dev@lists.freedesktop.org>
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> > Doesn't MAX_VARYING_FLOATS need to be MIN2(MAX_VERTEX_OUTPUT_COMPONENTS,
> > MAX_FRAGMENT_INPUT_COMPONENTS)?  I can imagine an implementation where
> > MAX_FRAGMENT_INPUT_COMPONENTS is the smaller constraint (in fact, ES3's
> > minimum maximums constitute just such a case).
>
> It's all so much more horrible than you think. :)
>
> OpenGL 3.2 sets:
>
> MAX_VARYING_COMPONENTS: 60
> MAX_VERTEX_OUTPUT_COMPONENTS: 64
> MAX_FRAGMENT_INPUT_COMPONENTS: 128
>
> OpenGL ES 3.0 sets:
>
> MAX_VARYING_COMPONENTS: 60
> MAX_VARYING_VECTORS: 15
> MAX_VERTEX_OUTPUT_VECTORS: 16
> MAX_VERTEX_OUTPUT_COMPONENTS: 64
> MAX_FRAGMENT_INPUT_VECTORS: 15
> MAX_FRAGMENT_INPUT_COMPONENTS: 60
>

Oddly, I don'tsee MAX_VERTEX_OUTPUT_VECTORS or MAX_FRAGMENT_INPUT_VECTORS
in the GLES 3 spec; only MAX_VARYING_VECTORS.


>
> BUT the description of MAX_VARYING_COMPONENTS in GLES3 says:
>
> "Number of components for output variables."
>
> So... OpenGL 3.2 makes no sense, and OpenGL ES 3.0 seems self
> contradictory.  The core problem seems to be the way that each API (and
> each driver) counts gl_Position and gl_FragCoord.  Some say it's
> counted, some say it's not counted, and some say it might be counted.
> Our existing infrastructure may not be sufficient to handle all
> combinations of those cases.
>

I've spent a while digging through the specs, and I actually don't think
it's quite as complicated or counterintuitive as it sounded at first.

First of all, we don't need to worry about the distinction between
COMPONENTS and VECTORS, because MAX_foo_COMPONENTS always equals
MAX_foo_VECTORS * 4.  Similarly, there's no need to distinguish between
MAX_VARYING_COMPONENTS and MAX_VARYING_FLOATS.

As for the question of whether gl_Position and gl_FragCoord count against
these limits, all GL specs since 2.0, and all GLES specs since 2.0 seem to
agree that gl_Position is *not* counted for MAX_VARYING_COMPONENTS, however
MAX_*_OUTPUT_COMPONENTS and MAX_*_INPUT_COMPONENTS count *all* inputs and
outputs.  I think reasonable to infer that the spec writers probably
intended for the same to apply to gl_FragCoord and gl_FrontFacing (namely,
they don't count against MAX_VARYING_COMPONENTS, but the do count against
MAX_FRAGMENT_INPUT_COMPONENTS).

Interestingly, ARB_vertex_shader doesn't specifically mention that
gl_Position is excluded from MAX_VARYING_FLOATS, but it defines
MAX_VARYING_FLOATS as the number of "interpolators" that are available, so
I think it's a reasonable interpretation that gl_Position is not intended
to be counted (especially given that this is the interpretation that the GL
spec authors chose when promoting the extension into GL 2.0).

Note that all the specs allow for the possibility that device-dependent
optimizations may allow for more than the advertised number of
inputs/outputs/vary

[Mesa-dev] [PATCH 3/3] glsl: Drop shader_bit_encoding version checks.

2013-09-13 Thread Kenneth Graunke
We now set the ARB_shader_bit_encoding flag for versions that support
this functionality, so we don't need to double check it here.

Signed-off-by: Kenneth Graunke 
Cc: Ian Romanick 
---
 src/glsl/builtin_functions.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index c468bd5..b020a7c 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -182,8 +182,7 @@ shader_texture_lod_and_rect(const _mesa_glsl_parse_state 
*state)
 static bool
 shader_bit_encoding(const _mesa_glsl_parse_state *state)
 {
-   return state->is_version(330, 300) ||
-  state->ARB_shader_bit_encoding_enable ||
+   return state->ARB_shader_bit_encoding_enable ||
   state->ARB_gpu_shader5_enable;
 }
 
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/hsw: compute DDX in a subspan based only on top row

2013-09-13 Thread Paul Berry
On 12 September 2013 22:06, Chia-I Wu  wrote:

> From: Chia-I Wu 
>
> Consider only the top-left and top-right pixels to approximate DDX in a 2x2
> subspan, unless the application or the user requests a more accurate
> approximation.  This results in a less accurate approximation.  However, it
> improves the performance of Xonotic with Ultra settings by 24.3879% +/-
> 0.832202% (at 95.0% confidence) on Haswell.  No noticeable image quality
> difference observed.
>
> No piglit gpu.tests regressions (tested with v1)
>
> I failed to come up with an explanation for the performance difference, as
> the
> change does not affect Ivy Bridge.  If anyone has the insight, please
> kindly
> enlighten me.  Performance differences may also be observed on other games
> that call textureGrad and dFdx.
>
> v2: Honor GL_FRAGMENT_SHADER_DERIVATIVE_HINT and add a drirc option.
>  Update
> comments.
>

I'm not entirely comfortable making a change that has a known negative
impact on computational accuracy (even one that leads to such an impressive
performance improvement) when we don't have any theories as to why the
performance improvement happens, or why the improvement doesn't apply to
Ivy Bridge.  In my experience, making changes to the codebase without
understanding why they improve things almost always leads to improvements
that are brittle, since it's likely that the true source of the improvement
is a coincidence that will be wiped out by some future change (or won't be
relevant to client programs other than this particular benchmark).  Having
a theory as to why the performance improvement happens would help us be
confident that we're applying the right fix under the right circumstances.

For example, here's one theory as to why we might be seeing an improvement:
perhaps Haswell's sample_d processing is smart enough to realize that when
all the gradient values within a sub-span are the same, that means that all
of the sampling for the sub-span will come from the same LOD, and that
allows it to short-cut some expensive step in the LOD calculation.  Perhaps
the same improvement isn't seen on Ivy Bridge because Ivy Bridge's sample_d
processing logic is less sophisticated, so it's unable to perform the
optimization.  If this is the case, then conditioning the optimization on
brw->is_haswell (as you've done) makes sense.

Another possible explanation for the Haswell vs Ivy Bridge difference is
that perhaps Ivy Bridge, being a lower-performing chip, has other
bottlenecks that make the optimization irrelevant for this particular
benchmark, but potentially still useful for other benchmarks.  For
instance, maybe when this benchmark executes on Ivy Bridge, the texture
that's being sampled from is located in sufficiently distant memory that
optimizing the sample_d's memory accesses makes no difference, since the
bottleneck is the speed with which the texture can be read into cache,
rather than the speed of operation of sample_d.  If this explanation is
correct, then it might be worth applying the optimization to both Ivy
Bridge and Haswell (and perhaps Sandy Bridge as well), since it might
conceivably benefit those other chips when running applications that place
less cache pressure on the chip.

Another possibile explanation is that Haswell has a bug in its sample_d
logic which causes it to be slow under some conditions, and this
lower-accuracy DDX computation happens to work around it.  If that's the
case, we might want to consider not using sample_d at all on Haswell, and
instead calculating the LOD in the shader and using sample_l instead.  If
this is the correct explanation, then that might let us have faster
performance without sacrificing DDX accuracy.

A final possible explanation for the performance improvement is that
perhaps for some reason sample_d performs more optimally when the DDX and
DDY computations have similar accuracies to each other.  Before your patch,
our computation of DDX was more accurate than DDY; your patch decreases the
accuracy of DDX to match DDY.  If this explanation is correct, then a
better solution would probably be to improve the accuracy of DDY to make it
comparable to DDX, rather than the other way around.

Before we land this patch, can we do some experiments to try to figure out
which of these explanations (if any) is correct?


>
> Signed-off-by: Chia-I Wu 
> ---
>  src/mesa/drivers/dri/i965/brw_context.c   |  1 +
>  src/mesa/drivers/dri/i965/brw_context.h   |  1 +
>  src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 40
> ---
>  src/mesa/drivers/dri/i965/intel_screen.c  |  4 
>  4 files changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c
> b/src/mesa/drivers/dri/i965/brw_context.c
> index 4fcc9fb..1cdfb9d 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -470,6 +470,7 @@ brwCreateContext(int api,
> brw_draw_init( brw );
>
> brw->precompile = driQuer

[Mesa-dev] [PATCH 1/3] i965: Introduce a prog_data temporary in gen6_upload_wm_push_constants.

2013-09-13 Thread Kenneth Graunke
This saves a bit of typing and shortens a few lines.

Signed-off-by: Kenneth Graunke 
Cc: Paul Berry 
---
 src/mesa/drivers/dri/i965/gen6_wm_state.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c 
b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 2b9f19c..5a6943f 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -41,6 +41,8 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
   brw_fragment_program_const(brw->fragment_program);
+   /* CACHE_NEW_WM_PROG */
+   const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
 
/* Updates the ParameterValues[i] pointers for all parameters of the
 * basic type of PROGRAM_STATE_VAR.
@@ -48,25 +50,23 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
/* XXX: Should this happen somewhere before to get our state flag set? */
_mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
 
-   /* CACHE_NEW_WM_PROG */
-   if (brw->wm.prog_data->nr_params != 0) {
+   if (prog_data->nr_params != 0) {
   float *constants;
   unsigned int i;
 
   constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS,
- brw->wm.prog_data->nr_params *
- sizeof(float),
+ prog_data->nr_params * sizeof(float),
  32, &brw->wm.base.push_const_offset);
 
-  for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
-constants[i] = *brw->wm.prog_data->param[i];
+  for (i = 0; i < prog_data->nr_params; i++) {
+constants[i] = *prog_data->param[i];
   }
 
   if (0) {
 printf("WM constants:\n");
-for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+for (i = 0; i < prog_data->nr_params; i++) {
if ((i & 7) == 0)
-  printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8);
+  printf("g%d: ", prog_data->first_curbe_grf + i / 8);
printf("%8f ", constants[i]);
if ((i & 7) == 7)
   printf("\n");
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] i965: Set brw_stage_state::push_const_size for PS constants.

2013-09-13 Thread Kenneth Graunke
This paves the way for using gen7_upload_constant_state for PS data.

The formula is copied from gen7_wm_state.c.

Signed-off-by: Kenneth Graunke 
Cc: Paul Berry 
---
 src/mesa/drivers/dri/i965/gen6_wm_state.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c 
b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 5a6943f..d21de13 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -50,7 +50,9 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
/* XXX: Should this happen somewhere before to get our state flag set? */
_mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
 
-   if (prog_data->nr_params != 0) {
+   if (prog_data->nr_params == 0) {
+  brw->wm.base.push_const_size = 0;
+   } else {
   float *constants;
   unsigned int i;
 
@@ -75,6 +77,9 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
printf("\n");
 printf("\n");
   }
+
+  brw->wm.base.push_const_size =
+ ALIGN(prog_data->nr_params, prog_data->dispatch_width) / 8;
}
 }
 
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] i965: Use gen7_upload_constant_state for 3DSTATE_CONSTANT_PS as well.

2013-09-13 Thread Kenneth Graunke
Now we use gen7_upload_constant_state() for all three shader stages.

Signed-off-by: Kenneth Graunke 
Cc: Paul Berry 
---
 src/mesa/drivers/dri/i965/gen7_wm_state.c | 28 +---
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index e7bb861..0d2ac09 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -129,33 +129,7 @@ upload_ps_state(struct brw_context *brw)
ADVANCE_BATCH();
 
/* CACHE_NEW_WM_PROG */
-   if (brw->wm.prog_data->nr_params == 0) {
-  /* Disable the push constant buffers. */
-  BEGIN_BATCH(7);
-  OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  ADVANCE_BATCH();
-   } else {
-  BEGIN_BATCH(7);
-  OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
-
-  OUT_BATCH(ALIGN(brw->wm.prog_data->nr_params,
- brw->wm.prog_data->dispatch_width) / 8);
-  OUT_BATCH(0);
-  /* Pointer to the WM constant buffer.  Covered by the set of
-   * state flags from gen6_upload_wm_push_constants.
-   */
-  OUT_BATCH(brw->wm.base.push_const_offset | GEN7_MOCS_L3);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  ADVANCE_BATCH();
-   }
+   gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
 
dw2 = dw4 = dw5 = 0;
 
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Check for valid debug label before memcpy.

2013-09-13 Thread Vinson Lee
Fixes "Dereference after null check" reported by Coverity.

Signed-off-by: Vinson Lee 
---
 src/mesa/main/objectlabel.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c
index 90d9e09..79fd8c7 100644
--- a/src/mesa/main/objectlabel.c
+++ b/src/mesa/main/objectlabel.c
@@ -90,16 +90,28 @@ set_label(struct gl_context *ctx, char **labelPtr, const 
char *label,
 static void
 copy_label(char **labelPtr, char *label, int *length, int bufSize)
 {
+   /* From http://www.opengl.org/registry/specs/KHR/debug.txt:
+* "If  is NULL, no length is returned. The maximum number of
+* characters that may be written into , including the null
+* terminator, is specified by . If no debug label was specified
+* for the object then  will contain a null-terminated empty string,
+* and zero will be returned in . If  is NULL and 
+* is non-NULL then no string will be returned and the length of the label
+* will be returned in ."
+*/
+
int labelLen = 0;
 
if (*labelPtr)
   labelLen = strlen(*labelPtr);
 
if (label) {
-  if (bufSize <= labelLen)
- labelLen =  bufSize-1;
+  if (*labelPtr) {
+ if (bufSize <= labelLen)
+labelLen =  bufSize-1;
 
-  memcpy(label, *labelPtr, labelLen);
+ memcpy(label, *labelPtr, labelLen);
+  }
   label[labelLen] = '\0';
}
 
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] regression on nvc0 since floating point compare instructions

2013-09-13 Thread Dave Airlie
On Fri, Sep 13, 2013 at 12:24 AM, Christoph Bumiller
 wrote:
> On 12.09.2013 16:14, Roland Scheidegger wrote:
>> Am 12.09.2013 03:40, schrieb Dave Airlie:
 Maybe the type isn't set correctly? Looks to me like these instructions
 end up in mkCmp, which will set both src and dst type but ignore src
 type and set both according to the same type (which was the dst type).

 Roland
>>> Okay I've attached my next attempt at fixing it, fixes the two testcases I 
>>> had.
>>
>> No idea what setting type there really does but I guess that looks right
>> :-). Though I'm wondering if U32 vs. S32 would make a difference for dst
>> type since some of the (unsigned) comparisons still would use U32.
>
> It doesn't make a difference, making it signed is unnecessary.
> If it helped before that was just because it made negative floats be
> interpreted as negative ints (instead of large ints) which has a
> slightly better chance of "succeeding".

Cool I've dropped that bit then, and it still works fine! so I'll push
the remains.

Dave.


0001-nouveau-fix-regression-since-float-comparison-instru.patch
Description: Binary data
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: don't dereference stObj->pt if NULL

2013-09-13 Thread Dave Airlie
It seems a user app can get us into this state, I trigger the fail
running fbo-maxsize inside virgl, it fails to create the backing
storage for the texture object, but then segfaults here when it
should fail the completeness test.

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_cb_fbo.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_fbo.c 
b/src/mesa/state_tracker/st_cb_fbo.c
index 77aaccd..09daded 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -505,6 +505,9 @@ st_validate_attachment(struct gl_context *ctx,
if (!stObj)
   return GL_FALSE;
 
+   if (!stObj->pt)
+  return GL_FALSE;
+
format = stObj->pt->format;
texFormat = att->Renderbuffer->TexImage->TexFormat;
 
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] i965 surface state tidying, part 1

2013-09-13 Thread Kenneth Graunke
Here's a bit of tidying I did to the i965 SURFACE_STATE code:
- It unifies the code for constant buffers/UBOs, texture buffers,
  and shader time buffers.
- It also unifies the texture/renderbuffer code on Gen7+.

Both of these delete a /lot/ of complicated cut and pasted code.
No performance regressions observed in OpenArena on Ivybridge.

There are a few more things I'd like to do:
- Port BLORP to use emit_image_surface_state().
- Unify texture/renderbuffers on Gen4-6.
- Make emit_{buffer,image}_surface_state() vtable hooks.
  Then unify the Gen4-6 and Gen7+ variants of
  update_buffer_texture_surface, update_texture_surface,
  create_constant_surface, and update_renderbuffer_surface.

I stopped before doing this because a bunch of these use tile offsets
instead of LOD fields, and I thought that we were moving away from
tile offsets on Gen4-6 and in BLORP.  I can't remember where Chad and
Eric were at with those endeavors.

No regressions on Ivybridge.  Patches 3-4 still need testing.
Haswell also needs testing.

These patches are available on the 'sstidy' branch of ~kwg/mesa.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] i965: Refactor Gen7+ SURFACE_STATE setup for buffer surfaces.

2013-09-13 Thread Kenneth Graunke
This was an embarassingly large amount of copy and pasted code,
and it wasn't particularly simple code either.  By factoring it out
into a helper function, we consolidate the complexity.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 144 +-
 1 file changed, 58 insertions(+), 86 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 37e3174..8413308 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -224,6 +224,37 @@ gen7_check_surface_setup(uint32_t *surf, bool 
is_render_target)
}
 }
 
+static void
+gen7_emit_buffer_surface_state(struct brw_context *brw,
+   uint32_t *out_offset,
+   drm_intel_bo *bo,
+   unsigned buffer_offset,
+   unsigned surface_format,
+   unsigned buffer_size,
+   unsigned pitch,
+   unsigned mocs)
+{
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+8 * 4, 32, out_offset);
+   memset(surf, 0, 8 * 4);
+
+   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+ surface_format << BRW_SURFACE_FORMAT_SHIFT |
+ BRW_SURFACE_RC_READ_WRITE;
+   surf[1] = bo->offset + buffer_offset; /* reloc */
+   surf[2] = SET_FIELD(buffer_size & 0x7f, GEN7_SURFACE_WIDTH) |
+ SET_FIELD((buffer_size >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT);
+   surf[3] = SET_FIELD((buffer_size >> 21) & 0x3f, BRW_SURFACE_DEPTH) |
+ (pitch - 1);
+   surf[4] = 0;
+   surf[5] = SET_FIELD(mocs, GEN7_SURFACE_MOCS);
+
+   /* Emit relocation to surface contents */
+   drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
+   bo, buffer_offset, I915_GEM_DOMAIN_SAMPLER, 0);
+
+   gen7_check_surface_setup(surf, false /* is_render_target */);
+}
 
 static void
 gen7_update_buffer_texture_surface(struct gl_context *ctx,
@@ -237,39 +268,23 @@ gen7_update_buffer_texture_surface(struct gl_context *ctx,
drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
gl_format format = tObj->_BufferObjectFormat;
 
-   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-8 * 4, 32, surf_offset);
-   memset(surf, 0, 8 * 4);
-
uint32_t surface_format = brw_format_for_mesa_format(format);
if (surface_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
   _mesa_problem(NULL, "bad format %s for texture buffer\n",
 _mesa_get_format_name(format));
}
 
-   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
- surface_format << BRW_SURFACE_FORMAT_SHIFT |
- BRW_SURFACE_RC_READ_WRITE;
-
-   if (bo) {
-  surf[1] = bo->offset; /* reloc */
-
-  drm_intel_bo_emit_reloc(brw->batch.bo,
- *surf_offset + 4,
- bo, 0,
- I915_GEM_DOMAIN_SAMPLER, 0);
-
-  int texel_size = _mesa_get_format_bytes(format);
-  int w = intel_obj->Base.Size / texel_size;
-
-  /* note that these differ from GEN6 */
-  surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) | /* bits 6:0 of size 
*/
-SET_FIELD((w >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT); /* 20:7 */
-  surf[3] = SET_FIELD((w >> 21) & 0x3f, BRW_SURFACE_DEPTH) | /* bits 26:21 
*/
-(texel_size - 1);
-   }
-
-   gen7_check_surface_setup(surf, false /* is_render_target */);
+   int texel_size = _mesa_get_format_bytes(format);
+   int w = intel_obj ? intel_obj->Base.Size / texel_size : 0;
+
+   gen7_emit_buffer_surface_state(brw,
+  surf_offset,
+  bo,
+  0,
+  surface_format,
+  w,
+  texel_size,
+  0 /* mocs */);
 }
 
 static void
@@ -371,38 +386,15 @@ gen7_create_constant_surface(struct brw_context *brw,
 {
uint32_t stride = dword_pitch ? 4 : 16;
uint32_t elements = ALIGN(size, stride) / stride;
-   const GLint w = elements - 1;
 
-   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-8 * 4, 32, out_offset);
-   memset(surf, 0, 8 * 4);
-
-   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
- BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT |
- BRW_SURFACE_RC_READ_WRITE;
-
-   assert(bo);
-   surf[1] = bo->offset + offset; /* reloc */
-
-   /* note that these differ from GEN6 */
-   surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) |
- SET_FIELD((w >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT);
-   surf[3] = SET_FIELD((w >> 2

[Mesa-dev] [PATCH 2/4] i965: Share Gen7+ SURFACE_STATE setup for textures and renderbuffers.

2013-09-13 Thread Kenneth Graunke
The SURFACE_STATE entries for textures and renderbuffers share almost
all of the same fields.  Only a couple are specific to one or the other.

Thus, it makes sense to have a single shared function that takes care of
all the bit-shifting required to assemble the SURFACE_STATE structure.

This removes a lot of complicated cut and pasted code.

One change is that we now specify cube face enables for render targets,
but as far as I can tell this is harmless.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 210 ++
 1 file changed, 99 insertions(+), 111 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 8413308..8f95abe 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -257,6 +257,70 @@ gen7_emit_buffer_surface_state(struct brw_context *brw,
 }
 
 static void
+gen7_emit_image_surface_state(struct brw_context *brw,
+  uint32_t *out_offset,
+  const struct intel_mipmap_tree *mt,
+  unsigned bo_offset,
+  unsigned surface_type,
+  unsigned surface_format,
+  bool is_array,
+  unsigned depth,
+  unsigned min_array_element,
+  unsigned rt_view_extent,
+  unsigned mocs,
+  unsigned mip_count,
+  int swizzle,
+  bool is_render_target)
+{
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+8 * 4, 32, out_offset);
+   surf[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
+ surface_format << BRW_SURFACE_FORMAT_SHIFT |
+ gen7_surface_tiling_mode(mt->region->tiling) |
+ BRW_SURFACE_CUBEFACE_ENABLES |
+ (mt->align_h == 4 ? GEN7_SURFACE_VALIGN_4 : 
GEN7_SURFACE_VALIGN_2) |
+ (mt->align_w == 8 ? GEN7_SURFACE_HALIGN_8 : 
GEN7_SURFACE_HALIGN_4) |
+ (is_array ? GEN7_SURFACE_IS_ARRAY : 0) |
+ (mt->array_spacing_lod0 ? GEN7_SURFACE_ARYSPC_LOD0
+ : GEN7_SURFACE_ARYSPC_FULL);
+   surf[1] = mt->region->bo->offset + bo_offset; /* reloc */
+   surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) |
+ SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT);
+   surf[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
+ (mt->region->pitch - 1);
+   surf[4] = min_array_element << GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT |
+ rt_view_extent << GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT |
+ gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout);
+   surf[5] = SET_FIELD(mocs, GEN7_SURFACE_MOCS) | mip_count;
+
+   if (mt->mcs_mt) {
+  gen7_set_surface_mcs_info(brw, surf, *out_offset, mt->mcs_mt, true);
+   } else {
+  surf[6] = 0;
+   }
+
+   surf[7] = mt->fast_clear_color_value;
+
+   if (brw->is_haswell) {
+  surf[7] |=
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A);
+   }
+
+   uint32_t read_domain =
+  is_render_target ? I915_GEM_DOMAIN_RENDER : I915_GEM_DOMAIN_SAMPLER;
+
+   /* Emit relocation to surface contents */
+   drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
+  mt->region->bo, bo_offset,
+   read_domain, 0);
+
+   gen7_check_surface_setup(surf, is_render_target);
+}
+
+static void
 gen7_update_buffer_texture_surface(struct gl_context *ctx,
unsigned unit,
uint32_t *surf_offset)
@@ -305,43 +369,14 @@ gen7_update_texture_surface(struct gl_context *ctx,
   return;
}
 
-   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-8 * 4, 32, surf_offset);
-   memset(surf, 0, 8 * 4);
-
-   uint32_t tex_format = translate_tex_format(brw,
+   bool is_array = mt->logical_depth0 > 1 && tObj->Target != GL_TEXTURE_3D;
+   unsigned mip_count = intelObj->_MaxLevel - intel_image->mt->first_level;
+   uint32_t brw_format = translate_tex_format(brw,
   mt->format,
   tObj->DepthMode,
   sampler->sRGBDecode);
 
-   surf[0] = translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
- tex_format << BRW_SURFACE_FORMAT_SHIFT |
- gen7_surface_tiling_mode

[Mesa-dev] [PATCH 3/4] i965: Remove MIPLAYOUT_BELOW from Gen4-6 constant buffer surface state.

2013-09-13 Thread Kenneth Graunke
Specifying a miptree layout makes no sense for constant buffers.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 25db2e0..8d87786 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -318,7 +318,6 @@ brw_create_constant_surface(struct brw_context *brw,
  6 * 4, 32, out_offset);
 
surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
- BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
  BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
 
if (brw->gen >= 6)
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] i965: Refactor Gen4-6 SURFACE_STATE setup for buffer surfaces.

2013-09-13 Thread Kenneth Graunke
This was an embarassingly large amount of copy and pasted code,
and it wasn't particularly simple code either.  By factoring it out
into a helper function, we consolidate the complexity.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 98 +---
 1 file changed, 37 insertions(+), 61 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 8d87786..bbe7803 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -190,6 +190,36 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
 swizzles[GET_SWZ(t->_Swizzle, 3)]);
 }
 
+static void
+gen4_emit_buffer_surface_state(struct brw_context *brw,
+   uint32_t *out_offset,
+   drm_intel_bo *bo,
+   unsigned buffer_offset,
+   unsigned surface_format,
+   unsigned buffer_size,
+   unsigned pitch)
+{
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+6 * 4, 32, out_offset);
+   memset(surf, 0, 6 * 4);
+
+   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+ surface_format << BRW_SURFACE_FORMAT_SHIFT |
+ (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
+   surf[1] = bo->offset + buffer_offset; /* reloc */
+   surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
+ ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
+   surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
+ (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
+
+   /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
+* 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
+* physical cache.  It is mapped in hardware to the sampler cache."
+*/
+   drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
+   bo, buffer_offset,
+   I915_GEM_DOMAIN_SAMPLER, 0);
+}
 
 static void
 brw_update_buffer_texture_surface(struct gl_context *ctx,
@@ -198,49 +228,22 @@ brw_update_buffer_texture_surface(struct gl_context *ctx,
 {
struct brw_context *brw = brw_context(ctx);
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
-   uint32_t *surf;
struct intel_buffer_object *intel_obj =
   intel_buffer_object(tObj->BufferObject);
drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
gl_format format = tObj->_BufferObjectFormat;
uint32_t brw_format = brw_format_for_mesa_format(format);
int texel_size = _mesa_get_format_bytes(format);
+   int w = intel_obj ? intel_obj->Base.Size / texel_size : 0;
 
if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
   _mesa_problem(NULL, "bad format %s for texture buffer\n",
_mesa_get_format_name(format));
}
 
-   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
- 6 * 4, 32, surf_offset);
-
-   surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
- (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
-
-   if (brw->gen >= 6)
-  surf[0] |= BRW_SURFACE_RC_READ_WRITE;
-
-   if (bo) {
-  surf[1] = bo->offset; /* reloc */
-
-  /* Emit relocation to surface contents. */
-  drm_intel_bo_emit_reloc(brw->batch.bo,
- *surf_offset + 4,
- bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
-
-  int w = intel_obj->Base.Size / texel_size;
-  surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
-((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
-  surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
-(texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
-   } else {
-  surf[1] = 0;
-  surf[2] = 0;
-  surf[3] = 0;
-   }
-
-   surf[4] = 0;
-   surf[5] = 0;
+   gen4_emit_buffer_surface_state(brw, surf_offset, bo, 0,
+  brw_format,
+  w, texel_size);
 }
 
 static void
@@ -311,37 +314,10 @@ brw_create_constant_surface(struct brw_context *brw,
 {
uint32_t stride = dword_pitch ? 4 : 16;
uint32_t elements = ALIGN(size, stride) / stride;
-   const GLint w = elements - 1;
-   uint32_t *surf;
-
-   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
- 6 * 4, 32, out_offset);
-
-   surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
- BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
-
-   if (brw->gen >= 6)
-  surf[0] |= BRW_SURFACE_RC_READ_WRITE;
-
-   surf[1] = bo->offset + offset; /* reloc */
-
-   surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
- ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
-
-   surf[3]