Re: [Mesa-dev] [PATCH] mesa: Ignore size and offset parameters for BindBufferRange when buffer is 0

2012-12-06 Thread Eric Anholt
Matt Turner matts...@gmail.com writes:

 The ES 3 conformance suite unbinds buffers (by binding buffer 0) and
 passes zero for the size and offset, which the spec explicitly
 disallows. Otherwise, this seems like a reasonable thing to do.

 Khronos will be changing the spec to allow this (bug 9765). Fixes
 es3conform's transform_feedback_init_defaults test.

I think oglconform may have had some similar issues at one point.  The
spec is just really awfully done with respect to the error handling for
these two functions, so I'm in favor of this change to let people do
reasonable things.

Reviewed-by: Eric Anholt e...@anholt.net


pgpKRQneKEv70.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] mesa: add set support (stores a set of pointers)

2012-12-06 Thread Ian Romanick

On 12/05/2012 07:32 AM, Brian Paul wrote:

On 12/04/2012 04:55 PM, Jordan Justen wrote:

From: Eric Anholte...@anholt.net

From: git://people.freedesktop.org/~anholt/hash_table

Reviewed-by: Jordan Justenjordan.l.jus...@intel.com
[jordan.l.jus...@intel.com: minor rework for mesa]
Signed-off-by: Jordan Justenjordan.l.jus...@intel.com
---
  src/mesa/SConscript  |1 +
  src/mesa/main/set.c  |  349
++
  src/mesa/main/set.h  |   94 ++
  src/mesa/sources.mak |1 +
  4 files changed, 445 insertions(+)
  create mode 100644 src/mesa/main/set.c
  create mode 100644 src/mesa/main/set.h

diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 1afa412..a2492f7 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -107,6 +107,7 @@ main_sources = [
  'main/renderbuffer.c',
  'main/samplerobj.c',
  'main/scissor.c',
+'main/set.c',
  'main/shaderapi.c',
  'main/shaderobj.c',
  'main/shader_query.cpp',
diff --git a/src/mesa/main/set.c b/src/mesa/main/set.c
new file mode 100644
index 000..c530c40
--- /dev/null
+++ b/src/mesa/main/set.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright © 2009-2012 Intel Corporation
+ * Copyright © 1988-2004 Keith Packard and Bart Massey.
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining a
+ * copy of this software and associated documentation files (the
Software),
+ * to deal in the Software without restriction, including without
limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
the next
+ * paragraph) shall be included in all copies or substantial portions
of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the names of the authors
+ * or their institutions shall not be used in advertising or
+ * otherwise to promote the sale, use or other dealings in this
+ * Software without prior written authorization from the
+ * authors.
+ *
+ * Authors:
+ *Eric Anholte...@anholt.net
+ *Keith Packardkei...@keithp.com
+ */
+
+#includestdlib.h
+
+#include set.h
+#include ralloc.h
+
+#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]))
+
+/*
+ * From Knuth -- a good choice for hash/rehash values is p, p-2 where
+ * p and p-2 are both prime.  These tables are sized to have an extra
10%
+ * free to avoid exponential performance degradation as the hash
table fills
+ */
+
+uint32_t deleted_key_value;
+const void *deleted_key =deleted_key_value;
+
+static const struct {
+   uint32_t max_entries, size, rehash;
+} hash_sizes[] = {
+   { 2,5,3},
+   { 4,7,5},
+   { 8,13,   11   },
+   { 16,   19,   17   },
+   { 32,   43,   41   },
+   { 64,   73,   71   },
+   { 128,  151,  149  },
+   { 256,  283,  281  },
+   { 512,  571,  569  },
+   { 1024, 1153, 1151 },
+   { 2048, 2269, 2267 },
+   { 4096, 4519, 4517 },
+   { 8192, 9013, 9011 },
+   { 16384,18043,18041},
+   { 32768,36109,36107},
+   { 65536,72091,72089},
+   { 131072,   144409,   144407   },
+   { 262144,   288361,   288359   },
+   { 524288,   576883,   576881   },
+   { 1048576,  1153459,  1153457  },
+   { 2097152,  2307163,  2307161  },
+   { 4194304,  4613893,  4613891  },
+   { 8388608,  9227641,  9227639  },
+   { 16777216, 18455029, 18455027 },
+   { 33554432, 36911011, 36911009 },
+   { 67108864, 73819861, 73819859 },
+   { 134217728,147639589,147639587},
+   { 268435456,295279081,295279079},
+   { 536870912,590559793,590559791},
+   { 1073741824,   1181116273,   1181116271   },
+   { 2147483648ul, 2362232233ul, 2362232231ul }
+};
+
+static int
+entry_is_free(struct set_entry *entry)
+{
+   return entry-key == NULL;
+}
+
+static int
+entry_is_deleted(struct set_entry *entry)
+{
+   return entry-key == 

Re: [Mesa-dev] [PATCH 1/3] mesa: Disallow deprecated SNORM formats for renderbuffers

2012-12-06 Thread Eric Anholt
Marek Olšák mar...@gmail.com writes:

 On Tue, Dec 4, 2012 at 7:56 PM, Eric Anholt e...@anholt.net wrote:
 Ian Romanick i...@freedesktop.org writes:

 From: Ian Romanick ian.d.roman...@intel.com

 The OpenGL 3.2 core profile spec says:

 The following base internal formats from table 3.11 are
 color-renderable: RED, RG, RGB, and RGBA. The sized internal formats
 from table 3.12 that have a color-renderable base internal format
 are also color-renderable. No other formats, including compressed
 internal formats, are color-renderable.

 The OpenGL 3.2 compatibility profile spec says (only ALPHA is added):

 The following base internal formats from table 3.16 are
 color-renderable: ALPHA, RED, RG, RGB, and RGBA. The sized internal 
 formats
 from table 3.17 that have a color-renderable base internal format
 are also color-renderable. No other formats, including compressed
 internal formats, are color-renderable.

 Table 3.12 in the core profile spec and table 3.17 in the compatibility
 profile spec list SNORM formats as having a base internal format of RED,
 RG, RGB, or RGBA.  From this we infer that they should also be color
 renderable.

 This sounds sort of like the description of the bottom half of the
 change, to remove L/LA/I snorm formats.  (ALPHA being left in place,
 which I missed at first).

 I wonder, do they also not support FBOs with unorm L/LA/I, despite
 ARB_framebuffer_object?  fbo-clear-formats should tell.  I'd be happy to
 see those go if nobody else supports them either, and it would make this
 change make more sense to me.

 What do you mean by that? R600 has full renderbuffer support for all
 R, RG, RGBX, RGBA, A, L, LA, and I formats, all can be UNORM, SNORM,
 UINT, SINT, and FLOAT, and blending and MSAA are a sure thing. I'm
 only okay with disallowing the formats if all specifications (GL and
 extensions) agree they *shouldn't* be supported.

ARB_fbo is the only thing that allows L/LA/I to be color-renderable.
EXT_fbo and all the GL core/compat specs say only R/RG/RGB/RGBA/A are.
The ARB_fbo spec makes it pretty clear that they expect drivers to say
no for things despite that, just that they don't want to explicitly
ban it in that spec.  In the core/compat specs, they decided to do the
reasonable thing instead.

For intel, we need either mesa core to filter out the stupid formats, or
we can in the driver.  (unfortunately, we can't quite just look at the
MESA_FORMAT, because those are sometimes promoted from an internalformat
of LA to a mesa format of RGBA, for example).


pgpiHmidSv18o.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] mesa: Use the new hash table for the variable refcount visitor.

2012-12-06 Thread Jordan Justen
On Mon, Dec 3, 2012 at 10:01 PM, Ian Romanick i...@freedesktop.org wrote:
 On 12/03/2012 02:52 PM, Jordan Justen wrote:
 --- a/src/glsl/Makefile.am
 +++ b/src/glsl/Makefile.am
 @@ -42,6 +42,7 @@ noinst_PROGRAMS = glsl_compiler glsl_test
   libglsl_la_SOURCES = \
 glsl_lexer.ll \
 glsl_parser.cc \
 +   $(top_srcdir)/src/mesa/main/hash_table.c \

 Does this need to be here?  If so, what about scons, Android.mk, etc.

Would it be more appropriate to add this to glsl_compiler_SOURCES and
glsl_test_SOURCES rather than libglsl_la_SOURCES? I see this is how
program/hash_table.c was linked. (I currently plan to make this change
in v2.)

Regarding scons/android... yes, I need to fix this.

-Jordan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] r200: Initialize swrast before setting limits

2012-12-06 Thread Stefan Dösinger
Otherwise the driver announces 4096 vertex shader constants and other
way too high limits.

NOTE: This is a candidate for stable release branches.
---
 src/mesa/drivers/dri/r200/r200_context.c |   19 +--
 1 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/r200/r200_context.c 
b/src/mesa/drivers/dri/r200/r200_context.c
index 360ba72..54cf33e 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -289,13 +289,20 @@ GLboolean r200CreateContext( gl_api api,
rmesa-radeon.swtcl.RenderIndex = ~0;
rmesa-radeon.hw.all_dirty = 1;
 
+   ctx = rmesa-radeon.glCtx;
+   /* Initialize the software rasterizer and helper modules.
+*/
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+   _ae_create_context( ctx );
+
/* Set the maximum texture size small enough that we can guarentee that
 * all texture units can bind a maximal texture and have all of them in
 * texturable memory at once. Depending on the allow_large_textures driconf
 * setting allow larger textures.
 */
-
-   ctx = rmesa-radeon.glCtx;
ctx-Const.MaxTextureUnits = driQueryOptioni (rmesa-radeon.optionCache,
 texture_units);
ctx-Const.MaxTextureImageUnits = ctx-Const.MaxTextureUnits;
@@ -345,14 +352,6 @@ GLboolean r200CreateContext( gl_api api,
 
_mesa_set_mvp_with_dp4( ctx, GL_TRUE );
 
-   /* Initialize the software rasterizer and helper modules.
-*/
-   _swrast_CreateContext( ctx );
-   _vbo_CreateContext( ctx );
-   _tnl_CreateContext( ctx );
-   _swsetup_CreateContext( ctx );
-   _ae_create_context( ctx );
-
/* Install the customized pipeline:
 */
_tnl_destroy_pipeline( ctx );
-- 
1.7.8.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] radeon: Initialize swrast before setting limits

2012-12-06 Thread Stefan Dösinger
NOTE: This is a candidate for stable release branches.
---
 src/mesa/drivers/dri/radeon/radeon_context.c |   18 +-
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c 
b/src/mesa/drivers/dri/radeon/radeon_context.c
index 1e0da0b..d29e146 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -251,13 +251,21 @@ r100CreateContext( gl_api api,
rmesa-radeon.swtcl.RenderIndex = ~0;
rmesa-radeon.hw.all_dirty = GL_TRUE;
 
+   ctx = rmesa-radeon.glCtx;
+   /* Initialize the software rasterizer and helper modules.
+*/
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+   _ae_create_context( ctx );
+
/* Set the maximum texture size small enough that we can guarentee that
 * all texture units can bind a maximal texture and have all of them in
 * texturable memory at once. Depending on the allow_large_textures driconf
 * setting allow larger textures.
 */
 
-   ctx = rmesa-radeon.glCtx;
ctx-Const.MaxTextureUnits = driQueryOptioni (rmesa-radeon.optionCache,
 texture_units);
ctx-Const.MaxTextureImageUnits = ctx-Const.MaxTextureUnits;
@@ -307,14 +315,6 @@ r100CreateContext( gl_api api,
 
_mesa_set_mvp_with_dp4( ctx, GL_TRUE );
 
-   /* Initialize the software rasterizer and helper modules.
-*/
-   _swrast_CreateContext( ctx );
-   _vbo_CreateContext( ctx );
-   _tnl_CreateContext( ctx );
-   _swsetup_CreateContext( ctx );
-   _ae_create_context( ctx );
-
/* Install the customized pipeline:
 */
_tnl_destroy_pipeline( ctx );
-- 
1.7.8.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] meta: Disable GL_FRAGMENT_SHADER_ATI in MESA_META_SHADER

2012-12-06 Thread Stefan Dösinger
Fixes clears in Wine on r200.

NOTE: This is a candidate for stable release branches.
---
 src/mesa/drivers/common/meta.c |   11 +++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index d5e8af3..ad21fa8 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -133,6 +133,7 @@ struct save_state
struct gl_vertex_program *VertexProgram;
GLboolean FragmentProgramEnabled;
struct gl_fragment_program *FragmentProgram;
+   GLboolean ATIFragmentShaderEnabled;
struct gl_shader_program *VertexShader;
struct gl_shader_program *GeometryShader;
struct gl_shader_program *FragmentShader;
@@ -594,6 +595,11 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
  _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_FALSE);
   }
 
+  if (ctx-API == API_OPENGL_COMPAT  
ctx-Extensions.ATI_fragment_shader) {
+ save-ATIFragmentShaderEnabled = ctx-ATIFragmentShader.Enabled;
+ _mesa_set_enable(ctx, GL_FRAGMENT_SHADER_ATI, GL_FALSE);
+  }
+
   if (ctx-Extensions.ARB_shader_objects) {
 _mesa_reference_shader_program(ctx, save-VertexShader,
ctx-Shader.CurrentVertexProgram);
@@ -914,6 +920,11 @@ _mesa_meta_end(struct gl_context *ctx)
 _mesa_reference_fragprog(ctx, save-FragmentProgram, NULL);
   }
 
+  if (ctx-API == API_OPENGL_COMPAT  
ctx-Extensions.ATI_fragment_shader) {
+ _mesa_set_enable(ctx, GL_FRAGMENT_SHADER_ATI,
+  save-ATIFragmentShaderEnabled);
+  }
+
   if (ctx-Extensions.ARB_vertex_shader)
 _mesa_use_shader_program(ctx, GL_VERTEX_SHADER, save-VertexShader);
 
-- 
1.7.8.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] r300g: Remove an incorrect comment

2012-12-06 Thread Stefan Dösinger
This occurred because I started this patch by reverting another patch and
forgot to remove it.
---
 src/gallium/drivers/r300/r300_screen.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_screen.c 
b/src/gallium/drivers/r300/r300_screen.c
index de5e4a1..8cb1406 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -105,7 +105,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
 case PIPE_CAP_VERTEX_COLOR_CLAMPED:
 case PIPE_CAP_USER_INDEX_BUFFERS:
 case PIPE_CAP_USER_CONSTANT_BUFFERS:
-case PIPE_CAP_DEPTH_CLIP_DISABLE: /* XXX implemented, but breaks 
Regnum Online */
+case PIPE_CAP_DEPTH_CLIP_DISABLE:
 return 1;
 
 case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
-- 
1.7.8.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] mesa: add set support (stores a set of pointers)

2012-12-06 Thread Justen, Jordan L
On Wed, 2012-12-05 at 11:26 -0800, Ian Romanick wrote:
 On 12/05/2012 07:32 AM, Brian Paul wrote:
  On 12/04/2012 04:55 PM, Jordan Justen wrote:
  +/**
  + * This function is an iterator over the hash table.
  + *
  + * Pass in NULL for the first entry, as in the start of a for loop.
  Note that
  + * an iteration over the table is O(table_size) not O(entries).
  + */
  +struct set_entry *
  +_mesa_set_next_entry(struct set *ht, struct set_entry *entry)
 
  Should ht be const-qualified here too?
 
 Since the returned set_entry isn't const-qualified, probably not.  I 
 think the compiler will generate an error.

I added const to ht, and it is still building for me. (GCC 4.7)

I plan to push this series with Brian's feedback. Or, do
you suspect different compilers might take issue with it?

-Jordan


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] draw: remove some dead constant buffer code

2012-12-06 Thread Brian Paul
Remove the draw_vs_set_constants() and draw_gs_set_constants()
functions and the draw-vs.aligned_constants,
draw-vs.aligned_constant_storage and draw-vs.const_storage_size
fields.  None of it was used.
---
 src/gallium/auxiliary/draw/draw_context.c |2 -
 src/gallium/auxiliary/draw/draw_gs.c  |   13 
 src/gallium/auxiliary/draw/draw_private.h |   18 --
 src/gallium/auxiliary/draw/draw_vs.c  |   49 -
 4 files changed, 0 insertions(+), 82 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index 6980631..c231aba 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -372,12 +372,10 @@ draw_set_mapped_constant_buffer(struct draw_context *draw,
case PIPE_SHADER_VERTEX:
   draw-pt.user.vs_constants[slot] = buffer;
   draw-pt.user.vs_constants_size[slot] = size;
-  draw_vs_set_constants(draw, slot, buffer, size);
   break;
case PIPE_SHADER_GEOMETRY:
   draw-pt.user.gs_constants[slot] = buffer;
   draw-pt.user.gs_constants_size[slot] = size;
-  draw_gs_set_constants(draw, slot, buffer, size);
   break;
default:
   assert(0  invalid shader type in draw_set_mapped_constant_buffer);
diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index 3b3ff21..5c55523 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -69,19 +69,6 @@ void draw_gs_destroy( struct draw_context *draw )
tgsi_exec_machine_destroy(draw-gs.tgsi.machine);
 }
 
-void
-draw_gs_set_constants(struct draw_context *draw,
-  unsigned slot,
-  const void *constants,
-  unsigned size)
-{
-   /* noop. added here for symmetry with the VS
-* code and in case we'll ever want to allign
-* the constants, e.g. when we'll change to a
-* different interpreter */
-}
-
-
 struct draw_geometry_shader *
 draw_create_geometry_shader(struct draw_context *draw,
 const struct pipe_shader_state *state)
diff --git a/src/gallium/auxiliary/draw/draw_private.h 
b/src/gallium/auxiliary/draw/draw_private.h
index 5c497c6..86ce397 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -250,12 +250,6 @@ struct draw_context
  uint num_samplers;
   } tgsi;
 
-  const void *aligned_constants[PIPE_MAX_CONSTANT_BUFFERS];
-
-  const void *aligned_constant_storage[PIPE_MAX_CONSTANT_BUFFERS];
-  unsigned const_storage_size[PIPE_MAX_CONSTANT_BUFFERS];
-
-
   struct translate *fetch;
   struct translate_cache *fetch_cache;
   struct translate *emit;
@@ -369,24 +363,12 @@ void draw_vs_destroy( struct draw_context *draw );
 void draw_vs_set_viewport( struct draw_context *, 
const struct pipe_viewport_state * );
 
-void
-draw_vs_set_constants(struct draw_context *,
-  unsigned slot,
-  const void *constants,
-  unsigned size);
-
-
 
 
/***
  * Geometry shading code:
  */
 boolean draw_gs_init( struct draw_context *draw );
 
-void
-draw_gs_set_constants(struct draw_context *,
-  unsigned slot,
-  const void *constants,
-  unsigned size);
 
 void draw_gs_destroy( struct draw_context *draw );
 
diff --git a/src/gallium/auxiliary/draw/draw_vs.c 
b/src/gallium/auxiliary/draw/draw_vs.c
index 0aea2f2..785a903 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -49,47 +49,6 @@
 DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, GALLIUM_DUMP_VS, FALSE)
 
 
-/**
- * Set a vertex shader constant buffer.
- * \param slot  which constant buffer in [0, PIPE_MAX_CONSTANT_BUFFERS-1]
- * \param constants  the mapped buffer
- * \param size  size of buffer in bytes
- */
-void
-draw_vs_set_constants(struct draw_context *draw,
-  unsigned slot,
-  const void *constants,
-  unsigned size)
-{
-   const int alignment = 16;
-
-   /* check if buffer is 16-byte aligned */
-   if (((uintptr_t)constants)  (alignment - 1)) {
-  /* if not, copy the constants into a new, 16-byte aligned buffer */
-  if (size  draw-vs.const_storage_size[slot]) {
- if (draw-vs.aligned_constant_storage[slot]) {
-align_free((void *)draw-vs.aligned_constant_storage[slot]);
-draw-vs.const_storage_size[slot] = 0;
- }
- draw-vs.aligned_constant_storage[slot] =
-align_malloc(size, alignment);
- if (draw-vs.aligned_constant_storage[slot]) {
-draw-vs.const_storage_size[slot] = size;
- }
-  }
-  assert(constants);
-  if 

[Mesa-dev] [PATCH 5/8] glcpp: Allow test-specific arguments for standalone glcpp tests

2012-12-06 Thread Carl Worth
This will allow the test exercising disabled line continuations to arrange
for the --disable-line-continuations argument to be passed to the standalone
glcpp.
---
 src/glsl/glcpp/tests/glcpp-test |   11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/glsl/glcpp/tests/glcpp-test b/src/glsl/glcpp/tests/glcpp-test
index ac89191..d12db6f 100755
--- a/src/glsl/glcpp/tests/glcpp-test
+++ b/src/glsl/glcpp/tests/glcpp-test
@@ -23,6 +23,13 @@ Valid options include:
 EOF
 }
 
+test_specific_args ()
+{
+test=$1
+
+grep 'glcpp-args:' $test | sed -e 's,^.*glcpp-args: *,,'
+}
+
 # Parse command-line options
 for option; do
 if [ ${option} = '--help' ] ; then
@@ -45,7 +52,7 @@ clean=0
 echo == Testing for correctness ==
 for test in $testdir/*.c; do
 echo -n Testing $test...
-$glcpp  $test  $test.out 21
+$glcpp $(test_specific_args $test)  $test  $test.out 21
 total=$((total+1))
 if cmp $test.expected $test.out /dev/null 21; then
echo PASS
@@ -64,7 +71,7 @@ if [ $do_valgrind = yes ]; then
 echo == Testing for valgrind cleanliness ==
 for test in $testdir/*.c; do
echo -n Testing $test with valgrind...
-   valgrind --error-exitcode=31 --log-file=$test.valgrind-errors $glcpp  
$test /dev/null 21
+   valgrind --error-exitcode=31 --log-file=$test.valgrind-errors $glcpp 
$(test_specific_args $test)  $test /dev/null 21
if [ $? = 31 ]; then
echo ERRORS
cat $test.valgrind-errors
-- 
1.7.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/8] drirc: Add quirk to disable GLSL line continuations for Savage2

2012-12-06 Thread Carl Worth
This application is known to contain shaders that:

1. Have a stray backslash as the last line of comment lines
2. Have a declaration immediately following that line

Hence, interpreting that backslash as a line continuation causes the
declaration to be hidden and the shader fails to compile.  Fortunately, the
shaders also:

3. Do not have any other intentional line-continuation characters

So disabling line continuations entirely for the application fixes this
problem without causing any other breakage.
---
 src/mesa/drivers/dri/common/drirc |3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/common/drirc 
b/src/mesa/drivers/dri/common/drirc
index cf13cdb..a13941f 100644
--- a/src/mesa/drivers/dri/common/drirc
+++ b/src/mesa/drivers/dri/common/drirc
@@ -22,5 +22,8 @@
 application name=Unigine OilRush (64-bit) executable=OilRush_x64
 option name=disable_blend_func_extended value=true /
/application
+application name=Savage 2 executable=savage2.bin
+option name=disable_glsl_line_continuations value=true /
+/application
 /device
 /driconf
-- 
1.7.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/8] glcpp: Honor the GL context's DisableGLSLLineContinuations option

2012-12-06 Thread Carl Worth
And simply don't call into the function that removes line continuations.
---
 src/glsl/glcpp/pp.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/glsl/glcpp/pp.c b/src/glsl/glcpp/pp.c
index 5f86a49..3f33648 100644
--- a/src/glsl/glcpp/pp.c
+++ b/src/glsl/glcpp/pp.c
@@ -155,7 +155,9 @@ glcpp_preprocess(void *ralloc_ctx, const char **shader, 
char **info_log,
 {
int errors;
glcpp_parser_t *parser = glcpp_parser_create (extensions, gl_ctx-API);
-   *shader = remove_line_continuations(parser, *shader);
+
+   if (! gl_ctx-Const.DisableGLSLLineContinuations)
+   *shader = remove_line_continuations(parser, *shader);
 
glcpp_lex_set_source_string (parser, *shader);
 
-- 
1.7.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/8] glcpp: Accept pointer to GL context rather than just the API version

2012-12-06 Thread Carl Worth
As the preprocessor becomes more sophisticated and gains more optional
behavior, it's easiest to just pass the GL context pointer to it so that
it can examine any fields there that it needs to (such as API version,
or the state of any driconf options, etc.).
---
 src/glsl/glcpp/glcpp.c  |   13 -
 src/glsl/glcpp/glcpp.h  |4 +++-
 src/glsl/glcpp/pp.c |4 ++--
 src/glsl/glsl_parser_extras.h   |2 +-
 src/glsl/main.cpp   |2 +-
 src/glsl/test_optpass.cpp   |2 +-
 src/mesa/program/ir_to_mesa.cpp |2 +-
 7 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/glsl/glcpp/glcpp.c b/src/glsl/glcpp/glcpp.c
index 7c2ded8..79fbdac 100644
--- a/src/glsl/glcpp/glcpp.c
+++ b/src/glsl/glcpp/glcpp.c
@@ -94,6 +94,14 @@ load_text_file(void *ctx, const char *filename)
return text;
 }
 
+/* Initialize only those things that glcpp cares about.
+ */
+static void
+init_fake_gl_context (struct gl_context *gl_ctx)
+{
+   gl_ctx-API = API_OPENGL_COMPAT;
+}
+
 int
 main (int argc, char *argv[])
 {
@@ -102,6 +110,9 @@ main (int argc, char *argv[])
char *info_log = ralloc_strdup(ctx, );
const char *shader;
int ret;
+   struct gl_context gl_ctx;
+
+   init_fake_gl_context (gl_ctx);
 
if (argc) {
filename = argv[1];
@@ -111,7 +122,7 @@ main (int argc, char *argv[])
if (shader == NULL)
   return 1;
 
-   ret = glcpp_preprocess(ctx, shader, info_log, NULL, 
API_OPENGL_COMPAT);
+   ret = glcpp_preprocess(ctx, shader, info_log, NULL, gl_ctx);
 
printf(%s, shader);
fprintf(stderr, %s, info_log);
diff --git a/src/glsl/glcpp/glcpp.h b/src/glsl/glcpp/glcpp.h
index a459289..cdcbdfe 100644
--- a/src/glsl/glcpp/glcpp.h
+++ b/src/glsl/glcpp/glcpp.h
@@ -27,6 +27,8 @@
 #include stdint.h
 #include stdbool.h
 
+#include main/mtypes.h
+
 #include ../ralloc.h
 
 #include program/hash_table.h
@@ -197,7 +199,7 @@ glcpp_parser_destroy (glcpp_parser_t *parser);
 
 int
 glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log,
-  const struct gl_extensions *extensions, int api);
+  const struct gl_extensions *extensions, struct gl_context *g_ctx);
 
 /* Functions for writing to the info log */
 
diff --git a/src/glsl/glcpp/pp.c b/src/glsl/glcpp/pp.c
index 11b2941..5f86a49 100644
--- a/src/glsl/glcpp/pp.c
+++ b/src/glsl/glcpp/pp.c
@@ -151,10 +151,10 @@ remove_line_continuations(glcpp_parser_t *ctx, const char 
*shader)
 
 int
 glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log,
-  const struct gl_extensions *extensions, int api)
+  const struct gl_extensions *extensions, struct gl_context *gl_ctx)
 {
int errors;
-   glcpp_parser_t *parser = glcpp_parser_create (extensions, api);
+   glcpp_parser_t *parser = glcpp_parser_create (extensions, gl_ctx-API);
*shader = remove_line_continuations(parser, *shader);
 
glcpp_lex_set_source_string (parser, *shader);
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 0b208f6..7473eee 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -299,7 +299,7 @@ extern C {
 #endif
 
 extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log,
-  const struct gl_extensions *extensions, int api);
+  const struct gl_extensions *extensions, struct 
gl_context *gl_ctx);
 
 extern void _mesa_destroy_shader_compiler(void);
 extern void _mesa_destroy_shader_compiler_caches(void);
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 33cd79c..4af440a 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -145,7 +145,7 @@ compile_shader(struct gl_context *ctx, struct gl_shader 
*shader)
 
const char *source = shader-Source;
state-error = glcpp_preprocess(state, source, state-info_log,
-state-extensions, ctx-API) != 0;
+state-extensions, ctx) != 0;
 
if (!state-error) {
   _mesa_glsl_lexer_ctor(state, source);
diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp
index ce5df24..117b0b0 100644
--- a/src/glsl/test_optpass.cpp
+++ b/src/glsl/test_optpass.cpp
@@ -217,7 +217,7 @@ int test_optpass(int argc, char **argv)
   shader-Source = input.c_str();
   const char *source = shader-Source;
   state-error = glcpp_preprocess(state, source, state-info_log,
-state-extensions, ctx-API) != 0;
+state-extensions, ctx) != 0;
 
   if (!state-error) {
  _mesa_glsl_lexer_ctor(state, source);
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index d5b9683..0f7439b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -3061,7 +3061,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct 
gl_shader *shader)
}
 
   

[Mesa-dev] [PATCH 1/8] driconf: Add a new option: disable_glsl_line_continuations

2012-12-06 Thread Carl Worth
This is to enable a quirk for Savage2 which includes a shader with a stray '\'
at the end of a comment line. Interpreting that backslash as a line
continuation will break the compilation of the shader, so we need a way to
disable this.
---

It is bogus that the new option is setup as something i965-specific. This is
really just following the example of the existing force_glsl_extensions_warn
options.

I believe the correct answer is to create a new glsl option cache outside of
the driver so that options like these can easily be applied to all code using
the GLSL compiler. But I also think that can reasonably be done separately
after this patch series.

I've looked intowhat it would take to do this. It shouldn't be hard, but I
could use some specific assistance with some of the details from someone with
more Mesa-internals experience than I have.

 src/mesa/drivers/dri/common/xmlpool/t_options.h |5 +
 src/mesa/drivers/dri/i965/brw_context.c |2 ++
 src/mesa/drivers/dri/intel/intel_screen.c   |3 ++-
 src/mesa/main/mtypes.h  |6 ++
 4 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h 
b/src/mesa/drivers/dri/common/xmlpool/t_options.h
index 78f1765..683a473 100644
--- a/src/mesa/drivers/dri/common/xmlpool/t_options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h
@@ -282,3 +282,8 @@ DRI_CONF_OPT_END
 DRI_CONF_OPT_BEGIN(disable_blend_func_extended,bool,def) \
 DRI_CONF_DESC(en,gettext(Disable dual source blending)) \
 DRI_CONF_OPT_END
+
+#define DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS(def) \
+DRI_CONF_OPT_BEGIN(disable_glsl_line_continuations,bool,def) \
+DRI_CONF_DESC(en,gettext(Disable backslash-based line continuations 
in GLSL source)) \
+DRI_CONF_OPT_END
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 5665a3a..4628d82 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -374,6 +374,8 @@ brwCreateContext(int api,
 
ctx-Const.ForceGLSLExtensionsWarn = driQueryOptionb(intel-optionCache, 
force_glsl_extensions_warn);
 
+   ctx-Const.DisableGLSLLineContinuations = 
driQueryOptionb(intel-optionCache, disable_glsl_line_continuations);
+
ctx-Const.ContextFlags = 0;
if ((flags  __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0)
   ctx-Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c 
b/src/mesa/drivers/dri/intel/intel_screen.c
index e0fe8c1..557f1af 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -80,6 +80,7 @@ PUBLIC const char __driConfigOptions[] =
  DRI_CONF_ALWAYS_FLUSH_BATCH(false)
  DRI_CONF_ALWAYS_FLUSH_CACHE(false)
  DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN(false)
+ DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS(false)
  DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED(false)
 
   DRI_CONF_OPT_BEGIN(stub_occlusion_query, bool, false)
@@ -92,7 +93,7 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_SECTION_END
 DRI_CONF_END;
 
-const GLuint __driNConfigOptions = 15;
+const GLuint __driNConfigOptions = 16;
 
 #include intel_batchbuffer.h
 #include intel_buffers.h
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 5bfae69..f8fb113 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2925,6 +2925,12 @@ struct gl_constants
 
/** GL_ARB_map_buffer_alignment */
GLuint MinMapBufferAlignment;
+
+   /**
+* Disable interpretation of line continuations (lines ending with a
+* backslash character ('\') in GLSL source.
+*/
+   GLboolean DisableGLSLLineContinuations;
 };
 
 
-- 
1.7.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/8] glcpp: Rewrite line-continuation support to act globally.

2012-12-06 Thread Carl Worth
Previously, we were only supporting line-continuation backslash characters
within lines of pre-processor directives, (as per the specification). With
OpenGL 4.2 and GLES3, line continuations are now supported anywhere within a
shader.

While changing this, also fix a bug where the preprocessor was ignoring
line continuation characters when a line ended in multiple backslash
characters.

The new code is also more efficient than the old. Previously, we would
perform a ralloc copy at each newline. We now perform copies only at each
occurrence of a line-continuation.

This commit fixes the line-continuation.vert test in piglit.
---

This commit also causes the comment-continuation test to fail. This is
expected. My recommendation is to remove that test since the handling of
line-continuation characters is changing here. The original purpose of the
test was to ensure that the shaders from Savage 2, (with a stray
line-continuation character in a comment), could be compiled. This case is now
supported with a quirk, (and tested with glcpp tests as part of make
check). See elsewhere in this patch series for more details.

 src/glsl/glcpp/pp.c |  115 ++-
 1 file changed, 49 insertions(+), 66 deletions(-)

diff --git a/src/glsl/glcpp/pp.c b/src/glsl/glcpp/pp.c
index 3f33648..789f7f9 100644
--- a/src/glsl/glcpp/pp.c
+++ b/src/glsl/glcpp/pp.c
@@ -70,82 +70,65 @@ glcpp_warning (YYLTYPE *locp, glcpp_parser_t *parser, const 
char *fmt, ...)
 parser-info_log_length, \n);
 }
 
-/* Searches backwards for '^ *#' from a given starting point. */
-static int
-in_directive(const char *shader, const char *ptr)
-{
-   assert(ptr = shader);
-
-   /* Search backwards for '#'. If we find a \n first, it doesn't count */
-   for (; ptr = shader  *ptr != '#'; ptr--) {
-   if (*ptr == '\n')
-   return 0;
-   }
-   if (ptr = shader) {
-   /* Found '#'...look for spaces preceded by a newline */
-   for (ptr--; ptr = shader  isblank(*ptr); ptr--);
-   // FIXME: I don't think the '\n' case can happen
-   if (ptr  shader || *ptr == '\n')
-   return 1;
-   }
-   return 0;
-}
-
-/* Remove any line continuation characters in preprocessing directives.
- * However, ignore any in GLSL code, as There is no line continuation
- * character (1.30 page 9) in GLSL.
+/* Remove any line continuation characters in the shader, (whether in
+ * preprocessing directives or in GLSL code).
  */
 static char *
 remove_line_continuations(glcpp_parser_t *ctx, const char *shader)
 {
-   int in_continued_line = 0;
-   int extra_newlines = 0;
char *clean = ralloc_strdup(ctx, );
-   const char *search_start = shader;
-   const char *newline;
-   while ((newline = strchr(search_start, '\n')) != NULL) {
-   const char *backslash = NULL;
-
-   /* # of characters preceding the newline. */
-   int n = newline - shader;
-
-   /* Find the preceding '\', if it exists */
-   if (n = 1  newline[-1] == '\\')
-   backslash = newline - 1;
-   else if (n = 2  newline[-1] == '\r'  newline[-2] == '\\')
-   backslash = newline - 2;
-
-   /* Double backslashes don't count (the backslash is escaped) */
-   if (backslash != NULL  backslash[-1] == '\\') {
-   backslash = NULL;
-   }
-
-   if (backslash != NULL) {
-   /* We found a line continuation, but do we care? */
-   if (!in_continued_line) {
-   if (in_directive(shader, backslash)) {
-   in_continued_line = 1;
-   extra_newlines = 0;
-   }
-   }
-   if (in_continued_line) {
-   /* Copy everything before the \ */
-   ralloc_strncat(clean, shader, backslash - 
shader);
+   const char *backslash, *newline, *search_start;
+   int collapsed_newlines = 0;
+
+   search_start = shader;
+
+   while (true) {
+   backslash = strchr(search_start, '\\');
+
+   /* If we have previously collapsed any line-continuations,
+* then we want to insert additional newlines at the next
+* occurrence of a newline character to avoid changing any
+* line numbers.
+*/
+   if (collapsed_newlines) {
+   newline = strchr(search_start, '\n');
+   if (newline 
+   (backslash == NULL || newline  backslash))
+   {
+   ralloc_strncat(clean, shader,
+   

[Mesa-dev] [PATCH 8/8] glcpp: Add tests for line continuation

2012-12-06 Thread Carl Worth
First we test that line continuations are honored within a comment, (as
recently changed in glcpp), then we test that line continuations can be
disabled via an option within the context. This is tested via the new support
for a test-specific command-line option passed to glcpp.
---
 src/glsl/glcpp/tests/115-line-continuations.c|9 +
 src/glsl/glcpp/tests/115-line-continuations.c.expected   |   10 ++
 src/glsl/glcpp/tests/116-disable-line-continuations.c|   13 +
 .../tests/116-disable-line-continuations.c.expected  |   14 ++
 4 files changed, 46 insertions(+)
 create mode 100644 src/glsl/glcpp/tests/115-line-continuations.c
 create mode 100644 src/glsl/glcpp/tests/115-line-continuations.c.expected
 create mode 100644 src/glsl/glcpp/tests/116-disable-line-continuations.c
 create mode 100644 
src/glsl/glcpp/tests/116-disable-line-continuations.c.expected

diff --git a/src/glsl/glcpp/tests/115-line-continuations.c 
b/src/glsl/glcpp/tests/115-line-continuations.c
new file mode 100644
index 000..105590d
--- /dev/null
+++ b/src/glsl/glcpp/tests/115-line-continuations.c
@@ -0,0 +1,9 @@
+// This comment continues to the next line, hiding the define \
+#define CONTINUATION_UNSUPPORTED
+
+#ifdef CONTINUATION_UNSUPPORTED
+failure
+#else
+success
+#endif
+
diff --git a/src/glsl/glcpp/tests/115-line-continuations.c.expected 
b/src/glsl/glcpp/tests/115-line-continuations.c.expected
new file mode 100644
index 000..f67ba1c
--- /dev/null
+++ b/src/glsl/glcpp/tests/115-line-continuations.c.expected
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+success
+
+
+
diff --git a/src/glsl/glcpp/tests/116-disable-line-continuations.c 
b/src/glsl/glcpp/tests/116-disable-line-continuations.c
new file mode 100644
index 000..83d5ddf
--- /dev/null
+++ b/src/glsl/glcpp/tests/116-disable-line-continuations.c
@@ -0,0 +1,13 @@
+// glcpp-args: --disable-line-continuations
+
+// This comments ends with a backslash \\
+#define NO_CONTINUATION
+
+#ifdef NO_CONTINUATION
+success
+#else
+failure
+#endif
+
+
+
diff --git a/src/glsl/glcpp/tests/116-disable-line-continuations.c.expected 
b/src/glsl/glcpp/tests/116-disable-line-continuations.c.expected
new file mode 100644
index 000..9b9a8c5
--- /dev/null
+++ b/src/glsl/glcpp/tests/116-disable-line-continuations.c.expected
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+success
+
+
+
+
+
+
+
-- 
1.7.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/8] glcpp: Add --disable-line-continuations argument to standalone glcpp

2012-12-06 Thread Carl Worth
This will allow testing of disabled line-continuation on a case-by-case basis,
(with the option communicated to the preprocessor via the GL context).
---
 src/glsl/glcpp/glcpp.c |   43 +--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/src/glsl/glcpp/glcpp.c b/src/glsl/glcpp/glcpp.c
index 79fbdac..5352703 100644
--- a/src/glsl/glcpp/glcpp.c
+++ b/src/glsl/glcpp/glcpp.c
@@ -100,6 +100,18 @@ static void
 init_fake_gl_context (struct gl_context *gl_ctx)
 {
gl_ctx-API = API_OPENGL_COMPAT;
+   gl_ctx-Const.DisableGLSLLineContinuations = false;
+}
+
+static void
+usage (void)
+{
+   fprintf (stderr, Usage: glcpp [OPTIONS] [--] [filename]\n);
+   fprintf (stderr, \n);
+   fprintf (stderr, Pre-process the given filename (stdin if no filename 
given).\n);
+   fprintf (stderr, The following options are supported:\n);
+   fprintf (stderr, --disable-line-continuations  Do not 
interpret lines ending with a\n);
+   fprintf (stderr,   backslash 
('\\') as a line continuation.\n);
 }
 
 int
@@ -111,11 +123,38 @@ main (int argc, char *argv[])
const char *shader;
int ret;
struct gl_context gl_ctx;
+   int i;
 
init_fake_gl_context (gl_ctx);
 
-   if (argc) {
-   filename = argv[1];
+   for (i = 1; i  argc; i++) {
+   /* An argument of -- means that no subsequent
+* arguments are options.
+*/
+   if (strcmp(argv[i], --) == 0) {
+   i++;
+   break;
+   }
+
+   /* An argument not starting with '-' is not an
+* option.
+*/
+   if (argv[i][0] != '-')
+   break;
+
+   /* Interpret known options.
+*/
+   if (strcmp(argv[i], --disable-line-continuations) == 0) {
+   gl_ctx.Const.DisableGLSLLineContinuations = true;
+   } else {
+   fprintf (stderr, Unknown option: %s\n, argv[i]);
+   usage ();
+   exit (1);
+   }
+   }
+
+   if (i  argc) {
+   filename = argv[i];
}
 
shader = load_text_file (ctx, filename);
-- 
1.7.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] mesa: Disallow deprecated SNORM formats for renderbuffers

2012-12-06 Thread Marek Olšák
On Wed, Dec 5, 2012 at 8:39 PM, Eric Anholt e...@anholt.net wrote:
 Marek Olšák mar...@gmail.com writes:

 On Tue, Dec 4, 2012 at 7:56 PM, Eric Anholt e...@anholt.net wrote:
 Ian Romanick i...@freedesktop.org writes:

 From: Ian Romanick ian.d.roman...@intel.com

 The OpenGL 3.2 core profile spec says:

 The following base internal formats from table 3.11 are
 color-renderable: RED, RG, RGB, and RGBA. The sized internal formats
 from table 3.12 that have a color-renderable base internal format
 are also color-renderable. No other formats, including compressed
 internal formats, are color-renderable.

 The OpenGL 3.2 compatibility profile spec says (only ALPHA is added):

 The following base internal formats from table 3.16 are
 color-renderable: ALPHA, RED, RG, RGB, and RGBA. The sized internal 
 formats
 from table 3.17 that have a color-renderable base internal format
 are also color-renderable. No other formats, including compressed
 internal formats, are color-renderable.

 Table 3.12 in the core profile spec and table 3.17 in the compatibility
 profile spec list SNORM formats as having a base internal format of RED,
 RG, RGB, or RGBA.  From this we infer that they should also be color
 renderable.

 This sounds sort of like the description of the bottom half of the
 change, to remove L/LA/I snorm formats.  (ALPHA being left in place,
 which I missed at first).

 I wonder, do they also not support FBOs with unorm L/LA/I, despite
 ARB_framebuffer_object?  fbo-clear-formats should tell.  I'd be happy to
 see those go if nobody else supports them either, and it would make this
 change make more sense to me.

 What do you mean by that? R600 has full renderbuffer support for all
 R, RG, RGBX, RGBA, A, L, LA, and I formats, all can be UNORM, SNORM,
 UINT, SINT, and FLOAT, and blending and MSAA are a sure thing. I'm
 only okay with disallowing the formats if all specifications (GL and
 extensions) agree they *shouldn't* be supported.

 ARB_fbo is the only thing that allows L/LA/I to be color-renderable.
 EXT_fbo and all the GL core/compat specs say only R/RG/RGB/RGBA/A are.
 The ARB_fbo spec makes it pretty clear that they expect drivers to say
 no for things despite that, just that they don't want to explicitly
 ban it in that spec.  In the core/compat specs, they decided to do the
 reasonable thing instead.

 For intel, we need either mesa core to filter out the stupid formats, or
 we can in the driver.  (unfortunately, we can't quite just look at the
 MESA_FORMAT, because those are sometimes promoted from an internalformat
 of LA to a mesa format of RGBA, for example).

I think you can always look at the original internalformat, which we
need to keep around for glGet.

Anyway, what you say about L/LA/I sounds reasonable. It's not worth
maintaining those formats just because of ARB_fbo. You have my ack for
removal of GL renderbuffer support of L/LA/I.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 57802] including GL/gl.h and another gl header causes redefinition warnings

2012-12-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=57802

--- Comment #3 from Matthew Waters ystree...@gmail.com ---
(In reply to comment #2)
 Just curious: why are you including both OpenGL's gl.h and other Khronos
 (ES?) headers in the same file?

I am experimenting with having both opengl and ES support in an application.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 47607] [advocacy] Make Anomaly Warzone Earth work with Mesa

2012-12-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=47607

--- Comment #5 from imamdxl8...@gmail.com ---
does not work with Intel HD either even with glsl extension behavior warn

Tested On:

Ubuntu 12.10
Linux 3.5 (both 32 and 64 bit)
X.Org X Server 1.13.0
xf86-video-intel 2.20.15
libdrm 2.4.40
Mesa 8, 9 and git (both 32 and 64 bit)

Hardware: Intel GMA 4500 MHD (device id 2a42)
Hardware: Intel HD (device id 0106)

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [rfc] gallium ubo/tbo support (softpipe only)

2012-12-06 Thread Dave Airlie
Hi,

So I got UBOs working with softpipe today (well one demo and most piglit
tests pass), so I've cleaned up the tbo/ubo enablement code for gallium.

This only includes the softpipe TBO and gallium TBO/UBO work, (softpipe
didn't need changes for UBOs). I've got an initial hack at r600 TBO,
but I haven't looked at r600 UBOs yet.

Dave.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] gallium: add new texture buffer object capability

2012-12-06 Thread Dave Airlie
From: Dave Airlie airl...@redhat.com

this just adds the define to the header.

Signed-off-by: Dave Airlie airl...@redhat.com
---
 src/gallium/include/pipe/p_defines.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index fa2fb07..a751b8a 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -491,7 +491,8 @@ enum pipe_cap {
PIPE_CAP_QUERY_TIMESTAMP = 73,
PIPE_CAP_TEXTURE_MULTISAMPLE = 74,
PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT = 75,
-   PIPE_CAP_CUBE_MAP_ARRAY = 76
+   PIPE_CAP_CUBE_MAP_ARRAY = 76,
+   PIPE_CAP_TEXTURE_BUFFER_OBJECTS = 77
 };
 
 /**
-- 
1.8.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] softpipe: add texture buffer object support

2012-12-06 Thread Dave Airlie
From: Dave Airlie airl...@redhat.com

This adds TBO support to softpipe.

Signed-off-by: Dave Airlie airl...@redhat.com
---
 src/gallium/drivers/softpipe/sp_screen.c | 2 ++
 src/gallium/drivers/softpipe/sp_tex_sample.c | 8 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/softpipe/sp_screen.c 
b/src/gallium/drivers/softpipe/sp_screen.c
index 3a38182..1e752fb 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -171,6 +171,8 @@ softpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_CUBE_MAP_ARRAY:
   return 1;
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+  return 1;
}
/* should only get here on unhandled cases */
debug_printf(Unexpected PIPE_CAP %d query\n, param);
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 31eab39..7558ef1 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -38,6 +38,7 @@
 #include pipe/p_defines.h
 #include pipe/p_shader_tokens.h
 #include util/u_math.h
+#include util/u_format.h
 #include util/u_memory.h
 #include sp_quad.h   /* only for #define QUAD_* tokens */
 #include sp_tex_sample.h
@@ -2502,6 +2503,7 @@ get_lambda_func(const union sp_sampler_key key)
   return compute_lambda_vert;

switch (key.bits.target) {
+   case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
   return compute_lambda_1d;
@@ -2526,6 +2528,7 @@ get_img_filter(const union sp_sampler_key key,
const struct pipe_sampler_state *sampler)
 {
switch (key.bits.target) {
+   case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
   if (filter == PIPE_TEX_FILTER_NEAREST) 
  return img_filter_1d_nearest;
@@ -2648,7 +2651,6 @@ sample_get_dims(struct tgsi_sampler *tgsi_sampler, int 
level,
dims[1] = texture-array_size;
/* fallthrough */
 case PIPE_TEXTURE_1D:
-case PIPE_BUFFER:
return;
 case PIPE_TEXTURE_2D_ARRAY:
dims[2] = texture-array_size;
@@ -2665,6 +2667,9 @@ sample_get_dims(struct tgsi_sampler *tgsi_sampler, int 
level,
 case PIPE_TEXTURE_CUBE_ARRAY:
dims[1] = u_minify(texture-height0, level);
dims[2] = texture-array_size / 6;
+   break;
+case PIPE_BUFFER:
+   dims[0] /= util_format_get_blocksize(view-format);
return;
 default:
assert(!unexpected texture target in sample_get_dims());
@@ -2705,6 +2710,7 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler,
layers = texture-array_size;
 
switch(texture-target) {
+   case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
   for (j = 0; j  TGSI_QUAD_SIZE; j++) {
  int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
-- 
1.8.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] st/mesa: add texture buffer object support to state tracker

2012-12-06 Thread Dave Airlie
From: Dave Airlie airl...@redhat.com

This adds the necessary changes to the st to allow texture buffer object
support if the driver advertises it.

Signed-off-by: Dave Airlie airl...@redhat.com
---
 src/mesa/state_tracker/st_atom_texture.c   | 18 --
 src/mesa/state_tracker/st_cb_texture.c | 16 
 src/mesa/state_tracker/st_extensions.c |  2 ++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  2 +-
 src/mesa/state_tracker/st_mesa_to_tgsi.c   |  1 +
 5 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_texture.c 
b/src/mesa/state_tracker/st_atom_texture.c
index df05e83..dba1d82 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -215,13 +215,19 @@ update_single_texture(struct st_context *st,
 
/* Determine the format of the texture sampler view */
st_view_format = stObj-pt-format;
-   {
-  const struct st_texture_image *firstImage =
-st_texture_image(stObj-base.Image[0][stObj-base.BaseLevel]);
-  const gl_format texFormat = firstImage-base.TexFormat;
-  enum pipe_format firstImageFormat =
-st_mesa_format_to_pipe_format(texFormat);
 
+   {
+  gl_format texFormat;
+  enum pipe_format firstImageFormat;
+
+  if (texObj-Target == GL_TEXTURE_BUFFER) {
+ texFormat = stObj-base._BufferObjectFormat;
+  } else {
+ const struct st_texture_image *firstImage =
+st_texture_image(stObj-base.Image[0][stObj-base.BaseLevel]);
+ texFormat = firstImage-base.TexFormat;
+  }
+  firstImageFormat = st_mesa_format_to_pipe_format(texFormat);
   if ((samp-sRGBDecode == GL_SKIP_DECODE_EXT) 
  (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) {
  /* Don't do sRGB-RGB conversion.  Interpret the texture data as
diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index f06814f..48a18d2 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -48,6 +48,7 @@
 #include state_tracker/st_cb_fbo.h
 #include state_tracker/st_cb_flush.h
 #include state_tracker/st_cb_texture.h
+#include state_tracker/st_cb_bufferobjects.h
 #include state_tracker/st_format.h
 #include state_tracker/st_texture.h
 #include state_tracker/st_gen_mipmap.h
@@ -1182,6 +1183,21 @@ st_finalize_texture(struct gl_context *ctx,
  stObj-lastLevel = stObj-base._MaxLevel;
}
 
+   if (tObj-Target == GL_TEXTURE_BUFFER) {
+  struct st_buffer_object *st_obj = st_buffer_object(tObj-BufferObject);
+  
+  if (st_obj-buffer != stObj-pt) {
+ pipe_resource_reference(stObj-pt, st_obj-buffer);
+ pipe_sampler_view_release(st-pipe, stObj-sampler_view);
+ stObj-width0 = stObj-pt-width0 / 
_mesa_get_format_bytes(tObj-_BufferObjectFormat);
+ stObj-height0 = 1;
+ stObj-depth0 = 1;
+
+  }
+  return GL_TRUE;
+
+   }
+
firstImage = st_texture_image(stObj-base.Image[0][stObj-base.BaseLevel]);
assert(firstImage);
 
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 62a736b..0c61795 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -643,4 +643,6 @@ void st_init_extensions(struct st_context *st)
if (ctx-Const.MinMapBufferAlignment = 64) {
   ctx-Extensions.ARB_map_buffer_alignment = GL_TRUE;
}
+   if (screen-get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS))
+  ctx-Extensions.ARB_texture_buffer_object = GL_TRUE;
 }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index c030a6b..ae3b8b2 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2776,7 +2776,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
   inst-tex_target = TEXTURE_RECT_INDEX;
   break;
case GLSL_SAMPLER_DIM_BUF:
-  assert(!FINISHME: Implement ARB_texture_buffer_object);
+  inst-tex_target = TEXTURE_BUFFER_INDEX;
   break;
case GLSL_SAMPLER_DIM_EXTERNAL:
   inst-tex_target = TEXTURE_EXTERNAL_INDEX;
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c 
b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 81a870f..e326bcc 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -269,6 +269,7 @@ st_translate_texture_target( GLuint textarget,
}
 
switch( textarget ) {
+   case TEXTURE_BUFFER_INDEX: return TGSI_TEXTURE_BUFFER;
case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_1D;
case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_2D;
case TEXTURE_3D_INDEX:   return TGSI_TEXTURE_3D;
-- 
1.8.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] st/mesa: add option to enable GLSL 1.40

2012-12-06 Thread Dave Airlie
From: Dave Airlie airl...@redhat.com

Allow GLSL 1.40 to be enabled if the driver advertises it.

Signed-off-by: Dave Airlie airl...@redhat.com
---
 src/mesa/state_tracker/st_extensions.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 0c61795..ccb1f36 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -553,7 +553,9 @@ void st_init_extensions(struct st_context *st)
/* Figure out GLSL support. */
glsl_feature_level = screen-get_param(screen, PIPE_CAP_GLSL_FEATURE_LEVEL);
 
-   if (glsl_feature_level = 130) {
+   if (glsl_feature_level = 140) {
+  ctx-Const.GLSLVersion = 140;
+   } else if (glsl_feature_level = 130) {
   ctx-Const.GLSLVersion = 130;
} else {
   ctx-Const.GLSLVersion = 120;
-- 
1.8.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] softpipe: enable GLSL 1.40

2012-12-06 Thread Dave Airlie
From: Dave Airlie airl...@redhat.com

This enables GLSL 1.40 advertising by softpipe.

Signed-off-by: Dave Airlie airl...@redhat.com
---
 src/gallium/drivers/softpipe/sp_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/softpipe/sp_screen.c 
b/src/gallium/drivers/softpipe/sp_screen.c
index 1e752fb..909fa1c 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -147,7 +147,7 @@ softpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
   return 0;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
-  return 130;
+  return 140;
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
   return 0;
case PIPE_CAP_COMPUTE:
-- 
1.8.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] mesa/st: add ARB_uniform_buffer_object support

2012-12-06 Thread Dave Airlie
From: Dave Airlie airl...@redhat.com

this adds UBO support to the state tracker, it works with softpipe
as-is.

It uses UARL + CONST[x][ADDR[0].x] type constructs.

Signed-off-by: Dave Airlie airl...@redhat.com
---
 src/mesa/state_tracker/st_atom.c |  2 +
 src/mesa/state_tracker/st_atom.h |  2 +
 src/mesa/state_tracker/st_atom_constbuf.c| 70 +++-
 src/mesa/state_tracker/st_cb_bufferobjects.c |  3 ++
 src/mesa/state_tracker/st_extensions.c   | 13 ++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 63 ++---
 6 files changed, 146 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 102fee9..091bd55 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -64,6 +64,8 @@ static const struct st_tracked_state *atoms[] =
st_update_vs_constants,
st_update_gs_constants,
st_update_fs_constants,
+   st_upload_vs_ubos,
+   st_upload_fs_ubos,
st_update_pixel_transfer,
 
/* this must be done after the vertex program update */
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index 6c7d09f..ca79b44 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -67,6 +67,8 @@ extern const struct st_tracked_state st_finalize_textures;
 extern const struct st_tracked_state st_update_fs_constants;
 extern const struct st_tracked_state st_update_gs_constants;
 extern const struct st_tracked_state st_update_vs_constants;
+extern const struct st_tracked_state st_upload_fs_ubos;
+extern const struct st_tracked_state st_upload_vs_ubos;
 extern const struct st_tracked_state st_update_pixel_transfer;
 
 
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c 
b/src/mesa/state_tracker/st_atom_constbuf.c
index 580393e..ea0cf0e 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -45,7 +45,7 @@
 #include st_atom.h
 #include st_atom_constbuf.h
 #include st_program.h
-
+#include st_cb_bufferobjects.h
 
 /**
  * Pass the given program parameters to the graphics pipe as a
@@ -175,3 +175,71 @@ const struct st_tracked_state st_update_gs_constants = {
},
update_gs_constants /* update */
 };
+
+static void st_upload_ubos(struct st_context *st,
+   struct gl_shader *shader,
+   unsigned shader_type)
+{
+   int i;
+   struct pipe_constant_buffer cb = { 0 };
+   assert(shader_type == PIPE_SHADER_VERTEX ||
+  shader_type == PIPE_SHADER_FRAGMENT);
+
+   if (!shader)
+  return;
+
+   for (i = 0; i  shader-NumUniformBlocks; i++) {
+  struct gl_uniform_buffer_binding *binding;
+  struct st_buffer_object *st_obj;
+
+  binding = 
st-ctx-UniformBufferBindings[shader-UniformBlocks[i].Binding];
+  st_obj = st_buffer_object(binding-BufferObject);
+  pipe_resource_reference(cb.buffer, st_obj-buffer);
+
+  cb.buffer_size = st_obj-buffer-width0 - binding-Offset;
+
+  st-pipe-set_constant_buffer(st-pipe, shader_type, 1 + i, cb);
+  pipe_resource_reference(cb.buffer, NULL);
+   }
+}
+
+static void upload_vs_ubos(struct st_context *st)
+{
+   struct gl_shader_program *prog = st-ctx-Shader.CurrentVertexProgram;
+
+   if (!prog)
+  return;
+
+   st_upload_ubos(st, prog-_LinkedShaders[MESA_SHADER_VERTEX], 
PIPE_SHADER_VERTEX);
+}
+
+const struct st_tracked_state st_upload_vs_ubos = {
+   st_upload_vs_ubos,
+   {
+  (_NEW_PROGRAM | _NEW_BUFFER_OBJECT),
+  ST_NEW_VERTEX_PROGRAM,
+   },
+   upload_vs_ubos
+};
+
+static void upload_fs_ubos(struct st_context *st)
+{
+   struct gl_shader_program *prog = st-ctx-Shader.CurrentFragmentProgram;
+
+   if (!prog)
+  return;
+
+   st_upload_ubos(st, prog-_LinkedShaders[MESA_SHADER_FRAGMENT], 
PIPE_SHADER_FRAGMENT);
+
+}
+
+const struct st_tracked_state st_upload_fs_ubos = {
+   st_upload_fs_ubos,
+   {
+  (_NEW_PROGRAM | _NEW_BUFFER_OBJECT),
+  ST_NEW_FRAGMENT_PROGRAM,
+   },
+   upload_fs_ubos
+};
+
+
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c 
b/src/mesa/state_tracker/st_cb_bufferobjects.c
index ac38128..7d1c05a 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -198,6 +198,9 @@ st_bufferobj_data(struct gl_context *ctx,
case GL_TRANSFORM_FEEDBACK_BUFFER:
   bind = PIPE_BIND_STREAM_OUTPUT;
   break;
+   case GL_UNIFORM_BUFFER:
+  bind = PIPE_BIND_CONSTANT_BUFFER;
+  break;
default:
   bind = 0;
}
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index ccb1f36..d76cfdb 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -70,6 +70,8 @@ void st_init_limits(struct st_context *st)
struct pipe_screen *screen = st-pipe-screen;
struct gl_constants *c = st-ctx-Const;

[Mesa-dev] [PATCH] AMDGPU: Match fdiv for SI.

2012-12-06 Thread Michel Dänzer
From: Michel Dänzer michel.daen...@amd.com

Fixes hundreds of radeonsi piglit regressions from commit
62c8e1ec4a93b28c55c1c7accb6cbd64952e59c2 ('AMDGPU: replace int_AMDGPU_rcp by
fdiv (1.0, x) in RECIP pattern').

Signed-off-by: Michel Dänzer michel.daen...@amd.com
---
 lib/Target/AMDGPU/SIInstructions.td |5 +
 1 file changed, 5 insertions(+)

diff --git a/lib/Target/AMDGPU/SIInstructions.td 
b/lib/Target/AMDGPU/SIInstructions.td
index e9bbe23..3564ec3 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1264,6 +1264,11 @@ def : Pat 
   (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 
AllReg_32:$src1))
 ;
 
+def : Pat
+  (fdiv AllReg_32:$src0, AllReg_32:$src1),
+  (V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1))
+;
+
 def : Pat 
   (int_AMDGPU_kilp),
   (SI_KIL (V_MOV_IMM_I32 0xbf80))
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] AMDGPU/SI: Only allow selecting VGPRs with V_CNDMASK_B32.

2012-12-06 Thread Michel Dänzer
From: Michel Dänzer michel.daen...@amd.com

VALU instructions can only read from one SGPR, and that's the condition code
mask in this case.

Fixes a number of radeonsi piglit regressions from Vincent's max/min changes
(which means the matching to AMDGPUfmax/min doesn't work for some reason).

Signed-off-by: Michel Dänzer michel.daen...@amd.com
---
 lib/Target/AMDGPU/SIInstructions.td |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/Target/AMDGPU/SIInstructions.td 
b/lib/Target/AMDGPU/SIInstructions.td
index 3564ec3..ea8de91 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -764,15 +764,15 @@ def V_CNDMASK_B32_e32 : VOP2 0x, (outs 
VReg_32:$dst),
 }
 
 def V_CNDMASK_B32_e64 : VOP3 0x0100, (outs VReg_32:$dst),
-  (ins AllReg_32:$src0, AllReg_32:$src1, SReg_1:$src2, InstFlag:$abs, 
InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
+  (ins VReg_32:$src0, VReg_32:$src1, SReg_1:$src2, InstFlag:$abs, 
InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
   V_CNDMASK_B32_e64,
-  [(set (i32 VReg_32:$dst), (select SReg_1:$src2, AllReg_32:$src1, 
AllReg_32:$src0))]
+  [(set (i32 VReg_32:$dst), (select SReg_1:$src2, VReg_32:$src1, 
VReg_32:$src0))]
 ;
 
 //f32 pattern for V_CNDMASK_B32_e64
 def : Pat 
-  (f32 (select SReg_1:$src2, AllReg_32:$src1, AllReg_32:$src0)),
-  (V_CNDMASK_B32_e64 AllReg_32:$src0, AllReg_32:$src1, SReg_1:$src2)
+  (f32 (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0)),
+  (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_1:$src2)
 ;
 
 defm V_READLANE_B32 : VOP2_32 0x0001, V_READLANE_B32, [];
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] gallium/os: Fix os_time_sleep() on Windows for small durations.

2012-12-06 Thread jfonseca
From: José Fonseca jfons...@vmware.com

Prevents undetermined sleeps.
---
 src/gallium/auxiliary/os/os_time.c |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/os/os_time.c 
b/src/gallium/auxiliary/os/os_time.c
index 4055125..f943e0f 100644
--- a/src/gallium/auxiliary/os/os_time.c
+++ b/src/gallium/auxiliary/os/os_time.c
@@ -88,7 +88,11 @@ os_time_get_nano(void)
 void
 os_time_sleep(int64_t usecs)
 {
-   Sleep((usecs + 999) / 1000);
+   DWORD dwMilliseconds = (usecs + 999) / 1000;
+   /* Avoid Sleep(O) as that would cause to sleep for an undetermined duration 
*/
+   if (dwMilliseconds) {
+  Sleep(dwMilliseconds);
+   }
 }
 
 #endif
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] gallium/os: Cleanup up os_time_get/os_time_get_nano.

2012-12-06 Thread jfonseca
From: José Fonseca jfons...@vmware.com

- Re-implement os_time_get in terms of os_time_get_nano() for consistency
- Use CLOCK_MONOTONIC as recommended
- Only use clock_gettime on Linux for now.
---
 scons/gallium.py   |2 +-
 src/gallium/auxiliary/os/os_time.c |   28 
 src/gallium/auxiliary/os/os_time.h |   12 +++-
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/scons/gallium.py b/scons/gallium.py
index 66ccaea..98671f7 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -500,7 +500,7 @@ def generate(env):
 libs = []
 if env['platform'] in ('darwin', 'freebsd', 'linux', 'posix', 'sunos'):
 libs += ['m', 'pthread', 'dl']
-if env['platform'] in 'linux':
+if env['platform'] in ('linux',):
 libs += ['rt']
 env.Append(LIBS = libs)
 
diff --git a/src/gallium/auxiliary/os/os_time.c 
b/src/gallium/auxiliary/os/os_time.c
index f943e0f..3612eba 100644
--- a/src/gallium/auxiliary/os/os_time.c
+++ b/src/gallium/auxiliary/os/os_time.c
@@ -48,13 +48,19 @@
 
 
 int64_t
-os_time_get(void)
+os_time_get_nano(void)
 {
-#if defined(PIPE_OS_UNIX)
+#if defined(PIPE_OS_LINUX)
+
+   struct timespec tv;
+   clock_gettime(CLOCK_MONOTONIC, tv);
+   return tv.tv_nsec + tv.tv_sec*INT64_C(10);
+
+#elif defined(PIPE_OS_UNIX)
 
struct timeval tv;
gettimeofday(tv, NULL);
-   return tv.tv_usec + tv.tv_sec*100LL;
+   return tv.tv_usec*INT64_C(1000) + tv.tv_sec*INT64_C(10);
 
 #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
 
@@ -63,22 +69,12 @@ os_time_get(void)
if(!frequency.QuadPart)
   QueryPerformanceFrequency(frequency);
QueryPerformanceCounter(counter);
-   return counter.QuadPart*INT64_C(100)/frequency.QuadPart;
-
-#endif
-}
+   return counter.QuadPart*INT64_C(10)/frequency.QuadPart;
 
+#else
 
-uint64_t
-os_time_get_nano(void)
-{
-#if defined(PIPE_OS_UNIX)
-   struct timespec tv;
-   clock_gettime(CLOCK_REALTIME, tv);
-   return tv.tv_nsec + tv.tv_sec * 10LL;
+#error Unsupported OS
 
-#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
-   return os_time_get() * 1000;
 #endif
 }
 
diff --git a/src/gallium/auxiliary/os/os_time.h 
b/src/gallium/auxiliary/os/os_time.h
index 54101a1..517de9b 100644
--- a/src/gallium/auxiliary/os/os_time.h
+++ b/src/gallium/auxiliary/os/os_time.h
@@ -51,17 +51,19 @@ extern C {
 
 
 /*
- * Get the current time in microseconds from an unknown base.
+ * Get the current time in nanoseconds from an unknown base.
  */
 int64_t
-os_time_get(void);
+os_time_get_nano(void);
 
 
 /*
- * Get the current time in nanoseconds from an unknown base.
+ * Get the current time in microseconds from an unknown base.
  */
-uint64_t
-os_time_get_nano(void);
+static INLINE int64_t
+os_time_get(void) {
+return os_time_get_nano() * 1000;
+}
 
 
 /*
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] gallium/util: Move the util_copy/fill_rect into u_surface.

2012-12-06 Thread jfonseca
From: José Fonseca jfons...@vmware.com

u_rect.h said these should move to a different file, and u_surface seems
a better home.

Leave #include util/u_surface.h to avoid having to touch thousand of
files.
---
 src/gallium/auxiliary/Makefile.sources |1 -
 src/gallium/auxiliary/util/u_rect.c|  158 
 src/gallium/auxiliary/util/u_rect.h|   30 +-
 src/gallium/auxiliary/util/u_surface.c |  124 +
 src/gallium/auxiliary/util/u_surface.h |   14 +++
 5 files changed, 140 insertions(+), 187 deletions(-)
 delete mode 100644 src/gallium/auxiliary/util/u_rect.c

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index 6258861..7241785 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -124,7 +124,6 @@ C_SOURCES := \
util/u_math.c \
util/u_mm.c \
util/u_pstipple.c \
-   util/u_rect.c \
util/u_ringbuffer.c \
util/u_sampler.c \
util/u_simple_shaders.c \
diff --git a/src/gallium/auxiliary/util/u_rect.c 
b/src/gallium/auxiliary/util/u_rect.c
deleted file mode 100644
index d00568f..000
--- a/src/gallium/auxiliary/util/u_rect.c
+++ /dev/null
@@ -1,158 +0,0 @@
-/**
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * Software), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **/
-
-/**
- * Rectangle-related helper functions.
- */
-
-
-#include util/u_format.h
-#include util/u_rect.h
-#include util/u_pack_color.h
-
-
-/**
- * Copy 2D rect from one place to another.
- * Position and sizes are in pixels.
- * src_stride may be negative to do vertical flip of pixels from source.
- */
-void
-util_copy_rect(ubyte * dst,
-   enum pipe_format format,
-   unsigned dst_stride,
-   unsigned dst_x,
-   unsigned dst_y,
-   unsigned width,
-   unsigned height,
-   const ubyte * src,
-   int src_stride,
-   unsigned src_x, 
-   unsigned src_y)
-{
-   unsigned i;
-   int src_stride_pos = src_stride  0 ? -src_stride : src_stride;
-   int blocksize = util_format_get_blocksize(format);
-   int blockwidth = util_format_get_blockwidth(format);
-   int blockheight = util_format_get_blockheight(format);
-
-   assert(blocksize  0);
-   assert(blockwidth  0);
-   assert(blockheight  0);
-
-   dst_x /= blockwidth;
-   dst_y /= blockheight;
-   width = (width + blockwidth - 1)/blockwidth;
-   height = (height + blockheight - 1)/blockheight;
-   src_x /= blockwidth;
-   src_y /= blockheight;
-   
-   dst += dst_x * blocksize;
-   src += src_x * blocksize;
-   dst += dst_y * dst_stride;
-   src += src_y * src_stride_pos;
-   width *= blocksize;
-
-   if (width == dst_stride  width == src_stride)
-  memcpy(dst, src, height * width);
-   else {
-  for (i = 0; i  height; i++) {
- memcpy(dst, src, width);
- dst += dst_stride;
- src += src_stride;
-  }
-   }
-}
-
-void
-util_fill_rect(ubyte * dst,
-   enum pipe_format format,
-   unsigned dst_stride,
-   unsigned dst_x,
-   unsigned dst_y,
-   unsigned width,
-   unsigned height,
-   union util_color *uc)
-{
-   const struct util_format_description *desc = 
util_format_description(format);
-   unsigned i, j;
-   unsigned width_size;
-   int blocksize = desc-block.bits / 8;
-   int blockwidth = desc-block.width;
-   int blockheight = desc-block.height;
-
-   assert(blocksize  0);
-   assert(blockwidth  0);
-   assert(blockheight  0);
-
-   dst_x /= blockwidth;
-   dst_y /= blockheight;
-   width = (width + 

[Mesa-dev] [PATCH 4/4] util: Add util_copy_box helper.

2012-12-06 Thread jfonseca
From: José Fonseca jfons...@vmware.com

Must users of util_copy_rect() need or should deal with volumes.
---
 src/gallium/auxiliary/util/u_surface.c  |   55 +++
 src/gallium/auxiliary/util/u_surface.h  |   10 ++
 src/gallium/auxiliary/util/u_transfer.c |   28 
 3 files changed, 64 insertions(+), 29 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_surface.c 
b/src/gallium/auxiliary/util/u_surface.c
index b17dd74..2c197c3 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -168,6 +168,39 @@ util_copy_rect(ubyte * dst,
 }
 
 
+/**
+ * Copy 3D box from one place to another.
+ * Position and sizes are in pixels.
+ */
+void
+util_copy_box(ubyte * dst,
+  enum pipe_format format,
+  unsigned dst_stride, unsigned dst_slice_stride,
+  unsigned dst_x, unsigned dst_y, unsigned dst_z,
+  unsigned width, unsigned height, unsigned depth,
+  const ubyte * src,
+  int src_stride, unsigned src_slice_stride,
+  unsigned src_x, unsigned src_y, unsigned src_z)
+{
+   unsigned z;
+   dst += dst_z * dst_slice_stride;
+   src += src_z * src_slice_stride;
+   for (z = 0; z  depth; ++z) {
+  util_copy_rect(dst,
+ format,
+ dst_stride,
+ dst_x, dst_y,
+ width, height,
+ src,
+ src_stride,
+ src_x, src_y);
+
+  dst += dst_slice_stride;
+  src += src_slice_stride;
+   }
+}
+
+
 void
 util_fill_rect(ubyte * dst,
enum pipe_format format,
@@ -257,7 +290,6 @@ util_resource_copy_region(struct pipe_context *pipe,
const uint8_t *src_map;
enum pipe_format src_format, dst_format;
struct pipe_box dst_box;
-   int z;
 
assert(src  dst);
if (!src || !dst)
@@ -305,19 +337,14 @@ util_resource_copy_region(struct pipe_context *pipe,
   assert(src_box-depth == 1);
   memcpy(dst_map, src_map, src_box-width);
} else {
-  for (z = 0; z  src_box-depth; ++z) {
- util_copy_rect(dst_map,
-dst_format,
-dst_trans-stride,
-0, 0,
-src_box-width, src_box-height,
-src_map,
-src_trans-stride,
-0, 0);
-
- dst_map += dst_trans-layer_stride;
- src_map += src_trans-layer_stride;
-  }
+  util_copy_box(dst_map,
+dst_format,
+dst_trans-stride, dst_trans-layer_stride,
+0, 0, 0,
+src_box-width, src_box-height, src_box-depth,
+src_map,
+src_trans-stride, src_trans-layer_stride,
+0, 0, 0);
}
 
pipe-transfer_unmap(pipe, dst_trans);
diff --git a/src/gallium/auxiliary/util/u_surface.h 
b/src/gallium/auxiliary/util/u_surface.h
index db3fd8b..dd4d578 100644
--- a/src/gallium/auxiliary/util/u_surface.h
+++ b/src/gallium/auxiliary/util/u_surface.h
@@ -58,6 +58,16 @@ util_copy_rect(ubyte * dst, enum pipe_format format,
int src_stride, unsigned src_x, unsigned src_y);
 
 extern void
+util_copy_box(ubyte * dst,
+  enum pipe_format format,
+  unsigned dst_stride, unsigned dst_slice_stride,
+  unsigned dst_x, unsigned dst_y, unsigned dst_z,
+  unsigned width, unsigned height, unsigned depth,
+  const ubyte * src,
+  int src_stride, unsigned src_slice_stride,
+  unsigned src_x, unsigned src_y, unsigned src_z);
+
+extern void
 util_fill_rect(ubyte * dst, enum pipe_format format,
unsigned dst_stride, unsigned dst_x, unsigned dst_y,
unsigned width, unsigned height, union util_color *uc);
diff --git a/src/gallium/auxiliary/util/u_transfer.c 
b/src/gallium/auxiliary/util/u_transfer.c
index 8b4c365..8616825 100644
--- a/src/gallium/auxiliary/util/u_transfer.c
+++ b/src/gallium/auxiliary/util/u_transfer.c
@@ -1,5 +1,5 @@
 #include pipe/p_context.h
-#include util/u_rect.h
+#include util/u_surface.h
 #include util/u_inlines.h
 #include util/u_transfer.h
 #include util/u_memory.h
@@ -47,21 +47,19 @@ void u_default_transfer_inline_write( struct pipe_context 
*pipe,
}
else {
   const uint8_t *src_data = data;
-  unsigned i;
 
-  for (i = 0; i  box-depth; i++) {
- util_copy_rect(map,
-resource-format,
-transfer-stride, /* bytes */
-0, 0,
-box-width,
-box-height,
-src_data,
-stride,   /* bytes */
-0, 0);
- map += transfer-layer_stride;
- src_data += layer_stride;
-  }
+  util_copy_box(map,
+ 

[Mesa-dev] [PATCH 1/2] gallivm: Fix lp_build_print_value of small integers

2012-12-06 Thread jfonseca
From: José Fonseca jfons...@vmware.com

They need to be converted to native integers to prevent garbagge in
higher order bits from being printed.
---
 src/gallium/auxiliary/gallivm/lp_bld_printf.c |   13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c 
b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
index 0de6a08..7a6bbd9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
@@ -125,8 +125,19 @@ lp_build_print_value(struct gallivm_state *gallivm,
   params[2] = value;
} else {
   for (i = 0; i  length; ++i) {
+ LLVMValueRef param;
  util_strncat(format, type_fmt, sizeof(format) - strlen(format) - 1);
- params[2 + i] = LLVMBuildExtractElement(builder, value, 
lp_build_const_int32(gallivm, i), );
+ param = LLVMBuildExtractElement(builder, value, 
lp_build_const_int32(gallivm, i), );
+ if (type_kind == LLVMIntegerTypeKind 
+ LLVMGetIntTypeWidth(type_ref)  sizeof(int) * 8) {
+LLVMTypeRef int_type = LLVMIntTypeInContext(gallivm-context, 
sizeof(int) * 8);
+if (LLVMGetIntTypeWidth(type_ref) == 8) {
+   param = LLVMBuildZExt(builder, param, int_type, );
+} else {
+   param = LLVMBuildSExt(builder, param, int_type, );
+}
+ }
+ params[2 + i] = param;
   }
}
 
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] gallivm: Fix lerping of (un)signed normalized numbers.

2012-12-06 Thread jfonseca
From: José Fonseca jfons...@vmware.com

Several issues acutally:

- Fix a regression in unsigned normalized in the rescaling
  [0, 255] to [0, 256]

- Ensure we use signed shifts where appropriate (instead of
  unsigned shifts)

- Refactor the code slightly -- move all the logic inside
  lp_build_lerp_simple().

This change, plus an adjustment in the tolerance of signed normalized
results in piglit fbo-blending-formats fixes bug 57903
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c |   92 +--
 1 file changed, 43 insertions(+), 49 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 8b19ebd..d930f09 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -685,7 +685,7 @@ lp_build_sub(struct lp_build_context *bld,
 /**
  * Normalized multiplication.
  *
- * There are several approaches here (using 8-bit normalized multiplication as
+ * There are several approaches for (using 8-bit normalized multiplication as
  * an example):
  *
  * - alpha plus one
@@ -694,7 +694,7 @@ lp_build_sub(struct lp_build_context *bld,
  *
  *   a*b/255 ~= (a*(b + 1))  256
  *
- * which is the fastest method that satisfies the following OpenGL criteria
+ * which is the fastest method that satisfies the following OpenGL 
criteria of
  *
  *   0*0 = 0 and 255*255 = 255
  *
@@ -710,7 +710,7 @@ lp_build_sub(struct lp_build_context *bld,
  *
  * note that just by itself it doesn't satisfies the OpenGL criteria, as
  * 255*255 = 254, so the special case b = 255 must be accounted or roundoff
- * must be used
+ * must be used.
  *
  * - geometric series plus rounding
  *
@@ -719,7 +719,9 @@ lp_build_sub(struct lp_build_context *bld,
  *
  *   t/255 ~= (t + (t  8) + 0x80)  8
  *
- * achieving the exact results
+ * achieving the exact results.
+ *
+ *
  *
  * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 
1995, 
  * ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
@@ -733,8 +735,7 @@ lp_build_mul_norm(struct gallivm_state *gallivm,
 {
LLVMBuilderRef builder = gallivm-builder;
struct lp_build_context bld;
-   unsigned bits;
-   LLVMValueRef shift;
+   unsigned n;
LLVMValueRef half;
LLVMValueRef ab;
 
@@ -744,29 +745,28 @@ lp_build_mul_norm(struct gallivm_state *gallivm,
 
lp_build_context_init(bld, gallivm, wide_type);
 
-   bits = wide_type.width / 2;
+   n = wide_type.width / 2;
if (wide_type.sign) {
-  --bits;
+  --n;
}
 
-   shift = lp_build_const_int_vec(gallivm, wide_type, bits);
+   /*
+* TODO: for 16bits normalized SSE2 vectors we could consider using PMULHUW
+* 
http://ssp.impulsetrain.com/2011/07/03/multiplying-normalized-16-bit-numbers-with-sse2/
+*/
 
-#if 0
-   
-   /* a*b/255 ~= (a*(b + 1))  256 */
-   /* XXX: This would not work for signed types */
-   assert(!wide_type.sign);
-   b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(gallium, wide_type, 1), 
);
-   ab = LLVMBuildMul(builder, a, b, );
+   /*
+* a*b / (2**n - 1) ~= (a*b + (a*b  n) + half)  n
+*/
 
-#else
-   
-   /* ab/255 ~= (ab + (ab  8) + 0x80)  8 */
ab = LLVMBuildMul(builder, a, b, );
-   ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, shift, ), );
+   ab = LLVMBuildAdd(builder, ab, lp_build_shr_imm(bld, ab, n), );
 
-   /* Add rounding term */
-   half = lp_build_const_int_vec(gallivm, wide_type, 1  (bits - 1));
+   /*
+* half = sgn(ab) * 0.5 * (2 ** n) = sgn(ab) * (1  (n - 1))
+*/
+
+   half = lp_build_const_int_vec(gallivm, wide_type, 1  (n - 1));
if (wide_type.sign) {
   LLVMValueRef minus_half = LLVMBuildNeg(builder, half, );
   LLVMValueRef sign = lp_build_shr_imm(bld, half, wide_type.width - 1);
@@ -774,9 +774,8 @@ lp_build_mul_norm(struct gallivm_state *gallivm,
}
ab = LLVMBuildAdd(builder, ab, half, );
 
-#endif
-   
-   ab = LLVMBuildLShr(builder, ab, shift, );
+   /* Final division */
+   ab = lp_build_shr_imm(bld, ab, n);
 
return ab;
 }
@@ -988,14 +987,28 @@ lp_build_lerp_simple(struct lp_build_context *bld,
 
delta = lp_build_sub(bld, v1, v0);
 
-   res = lp_build_mul(bld, x, delta);
-
if (normalized) {
-  if (bld-type.sign) {
- res = lp_build_shr_imm(bld, res, half_width - 1);
-  } else {
+  if (!bld-type.sign) {
+ /*
+  * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
+  * most-significant-bit to the lowest-significant-bit, so that
+  * later we can just divide by 2**n instead of 2**n - 1.
+  */
+ x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));
+
+ /* (x * delta)  n */
+ res = lp_build_mul(bld, x, delta);
  res = lp_build_shr_imm(bld, res, half_width);
+  } else {
+ /*
+  * The rescaling trick above doesn't work for signed numbers, so
+  * use the 2**n - 1 divison 

Re: [Mesa-dev] [PATCH] draw: remove some dead constant buffer code

2012-12-06 Thread Jose Fonseca
LGTM!

- Original Message -
 Remove the draw_vs_set_constants() and draw_gs_set_constants()
 functions and the draw-vs.aligned_constants,
 draw-vs.aligned_constant_storage and draw-vs.const_storage_size
 fields.  None of it was used.
 ---
  src/gallium/auxiliary/draw/draw_context.c |2 -
  src/gallium/auxiliary/draw/draw_gs.c  |   13 
  src/gallium/auxiliary/draw/draw_private.h |   18 --
  src/gallium/auxiliary/draw/draw_vs.c  |   49
  -
  4 files changed, 0 insertions(+), 82 deletions(-)
 
 diff --git a/src/gallium/auxiliary/draw/draw_context.c
 b/src/gallium/auxiliary/draw/draw_context.c
 index 6980631..c231aba 100644
 --- a/src/gallium/auxiliary/draw/draw_context.c
 +++ b/src/gallium/auxiliary/draw/draw_context.c
 @@ -372,12 +372,10 @@ draw_set_mapped_constant_buffer(struct
 draw_context *draw,
 case PIPE_SHADER_VERTEX:
draw-pt.user.vs_constants[slot] = buffer;
draw-pt.user.vs_constants_size[slot] = size;
 -  draw_vs_set_constants(draw, slot, buffer, size);
break;
 case PIPE_SHADER_GEOMETRY:
draw-pt.user.gs_constants[slot] = buffer;
draw-pt.user.gs_constants_size[slot] = size;
 -  draw_gs_set_constants(draw, slot, buffer, size);
break;
 default:
assert(0  invalid shader type in
draw_set_mapped_constant_buffer);
 diff --git a/src/gallium/auxiliary/draw/draw_gs.c
 b/src/gallium/auxiliary/draw/draw_gs.c
 index 3b3ff21..5c55523 100644
 --- a/src/gallium/auxiliary/draw/draw_gs.c
 +++ b/src/gallium/auxiliary/draw/draw_gs.c
 @@ -69,19 +69,6 @@ void draw_gs_destroy( struct draw_context *draw )
 tgsi_exec_machine_destroy(draw-gs.tgsi.machine);
  }
  
 -void
 -draw_gs_set_constants(struct draw_context *draw,
 -  unsigned slot,
 -  const void *constants,
 -  unsigned size)
 -{
 -   /* noop. added here for symmetry with the VS
 -* code and in case we'll ever want to allign
 -* the constants, e.g. when we'll change to a
 -* different interpreter */
 -}
 -
 -
  struct draw_geometry_shader *
  draw_create_geometry_shader(struct draw_context *draw,
  const struct pipe_shader_state *state)
 diff --git a/src/gallium/auxiliary/draw/draw_private.h
 b/src/gallium/auxiliary/draw/draw_private.h
 index 5c497c6..86ce397 100644
 --- a/src/gallium/auxiliary/draw/draw_private.h
 +++ b/src/gallium/auxiliary/draw/draw_private.h
 @@ -250,12 +250,6 @@ struct draw_context
   uint num_samplers;
} tgsi;
  
 -  const void *aligned_constants[PIPE_MAX_CONSTANT_BUFFERS];
 -
 -  const void
 *aligned_constant_storage[PIPE_MAX_CONSTANT_BUFFERS];
 -  unsigned const_storage_size[PIPE_MAX_CONSTANT_BUFFERS];
 -
 -
struct translate *fetch;
struct translate_cache *fetch_cache;
struct translate *emit;
 @@ -369,24 +363,12 @@ void draw_vs_destroy( struct draw_context *draw
 );
  void draw_vs_set_viewport( struct draw_context *,
 const struct pipe_viewport_state * );
  
 -void
 -draw_vs_set_constants(struct draw_context *,
 -  unsigned slot,
 -  const void *constants,
 -  unsigned size);
 -
 -
  
  
 /***
   * Geometry shading code:
   */
  boolean draw_gs_init( struct draw_context *draw );
  
 -void
 -draw_gs_set_constants(struct draw_context *,
 -  unsigned slot,
 -  const void *constants,
 -  unsigned size);
  
  void draw_gs_destroy( struct draw_context *draw );
  
 diff --git a/src/gallium/auxiliary/draw/draw_vs.c
 b/src/gallium/auxiliary/draw/draw_vs.c
 index 0aea2f2..785a903 100644
 --- a/src/gallium/auxiliary/draw/draw_vs.c
 +++ b/src/gallium/auxiliary/draw/draw_vs.c
 @@ -49,47 +49,6 @@
  DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, GALLIUM_DUMP_VS,
  FALSE)
  
  
 -/**
 - * Set a vertex shader constant buffer.
 - * \param slot  which constant buffer in [0,
 PIPE_MAX_CONSTANT_BUFFERS-1]
 - * \param constants  the mapped buffer
 - * \param size  size of buffer in bytes
 - */
 -void
 -draw_vs_set_constants(struct draw_context *draw,
 -  unsigned slot,
 -  const void *constants,
 -  unsigned size)
 -{
 -   const int alignment = 16;
 -
 -   /* check if buffer is 16-byte aligned */
 -   if (((uintptr_t)constants)  (alignment - 1)) {
 -  /* if not, copy the constants into a new, 16-byte aligned
 buffer */
 -  if (size  draw-vs.const_storage_size[slot]) {
 - if (draw-vs.aligned_constant_storage[slot]) {
 -align_free((void
 *)draw-vs.aligned_constant_storage[slot]);
 -draw-vs.const_storage_size[slot] = 0;
 - }
 - draw-vs.aligned_constant_storage[slot] =
 -align_malloc(size, alignment);
 - if 

Re: [Mesa-dev] [PATCH 6/6] mesa/st: add ARB_uniform_buffer_object support

2012-12-06 Thread Jose Fonseca
Series looks good AFAICT.

Jose

- Original Message -
 From: Dave Airlie airl...@redhat.com
 
 this adds UBO support to the state tracker, it works with softpipe
 as-is.
 
 It uses UARL + CONST[x][ADDR[0].x] type constructs.
 
 Signed-off-by: Dave Airlie airl...@redhat.com
 ---
  src/mesa/state_tracker/st_atom.c |  2 +
  src/mesa/state_tracker/st_atom.h |  2 +
  src/mesa/state_tracker/st_atom_constbuf.c| 70
  +++-
  src/mesa/state_tracker/st_cb_bufferobjects.c |  3 ++
  src/mesa/state_tracker/st_extensions.c   | 13 ++
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 63
  ++---
  6 files changed, 146 insertions(+), 7 deletions(-)
 
 diff --git a/src/mesa/state_tracker/st_atom.c
 b/src/mesa/state_tracker/st_atom.c
 index 102fee9..091bd55 100644
 --- a/src/mesa/state_tracker/st_atom.c
 +++ b/src/mesa/state_tracker/st_atom.c
 @@ -64,6 +64,8 @@ static const struct st_tracked_state *atoms[] =
 st_update_vs_constants,
 st_update_gs_constants,
 st_update_fs_constants,
 +   st_upload_vs_ubos,
 +   st_upload_fs_ubos,
 st_update_pixel_transfer,
  
 /* this must be done after the vertex program update */
 diff --git a/src/mesa/state_tracker/st_atom.h
 b/src/mesa/state_tracker/st_atom.h
 index 6c7d09f..ca79b44 100644
 --- a/src/mesa/state_tracker/st_atom.h
 +++ b/src/mesa/state_tracker/st_atom.h
 @@ -67,6 +67,8 @@ extern const struct st_tracked_state
 st_finalize_textures;
  extern const struct st_tracked_state st_update_fs_constants;
  extern const struct st_tracked_state st_update_gs_constants;
  extern const struct st_tracked_state st_update_vs_constants;
 +extern const struct st_tracked_state st_upload_fs_ubos;
 +extern const struct st_tracked_state st_upload_vs_ubos;
  extern const struct st_tracked_state st_update_pixel_transfer;
  
  
 diff --git a/src/mesa/state_tracker/st_atom_constbuf.c
 b/src/mesa/state_tracker/st_atom_constbuf.c
 index 580393e..ea0cf0e 100644
 --- a/src/mesa/state_tracker/st_atom_constbuf.c
 +++ b/src/mesa/state_tracker/st_atom_constbuf.c
 @@ -45,7 +45,7 @@
  #include st_atom.h
  #include st_atom_constbuf.h
  #include st_program.h
 -
 +#include st_cb_bufferobjects.h
  
  /**
   * Pass the given program parameters to the graphics pipe as a
 @@ -175,3 +175,71 @@ const struct st_tracked_state
 st_update_gs_constants = {
 },
 update_gs_constants   /* update */
  };
 +
 +static void st_upload_ubos(struct st_context *st,
 +   struct gl_shader *shader,
 +   unsigned shader_type)
 +{
 +   int i;
 +   struct pipe_constant_buffer cb = { 0 };
 +   assert(shader_type == PIPE_SHADER_VERTEX ||
 +  shader_type == PIPE_SHADER_FRAGMENT);
 +
 +   if (!shader)
 +  return;
 +
 +   for (i = 0; i  shader-NumUniformBlocks; i++) {
 +  struct gl_uniform_buffer_binding *binding;
 +  struct st_buffer_object *st_obj;
 +
 +  binding =
 st-ctx-UniformBufferBindings[shader-UniformBlocks[i].Binding];
 +  st_obj = st_buffer_object(binding-BufferObject);
 +  pipe_resource_reference(cb.buffer, st_obj-buffer);
 +
 +  cb.buffer_size = st_obj-buffer-width0 - binding-Offset;
 +
 +  st-pipe-set_constant_buffer(st-pipe, shader_type, 1 + i,
 cb);
 +  pipe_resource_reference(cb.buffer, NULL);
 +   }
 +}
 +
 +static void upload_vs_ubos(struct st_context *st)
 +{
 +   struct gl_shader_program *prog =
 st-ctx-Shader.CurrentVertexProgram;
 +
 +   if (!prog)
 +  return;
 +
 +   st_upload_ubos(st, prog-_LinkedShaders[MESA_SHADER_VERTEX],
 PIPE_SHADER_VERTEX);
 +}
 +
 +const struct st_tracked_state st_upload_vs_ubos = {
 +   st_upload_vs_ubos,
 +   {
 +  (_NEW_PROGRAM | _NEW_BUFFER_OBJECT),
 +  ST_NEW_VERTEX_PROGRAM,
 +   },
 +   upload_vs_ubos
 +};
 +
 +static void upload_fs_ubos(struct st_context *st)
 +{
 +   struct gl_shader_program *prog =
 st-ctx-Shader.CurrentFragmentProgram;
 +
 +   if (!prog)
 +  return;
 +
 +   st_upload_ubos(st, prog-_LinkedShaders[MESA_SHADER_FRAGMENT],
 PIPE_SHADER_FRAGMENT);
 +
 +}
 +
 +const struct st_tracked_state st_upload_fs_ubos = {
 +   st_upload_fs_ubos,
 +   {
 +  (_NEW_PROGRAM | _NEW_BUFFER_OBJECT),
 +  ST_NEW_FRAGMENT_PROGRAM,
 +   },
 +   upload_fs_ubos
 +};
 +
 +
 diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c
 b/src/mesa/state_tracker/st_cb_bufferobjects.c
 index ac38128..7d1c05a 100644
 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c
 +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
 @@ -198,6 +198,9 @@ st_bufferobj_data(struct gl_context *ctx,
 case GL_TRANSFORM_FEEDBACK_BUFFER:
bind = PIPE_BIND_STREAM_OUTPUT;
break;
 +   case GL_UNIFORM_BUFFER:
 +  bind = PIPE_BIND_CONSTANT_BUFFER;
 +  break;
 default:
bind = 0;
 }
 diff --git a/src/mesa/state_tracker/st_extensions.c
 b/src/mesa/state_tracker/st_extensions.c
 index ccb1f36..d76cfdb 100644
 --- 

Re: [Mesa-dev] [PATCH] AMDGPU: Match fdiv for SI.

2012-12-06 Thread Tom Stellard
On Thu, Dec 06, 2012 at 11:02:35AM +0100, Michel Dänzer wrote:
 From: Michel Dänzer michel.daen...@amd.com
 
 Fixes hundreds of radeonsi piglit regressions from commit
 62c8e1ec4a93b28c55c1c7accb6cbd64952e59c2 ('AMDGPU: replace int_AMDGPU_rcp by
 fdiv (1.0, x) in RECIP pattern').


Sorry, I missed this.

Reviewed-by: Tom Stellard thomas.stell...@ams.com
 
 Signed-off-by: Michel Dänzer michel.daen...@amd.com
 ---
  lib/Target/AMDGPU/SIInstructions.td |5 +
  1 file changed, 5 insertions(+)
 
 diff --git a/lib/Target/AMDGPU/SIInstructions.td 
 b/lib/Target/AMDGPU/SIInstructions.td
 index e9bbe23..3564ec3 100644
 --- a/lib/Target/AMDGPU/SIInstructions.td
 +++ b/lib/Target/AMDGPU/SIInstructions.td
 @@ -1264,6 +1264,11 @@ def : Pat 
(V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 
 AllReg_32:$src1))
  ;
  
 +def : Pat
 +  (fdiv AllReg_32:$src0, AllReg_32:$src1),
 +  (V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1))
 +;
 +
  def : Pat 
(int_AMDGPU_kilp),
(SI_KIL (V_MOV_IMM_I32 0xbf80))
 -- 
 1.7.10.4
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gallivm: Fix lp_build_print_value of small integers

2012-12-06 Thread Brian Paul
OK, by small integers you mean 8 and 16-bit ints.  At first I 
thought you meant 1, 2, 3, etc.



On 12/06/2012 06:20 AM, jfons...@vmware.com wrote:

From: José Fonsecajfons...@vmware.com

They need to be converted to native integers to prevent garbagge in


garbage



higher order bits from being printed.
---
  src/gallium/auxiliary/gallivm/lp_bld_printf.c |   13 -
  1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c 
b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
index 0de6a08..7a6bbd9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
@@ -125,8 +125,19 @@ lp_build_print_value(struct gallivm_state *gallivm,
params[2] = value;
 } else {
for (i = 0; i  length; ++i) {
+ LLVMValueRef param;
   util_strncat(format, type_fmt, sizeof(format) - strlen(format) - 1);
- params[2 + i] = LLVMBuildExtractElement(builder, value, 
lp_build_const_int32(gallivm, i), );
+ param = LLVMBuildExtractElement(builder, value, lp_build_const_int32(gallivm, 
i), );
+ if (type_kind == LLVMIntegerTypeKind
+ LLVMGetIntTypeWidth(type_ref)  sizeof(int) * 8) {
+LLVMTypeRef int_type = LLVMIntTypeInContext(gallivm-context, 
sizeof(int) * 8);
+if (LLVMGetIntTypeWidth(type_ref) == 8) {


  /* print 8-bit ints as unsigned */


+   param = LLVMBuildZExt(builder, param, int_type, );
+} else {


  /* print 16-bit ints as signed */


+   param = LLVMBuildSExt(builder, param, int_type, );
+}
+ }
+ params[2 + i] = param;
}
 }



Looks good otherwise.  Reviewed-by: Brian Paul bri...@vmware.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallivm: Fix lerping of (un)signed normalized numbers.

2012-12-06 Thread Brian Paul

On 12/06/2012 06:20 AM, jfons...@vmware.com wrote:

From: José Fonsecajfons...@vmware.com

Several issues acutally:


actually




- Fix a regression in unsigned normalized in the rescaling
   [0, 255] to [0, 256]

- Ensure we use signed shifts where appropriate (instead of
   unsigned shifts)

- Refactor the code slightly -- move all the logic inside
   lp_build_lerp_simple().

This change, plus an adjustment in the tolerance of signed normalized
results in piglit fbo-blending-formats fixes bug 57903
---
  src/gallium/auxiliary/gallivm/lp_bld_arit.c |   92 +--
  1 file changed, 43 insertions(+), 49 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 8b19ebd..d930f09 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -685,7 +685,7 @@ lp_build_sub(struct lp_build_context *bld,
  /**
   * Normalized multiplication.
   *
- * There are several approaches here (using 8-bit normalized multiplication as
+ * There are several approaches for (using 8-bit normalized multiplication as
   * an example):
   *
   * - alpha plus one
@@ -694,7 +694,7 @@ lp_build_sub(struct lp_build_context *bld,
   *
   *   a*b/255 ~= (a*(b + 1))  256
   *
- * which is the fastest method that satisfies the following OpenGL criteria
+ * which is the fastest method that satisfies the following OpenGL 
criteria of
   *
   *   0*0 = 0 and 255*255 = 255
   *
@@ -710,7 +710,7 @@ lp_build_sub(struct lp_build_context *bld,
   *
   * note that just by itself it doesn't satisfies the OpenGL criteria, as
   * 255*255 = 254, so the special case b = 255 must be accounted or 
roundoff
- * must be used
+ * must be used.
   *
   * - geometric series plus rounding
   *
@@ -719,7 +719,9 @@ lp_build_sub(struct lp_build_context *bld,
   *
   *   t/255 ~= (t + (t  8) + 0x80)  8
   *
- * achieving the exact results
+ * achieving the exact results.
+ *
+ *
   *
   * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 
1995,
   * ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
@@ -733,8 +735,7 @@ lp_build_mul_norm(struct gallivm_state *gallivm,
  {
 LLVMBuilderRef builder = gallivm-builder;
 struct lp_build_context bld;
-   unsigned bits;
-   LLVMValueRef shift;
+   unsigned n;
 LLVMValueRef half;
 LLVMValueRef ab;

@@ -744,29 +745,28 @@ lp_build_mul_norm(struct gallivm_state *gallivm,

 lp_build_context_init(bld, gallivm, wide_type);

-   bits = wide_type.width / 2;
+   n = wide_type.width / 2;
 if (wide_type.sign) {
-  --bits;
+  --n;
 }

-   shift = lp_build_const_int_vec(gallivm, wide_type, bits);
+   /*
+* TODO: for 16bits normalized SSE2 vectors we could consider using PMULHUW
+* 
http://ssp.impulsetrain.com/2011/07/03/multiplying-normalized-16-bit-numbers-with-sse2/
+*/

-#if 0
-
-   /* a*b/255 ~= (a*(b + 1))  256 */
-   /* XXX: This would not work for signed types */
-   assert(!wide_type.sign);
-   b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(gallium, wide_type, 1), 
);
-   ab = LLVMBuildMul(builder, a, b, );
+   /*
+* a*b / (2**n - 1) ~= (a*b + (a*b  n) + half)  n
+*/

-#else
-
-   /* ab/255 ~= (ab + (ab  8) + 0x80)  8 */
 ab = LLVMBuildMul(builder, a, b, );
-   ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, shift, ), );
+   ab = LLVMBuildAdd(builder, ab, lp_build_shr_imm(bld, ab, n), );

-   /* Add rounding term */
-   half = lp_build_const_int_vec(gallivm, wide_type, 1  (bits - 1));
+   /*
+* half = sgn(ab) * 0.5 * (2 ** n) = sgn(ab) * (1  (n - 1))
+*/
+
+   half = lp_build_const_int_vec(gallivm, wide_type, 1  (n - 1));
 if (wide_type.sign) {
LLVMValueRef minus_half = LLVMBuildNeg(builder, half, );
LLVMValueRef sign = lp_build_shr_imm(bld, half, wide_type.width - 1);
@@ -774,9 +774,8 @@ lp_build_mul_norm(struct gallivm_state *gallivm,
 }
 ab = LLVMBuildAdd(builder, ab, half, );

-#endif
-
-   ab = LLVMBuildLShr(builder, ab, shift, );
+   /* Final division */
+   ab = lp_build_shr_imm(bld, ab, n);

 return ab;
  }
@@ -988,14 +987,28 @@ lp_build_lerp_simple(struct lp_build_context *bld,

 delta = lp_build_sub(bld, v1, v0);

-   res = lp_build_mul(bld, x, delta);
-
 if (normalized) {
-  if (bld-type.sign) {
- res = lp_build_shr_imm(bld, res, half_width - 1);
-  } else {
+  if (!bld-type.sign) {
+ /*
+  * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
+  * most-significant-bit to the lowest-significant-bit, so that
+  * later we can just divide by 2**n instead of 2**n - 1.
+  */
+ x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));
+
+ /* (x * delta)  n */
+ res = lp_build_mul(bld, x, delta);
   res = lp_build_shr_imm(bld, res, half_width);
+  } else {
+ /*
+  * The rescaling trick 

Re: [Mesa-dev] [PATCH 1/4] gallium/os: Fix os_time_sleep() on Windows for small durations.

2012-12-06 Thread Brian Paul

On 12/06/2012 06:17 AM, jfons...@vmware.com wrote:

From: José Fonsecajfons...@vmware.com

Prevents undetermined sleeps.
---
  src/gallium/auxiliary/os/os_time.c |6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/os/os_time.c 
b/src/gallium/auxiliary/os/os_time.c
index 4055125..f943e0f 100644
--- a/src/gallium/auxiliary/os/os_time.c
+++ b/src/gallium/auxiliary/os/os_time.c
@@ -88,7 +88,11 @@ os_time_get_nano(void)
  void
  os_time_sleep(int64_t usecs)
  {
-   Sleep((usecs + 999) / 1000);
+   DWORD dwMilliseconds = (usecs + 999) / 1000;
+   /* Avoid Sleep(O) as that would cause to sleep for an undetermined duration 
*/
+   if (dwMilliseconds) {
+  Sleep(dwMilliseconds);
+   }
  }

  #endif


For the series:
Reviewed-by: Brian Paul bri...@vmware.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] st/mesa: add texture buffer object support to state tracker

2012-12-06 Thread Brian Paul

On 12/05/2012 11:24 PM, Dave Airlie wrote:

From: Dave Airlieairl...@redhat.com

This adds the necessary changes to the st to allow texture buffer object
support if the driver advertises it.

Signed-off-by: Dave Airlieairl...@redhat.com
---
  src/mesa/state_tracker/st_atom_texture.c   | 18 --
  src/mesa/state_tracker/st_cb_texture.c | 16 
  src/mesa/state_tracker/st_extensions.c |  2 ++
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  2 +-
  src/mesa/state_tracker/st_mesa_to_tgsi.c   |  1 +
  5 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_texture.c 
b/src/mesa/state_tracker/st_atom_texture.c
index df05e83..dba1d82 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -215,13 +215,19 @@ update_single_texture(struct st_context *st,

 /* Determine the format of the texture sampler view */
 st_view_format = stObj-pt-format;
-   {
-  const struct st_texture_image *firstImage =
-st_texture_image(stObj-base.Image[0][stObj-base.BaseLevel]);
-  const gl_format texFormat = firstImage-base.TexFormat;
-  enum pipe_format firstImageFormat =
-st_mesa_format_to_pipe_format(texFormat);

+   {
+  gl_format texFormat;
+  enum pipe_format firstImageFormat;
+
+  if (texObj-Target == GL_TEXTURE_BUFFER) {
+ texFormat = stObj-base._BufferObjectFormat;
+  } else {
+ const struct st_texture_image *firstImage =
+st_texture_image(stObj-base.Image[0][stObj-base.BaseLevel]);
+ texFormat = firstImage-base.TexFormat;
+  }
+  firstImageFormat = st_mesa_format_to_pipe_format(texFormat);


I haven't double-checked, but I thought we had a utility function for 
getting a texture's format using code like that.




if ((samp-sRGBDecode == GL_SKIP_DECODE_EXT)
(_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) {
   /* Don't do sRGB-RGB conversion.  Interpret the texture data as
diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index f06814f..48a18d2 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -48,6 +48,7 @@
  #include state_tracker/st_cb_fbo.h
  #include state_tracker/st_cb_flush.h
  #include state_tracker/st_cb_texture.h
+#include state_tracker/st_cb_bufferobjects.h
  #include state_tracker/st_format.h
  #include state_tracker/st_texture.h
  #include state_tracker/st_gen_mipmap.h
@@ -1182,6 +1183,21 @@ st_finalize_texture(struct gl_context *ctx,
   stObj-lastLevel = stObj-base._MaxLevel;
 }

+   if (tObj-Target == GL_TEXTURE_BUFFER) {
+  struct st_buffer_object *st_obj = st_buffer_object(tObj-BufferObject);
+
+  if (st_obj-buffer != stObj-pt) {
+ pipe_resource_reference(stObj-pt, st_obj-buffer);
+ pipe_sampler_view_release(st-pipe,stObj-sampler_view);
+ stObj-width0 = stObj-pt-width0 / 
_mesa_get_format_bytes(tObj-_BufferObjectFormat);
+ stObj-height0 = 1;
+ stObj-depth0 = 1;
+


You can remove the extra blank line there.


+  }
+  return GL_TRUE;
+
+   }
+
 firstImage = st_texture_image(stObj-base.Image[0][stObj-base.BaseLevel]);
 assert(firstImage);

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 62a736b..0c61795 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -643,4 +643,6 @@ void st_init_extensions(struct st_context *st)
 if (ctx-Const.MinMapBufferAlignment= 64) {
ctx-Extensions.ARB_map_buffer_alignment = GL_TRUE;
 }
+   if (screen-get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS))
+  ctx-Extensions.ARB_texture_buffer_object = GL_TRUE;
  }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index c030a6b..ae3b8b2 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2776,7 +2776,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
inst-tex_target = TEXTURE_RECT_INDEX;
break;
 case GLSL_SAMPLER_DIM_BUF:
-  assert(!FINISHME: Implement ARB_texture_buffer_object);
+  inst-tex_target = TEXTURE_BUFFER_INDEX;
break;
 case GLSL_SAMPLER_DIM_EXTERNAL:
inst-tex_target = TEXTURE_EXTERNAL_INDEX;
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c 
b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 81a870f..e326bcc 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -269,6 +269,7 @@ st_translate_texture_target( GLuint textarget,
 }

 switch( textarget ) {
+   case TEXTURE_BUFFER_INDEX: return TGSI_TEXTURE_BUFFER;
 case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_1D;
 case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_2D;
 case TEXTURE_3D_INDEX:   return TGSI_TEXTURE_3D;



Reviewed-by: Brian 

Re: [Mesa-dev] [PATCH 6/6] mesa/st: add ARB_uniform_buffer_object support

2012-12-06 Thread Brian Paul

On 12/05/2012 11:24 PM, Dave Airlie wrote:

From: Dave Airlieairl...@redhat.com

this adds UBO support to the state tracker, it works with softpipe
as-is.

It uses UARL + CONST[x][ADDR[0].x] type constructs.

Signed-off-by: Dave Airlieairl...@redhat.com
---
  src/mesa/state_tracker/st_atom.c |  2 +
  src/mesa/state_tracker/st_atom.h |  2 +
  src/mesa/state_tracker/st_atom_constbuf.c| 70 +++-
  src/mesa/state_tracker/st_cb_bufferobjects.c |  3 ++
  src/mesa/state_tracker/st_extensions.c   | 13 ++
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 63 ++---
  6 files changed, 146 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 102fee9..091bd55 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -64,6 +64,8 @@ static const struct st_tracked_state *atoms[] =
 st_update_vs_constants,
 st_update_gs_constants,
 st_update_fs_constants,
+st_upload_vs_ubos,
+st_upload_fs_ubos,
 st_update_pixel_transfer,

 /* this must be done after the vertex program update */
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index 6c7d09f..ca79b44 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -67,6 +67,8 @@ extern const struct st_tracked_state st_finalize_textures;
  extern const struct st_tracked_state st_update_fs_constants;
  extern const struct st_tracked_state st_update_gs_constants;
  extern const struct st_tracked_state st_update_vs_constants;
+extern const struct st_tracked_state st_upload_fs_ubos;
+extern const struct st_tracked_state st_upload_vs_ubos;
  extern const struct st_tracked_state st_update_pixel_transfer;


diff --git a/src/mesa/state_tracker/st_atom_constbuf.c 
b/src/mesa/state_tracker/st_atom_constbuf.c
index 580393e..ea0cf0e 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -45,7 +45,7 @@
  #include st_atom.h
  #include st_atom_constbuf.h
  #include st_program.h
-
+#include st_cb_bufferobjects.h

  /**
   * Pass the given program parameters to the graphics pipe as a
@@ -175,3 +175,71 @@ const struct st_tracked_state st_update_gs_constants = {
 },
 update_gs_constants/* update */
  };
+
+static void st_upload_ubos(struct st_context *st,
+   struct gl_shader *shader,
+   unsigned shader_type)
+{
+   int i;
+   struct pipe_constant_buffer cb = { 0 };
+   assert(shader_type == PIPE_SHADER_VERTEX ||
+  shader_type == PIPE_SHADER_FRAGMENT);


Is that assertion is really needed?  Will this function need changes 
for GS?




+
+   if (!shader)
+  return;
+
+   for (i = 0; i  shader-NumUniformBlocks; i++) {


'i' should be declared unsigned above to match the type of 
NumUniformBlocks, otherwise we'll get a signed/unsigned comparison 
warning with MSVC.




+  struct gl_uniform_buffer_binding *binding;
+  struct st_buffer_object *st_obj;
+
+  binding 
=st-ctx-UniformBufferBindings[shader-UniformBlocks[i].Binding];
+  st_obj = st_buffer_object(binding-BufferObject);
+  pipe_resource_reference(cb.buffer, st_obj-buffer);
+
+  cb.buffer_size = st_obj-buffer-width0 - binding-Offset;
+
+  st-pipe-set_constant_buffer(st-pipe, shader_type, 1 + i,cb);
+  pipe_resource_reference(cb.buffer, NULL);
+   }
+}
+
+static void upload_vs_ubos(struct st_context *st)
+{
+   struct gl_shader_program *prog = st-ctx-Shader.CurrentVertexProgram;
+
+   if (!prog)
+  return;
+
+   st_upload_ubos(st, prog-_LinkedShaders[MESA_SHADER_VERTEX], 
PIPE_SHADER_VERTEX);
+}
+
+const struct st_tracked_state st_upload_vs_ubos = {
+   st_upload_vs_ubos,
+   {
+  (_NEW_PROGRAM | _NEW_BUFFER_OBJECT),
+  ST_NEW_VERTEX_PROGRAM,
+   },
+   upload_vs_ubos
+};
+
+static void upload_fs_ubos(struct st_context *st)
+{
+   struct gl_shader_program *prog = st-ctx-Shader.CurrentFragmentProgram;
+
+   if (!prog)
+  return;
+
+   st_upload_ubos(st, prog-_LinkedShaders[MESA_SHADER_FRAGMENT], 
PIPE_SHADER_FRAGMENT);
+
+}
+
+const struct st_tracked_state st_upload_fs_ubos = {
+   st_upload_fs_ubos,
+   {
+  (_NEW_PROGRAM | _NEW_BUFFER_OBJECT),
+  ST_NEW_FRAGMENT_PROGRAM,
+   },
+   upload_fs_ubos
+};
+
+
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c 
b/src/mesa/state_tracker/st_cb_bufferobjects.c
index ac38128..7d1c05a 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -198,6 +198,9 @@ st_bufferobj_data(struct gl_context *ctx,
 case GL_TRANSFORM_FEEDBACK_BUFFER:
bind = PIPE_BIND_STREAM_OUTPUT;
break;
+   case GL_UNIFORM_BUFFER:
+  bind = PIPE_BIND_CONSTANT_BUFFER;
+  break;
 default:
bind = 0;
 }
diff --git a/src/mesa/state_tracker/st_extensions.c 

Re: [Mesa-dev] [PATCH 4/4] r300g: Remove an incorrect comment

2012-12-06 Thread Michel Dänzer
On Mit, 2012-12-05 at 23:08 +0100, Stefan Dösinger wrote: 
 This occurred because I started this patch by reverting another patch and
 forgot to remove it.
 ---
  src/gallium/drivers/r300/r300_screen.c |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)
 
 diff --git a/src/gallium/drivers/r300/r300_screen.c 
 b/src/gallium/drivers/r300/r300_screen.c
 index de5e4a1..8cb1406 100644
 --- a/src/gallium/drivers/r300/r300_screen.c
 +++ b/src/gallium/drivers/r300/r300_screen.c
 @@ -105,7 +105,7 @@ static int r300_get_param(struct pipe_screen* pscreen, 
 enum pipe_cap param)
  case PIPE_CAP_VERTEX_COLOR_CLAMPED:
  case PIPE_CAP_USER_INDEX_BUFFERS:
  case PIPE_CAP_USER_CONSTANT_BUFFERS:
 -case PIPE_CAP_DEPTH_CLIP_DISABLE: /* XXX implemented, but breaks 
 Regnum Online */
 +case PIPE_CAP_DEPTH_CLIP_DISABLE:
  return 1;
  
  case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:

It still seems to break other things though, see e.g.
https://bugs.freedesktop.org/show_bug.cgi?id=57875 .


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast |  Debian, X and DRI developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for tgsi-to-llvm

2012-12-06 Thread Michel Dänzer
On Mit, 2012-12-05 at 09:32 -0800, Vincent Lejeune wrote: 
 Module: Mesa
 Branch: master
 Commit: 0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
 URL:
 http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
 
 Author: Vincent Lejeune v...@ovi.com
 Date:   Wed Nov 28 00:35:55 2012 +0100
 
 r600g: Use default mul/mad function for tgsi-to-llvm

This change breaks the piglit tests glsl-{f,v}s-vec4-indexing-temp-src
on radeonsi. I suspect the same would be true with r600g as well if it
didn't fall back to the non-LLVM backend for this test.

Comparing the generated code, I've noticed two main differences so far:

  * LLVM now optimizes away some TGSI MUL operations with constant
1.0, which previously resulted in V_MUL_LEGACY_F32 (non-IEEE
semantics) instructions. 
  * V_MUL_F32 and V_ADD_F32 (IEEE semantics) are used instead of
V_MAD_LEGACY_F32 (non-IEEE semantics) in some places.

I suspect the problem is a (non-)IEEE semantics mismatch between TGSI
and LLVM introduced by this change.


BTW, some general issues with this commit:

The prefix 'r600g:' is misleading, as this change affects radeonsi as
well. I think we've usually used 'radeon/llvm:' for changes
src/gallium/drivers/radeon.

There is no rationale for this change in the commit log: What is the
intended effect? Why is it a good idea? ...


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast |  Debian, X and DRI developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 57802] including GL/gl.h and another gl header causes redefinition warnings

2012-12-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=57802

Brian Paul brian.e.p...@gmail.com changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #4 from Brian Paul brian.e.p...@gmail.com ---
OK, committed.  ac24d172582cdb7a79098ca43ca8870712a98068

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for tgsi-to-llvm

2012-12-06 Thread Tom Stellard
On Thu, Dec 06, 2012 at 05:08:07PM +0100, Michel Dänzer wrote:
 On Mit, 2012-12-05 at 09:32 -0800, Vincent Lejeune wrote: 
  Module: Mesa
  Branch: master
  Commit: 0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
  URL:
  http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
  
  Author: Vincent Lejeune v...@ovi.com
  Date:   Wed Nov 28 00:35:55 2012 +0100
  
  r600g: Use default mul/mad function for tgsi-to-llvm
 
 This change breaks the piglit tests glsl-{f,v}s-vec4-indexing-temp-src
 on radeonsi. I suspect the same would be true with r600g as well if it
 didn't fall back to the non-LLVM backend for this test.
 
 Comparing the generated code, I've noticed two main differences so far:
 
   * LLVM now optimizes away some TGSI MUL operations with constant
 1.0, which previously resulted in V_MUL_LEGACY_F32 (non-IEEE
 semantics) instructions. 
   * V_MUL_F32 and V_ADD_F32 (IEEE semantics) are used instead of
 V_MAD_LEGACY_F32 (non-IEEE semantics) in some places.


We really need to fix the SI AsmPrinter and start using the FileCheck tests
in LLVM for things like this.

 
 I suspect the problem is a (non-)IEEE semantics mismatch between TGSI
 and LLVM introduced by this change.
 


What are the semantics of TGSI opcodes?  For MUL and MAD, tgsi_exec uses IEEE
operations, but it seems like the glsl frontend thinks they are non-IEEE. 
 
 BTW, some general issues with this commit:
 
 The prefix 'r600g:' is misleading, as this change affects radeonsi as
 well. I think we've usually used 'radeon/llvm:' for changes
 src/gallium/drivers/radeon.
 
 There is no rationale for this change in the commit log: What is the
 intended effect? Why is it a good idea? ...
 
 

I agree with you here.  These are things I should have caught during the review.
Should we revert this and the associated LLVM changes?

-Tom

 -- 
 Earthling Michel Dänzer   |   http://www.amd.com
 Libre software enthusiast |  Debian, X and DRI developer
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] AMDGPU/SI: Only allow selecting VGPRs with V_CNDMASK_B32.

2012-12-06 Thread Tom Stellard
On Thu, Dec 06, 2012 at 11:55:15AM +0100, Michel Dänzer wrote:
 From: Michel Dänzer michel.daen...@amd.com
 
 VALU instructions can only read from one SGPR, and that's the condition code
 mask in this case.
 
 Fixes a number of radeonsi piglit regressions from Vincent's max/min changes
 (which means the matching to AMDGPUfmax/min doesn't work for some reason).

This looks good to me, though we should investigate why the AMDGPUfmax/min nodes
aren't being matched.

Reviewed-by: Tom Stellard thomas.stell...@amd.com
 
 Signed-off-by: Michel Dänzer michel.daen...@amd.com
 ---
  lib/Target/AMDGPU/SIInstructions.td |8 
  1 file changed, 4 insertions(+), 4 deletions(-)
 
 diff --git a/lib/Target/AMDGPU/SIInstructions.td 
 b/lib/Target/AMDGPU/SIInstructions.td
 index 3564ec3..ea8de91 100644
 --- a/lib/Target/AMDGPU/SIInstructions.td
 +++ b/lib/Target/AMDGPU/SIInstructions.td
 @@ -764,15 +764,15 @@ def V_CNDMASK_B32_e32 : VOP2 0x, (outs 
 VReg_32:$dst),
  }
  
  def V_CNDMASK_B32_e64 : VOP3 0x0100, (outs VReg_32:$dst),
 -  (ins AllReg_32:$src0, AllReg_32:$src1, SReg_1:$src2, InstFlag:$abs, 
 InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
 +  (ins VReg_32:$src0, VReg_32:$src1, SReg_1:$src2, InstFlag:$abs, 
 InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
V_CNDMASK_B32_e64,
 -  [(set (i32 VReg_32:$dst), (select SReg_1:$src2, AllReg_32:$src1, 
 AllReg_32:$src0))]
 +  [(set (i32 VReg_32:$dst), (select SReg_1:$src2, VReg_32:$src1, 
 VReg_32:$src0))]
  ;
  
  //f32 pattern for V_CNDMASK_B32_e64
  def : Pat 
 -  (f32 (select SReg_1:$src2, AllReg_32:$src1, AllReg_32:$src0)),
 -  (V_CNDMASK_B32_e64 AllReg_32:$src0, AllReg_32:$src1, SReg_1:$src2)
 +  (f32 (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0)),
 +  (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_1:$src2)
  ;
  
  defm V_READLANE_B32 : VOP2_32 0x0001, V_READLANE_B32, [];
 -- 
 1.7.10.4
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] gallivm, llvmpipe, draw: Support multiple constant buffers.

2012-12-06 Thread jfonseca
From: José Fonseca jfons...@vmware.com

Support 4 (defined in LP_MAX_TGSI_CONST_BUFFERS) as opposed to 32 (as
defined by PIPE_MAX_CONSTANT_BUFFERS) because that would make the jit
context become unnecessarily large.
---
 src/gallium/auxiliary/draw/draw_llvm.c |5 +-
 src/gallium/auxiliary/draw/draw_llvm.h |9 +-
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |   16 ++--
 src/gallium/auxiliary/gallivm/lp_bld_limits.h  |3 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|   19 -
 src/gallium/drivers/llvmpipe/lp_context.h  |2 +-
 src/gallium/drivers/llvmpipe/lp_jit.c  |3 +-
 src/gallium/drivers/llvmpipe/lp_jit.h  |5 +-
 src/gallium/drivers/llvmpipe/lp_setup.c|   90 
 src/gallium/drivers/llvmpipe/lp_setup.h|3 +-
 src/gallium/drivers/llvmpipe/lp_setup_context.h|2 +-
 src/gallium/drivers/llvmpipe/lp_state_derived.c|5 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c |2 +-
 13 files changed, 103 insertions(+), 61 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 6f0e1de..039db8f 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -161,8 +161,9 @@ create_jit_context_type(struct gallivm_state *gallivm,
LLVMTypeRef elem_types[5];
LLVMTypeRef context_type;
 
-   elem_types[0] = LLVMPointerType(float_type, 0); /* vs_constants */
-   elem_types[1] = LLVMPointerType(float_type, 0); /* gs_constants */
+   elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* 
vs_constants */
+ LP_MAX_TGSI_CONST_BUFFERS);
+   elem_types[1] = elem_types[0]; /* gs_constants */
elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
  DRAW_TOTAL_CLIP_PLANES), 0);
elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h 
b/src/gallium/auxiliary/draw/draw_llvm.h
index d7a630e..bd11886 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -32,6 +32,7 @@
 
 #include draw/draw_vs.h
 #include gallivm/lp_bld_sample.h
+#include gallivm/lp_bld_limits.h
 
 #include pipe/p_context.h
 #include util/u_simple_list.h
@@ -94,8 +95,8 @@ enum {
  */
 struct draw_jit_context
 {
-   const float *vs_constants;
-   const float *gs_constants;
+   const float *vs_constants[LP_MAX_TGSI_CONST_BUFFERS];
+   const float *gs_constants[LP_MAX_TGSI_CONST_BUFFERS];
float (*planes) [DRAW_TOTAL_CLIP_PLANES][4];
float *viewport;
 
@@ -104,10 +105,10 @@ struct draw_jit_context
 
 
 #define draw_jit_context_vs_constants(_gallivm, _ptr) \
-   lp_build_struct_get(_gallivm, _ptr, 0, vs_constants)
+   lp_build_struct_get_ptr(_gallivm, _ptr, 0, vs_constants)
 
 #define draw_jit_context_gs_constants(_gallivm, _ptr) \
-   lp_build_struct_get(_gallivm, _ptr, 1, gs_constants)
+   lp_build_struct_get_ptr(_gallivm, _ptr, 1, gs_constants)
 
 #define draw_jit_context_planes(_gallivm, _ptr) \
lp_build_struct_get(_gallivm, _ptr, 2, planes)
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 
b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 04b286f..507c158 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -81,6 +81,8 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
const unsigned nr = MAX2( shader-base.info.num_inputs,
  shader-base.info.num_outputs + 1 );
 
+   unsigned i;
+
fpme-input_prim = in_prim;
fpme-opt = opt;
 
@@ -138,7 +140,6 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
}
else {
   /* Need to create new variant */
-  unsigned i;
 
   /* First check if we've created too many variants.  If so, free
* 25% of the LRU to avoid using too much memory.
@@ -171,11 +172,14 @@ llvm_middle_end_prepare( struct draw_pt_middle_end 
*middle,
 
fpme-current_variant = variant;
 
-   /*XXX we only support one constant buffer */
-   fpme-llvm-jit_context.vs_constants =
-  draw-pt.user.vs_constants[0];
-   fpme-llvm-jit_context.gs_constants =
-  draw-pt.user.gs_constants[0];
+   for (i = 0; i  Elements(fpme-llvm-jit_context.vs_constants); ++i) {
+  fpme-llvm-jit_context.vs_constants[i] =
+ draw-pt.user.vs_constants[i];
+   }
+   for (i = 0; i  Elements(fpme-llvm-jit_context.gs_constants); ++i) {
+  fpme-llvm-jit_context.gs_constants[i] =
+ draw-pt.user.gs_constants[i];
+   }
fpme-llvm-jit_context.planes =
   (float (*) [DRAW_TOTAL_CLIP_PLANES][4]) draw-pt.user.planes[0];
fpme-llvm-jit_context.viewport =
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h 

[Mesa-dev] [PATCH 2/4] gallivm: Allow indirection from TEMP registers too.

2012-12-06 Thread jfonseca
From: José Fonseca jfons...@vmware.com

The ADDR file is cumbersome for native integer capable drivers.  We
should consider deprecating it eventually, but this just adds support
for indirection from TEMP registers.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c |   20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 9caac21..bd783b5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -533,9 +533,23 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
base = lp_build_const_int_vec(bld-bld_base.base.gallivm, uint_bld-type, 
reg_index);
 
assert(swizzle  4);
-   rel = LLVMBuildLoad(builder,
-bld-addr[indirect_reg-Index][swizzle],
-load addr reg);
+   switch (indirect_reg-File) {
+   case TGSI_FILE_ADDRESS:
+  rel = LLVMBuildLoad(builder,
+  bld-addr[indirect_reg-Index][swizzle],
+  load addr reg);
+  /* ADDR registers are uint */
+  break;
+   case TGSI_FILE_TEMPORARY:
+  rel = lp_get_temp_ptr_soa(bld, indirect_reg-Index, swizzle);
+  rel = LLVMBuildLoad(builder, rel, load temp reg);
+  /* TEMP registers are float type */
+  rel = LLVMBuildBitCast(builder, rel, uint_bld-vec_type, );
+  break;
+   default:
+  assert(0);
+  rel = uint_bld-zero;
+   }
 
index = lp_build_add(uint_bld, base, rel);
 
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] draw: Dump LLVM shader key.

2012-12-06 Thread jfonseca
From: José Fonseca jfons...@vmware.com

Just like we do in llvmpipe for the fragment shader compilation key.
---
 src/gallium/auxiliary/draw/draw_llvm.c |   31 ++-
 src/gallium/auxiliary/draw/draw_llvm.h |3 +++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 039db8f..a3a3bbf 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -443,8 +443,9 @@ generate_vs(struct draw_llvm_variant *variant,
LLVMValueRef consts_ptr = draw_jit_context_vs_constants(variant-gallivm, 
context_ptr);
struct lp_build_sampler_soa *sampler = 0;
 
-   if (gallivm_debug  GALLIVM_DEBUG_IR) {
+   if (gallivm_debug  (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
   tgsi_dump(tokens, 0);
+  draw_llvm_dump_variant_key(variant-key);
}
 
if (llvm-draw-num_sampler_views  llvm-draw-num_samplers)
@@ -1364,6 +1365,34 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char 
*store)
 
 
 void
+draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
+{
+   unsigned i;
+   struct lp_sampler_static_state *sampler = 
draw_llvm_variant_key_samplers(key);
+
+   debug_printf(clamp_vertex_color = %u\n, key-clamp_vertex_color);
+   debug_printf(clip_xy = %u\n, key-clip_xy);
+   debug_printf(clip_z = %u\n, key-clip_z);
+   debug_printf(clip_user = %u\n, key-clip_user);
+   debug_printf(bypass_viewport = %u\n, key-bypass_viewport);
+   debug_printf(clip_halfz = %u\n, key-clip_halfz);
+   debug_printf(need_edgeflags = %u\n, key-need_edgeflags);
+   debug_printf(ucp_enable = %u\n, key-ucp_enable);
+
+   for (i = 0 ; i  key-nr_vertex_elements; i++) {
+  debug_printf(vertex_element[%i].src_offset = %u\n, i, 
key-vertex_element[i].src_offset);
+  debug_printf(vertex_element[%i].instance_divisor = %u\n, i, 
key-vertex_element[i].instance_divisor);
+  debug_printf(vertex_element[%i].vertex_buffer_index = %u\n, i, 
key-vertex_element[i].vertex_buffer_index);
+  debug_printf(vertex_element[%i].src_format = %s\n, i, 
util_format_name(key-vertex_element[i].src_format));
+   }
+
+   for (i = 0 ; i  key-nr_samplers; i++) {
+  debug_printf(sampler[%i].src_format = %s\n, i, 
util_format_name(sampler[i].format));
+   }
+}
+
+
+void
 draw_llvm_set_mapped_texture(struct draw_context *draw,
  unsigned sampler_idx,
  uint32_t width, uint32_t height, uint32_t depth,
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h 
b/src/gallium/auxiliary/draw/draw_llvm.h
index bd11886..892973c 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -284,6 +284,9 @@ draw_llvm_destroy_variant(struct draw_llvm_variant 
*variant);
 struct draw_llvm_variant_key *
 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
 
+void
+draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key);
+
 struct lp_build_sampler_soa *
 draw_llvm_sampler_soa_create(const struct lp_sampler_static_state 
*static_state,
  LLVMValueRef context_ptr);
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] gallivm: Rudimentary native integer support.

2012-12-06 Thread jfonseca
From: José Fonseca jfons...@vmware.com

Just enough for draw module to work ok.

This improves piglit attribs GL3, though something fishy is still
happening with certain unsigned integer values.
---
 src/gallium/auxiliary/gallivm/lp_bld_format_aos.c  |4 ++--
 .../auxiliary/gallivm/lp_bld_format_aos_array.c|   21 +---
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index c7fdb6e..e5c93f0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -234,7 +234,6 @@ lp_build_unpack_arith_rgba_aos(struct gallivm_state 
*gallivm,
shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), );
masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), );
 
-
if (!needs_uitofp) {
   /* UIToFP can't be expressed in SSE2 */
   casted = LLVMBuildSIToFP(builder, masked, 
LLVMVectorType(LLVMFloatTypeInContext(gallivm-context), 4), );
@@ -438,7 +437,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
format_desc-is_bitmask 
!format_desc-is_mixed 
(format_desc-channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
-format_desc-channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {
+format_desc-channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED) 
+   !format_desc-channel[0].pure_integer) {
 
   LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
   LLVMValueRef res;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c 
b/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c
index cf29617..1eb9446 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c
@@ -113,6 +113,8 @@ lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
LLVMTypeRef src_vec_type;
LLVMValueRef ptr, res = NULL;
struct lp_type src_type;
+   boolean pure_integer = format_desc-channel[0].pure_integer;
+   struct lp_type tmp_type;
 
lp_type_from_format_desc(src_type, format_desc);
 
@@ -144,10 +146,23 @@ lp_build_fetch_rgba_aos_array(struct gallivm_state 
*gallivm,
   src_type.length = dst_type.length;
}
 
+   tmp_type = dst_type;
+   if (pure_integer) {
+  assert(dst_type.floating);
+  tmp_type.floating = 0;
+   }
+
/* Convert to correct format */
-   lp_build_conv(gallivm, src_type, dst_type, res, 1, res, 1);
+   lp_build_conv(gallivm, src_type, tmp_type, res, 1, res, 1);
 
/* Swizzle it */
-   lp_build_context_init(bld, gallivm, dst_type);
-   return lp_build_format_swizzle_aos(format_desc, bld, res);
+   lp_build_context_init(bld, gallivm, tmp_type);
+   res = lp_build_format_swizzle_aos(format_desc, bld, res);
+
+   /* Bitcast to floats (for pure integers) */
+   if (pure_integer) {
+  res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, 
dst_type), );
+   }
+
+   return res;
 }
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] gallivm, llvmpipe, draw: Support multiple constant buffers.

2012-12-06 Thread Brian Paul

On 12/06/2012 10:35 AM, jfons...@vmware.com wrote:

From: José Fonsecajfons...@vmware.com

Support 4 (defined in LP_MAX_TGSI_CONST_BUFFERS) as opposed to 32 (as
defined by PIPE_MAX_CONSTANT_BUFFERS) because that would make the jit
context become unnecessarily large.
---
  src/gallium/auxiliary/draw/draw_llvm.c |5 +-
  src/gallium/auxiliary/draw/draw_llvm.h |9 +-
  .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |   16 ++--
  src/gallium/auxiliary/gallivm/lp_bld_limits.h  |3 +
  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|   19 -
  src/gallium/drivers/llvmpipe/lp_context.h  |2 +-
  src/gallium/drivers/llvmpipe/lp_jit.c  |3 +-
  src/gallium/drivers/llvmpipe/lp_jit.h  |5 +-
  src/gallium/drivers/llvmpipe/lp_setup.c|   90 
  src/gallium/drivers/llvmpipe/lp_setup.h|3 +-
  src/gallium/drivers/llvmpipe/lp_setup_context.h|2 +-
  src/gallium/drivers/llvmpipe/lp_state_derived.c|5 +-
  src/gallium/drivers/llvmpipe/lp_state_fs.c |2 +-
  13 files changed, 103 insertions(+), 61 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 6f0e1de..039db8f 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -161,8 +161,9 @@ create_jit_context_type(struct gallivm_state *gallivm,
 LLVMTypeRef elem_types[5];
 LLVMTypeRef context_type;

-   elem_types[0] = LLVMPointerType(float_type, 0); /* vs_constants */
-   elem_types[1] = LLVMPointerType(float_type, 0); /* gs_constants */
+   elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* 
vs_constants */
+ LP_MAX_TGSI_CONST_BUFFERS);
+   elem_types[1] = elem_types[0]; /* gs_constants */
 elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
   DRAW_TOTAL_CLIP_PLANES), 0);
 elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h 
b/src/gallium/auxiliary/draw/draw_llvm.h
index d7a630e..bd11886 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -32,6 +32,7 @@

  #include draw/draw_vs.h
  #include gallivm/lp_bld_sample.h
+#include gallivm/lp_bld_limits.h

  #include pipe/p_context.h
  #include util/u_simple_list.h
@@ -94,8 +95,8 @@ enum {
   */
  struct draw_jit_context
  {
-   const float *vs_constants;
-   const float *gs_constants;
+   const float *vs_constants[LP_MAX_TGSI_CONST_BUFFERS];
+   const float *gs_constants[LP_MAX_TGSI_CONST_BUFFERS];
 float (*planes) [DRAW_TOTAL_CLIP_PLANES][4];
 float *viewport;

@@ -104,10 +105,10 @@ struct draw_jit_context


  #define draw_jit_context_vs_constants(_gallivm, _ptr) \
-   lp_build_struct_get(_gallivm, _ptr, 0, vs_constants)
+   lp_build_struct_get_ptr(_gallivm, _ptr, 0, vs_constants)

  #define draw_jit_context_gs_constants(_gallivm, _ptr) \
-   lp_build_struct_get(_gallivm, _ptr, 1, gs_constants)
+   lp_build_struct_get_ptr(_gallivm, _ptr, 1, gs_constants)

  #define draw_jit_context_planes(_gallivm, _ptr) \
 lp_build_struct_get(_gallivm, _ptr, 2, planes)
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 
b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 04b286f..507c158 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -81,6 +81,8 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
 const unsigned nr = MAX2( shader-base.info.num_inputs,
   shader-base.info.num_outputs + 1 );

+   unsigned i;
+
 fpme-input_prim = in_prim;
 fpme-opt = opt;

@@ -138,7 +140,6 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
 }
 else {
/* Need to create new variant */
-  unsigned i;

/* First check if we've created too many variants.  If so, free
 * 25% of the LRU to avoid using too much memory.
@@ -171,11 +172,14 @@ llvm_middle_end_prepare( struct draw_pt_middle_end 
*middle,

 fpme-current_variant = variant;

-   /*XXX we only support one constant buffer */
-   fpme-llvm-jit_context.vs_constants =
-  draw-pt.user.vs_constants[0];
-   fpme-llvm-jit_context.gs_constants =
-  draw-pt.user.gs_constants[0];
+   for (i = 0; i  Elements(fpme-llvm-jit_context.vs_constants); ++i) {
+  fpme-llvm-jit_context.vs_constants[i] =
+ draw-pt.user.vs_constants[i];
+   }
+   for (i = 0; i  Elements(fpme-llvm-jit_context.gs_constants); ++i) {
+  fpme-llvm-jit_context.gs_constants[i] =
+ draw-pt.user.gs_constants[i];
+   }
 fpme-llvm-jit_context.planes =
(float (*) [DRAW_TOTAL_CLIP_PLANES][4]) draw-pt.user.planes[0];
 fpme-llvm-jit_context.viewport =
diff --git 

Re: [Mesa-dev] [PATCH 2/4] gallivm: Allow indirection from TEMP registers too.

2012-12-06 Thread Brian Paul

On 12/06/2012 10:35 AM, jfons...@vmware.com wrote:

From: José Fonsecajfons...@vmware.com

The ADDR file is cumbersome for native integer capable drivers.  We
should consider deprecating it eventually, but this just adds support
for indirection from TEMP registers.
---
  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c |   20 +---
  1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 9caac21..bd783b5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -533,9 +533,23 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
 base = lp_build_const_int_vec(bld-bld_base.base.gallivm, uint_bld-type, 
reg_index);

 assert(swizzle  4);
-   rel = LLVMBuildLoad(builder,
-bld-addr[indirect_reg-Index][swizzle],
-load addr reg);
+   switch (indirect_reg-File) {
+   case TGSI_FILE_ADDRESS:
+  rel = LLVMBuildLoad(builder,
+  bld-addr[indirect_reg-Index][swizzle],
+  load addr reg);
+  /* ADDR registers are uint */
+  break;
+   case TGSI_FILE_TEMPORARY:
+  rel = lp_get_temp_ptr_soa(bld, indirect_reg-Index, swizzle);
+  rel = LLVMBuildLoad(builder, rel, load temp reg);
+  /* TEMP registers are float type */
+  rel = LLVMBuildBitCast(builder, rel, uint_bld-vec_type, );


Is bitcasting right?  I'd expect that if the temp register has float 
values that we need to do a float-int conversion here.




+  break;
+   default:
+  assert(0);
+  rel = uint_bld-zero;
+   }

 index = lp_build_add(uint_bld, base, rel);



Looks OK otherwise.

Reviewed-by: Brian Paul bri...@vmware.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] draw: Dump LLVM shader key.

2012-12-06 Thread Brian Paul

On 12/06/2012 10:35 AM, jfons...@vmware.com wrote:

From: José Fonsecajfons...@vmware.com

Just like we do in llvmpipe for the fragment shader compilation key.
---
  src/gallium/auxiliary/draw/draw_llvm.c |   31 ++-
  src/gallium/auxiliary/draw/draw_llvm.h |3 +++
  2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 039db8f..a3a3bbf 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -443,8 +443,9 @@ generate_vs(struct draw_llvm_variant *variant,
 LLVMValueRef consts_ptr = draw_jit_context_vs_constants(variant-gallivm, 
context_ptr);
 struct lp_build_sampler_soa *sampler = 0;

-   if (gallivm_debug  GALLIVM_DEBUG_IR) {
+   if (gallivm_debug  (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
tgsi_dump(tokens, 0);
+  draw_llvm_dump_variant_key(variant-key);
 }

 if (llvm-draw-num_sampler_views  llvm-draw-num_samplers)
@@ -1364,6 +1365,34 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char 
*store)


  void
+draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
+{
+   unsigned i;
+   struct lp_sampler_static_state *sampler = 
draw_llvm_variant_key_samplers(key);
+
+   debug_printf(clamp_vertex_color = %u\n, key-clamp_vertex_color);
+   debug_printf(clip_xy = %u\n, key-clip_xy);
+   debug_printf(clip_z = %u\n, key-clip_z);
+   debug_printf(clip_user = %u\n, key-clip_user);
+   debug_printf(bypass_viewport = %u\n, key-bypass_viewport);
+   debug_printf(clip_halfz = %u\n, key-clip_halfz);
+   debug_printf(need_edgeflags = %u\n, key-need_edgeflags);
+   debug_printf(ucp_enable = %u\n, key-ucp_enable);
+
+   for (i = 0 ; i  key-nr_vertex_elements; i++) {
+  debug_printf(vertex_element[%i].src_offset = %u\n, i, 
key-vertex_element[i].src_offset);
+  debug_printf(vertex_element[%i].instance_divisor = %u\n, i, 
key-vertex_element[i].instance_divisor);
+  debug_printf(vertex_element[%i].vertex_buffer_index = %u\n, i, 
key-vertex_element[i].vertex_buffer_index);
+  debug_printf(vertex_element[%i].src_format = %s\n, i, 
util_format_name(key-vertex_element[i].src_format));
+   }
+
+   for (i = 0 ; i  key-nr_samplers; i++) {
+  debug_printf(sampler[%i].src_format = %s\n, i, 
util_format_name(sampler[i].format));
+   }
+}
+
+
+void
  draw_llvm_set_mapped_texture(struct draw_context *draw,
   unsigned sampler_idx,
   uint32_t width, uint32_t height, uint32_t depth,
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h 
b/src/gallium/auxiliary/draw/draw_llvm.h
index bd11886..892973c 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -284,6 +284,9 @@ draw_llvm_destroy_variant(struct draw_llvm_variant 
*variant);
  struct draw_llvm_variant_key *
  draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store);

+void
+draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key);
+
  struct lp_build_sampler_soa *
  draw_llvm_sampler_soa_create(const struct lp_sampler_static_state 
*static_state,
   LLVMValueRef context_ptr);


Reviewed-by: Brian Paul bri...@vmware.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] gallivm: Rudimentary native integer support.

2012-12-06 Thread Brian Paul

On 12/06/2012 10:35 AM, jfons...@vmware.com wrote:

From: José Fonsecajfons...@vmware.com

Just enough for draw module to work ok.

This improves piglit attribs GL3, though something fishy is still
happening with certain unsigned integer values.
---
  src/gallium/auxiliary/gallivm/lp_bld_format_aos.c  |4 ++--
  .../auxiliary/gallivm/lp_bld_format_aos_array.c|   21 +---
  2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index c7fdb6e..e5c93f0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -234,7 +234,6 @@ lp_build_unpack_arith_rgba_aos(struct gallivm_state 
*gallivm,
 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), );
 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), );

-
 if (!needs_uitofp) {
/* UIToFP can't be expressed in SSE2 */
casted = LLVMBuildSIToFP(builder, masked, 
LLVMVectorType(LLVMFloatTypeInContext(gallivm-context), 4), );
@@ -438,7 +437,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
 format_desc-is_bitmask
 !format_desc-is_mixed
 (format_desc-channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
-format_desc-channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {
+format_desc-channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)
+   !format_desc-channel[0].pure_integer) {

LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
LLVMValueRef res;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c 
b/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c
index cf29617..1eb9446 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c
@@ -113,6 +113,8 @@ lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
 LLVMTypeRef src_vec_type;
 LLVMValueRef ptr, res = NULL;
 struct lp_type src_type;
+   boolean pure_integer = format_desc-channel[0].pure_integer;
+   struct lp_type tmp_type;

 lp_type_from_format_desc(src_type, format_desc);

@@ -144,10 +146,23 @@ lp_build_fetch_rgba_aos_array(struct gallivm_state 
*gallivm,
src_type.length = dst_type.length;
 }

+   tmp_type = dst_type;
+   if (pure_integer) {
+  assert(dst_type.floating);
+  tmp_type.floating = 0;
+   }
+
 /* Convert to correct format */
-   lp_build_conv(gallivm, src_type, dst_type,res, 1,res, 1);
+   lp_build_conv(gallivm, src_type, tmp_type,res, 1,res, 1);

 /* Swizzle it */
-   lp_build_context_init(bld, gallivm, dst_type);
-   return lp_build_format_swizzle_aos(format_desc,bld, res);
+   lp_build_context_init(bld, gallivm, tmp_type);
+   res = lp_build_format_swizzle_aos(format_desc,bld, res);
+
+   /* Bitcast to floats (for pure integers) */
+   if (pure_integer) {
+  res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, dst_type), 
);
+   }
+
+   return res;
  }


Reviewed-by: Brian Paul bri...@vmware.com
but Roland might want to look at this too.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] linker: Allow aliased location assignments for shader inputs / outputs

2012-12-06 Thread Ian Romanick
From: Ian Romanick ian.d.roman...@intel.com

The linker quotes two bits of OpenGL 4.0 spec text and claims that
either two paragraphs are contradictory or misunderstood.  It appears
that the later was the case.

Page 61 of the OpenGL 4.0 spec:

 LinkProgram will fail if the attribute bindings assigned by
 BindAttribLocation do not leave not enough space to assign a
 location for an active matrix attribute or an active attribute
 array, both of which require multiple contiguous generic
 attributes.

Later on page 61 of the OpenGL 4.0 spec:

It is possible for an application to bind more than one attribute
name to the same location. This is referred to as aliasing. This
will only work if only one of the aliased attributes is active in
the executable program, or if no path through the shader consumes
more than one attribute of a set of attributes aliased to the same
location. A link error can occur if the linker determines that every
path through the shader consumes multiple aliased attributes, but
implementations are not required to generate an error in this case.

These two paragraphs seem a bit contradictory, but they're talking about
two different things.

The first paragraph is talking about assignments near the end of the
slot limit.  Some types (e.g., matrix types) use multiple slots.  A mat4
uses four slots.  It is illegal to assign a multi-slot variable to a
location that would cause part of the variable to be assign out of
bounds slots.

The second paragraph is talking about cases where multiple varaibles are
assigned the same slot.  Variables can be assigned the same slot, but
unpredictable things might happen.

At least one test in the OpenGL ES 3.0 conformance suite tries to assign
multiple variables to the same location without a link error.  It does
something like:

layout(location = 3) in vec4 a;
layout(location = 3) in vec4 b;

The change in this patch is to just generate an error if the assigned
location would cause a variable spill over the end (the situation
described by the first quoted spec paragraph).

Fixes es3conform's explicit_attrib_location_vertex_input_aliased test.

Signed-off-by: Ian Romanick ian.d.roman...@intel.com
Reviewed-and-Tested-by: Matt Turner matts...@gmail.com
---
 src/glsl/linker.cpp | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 3b2ab96..01ebcb6 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1393,11 +1393,18 @@ assign_attribute_or_color_locations(gl_shader_program 
*prog,
 * but implementations are not required to generate an error
 * in this case.
 *
-* These two paragraphs are either somewhat contradictory, or I
-* don't fully understand one or both of them.
-*/
-   /* FINISHME: The code as currently written does not support
-* FINISHME: attribute location aliasing (see comment above).
+* These two paragraphs seem a bit contradictory, but they're
+* talking about two different things.
+*
+* The first paragraph is talking about assignments near the end
+* of the slot limit.  Some types (e.g., matrix types) use
+* multiple slots.  A mat4 uses four slots.  It is illegal to
+* assign a multi-slot variable to a location that would cause
+* part of the variable to be assign out of bounds slots.
+*
+* The second paragraph is talking about cases where multiple
+* varaibles are assigned the same slot.  Variables can be
+* assigned the same slot, but unpredictable things might happen.
 */
/* Mask representing the contiguous slots that will be used by
 * this attribute.
@@ -1405,16 +1412,15 @@ assign_attribute_or_color_locations(gl_shader_program 
*prog,
const unsigned attr = var-location - generic_base;
const unsigned use_mask = (1  slots) - 1;
 
-   /* Generate a link error if the set of bits requested for this
-* attribute overlaps any previously allocated bits.
+   /* Generate a link error if the location assigned to the variable
+* causes it to spill over the limit.
 */
-   if ((~(use_mask  attr)  used_locations) != used_locations) {
+   if (attr + slots  max_index) {
   const char *const string = (target_index == MESA_SHADER_VERTEX)
  ? vertex shader input : fragment shader output;
   linker_error(prog,
-   insufficient contiguous locations 
-   available for %s `%s' %d %d %d, string,
-   var-name, used_locations, use_mask, attr);
+   insufficient locations available for %s `%s',
+ 

Re: [Mesa-dev] [PATCH 2/4] gallivm: Allow indirection from TEMP registers too.

2012-12-06 Thread Jose Fonseca


- Original Message -
 On 12/06/2012 10:35 AM, jfons...@vmware.com wrote:
  From: José Fonsecajfons...@vmware.com
 
  The ADDR file is cumbersome for native integer capable drivers.  We
  should consider deprecating it eventually, but this just adds
  support
  for indirection from TEMP registers.
  ---
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c |   20
+---
1 file changed, 17 insertions(+), 3 deletions(-)
 
  diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
  b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
  index 9caac21..bd783b5 100644
  --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
  +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
  @@ -533,9 +533,23 @@ get_indirect_index(struct
  lp_build_tgsi_soa_context *bld,
   base = lp_build_const_int_vec(bld-bld_base.base.gallivm,
   uint_bld-type, reg_index);
 
   assert(swizzle  4);
  -   rel = LLVMBuildLoad(builder,
  -bld-addr[indirect_reg-Index][swizzle],
  -load addr reg);
  +   switch (indirect_reg-File) {
  +   case TGSI_FILE_ADDRESS:
  +  rel = LLVMBuildLoad(builder,
  +  bld-addr[indirect_reg-Index][swizzle],
  +  load addr reg);
  +  /* ADDR registers are uint */
  +  break;
  +   case TGSI_FILE_TEMPORARY:
  +  rel = lp_get_temp_ptr_soa(bld, indirect_reg-Index,
  swizzle);
  +  rel = LLVMBuildLoad(builder, rel, load temp reg);
  +  /* TEMP registers are float type */
  +  rel = LLVMBuildBitCast(builder, rel, uint_bld-vec_type,
  );
 
 Is bitcasting right?  I'd expect that if the temp register has float
 values that we need to do a float-int conversion here.

The comment is not precise enough: LLVM is strongly typed, and TEMPS always 
have float LLVM type, but they actual value can be FLOAT/INT/UINT depending one 
the opcode, and for indirection, the index is expected to be an integer.

I'll update the comment with this.

 
  +  break;
  +   default:
  +  assert(0);
  +  rel = uint_bld-zero;
  +   }
 
   index = lp_build_add(uint_bld, base, rel);
 
 
 Looks OK otherwise.
 
 Reviewed-by: Brian Paul bri...@vmware.com
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for tgsi-to-llvm

2012-12-06 Thread Vincent Lejeune
Sorry for the inconvenience.
I think the r600g backend work because of this patch, which switch MUL and 
MUL_IEEE definition :
http://lists.freedesktop.org/archives/mesa-dev/2012-November/030748.html

The rationale behind the patch is use the fmul instead of a custom intrinsic 
for OpenGL path.
Glsl-to-llvm generates as vanilla instructions as possible and uses fmul, 
this consolidates behaviour 
between tgsi-to-llvm and glsl-to-llvm. 
IMHO emitting MUL and MUL_IEEE should be decided with the presence of some 
fast-math arg to the backend,
or some context variable telling if we have a glsl or an opencl shader.

Vincent

- Mail original -
 De : Tom Stellard t...@stellard.net
 À : Michel Dänzer mic...@daenzer.net
 Cc : Vincent Lejeune v...@ovi.com; mesa-dev@lists.freedesktop.org
 Envoyé le : Jeudi 6 décembre 2012 18h05
 Objet : Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for 
 tgsi-to-llvm
 
 On Thu, Dec 06, 2012 at 05:08:07PM +0100, Michel Dänzer wrote:
  On Mit, 2012-12-05 at 09:32 -0800, Vincent Lejeune wrote: 
   Module: Mesa
   Branch: master
   Commit: 0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
   URL:    
 http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
   
   Author: Vincent Lejeune v...@ovi.com
   Date:   Wed Nov 28 00:35:55 2012 +0100
   
   r600g: Use default mul/mad function for tgsi-to-llvm
 
  This change breaks the piglit tests glsl-{f,v}s-vec4-indexing-temp-src
  on radeonsi. I suspect the same would be true with r600g as well if it
  didn't fall back to the non-LLVM backend for this test.
 
  Comparing the generated code, I've noticed two main differences so far:
 
        * LLVM now optimizes away some TGSI MUL operations with constant
          1.0, which previously resulted in V_MUL_LEGACY_F32 (non-IEEE
          semantics) instructions. 
        * V_MUL_F32 and V_ADD_F32 (IEEE semantics) are used instead of
          V_MAD_LEGACY_F32 (non-IEEE semantics) in some places.
 
 
 We really need to fix the SI AsmPrinter and start using the FileCheck tests
 in LLVM for things like this.
 
 
  I suspect the problem is a (non-)IEEE semantics mismatch between TGSI
  and LLVM introduced by this change.
 
 
 
 What are the semantics of TGSI opcodes?  For MUL and MAD, tgsi_exec uses IEEE
 operations, but it seems like the glsl frontend thinks they are non-IEEE. 
 
  BTW, some general issues with this commit:
 
  The prefix 'r600g:' is misleading, as this change affects radeonsi 
 as
  well. I think we've usually used 'radeon/llvm:' for changes
  src/gallium/drivers/radeon.
 
  There is no rationale for this change in the commit log: What is the
  intended effect? Why is it a good idea? ...
 
 
 
 I agree with you here.  These are things I should have caught during the 
 review.
 Should we revert this and the associated LLVM changes?
 
 -Tom
 
  -- 
  Earthling Michel Dänzer           |                  http://www.amd.com
  Libre software enthusiast         |          Debian, X and DRI developer
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for tgsi-to-llvm

2012-12-06 Thread Michel Dänzer
On Don, 2012-12-06 at 09:05 -0800, Tom Stellard wrote: 
 On Thu, Dec 06, 2012 at 05:08:07PM +0100, Michel Dänzer wrote:
  On Mit, 2012-12-05 at 09:32 -0800, Vincent Lejeune wrote: 
   Module: Mesa
   Branch: master
   Commit: 0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
   URL:
   http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
   
   Author: Vincent Lejeune v...@ovi.com
   Date:   Wed Nov 28 00:35:55 2012 +0100
   
   r600g: Use default mul/mad function for tgsi-to-llvm
  
  This change breaks the piglit tests glsl-{f,v}s-vec4-indexing-temp-src
  on radeonsi. I suspect the same would be true with r600g as well if it
  didn't fall back to the non-LLVM backend for this test.
  
  Comparing the generated code, I've noticed two main differences so far:
  
* LLVM now optimizes away some TGSI MUL operations with constant
  1.0, which previously resulted in V_MUL_LEGACY_F32 (non-IEEE
  semantics) instructions. 
* V_MUL_F32 and V_ADD_F32 (IEEE semantics) are used instead of
  V_MAD_LEGACY_F32 (non-IEEE semantics) in some places.

BTW, I have a fix so the second bullet at least becomes V_MAD_F32.


  I suspect the problem is a (non-)IEEE semantics mismatch between TGSI
  and LLVM introduced by this change.
 
 What are the semantics of TGSI opcodes?  For MUL and MAD, tgsi_exec uses IEEE
 operations, but it seems like the glsl frontend thinks they are non-IEEE. 

Not sure... FWIW, the tests pass on llvmpipe.

Reverting only the MAD related changes doesn't fix the problem.
Reverting only the MUL related change triggers a shader compile failure
in LLVM.


 Should we revert this and the associated LLVM changes?

I guess it depends on the answer to the above question about the
expected semantics.

Given that these tests seem to use indirect addressing, could it be that
they just happened to pass by accident before?


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast |  Debian, X and DRI developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for tgsi-to-llvm

2012-12-06 Thread Alex Deucher
On Thu, Dec 6, 2012 at 1:21 PM, Vincent Lejeune v...@ovi.com wrote:
 Sorry for the inconvenience.
 I think the r600g backend work because of this patch, which switch MUL and 
 MUL_IEEE definition :
 http://lists.freedesktop.org/archives/mesa-dev/2012-November/030748.html

 The rationale behind the patch is use the fmul instead of a custom intrinsic 
 for OpenGL path.
 Glsl-to-llvm generates as vanilla instructions as possible and uses fmul, 
 this consolidates behaviour
 between tgsi-to-llvm and glsl-to-llvm.
 IMHO emitting MUL and MUL_IEEE should be decided with the presence of some 
 fast-math arg to the backend,
 or some context variable telling if we have a glsl or an opencl shader.

MUL and MUL_IEEE are not related to performance, rather to behavior:

MULFloating-point multiply. 0*anything = 0.
MUL_IEEE  IEEE Floating-point multiply. Uses IEEE rules for 0*anything.

Alex


 Vincent

 - Mail original -
 De : Tom Stellard t...@stellard.net
 À : Michel Dänzer mic...@daenzer.net
 Cc : Vincent Lejeune v...@ovi.com; mesa-dev@lists.freedesktop.org
 Envoyé le : Jeudi 6 décembre 2012 18h05
 Objet : Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function 
 for tgsi-to-llvm

 On Thu, Dec 06, 2012 at 05:08:07PM +0100, Michel Dänzer wrote:
  On Mit, 2012-12-05 at 09:32 -0800, Vincent Lejeune wrote:
   Module: Mesa
   Branch: master
   Commit: 0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
   URL:
 http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
  
   Author: Vincent Lejeune v...@ovi.com
   Date:   Wed Nov 28 00:35:55 2012 +0100
  
   r600g: Use default mul/mad function for tgsi-to-llvm

  This change breaks the piglit tests glsl-{f,v}s-vec4-indexing-temp-src
  on radeonsi. I suspect the same would be true with r600g as well if it
  didn't fall back to the non-LLVM backend for this test.

  Comparing the generated code, I've noticed two main differences so far:

* LLVM now optimizes away some TGSI MUL operations with constant
  1.0, which previously resulted in V_MUL_LEGACY_F32 (non-IEEE
  semantics) instructions.
* V_MUL_F32 and V_ADD_F32 (IEEE semantics) are used instead of
  V_MAD_LEGACY_F32 (non-IEEE semantics) in some places.


 We really need to fix the SI AsmPrinter and start using the FileCheck tests
 in LLVM for things like this.


  I suspect the problem is a (non-)IEEE semantics mismatch between TGSI
  and LLVM introduced by this change.



 What are the semantics of TGSI opcodes?  For MUL and MAD, tgsi_exec uses IEEE
 operations, but it seems like the glsl frontend thinks they are non-IEEE.

  BTW, some general issues with this commit:

  The prefix 'r600g:' is misleading, as this change affects radeonsi
 as
  well. I think we've usually used 'radeon/llvm:' for changes
  src/gallium/drivers/radeon.

  There is no rationale for this change in the commit log: What is the
  intended effect? Why is it a good idea? ...



 I agree with you here.  These are things I should have caught during the 
 review.
 Should we revert this and the associated LLVM changes?

 -Tom

  --
  Earthling Michel Dänzer   |  http://www.amd.com
  Libre software enthusiast |  Debian, X and DRI developer
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] mesa: add set support (stores a set of pointers)

2012-12-06 Thread Ian Romanick

On 12/05/2012 03:28 PM, Justen, Jordan L wrote:

On Wed, 2012-12-05 at 11:26 -0800, Ian Romanick wrote:

On 12/05/2012 07:32 AM, Brian Paul wrote:

On 12/04/2012 04:55 PM, Jordan Justen wrote:

+/**
+ * This function is an iterator over the hash table.
+ *
+ * Pass in NULL for the first entry, as in the start of a for loop.
Note that
+ * an iteration over the table is O(table_size) not O(entries).
+ */
+struct set_entry *
+_mesa_set_next_entry(struct set *ht, struct set_entry *entry)


Should ht be const-qualified here too?


Since the returned set_entry isn't const-qualified, probably not.  I
think the compiler will generate an error.


I added const to ht, and it is still building for me. (GCC 4.7)

I plan to push this series with Brian's feedback. Or, do
you suspect different compilers might take issue with it?


It doesn't generate any warnings or anything?  Eventually you're 
generating a non-const pointer from something const, so it sure seems 
like it should complain.  If it doesn't, I'm always fine with adding 
more const.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for tgsi-to-llvm

2012-12-06 Thread Alex Deucher
On Thu, Dec 6, 2012 at 1:27 PM, Alex Deucher alexdeuc...@gmail.com wrote:
 On Thu, Dec 6, 2012 at 1:21 PM, Vincent Lejeune v...@ovi.com wrote:
 Sorry for the inconvenience.
 I think the r600g backend work because of this patch, which switch MUL and 
 MUL_IEEE definition :
 http://lists.freedesktop.org/archives/mesa-dev/2012-November/030748.html

 The rationale behind the patch is use the fmul instead of a custom intrinsic 
 for OpenGL path.
 Glsl-to-llvm generates as vanilla instructions as possible and uses fmul, 
 this consolidates behaviour
 between tgsi-to-llvm and glsl-to-llvm.
 IMHO emitting MUL and MUL_IEEE should be decided with the presence of some 
 fast-math arg to the backend,
 or some context variable telling if we have a glsl or an opencl shader.

 MUL and MUL_IEEE are not related to performance, rather to behavior:

 MULFloating-point multiply. 0*anything = 0.
 MUL_IEEE  IEEE Floating-point multiply. Uses IEEE rules for 0*anything.

IIRC, we want the MUL behavior in GL.  In the pre-LLVM days we used
MUL_IEEE and switching to MUL fixed a bunch of piglit tests.

Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] linker: Allow aliased location assignments for shader inputs / outputs

2012-12-06 Thread Matt Turner
On Thu, Dec 6, 2012 at 10:08 AM, Ian Romanick i...@freedesktop.org wrote:
 Reviewed-and-Tested-by: Matt Turner matts...@gmail.com

Looks like this fixes
explicit_attrib_location_vertex_input_aliased.test but breaks
explicit_attrib_location_overlapping_ranges.test. Crap.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for tgsi-to-llvm

2012-12-06 Thread Tom Stellard
On Thu, Dec 06, 2012 at 07:25:55PM +0100, Michel Dänzer wrote:
 On Don, 2012-12-06 at 09:05 -0800, Tom Stellard wrote: 
  On Thu, Dec 06, 2012 at 05:08:07PM +0100, Michel Dänzer wrote:
   On Mit, 2012-12-05 at 09:32 -0800, Vincent Lejeune wrote: 
Module: Mesa
Branch: master
Commit: 0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c

Author: Vincent Lejeune v...@ovi.com
Date:   Wed Nov 28 00:35:55 2012 +0100

r600g: Use default mul/mad function for tgsi-to-llvm
   
   This change breaks the piglit tests glsl-{f,v}s-vec4-indexing-temp-src
   on radeonsi. I suspect the same would be true with r600g as well if it
   didn't fall back to the non-LLVM backend for this test.
   
   Comparing the generated code, I've noticed two main differences so far:
   
 * LLVM now optimizes away some TGSI MUL operations with constant
   1.0, which previously resulted in V_MUL_LEGACY_F32 (non-IEEE
   semantics) instructions. 
 * V_MUL_F32 and V_ADD_F32 (IEEE semantics) are used instead of
   V_MAD_LEGACY_F32 (non-IEEE semantics) in some places.
 
 BTW, I have a fix so the second bullet at least becomes V_MAD_F32.
 
 
   I suspect the problem is a (non-)IEEE semantics mismatch between TGSI
   and LLVM introduced by this change.
  
  What are the semantics of TGSI opcodes?  For MUL and MAD, tgsi_exec uses 
  IEEE
  operations, but it seems like the glsl frontend thinks they are non-IEEE. 
 
 Not sure... FWIW, the tests pass on llvmpipe.


I just looked at the TGSI-LLVM code, which is shared by r600g,
radeonsi, and llvmpipe.  For TGSI_OPCODE_MUL, it actually uses both
the IEEE and non-IEEE definitions depending on what the arguments are.
If one of the arguments to MUL is the compile time constant zero then it
uses the non-IEEE definition (0 * anything) = 0, otherwise it produces
a fmul instruction which has IEEE semantics.  This probably deserves
it's own thread, because I think newer versions of GLSL require IEEE,
but older versions and also ARB have different semantics.

 
 Reverting only the MAD related changes doesn't fix the problem.
 Reverting only the MUL related change triggers a shader compile failure
 in LLVM.
 
 
  Should we revert this and the associated LLVM changes?
 
 I guess it depends on the answer to the above question about the
 expected semantics.
 
 Given that these tests seem to use indirect addressing, could it be that
 they just happened to pass by accident before?

Yeah, those tests must have been passing by accident.  Are there any other
failing tests?

-Tom
 
 
 -- 
 Earthling Michel Dänzer   |   http://www.amd.com
 Libre software enthusiast |  Debian, X and DRI developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/26] glsl: Clean up shading language mixing check for GLSL 3.00 ES.

2012-12-06 Thread Ian Romanick

On 12/01/2012 04:52 AM, Kenneth Graunke wrote:

On 11/30/2012 10:07 AM, Ian Romanick wrote:

From: Paul Berry stereotype...@gmail.com

Previously, we prohibited mixing of shading language versions if
min_version == 100 or max_version = 130.  This was technically
correct (since desktop GLSL 1.30 and beyond prohibit mixing of shading
language versions, as does GLSL 1.00 ES), but it was confusing.  Also,
we asserted that all shading language versions were between 1.00 and
1.40, which was unnecessary (since the parser already checks shading
language versions) and doesn't work for GLSL 3.00 ES.

This patch changes the code to explicitly check that (a) ES shaders
aren't mixed with desktop shaders, (b) shaders aren't mixed between ES
versions, and (c) shaders aren't mixed between desktop GLSL versions
when at least one shader is GLSL 1.30 or greater.  Also, it removes
the unnecessary assertion.

Reviewed-by: Ian Romanick ian.d.roman...@intel.com
---
  src/glsl/linker.cpp | 16 +---
  1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 3b2ab96..1bae043 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2421,9 +2421,19 @@ link_shaders(struct gl_context *ctx, struct
gl_shader_program *prog)

 unsigned min_version = UINT_MAX;
 unsigned max_version = 0;
+   bool is_es_prog = false;
 for (unsigned i = 0; i  prog-NumShaders; i++) {
min_version = MIN2(min_version, prog-Shaders[i]-Version);
max_version = MAX2(max_version, prog-Shaders[i]-Version);
+  if (i == 0) {
+ is_es_prog = prog-Shaders[i]-IsEsShader;
+  } else {


I really dislike loops that contain conditional code for particular
indices.  It almost seems like you want:

bool is_es_prog = prog-Shaders[0]-IsEsShader;

then you can just omit this code within the loop.  Nice and clean.  The
only trouble is that doing so assumes there is at least one shader.

I *believe* that calling glLinkProgram with no attached shaders is an
error and should be rejected (oglconform claims it's invalid).  However,
I can't find any spec text about it and I'm fairly sure we don't verify
that NumShaders = 1 when linking.


When you link, one shader for each stage must define main.  If there are 
no shaders, no shader defines main.  We should catch this case with our 
check for main.



But it's not a big deal...just a pet peeve I picked up after reading
code in another project which was essentially:
for (int i = 0; i  POUND_DEFINE_THAT_HAPPENS_TO_BE_THREE; i++) {
if (i == 0) {...} else if (i == 1) {...} else if (i == 2) {...}
/* seriously, why the hell did you make this a loop? */
}

Your code here is sensible and gets a R-b either way.


+ if (prog-Shaders[i]-IsEsShader != is_es_prog) {
+linker_error(prog, all shaders must use same shading 
+ language version\n);
+goto done;
+ }
+  }

switch (prog-Shaders[i]-Type) {
case GL_VERTEX_SHADER:
@@ -2444,10 +2454,10 @@ link_shaders(struct gl_context *ctx, struct
gl_shader_program *prog)
 /* Previous to GLSL version 1.30, different compilation units
could mix and
  * match shading language versions.  With GLSL 1.30 and later,
the versions
  * of all shaders must match.
+*
+* GLSL ES has never allowed mixing of shading language versions.
  */
-   assert(min_version = 100);
-   assert(max_version = 140);
-   if ((max_version = 130 || min_version == 100)
+   if ((is_es_prog || max_version = 130)
  min_version != max_version) {
linker_error(prog, all shaders must use same shading 
 language version\n);





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] build: Fix GLES linkage without libglapi

2012-12-06 Thread Andreas Boll
fixes a regression introduced with
fc9ea7c74dc5cb996c3d9fe6663fd6da080e8360

Cc: Brian Paul bri...@vmware.com
Cc: Matt Turner matts...@gmail.com
---

Steps to reproduce the regression:

$ ./autogen.sh --enable-xlib-glx --disable-driglx-direct --disable-dri 
--enable-debug --enable-gles1 --enable-gles2 --enable-openvg 
--enable-gallium-egl --enable-xa --enable-xorg
$ make

 src/mapi/es1api/Makefile.am |6 +-
 src/mapi/es2api/Makefile.am |6 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mapi/es1api/Makefile.am b/src/mapi/es1api/Makefile.am
index c49c86e..c163a39 100644
--- a/src/mapi/es1api/Makefile.am
+++ b/src/mapi/es1api/Makefile.am
@@ -41,9 +41,13 @@ GLES_include_HEADERS = \
 lib_LTLIBRARIES = libGLESv1_CM.la
 
 libGLESv1_CM_la_SOURCES = ../mapi/entry.c glapi_mapi_tmp.h
-libGLESv1_CM_la_LIBADD = $(GLESv1_CM_LIB_DEPS) ../shared-glapi/libglapi.la
+libGLESv1_CM_la_LIBADD = $(GLESv1_CM_LIB_DEPS)
 libGLESv1_CM_la_LDFLAGS = -version-number 1:1 -no-undefined
 
+if HAVE_SHARED_GLAPI
+libGLESv1_CM_la_LIBADD += ../shared-glapi/libglapi.la
+endif
+
 include ../glapi/gen/glapi_gen.mk
 glapi_mapi_tmp.h: ../glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
$(call glapi_gen_mapi,$,es1api)
diff --git a/src/mapi/es2api/Makefile.am b/src/mapi/es2api/Makefile.am
index 5e02e1c..0538f73 100644
--- a/src/mapi/es2api/Makefile.am
+++ b/src/mapi/es2api/Makefile.am
@@ -45,9 +45,13 @@ GLES3_include_HEADERS = \
 lib_LTLIBRARIES = libGLESv2.la
 
 libGLESv2_la_SOURCES = ../mapi/entry.c glapi_mapi_tmp.h
-libGLESv2_la_LIBADD = $(GLESv2_LIB_DEPS) ../shared-glapi/libglapi.la
+libGLESv2_la_LIBADD = $(GLESv2_LIB_DEPS)
 libGLESv2_la_LDFLAGS = -version-number 2 -no-undefined
 
+if HAVE_SHARED_GLAPI
+libGLESv2_la_LIBADD += ../shared-glapi/libglapi.la
+endif
+
 include ../glapi/gen/glapi_gen.mk
 glapi_mapi_tmp.h: ../glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
$(call glapi_gen_mapi,$,es2api)
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] linker: Allow aliased location assignments for shader inputs / outputs

2012-12-06 Thread Matt Turner
On Thu, Dec 6, 2012 at 10:41 AM, Matt Turner matts...@gmail.com wrote:
 On Thu, Dec 6, 2012 at 10:08 AM, Ian Romanick i...@freedesktop.org wrote:
 Reviewed-and-Tested-by: Matt Turner matts...@gmail.com

 Looks like this fixes
 explicit_attrib_location_vertex_input_aliased.test but breaks
 explicit_attrib_location_overlapping_ranges.test. Crap.

Eh, the overlapping_ranges test might have been broken. See Khronos bug 9726.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] linker: Allow aliased location assignments for shader inputs / outputs

2012-12-06 Thread Matt Turner
On Thu, Dec 6, 2012 at 11:00 AM, Matt Turner matts...@gmail.com wrote:
 On Thu, Dec 6, 2012 at 10:41 AM, Matt Turner matts...@gmail.com wrote:
 On Thu, Dec 6, 2012 at 10:08 AM, Ian Romanick i...@freedesktop.org wrote:
 Reviewed-and-Tested-by: Matt Turner matts...@gmail.com

 Looks like this fixes
 explicit_attrib_location_vertex_input_aliased.test but breaks
 explicit_attrib_location_overlapping_ranges.test. Crap.

 Eh, the overlapping_ranges test might have been broken. See Khronos bug 9726.

And actually it appears that we're now supposed to disallow aliasing
in ES 3.0. Bug 9609.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] build: Fix GLES linkage without libglapi

2012-12-06 Thread Matt Turner
On Thu, Dec 6, 2012 at 10:59 AM, Andreas Boll
andreas.boll@gmail.com wrote:
 fixes a regression introduced with
 fc9ea7c74dc5cb996c3d9fe6663fd6da080e8360

 Cc: Brian Paul bri...@vmware.com
 Cc: Matt Turner matts...@gmail.com
 ---

 Steps to reproduce the regression:

 $ ./autogen.sh --enable-xlib-glx --disable-driglx-direct --disable-dri 
 --enable-debug --enable-gles1 --enable-gles2 --enable-openvg 
 --enable-gallium-egl --enable-xa --enable-xorg
 $ make

Ugh, I guess. Although that seems like a really ridiculous configuration.

Acked-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] build: Fix GLES linkage without libglapi

2012-12-06 Thread Andreas Boll
2012/12/6 Andreas Boll andreas.boll@gmail.com:
 fixes a regression introduced with
 fc9ea7c74dc5cb996c3d9fe6663fd6da080e8360

NOTE: This is a candidate for the 9.0 branch.


 Cc: Brian Paul bri...@vmware.com
 Cc: Matt Turner matts...@gmail.com
 ---

 Steps to reproduce the regression:

 $ ./autogen.sh --enable-xlib-glx --disable-driglx-direct --disable-dri 
 --enable-debug --enable-gles1 --enable-gles2 --enable-openvg 
 --enable-gallium-egl --enable-xa --enable-xorg
 $ make

  src/mapi/es1api/Makefile.am |6 +-
  src/mapi/es2api/Makefile.am |6 +-
  2 files changed, 10 insertions(+), 2 deletions(-)

 diff --git a/src/mapi/es1api/Makefile.am b/src/mapi/es1api/Makefile.am
 index c49c86e..c163a39 100644
 --- a/src/mapi/es1api/Makefile.am
 +++ b/src/mapi/es1api/Makefile.am
 @@ -41,9 +41,13 @@ GLES_include_HEADERS = \
  lib_LTLIBRARIES = libGLESv1_CM.la

  libGLESv1_CM_la_SOURCES = ../mapi/entry.c glapi_mapi_tmp.h
 -libGLESv1_CM_la_LIBADD = $(GLESv1_CM_LIB_DEPS) ../shared-glapi/libglapi.la
 +libGLESv1_CM_la_LIBADD = $(GLESv1_CM_LIB_DEPS)
  libGLESv1_CM_la_LDFLAGS = -version-number 1:1 -no-undefined

 +if HAVE_SHARED_GLAPI
 +libGLESv1_CM_la_LIBADD += ../shared-glapi/libglapi.la
 +endif
 +
  include ../glapi/gen/glapi_gen.mk
  glapi_mapi_tmp.h: ../glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
 $(call glapi_gen_mapi,$,es1api)
 diff --git a/src/mapi/es2api/Makefile.am b/src/mapi/es2api/Makefile.am
 index 5e02e1c..0538f73 100644
 --- a/src/mapi/es2api/Makefile.am
 +++ b/src/mapi/es2api/Makefile.am
 @@ -45,9 +45,13 @@ GLES3_include_HEADERS = \
  lib_LTLIBRARIES = libGLESv2.la

  libGLESv2_la_SOURCES = ../mapi/entry.c glapi_mapi_tmp.h
 -libGLESv2_la_LIBADD = $(GLESv2_LIB_DEPS) ../shared-glapi/libglapi.la
 +libGLESv2_la_LIBADD = $(GLESv2_LIB_DEPS)
  libGLESv2_la_LDFLAGS = -version-number 2 -no-undefined

 +if HAVE_SHARED_GLAPI
 +libGLESv2_la_LIBADD += ../shared-glapi/libglapi.la
 +endif
 +
  include ../glapi/gen/glapi_gen.mk
  glapi_mapi_tmp.h: ../glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
 $(call glapi_gen_mapi,$,es2api)
 --
 1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for tgsi-to-llvm

2012-12-06 Thread Michel Dänzer
On Don, 2012-12-06 at 10:52 -0800, Tom Stellard wrote: 
 On Thu, Dec 06, 2012 at 07:25:55PM +0100, Michel Dänzer wrote:
  On Don, 2012-12-06 at 09:05 -0800, Tom Stellard wrote: 
   On Thu, Dec 06, 2012 at 05:08:07PM +0100, Michel Dänzer wrote:
On Mit, 2012-12-05 at 09:32 -0800, Vincent Lejeune wrote: 

I suspect the problem is a (non-)IEEE semantics mismatch between TGSI
and LLVM introduced by this change.
   
   What are the semantics of TGSI opcodes?  For MUL and MAD, tgsi_exec uses 
   IEEE
   operations, but it seems like the glsl frontend thinks they are non-IEEE. 
  
  Not sure... FWIW, the tests pass on llvmpipe.
 
 I just looked at the TGSI-LLVM code, which is shared by r600g,
 radeonsi, and llvmpipe.  For TGSI_OPCODE_MUL, it actually uses both
 the IEEE and non-IEEE definitions depending on what the arguments are.
 If one of the arguments to MUL is the compile time constant zero then it
 uses the non-IEEE definition (0 * anything) = 0, otherwise it produces
 a fmul instruction which has IEEE semantics.  This probably deserves
 it's own thread, because I think newer versions of GLSL require IEEE,
 but older versions and also ARB have different semantics.

Right, also, the code you described doesn't cover cases where a
non-constant value is 0.


   Should we revert this and the associated LLVM changes?
  
  I guess it depends on the answer to the above question about the
  expected semantics.
  
  Given that these tests seem to use indirect addressing, could it be that
  they just happened to pass by accident before?
 
 Yeah, those tests must have been passing by accident.  Are there any other
 failing tests?

No, those are the only two tests that regressed for me. So I guess this
can be ignored for now. It would be interesting to know which tests Alex
was referring to though, which were fixed by switching from MUL_IEEE to
MUL. Maybe those are covered by the code you described above though.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast |  Debian, X and DRI developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for tgsi-to-llvm

2012-12-06 Thread Alex Deucher
On Thu, Dec 6, 2012 at 2:37 PM, Michel Dänzer mic...@daenzer.net wrote:
 On Don, 2012-12-06 at 10:52 -0800, Tom Stellard wrote:
 On Thu, Dec 06, 2012 at 07:25:55PM +0100, Michel Dänzer wrote:
  On Don, 2012-12-06 at 09:05 -0800, Tom Stellard wrote:
   On Thu, Dec 06, 2012 at 05:08:07PM +0100, Michel Dänzer wrote:
On Mit, 2012-12-05 at 09:32 -0800, Vincent Lejeune wrote:
   
I suspect the problem is a (non-)IEEE semantics mismatch between TGSI
and LLVM introduced by this change.
  
   What are the semantics of TGSI opcodes?  For MUL and MAD, tgsi_exec uses 
   IEEE
   operations, but it seems like the glsl frontend thinks they are non-IEEE.
 
  Not sure... FWIW, the tests pass on llvmpipe.

 I just looked at the TGSI-LLVM code, which is shared by r600g,
 radeonsi, and llvmpipe.  For TGSI_OPCODE_MUL, it actually uses both
 the IEEE and non-IEEE definitions depending on what the arguments are.
 If one of the arguments to MUL is the compile time constant zero then it
 uses the non-IEEE definition (0 * anything) = 0, otherwise it produces
 a fmul instruction which has IEEE semantics.  This probably deserves
 it's own thread, because I think newer versions of GLSL require IEEE,
 but older versions and also ARB have different semantics.

 Right, also, the code you described doesn't cover cases where a
 non-constant value is 0.


   Should we revert this and the associated LLVM changes?
 
  I guess it depends on the answer to the above question about the
  expected semantics.
 
  Given that these tests seem to use indirect addressing, could it be that
  they just happened to pass by accident before?

 Yeah, those tests must have been passing by accident.  Are there any other
 failing tests?

 No, those are the only two tests that regressed for me. So I guess this
 can be ignored for now. It would be interesting to know which tests Alex
 was referring to though, which were fixed by switching from MUL_IEEE to
 MUL. Maybe those are covered by the code you described above though.

I wasn't talking about recent changes, This was back in the early days
of r600g.  Probably the same thing Tom was talking about.

Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallivm: Lower TGSI_OPCODE_MUL to fmul by default

2012-12-06 Thread Tom Stellard
From: Tom Stellard thomas.stell...@amd.com

This fixes a number of crashes on r600g due to the fact that
lp_build_mul assumes vector types when optimizing mul to bit shifts.

This bug was uncovered by 0ad1fefd6951aa47ab58a41dc9ee73083cbcf85c
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index cc4bd2e..8159beb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -580,8 +580,9 @@ mul_emit(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
 {
-   emit_data-output[emit_data-chan] = lp_build_mul(bld_base-base,
-   emit_data-args[0], emit_data-args[1]);
+   emit_data-output[emit_data-chan] = LLVMBuildFMul(
+   bld_base-base.gallivm-builder,
+   emit_data-args[0], emit_data-args[1], );
 }
 
 /*.TGSI_OPCODE_DIV.*/
-- 
1.7.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] llvmpipe perf regression

2012-12-06 Thread Brian Paul

Hi Dave,

llvmpipe is running gears at about half speed for me since your commit:

77b26564c3f0395bf3e744abbf6d0e7aa9d2c8da is the first bad commit
commit 77b26564c3f0395bf3e744abbf6d0e7aa9d2c8da
Author: Dave Airlie airl...@gmail.com
Date:   Wed Dec 5 06:50:07 2012 +1000

llvmpipe: EXT_transform_feedback support (v1.1)


While gears shows a big slowdown, other demos like cubemap and engine 
seem unchanged.


I haven't investigated to see exactly what the issue is.

Can you take a look?

-Brian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): r600g: Use default mul/mad function for tgsi-to-llvm

2012-12-06 Thread Tom Stellard
On Thu, Dec 06, 2012 at 01:35:22PM -0500, Alex Deucher wrote:
 On Thu, Dec 6, 2012 at 1:27 PM, Alex Deucher alexdeuc...@gmail.com wrote:
  On Thu, Dec 6, 2012 at 1:21 PM, Vincent Lejeune v...@ovi.com wrote:
  Sorry for the inconvenience.
  I think the r600g backend work because of this patch, which switch MUL and 
  MUL_IEEE definition :
  http://lists.freedesktop.org/archives/mesa-dev/2012-November/030748.html
 
  The rationale behind the patch is use the fmul instead of a custom 
  intrinsic for OpenGL path.
  Glsl-to-llvm generates as vanilla instructions as possible and uses 
  fmul, this consolidates behaviour
  between tgsi-to-llvm and glsl-to-llvm.
  IMHO emitting MUL and MUL_IEEE should be decided with the presence of some 
  fast-math arg to the backend,
  or some context variable telling if we have a glsl or an opencl shader.
 
  MUL and MUL_IEEE are not related to performance, rather to behavior:
 
  MULFloating-point multiply. 0*anything = 0.
  MUL_IEEE  IEEE Floating-point multiply. Uses IEEE rules for 0*anything.


I asked idr about this on IRC and he said that IEEE rules are required for
GLSL = 1.30 and they are compliant, but not required for GLSL  1.30.
stringfellow added that the d3d9 spec required 0*anything = 0, which is
probably why the hardware has those instructions.

 
 IIRC, we want the MUL behavior in GL.  In the pre-LLVM days we used
 MUL_IEEE and switching to MUL fixed a bunch of piglit tests.
 

I just ran tests on r600g with the normal compiler and using MUL_IEEE results
in +6 piglit passes (All of theses tests are glsl-1.30 isnan tests).  I'm
guessing that the tests that used to fail with MUL_IEEE were either fixed or
removed during the glsl 1.30 implementation.  So, it seems like MUL_IEEE is safe
to use.

-Tom
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] llvmpipe perf regression

2012-12-06 Thread Dave Airlie
 I haven't investigated to see exactly what the issue is.

 Can you take a look?

I'd suspect its the discard handling,

lp_setup.c:lp_setup_set_rasterizer_discard
set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ );

try removing that line, seeing it speed comes back.

I'll see if I can do some testing on it later,

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] R600: Convert global store address to dword offset during isel

2012-12-06 Thread Tom Stellard
From: Tom Stellard thomas.stell...@amd.com

---
 lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  1 +
 lib/Target/AMDGPU/AMDGPUISelLowering.h   |  1 +
 lib/Target/AMDGPU/AMDGPUInstrInfo.td |  3 +++
 lib/Target/AMDGPU/AMDGPUInstructions.td  |  5 
 lib/Target/AMDGPU/R600ISelLowering.cpp   | 45 +++-
 lib/Target/AMDGPU/R600ISelLowering.h |  1 +
 lib/Target/AMDGPU/R600Instructions.td|  4 ++-
 7 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp 
b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 63b937f..1f31c2a 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -387,6 +387,7 @@ const char* 
AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(BRANCH_COND);
 
   // AMDGPU DAG nodes
+  NODE_NAME_CASE(DWORDADDR)
   NODE_NAME_CASE(FRACT)
   NODE_NAME_CASE(FMAX)
   NODE_NAME_CASE(SMAX)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h 
b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 992dab7..c7abaf6 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -111,6 +111,7 @@ enum {
   BRANCH_COND,
   // End AMDIL ISD Opcodes
   BITALIGN,
+  DWORDADDR,
   FRACT,
   FMAX,
   SMAX,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td 
b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 23ca35a..96368e8 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -29,6 +29,9 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile1, 3, [
 // rotl(a, b) = bitalign(a, a, 32 - b)
 def AMDGPUbitalign : SDNodeAMDGPUISD::BITALIGN, AMDGPUDTIntTernaryOp;
 
+// This argument to this node is a dword address.
+def AMDGPUdwordaddr : SDNodeAMDGPUISD::DWORDADDR, SDTIntUnaryOp;
+
 // out = a - floor(a)
 def AMDGPUfract : SDNodeAMDGPUISD::FRACT, SDTFPUnaryOp;
 
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td 
b/lib/Target/AMDGPU/AMDGPUInstructions.td
index 443c6d4..e634d20 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -179,6 +179,11 @@ class BitConvert ValueType dt, ValueType st, 
RegisterClass rc : Pat 
   (dt rc:$src0)
 ;
 
+class DwordAddrPatValueType vt, RegisterClass rc : Pat 
+  (vt (AMDGPUdwordaddr (vt rc:$addr))),
+  (vt rc:$addr)
+;
+
 include R600Instructions.td
 
 include SIInstrInfo.td
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp 
b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 5da52af..f7ae3c9 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -66,6 +66,11 @@ R600TargetLowering::R600TargetLowering(TargetMachine TM) :
   setOperationAction(ISD::SELECT, MVT::i32, Custom);
   setOperationAction(ISD::SELECT, MVT::f32, Custom);
 
+  setOperationAction(ISD::STORE, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::f32, Custom);
+  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v4f32, Custom);
+
   setTargetDAGCombine(ISD::FP_ROUND);
 
   setSchedulingPreference(Sched::VLIW);
@@ -137,23 +142,11 @@ MachineBasicBlock * 
R600TargetLowering::EmitInstrWithCustomInserter(
 
   case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
   case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
-// Convert to DWORD address
-unsigned NewAddr = MRI.createVirtualRegister(
- AMDGPU::R600_TReg32_XRegClass);
-unsigned ShiftValue = MRI.createVirtualRegister(
-  AMDGPU::R600_TReg32RegClass);
 unsigned EOP = (llvm::next(I)-getOpcode() == AMDGPU::RETURN) ? 1 : 0;
 
-// XXX In theory, we should be able to pass ShiftValue directly to
-// the LSHR_eg instruction as an inline literal, but I tried doing it
-// this way and it didn't produce the correct results.
-TII-buildMovImm(*BB, I, ShiftValue, 2);
-TII-buildDefaultInstruction(*BB, I, AMDGPU::LSHR_eg, NewAddr,
- MI-getOperand(1).getReg(),
- ShiftValue);
 BuildMI(*BB, I, BB-findDebugLoc(I), TII-get(MI-getOpcode()))
 .addOperand(MI-getOperand(0))
-.addReg(NewAddr)
+.addOperand(MI-getOperand(1))
 .addImm(EOP); // Set End of program bit
 break;
   }
@@ -316,6 +309,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
SelectionDAG DAG) const
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
   case ISD::SELECT: return LowerSELECT(Op, DAG);
   case ISD::SETCC: return LowerSETCC(Op, DAG);
+  case ISD::STORE: return LowerSTORE(Op, DAG);
   case ISD::FPOW: return LowerFPOW(Op, DAG);
   case ISD::INTRINSIC_VOID: {
 SDValue Chain = Op.getOperand(0);
@@ -763,6 +757,31 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, 
SelectionDAG DAG) const {
   return Cond;
 }
 
+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  StoreSDNode 

[Mesa-dev] [PATCH 1/2] R600: Fix use iterator in custom select of ISD::Constant

2012-12-06 Thread Tom Stellard
From: Tom Stellard thomas.stell...@amd.com

The use list may change during the execution of the loop, so we
need to manually keep track of the next item in the list.
---
 lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp 
b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
index a765438..5a98463 100644
--- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
@@ -208,8 +208,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   }
 }
 
-for (SDNode::use_iterator Use = N-use_begin(), E = SDNode::use_end();
-Use != E; ++Use) {
+for (SDNode::use_iterator Use = N-use_begin(), Next = llvm::next(Use);
+  Use != SDNode::use_end(); Use = Next) {
+  Next = llvm::next(Use);
   std::vectorSDValue Ops;
   for (unsigned i = 0; i  Use-getNumOperands(); ++i) {
 Ops.push_back(Use-getOperand(i));
-- 
1.7.11.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] llvmpipe perf regression

2012-12-06 Thread Dave Airlie
On Thu, Dec 6, 2012 at 9:57 PM, Dave Airlie airl...@gmail.com wrote:
 I haven't investigated to see exactly what the issue is.

 Can you take a look?

 I'd suspect its the discard handling,

 lp_setup.c:lp_setup_set_rasterizer_discard
 set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ );

 try removing that line, seeing it speed comes back.

 I'll see if I can do some testing on it later,


Okay I've found it, will push a fix that should make it better.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] llvmpipe perf regression

2012-12-06 Thread Brian Paul

On 12/06/2012 03:33 PM, Dave Airlie wrote:

On Thu, Dec 6, 2012 at 9:57 PM, Dave Airlieairl...@gmail.com  wrote:

I haven't investigated to see exactly what the issue is.

Can you take a look?


I'd suspect its the discard handling,

lp_setup.c:lp_setup_set_rasterizer_discard
set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ );

try removing that line, seeing it speed comes back.

I'll see if I can do some testing on it later,



Okay I've found it, will push a fix that should make it better.


Yeah, that seems to fix it.  Thanks!

-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: Don't add structure fields to the symbol table

2012-12-06 Thread Ian Romanick
From: Ian Romanick ian.d.roman...@intel.com

I erroneously added this back in January 2011 in commit 88421589.
Looking at the commit message, I have no idea why I added it.  It only
added non-array structure fields to the symbol table, so array structure
fields are treated correctly.

Fixes piglit tests structure-and-field-have-same-name.vert and
structure-and-field-have-same-name-nested.vert.  It should also fix
WebGL conformance tests shader-with-non-reserved-words.

NOTE: This is a candidate for the stable release branches.

Signed-off-by: Ian Romanick ian.d.roman...@intel.com
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57622
Cc: Pavel Ondračka pavel.ondra...@email.cz
---
 src/glsl/glsl_parser.yy | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index ee6a672..6dd3d76 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1581,7 +1581,6 @@ struct_declarator:
   void *ctx = state;
   $$ = new(ctx) ast_declaration($1, false, NULL, NULL);
   $$-set_location(yylloc);
-  state-symbols-add_variable(new(state) ir_variable(NULL, $1, 
ir_var_auto));
}
| any_identifier '[' constant_expression ']'
{
-- 
1.7.11.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC] r600g hyperz support

2012-12-06 Thread j . glisse
So i finally have something that doesn't seem to lockup (i run in loop several
things that used to lockup on various GPU over 24hour without a single lockup),
or regress anything. It's a bundle deal the first patch is needed for lockup
avoidance. Tested on :
rv610, rv635, rv670, rv710, rv730, rv740, rv770, cedar, redwood, barts
A slightly different version of patch 2 have also been tested on :
rs780,rs880,caicos,turks,barts le

It's not yet fully cook as i need to check again the htile buffer size
computation which seems kind of wrong (i use a modified kernel to no choke on
it and to overallocate things). Otherwise the mesa bit are fully cook, i am
sending this to get feedback on them.

I will go back to htile computation but this will require quite a lot few GPU
swaping.

For the curious this is the result of looking at more that 12GB of fglrx command
stream ...

Cheers,
Jerome
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] r600g: rework flusing and synchronization pattern v4

2012-12-06 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

This bring r600g allmost inline with closed source driver when
it comes to flushing and synchronization pattern.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_compute.c   |   8 +-
 .../drivers/r600/evergreen_compute_internal.c  |   4 +-
 src/gallium/drivers/r600/evergreen_state.c |   4 +-
 src/gallium/drivers/r600/r600.h|  16 +--
 src/gallium/drivers/r600/r600_hw_context.c | 154 -
 src/gallium/drivers/r600/r600_state.c  |  18 ++-
 src/gallium/drivers/r600/r600_state_common.c   |  19 ++-
 7 files changed, 61 insertions(+), 162 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 44831a7..33a5910 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -98,7 +98,7 @@ static void evergreen_cs_set_vertex_buffer(
 
/* The vertex instructions in the compute shaders use the texture cache,
 * so we need to invalidate it. */
-   rctx-flags |= R600_CONTEXT_TEX_FLUSH;
+   rctx-flags |= R600_CONTEXT_FLUSH;
state-enabled_mask |= 1  vb_index;
state-dirty_mask |= 1  vb_index;
state-atom.dirty = true;
@@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
 */
r600_emit_command_buffer(ctx-cs, ctx-start_compute_cs_cmd);
 
-   ctx-flags |= R600_CONTEXT_CB_FLUSH;
+   ctx-flags |= R600_CONTEXT_FLUSH;
r600_flush_emit(ctx);
 
/* Emit colorbuffers. */
@@ -409,7 +409,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
 
/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0x
 */
-   ctx-flags |= R600_CONTEXT_CB_FLUSH;
+   ctx-flags |= R600_CONTEXT_FLUSH;
r600_flush_emit(ctx);
 
 #if 0
@@ -468,7 +468,7 @@ void evergreen_emit_cs_shader(
r600_write_value(cs, r600_context_bo_reloc(rctx, kernel-code_bo,
RADEON_USAGE_READ));
 
-   rctx-flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+   rctx-flags |= R600_CONTEXT_FLUSH;
 }
 
 static void evergreen_launch_grid(
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c 
b/src/gallium/drivers/r600/evergreen_compute_internal.c
index 7bc7fb4..187bcf1 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -538,7 +538,7 @@ void evergreen_set_tex_resource(
 
util_format_get_blockwidth(tmp-resource.b.b.format) *
 view-base.texture-width0*height*depth;
 
-   pipe-ctx-flags |= R600_CONTEXT_TEX_FLUSH;
+   pipe-ctx-flags |= R600_CONTEXT_FLUSH;
 
evergreen_emit_force_reloc(res);
evergreen_emit_force_reloc(res);
@@ -597,7 +597,7 @@ void evergreen_set_const_cache(
res-usage = RADEON_USAGE_READ;
res-coher_bo_size = size;
 
-   pipe-ctx-flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+   pipe-ctx-flags |= R600_CONTEXT_FLUSH;
 }
 
 struct r600_resource* r600_compute_buffer_alloc_vram(
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 9b898cb..7bc4772 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1557,14 +1557,14 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
uint32_t i, log_samples;
 
if (rctx-framebuffer.state.nr_cbufs) {
-   rctx-flags |= R600_CONTEXT_CB_FLUSH;
+   rctx-flags |= R600_CONTEXT_WAIT_IDLE | 
R600_CONTEXT_FLUSH_AND_INV;
 
if (rctx-framebuffer.state.cbufs[0]-texture-nr_samples  1) {
rctx-flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
}
}
if (rctx-framebuffer.state.zsbuf) {
-   rctx-flags |= R600_CONTEXT_DB_FLUSH;
+   rctx-flags |= R600_CONTEXT_WAIT_IDLE | 
R600_CONTEXT_FLUSH_AND_INV;
}
 
util_copy_framebuffer_state(rctx-framebuffer.state, state);
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 7d43416..4060672 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -180,17 +180,11 @@ struct r600_so_target {
unsignedso_index;
 };
 
-#define R600_CONTEXT_PS_PARTIAL_FLUSH  (1  0)
-#define R600_CONTEXT_CB_FLUSH  (1  1)
-#define R600_CONTEXT_DB_FLUSH  (1  2)
-#define R600_CONTEXT_SHADERCONST_FLUSH (1  3)
-#define R600_CONTEXT_TEX_FLUSH (1  4)
-#define R600_CONTEXT_VTX_FLUSH (1  5)
-#define R600_CONTEXT_STREAMOUT_FLUSH   (1  6)
-#define R600_CONTEXT_WAIT_IDLE (1  7)
-#define 

[Mesa-dev] [PATCH 2/2] r600g: add htile support v13

2012-12-06 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.

v2 really use fast clear, still random issue with some tiles
   need to try more flush combination, fix depth/stencil
   texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
   htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
   decompression, the custom blitter with custom dsa is no longer
   needed.
v6 Reorganize render control/override update mecanism, fixing more
   issues in the process.
v7 Add nop after depth surface base update to work around some htile
   flushing issue. For htile to 8x8 on r6xx/r7xx as other combination
   have issue. Do not enable hyperz when flushing/uncompressing
   depth buffer.
v8 Fix htile surface, preload and prefetch setup. Only set preload
   and prefetch on htile surface clear like fglrx. Record depth
   clear value per level. Support several level for the htile
   surface. First depth clear can't be a fast clear.
v9 Fix comments, properly account new register in emit function,
   disable fast zclear if clearing different layer of texture
   array to different value
v10 Disable hyperz for texture array making test simpler. Force
db_misc_state update when no depth buffer is bound. Remove
unused variable, rename depth_clearstencil to depth_clear.
Don't allocate htile surface for flushed depth. Something
broken the cliprect change, this need to be investigated.
v11 Rebase on top of newer mesa
v12 Rebase on top of newer mesa
v13 Rebase on top of newer mesa, htile surface need to be initialized
to zero, somehow special casing first clear to not use fast clear
and thus initialize the htile surface with proper value does not
work in all case.

Signed-off-by: Pierre-Eric Pelloux-Prayer pell...@gmail.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c | 83 +++---
 src/gallium/drivers/r600/evergreend.h  |  2 +
 src/gallium/drivers/r600/r600_blit.c   | 28 ++
 src/gallium/drivers/r600/r600_hw_context.c |  1 +
 src/gallium/drivers/r600/r600_pipe.c   |  9 
 src/gallium/drivers/r600/r600_pipe.h   | 26 ++
 src/gallium/drivers/r600/r600_resource.h   |  9 
 src/gallium/drivers/r600/r600_state.c  | 59 +++--
 src/gallium/drivers/r600/r600_texture.c| 50 ++
 src/gallium/drivers/r600/r600d.h   |  1 +
 10 files changed, 247 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 7bc4772..e7f5c44 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1545,6 +1545,36 @@ static void evergreen_init_depth_surface(struct 
r600_context *rctx,
S_028044_FORMAT(V_028044_STENCIL_8);
}
 
+   surf-htile_enabled = 0;
+   /* use htile only for first level */
+   if (rtex-htile  !level) {
+   unsigned preload_x, preload_y;
+
+   surf-htile_enabled = 1;
+   surf-db_htile_data_base = 
rtex-htile-surface.level[level].offset;
+   surf-db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
+   S_028ABC_HTILE_HEIGHT(1) |
+   S_028ABC_LINEAR(1) |
+   S_028ABC_FULL_CACHE(1);
+   if (rtex-surface.level[level].nblk_x = 512) {
+   surf-db_htile_surface |= 
S_028ABC_HTILE_USES_PRELOAD_WIN(1);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_WIDTH(16);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_HEIGHT(4);
+   } else if (rtex-surface.level[level].nblk_x = 1024) {
+   surf-db_htile_surface |= 
S_028ABC_HTILE_USES_PRELOAD_WIN(1);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_WIDTH(16);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_HEIGHT(2);
+   } else {
+   surf-db_htile_surface |= 
S_028ABC_HTILE_USES_PRELOAD_WIN(1);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_WIDTH(16);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_HEIGHT(0);
+   }
+   surf-db_depth_info |= S_028040_TILE_SURFACE_ENABLE(1);
+   preload_x = align(rtex-surface.level[level].nblk_x, 32)  5;
+   preload_y = align(rtex-surface.level[level].nblk_y, 32)  5;
+   surf-db_preload_control = S_028AC8_MAX_X(preload_x) | 
S_028AC8_MAX_Y(preload_y);
+   }
+

Re: [Mesa-dev] [PATCH v3 0/7] Delay exec table initialization until version is computed

2012-12-06 Thread Jordan Justen
I tested this series on i965 and llvmpipe. Does anyone want to test
this branch on other hardware since it modifies the initialization
path for all drivers?

git://people.freedesktop.org/~jljusten/mesa version-gles3-dispatch

(This branch is the series rebased on the current master.)

Thanks,

-Jordan

On Mon, Nov 19, 2012 at 5:01 PM, Jordan Justen
jordan.l.jus...@intel.com wrote:
 GLES3 support requires that the version be computed before the
 exec table is initialized.

 For main exec table initialization (api_exec.c):
 * Rename _mesa_create_exec_table to _mesa_initialize_exec_table
 * _mesa_initialize_context allocates the exec table, and sets
   entries to nop, but leaves functions otherwise uninitialized
 * Drivers must now call _mesa_compute_version and then
   _mesa_initialize_exec_table

 Similarly for VBO:
 * _vbo_CreateContext no longer initializes VBO vtxfmt functions
 * _mesa_initialize_vbo_vtxfmt was added, and drivers must now
   call this function after computing the version

 v3:
  * VBO vtxfmt function programming must also be delayed until
the version in computed.
  * Combined all driver changes into a single patch

 v2:
  * Rather than move version computation earlier into
_mesa_initialize_context, now _mesa_initialize_context will
no longer initialize the exec dispatch table.
  * All drivers should now be modified, whereas in v1, only
i965 and swrast were modified.

 v1:
  * Implement support for swrast  i965 drivers

 Jordan Justen (7):
   mesa: separate exec allocation from initialization
   vbo: add _mesa_initialize_vbo_vtxfmt
   drivers: compute version and then initialize exec table
   mesa dispatch_sanity: call new functions to initialize exec table
   mesa: don't initialize exec dispatch tables in
 _mesa_initialize_context
   mesa: don't initialize VBO vtxfmt in _vbo_CreateContext
   mesa: assert if driver did not compute the version

  src/mapi/glapi/gen/gl_genexec.py   |   21 +
  src/mesa/drivers/dri/i915/i830_context.c   |8 
  src/mesa/drivers/dri/i915/i915_context.c   |8 
  src/mesa/drivers/dri/i965/brw_context.c|8 
  src/mesa/drivers/dri/nouveau/nouveau_context.c |6 ++
  src/mesa/drivers/dri/r200/r200_context.c   |6 ++
  src/mesa/drivers/dri/radeon/radeon_context.c   |6 ++
  src/mesa/drivers/dri/swrast/swrast.c   |8 
  src/mesa/drivers/osmesa/osmesa.c   |9 +
  src/mesa/drivers/windows/gdi/wmesa.c   |7 +++
  src/mesa/drivers/x11/xm_api.c  |7 +++
  src/mesa/main/api_exec.h   |4 ++--
  src/mesa/main/context.c|7 +++
  src/mesa/main/tests/dispatch_sanity.cpp|7 ++-
  src/mesa/main/vtxfmt.c |   20 
  src/mesa/main/vtxfmt.h |1 +
  src/mesa/state_tracker/st_context.c|8 
  src/mesa/state_tracker/st_manager.c|2 --
  src/mesa/vbo/vbo_exec.c|4 
  src/mesa/vbo/vbo_exec_api.c|4 
  src/mesa/vbo/vbo_save_api.c|1 -
  21 files changed, 122 insertions(+), 30 deletions(-)

 --
 1.7.10.4

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] st/mesa: add texture buffer object support to state tracker

2012-12-06 Thread Dave Airlie
 +  } else {
 + const struct st_texture_image *firstImage =
 +
 st_texture_image(stObj-base.Image[0][stObj-base.BaseLevel]);
 + texFormat = firstImage-base.TexFormat;
 +  }
 +  firstImageFormat = st_mesa_format_to_pipe_format(texFormat);


 I haven't double-checked, but I thought we had a utility function for
 getting a texture's format using code like that.

I can't find one,, not sure I've seen this pattern anywhere else, at
least a grep for BufferObjectFormat doesn't hit it.

removed the blank line as well.

Thanks,
Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] gallivm, llvmpipe, draw: Support multiple constant buffers.

2012-12-06 Thread Dave Airlie
On Fri, Dec 7, 2012 at 3:35 AM,  jfons...@vmware.com wrote:
 From: José Fonseca jfons...@vmware.com

 Support 4 (defined in LP_MAX_TGSI_CONST_BUFFERS) as opposed to 32 (as
 defined by PIPE_MAX_CONSTANT_BUFFERS) because that would make the jit
 context become unnecessarily large.

For GL ARB_uniform_buffer_object, we need at least 13 from what I can see.

So maybe we should consider aiming for that.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/13] ETC2 support resend

2012-12-06 Thread Ian Romanick
These patches have already circulated through the list once.  Since that
time, Paul modified a couple of the patches so that every patch will
build (for git-bisect happiness), and Anuj has fixed a few bugs (and
squashed the bug fixes in to the original patches).

If there are no objections, I'd like to commit these to master tomorrow
afternoon.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/13] mesa: Add new MESA_FORMATs for ETC2 compressed textures

2012-12-06 Thread Ian Romanick
From: Anuj Phogat anuj.pho...@gmail.com

It is required by OpenGL ES 3.0 to support ETC2 textures.
This patch adds new MESA_FORMATs for following etc2 texture
formats:
 GL_COMPRESSED_RGB8_ETC2
 GL_COMPRESSED_SRGB8_ETC2
 GL_COMPRESSED_RGBA8_ETC2_EAC
 GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC
 GL_COMPRESSED_R11_EAC
 GL_COMPRESSED_RG11_EAC
 GL_COMPRESSED_SIGNED_R11_EAC
 GL_COMPRESSED_SIGNED_RG11_EAC
 MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1
 MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1

Above formats are currently available in only gles 3.0.

v2: Add entries in texfetch_funcs[] array.

Signed-off-by: Anuj Phogat anuj.pho...@gmail.com
Reviewed-by: Brian Paul bri...@vmware.com

v3 (Paul Berry stereotype...@gmail.com): comment out symbols that
are not implemented yet, so that this commit compiles on its own;
future commits will uncomment the symbols as they become available.
---
 src/mesa/main/format_unpack.c |  75 +++-
 src/mesa/main/formats.c   | 133 ++
 src/mesa/main/formats.h   |  10 
 src/mesa/main/glformats.c |  21 +++
 src/mesa/main/texcompress.c   | 100 +++
 src/mesa/main/texformat.c |  37 
 src/mesa/main/teximage.c  |  21 +++
 src/mesa/main/texstore.c  |  12 
 src/mesa/swrast/s_texfetch.c  |  60 +++
 9 files changed, 468 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/format_unpack.c b/src/mesa/main/format_unpack.c
index 04fd1d6..7bbdebb 100644
--- a/src/mesa/main/format_unpack.c
+++ b/src/mesa/main/format_unpack.c
@@ -1337,6 +1337,68 @@ unpack_ETC1_RGB8(const void *src, GLfloat dst[][4], 
GLuint n)
 }
 
 static void
+unpack_ETC2_RGB8(const void *src, GLfloat dst[][4], GLuint n)
+{
+   /* XXX to do */
+}
+
+static void
+unpack_ETC2_SRGB8(const void *src, GLfloat dst[][4], GLuint n)
+{
+   /* XXX to do */
+}
+
+static void
+unpack_ETC2_RGBA8_EAC(const void *src, GLfloat dst[][4], GLuint n)
+{
+   /* XXX to do */
+}
+
+static void
+unpack_ETC2_SRGB8_ALPHA8_EAC(const void *src, GLfloat dst[][4], GLuint n)
+{
+   /* XXX to do */
+}
+
+static void
+unpack_ETC2_R11_EAC(const void *src, GLfloat dst[][4], GLuint n)
+{
+   /* XXX to do */
+}
+
+static void
+unpack_ETC2_RG11_EAC(const void *src, GLfloat dst[][4], GLuint n)
+{
+   /* XXX to do */
+}
+
+static void
+unpack_ETC2_SIGNED_R11_EAC(const void *src, GLfloat dst[][4], GLuint n)
+{
+   /* XXX to do */
+}
+
+static void
+unpack_ETC2_SIGNED_RG11_EAC(const void *src, GLfloat dst[][4], GLuint n)
+{
+   /* XXX to do */
+}
+
+static void
+unpack_ETC2_RGB8_PUNCHTHROUGH_ALPHA1(const void *src, GLfloat dst[][4],
+  GLuint n)
+{
+   /* XXX to do */
+}
+
+static void
+unpack_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1(const void *src, GLfloat dst[][4],
+  GLuint n)
+{
+   /* XXX to do */
+}
+
+static void
 unpack_SIGNED_A8(const void *src, GLfloat dst[][4], GLuint n)
 {
const GLbyte *s = ((const GLbyte *) src);
@@ -1585,7 +1647,18 @@ get_unpack_rgba_function(gl_format format)
   table[MESA_FORMAT_SIGNED_LA_LATC2] = unpack_SIGNED_LA_LATC2;
 
   table[MESA_FORMAT_ETC1_RGB8] = unpack_ETC1_RGB8;
-
+  table[MESA_FORMAT_ETC2_RGB8] = unpack_ETC2_RGB8;
+  table[MESA_FORMAT_ETC2_SRGB8] = unpack_ETC2_SRGB8;
+  table[MESA_FORMAT_ETC2_RGBA8_EAC] = unpack_ETC2_RGBA8_EAC;
+  table[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = unpack_ETC2_SRGB8_ALPHA8_EAC;
+  table[MESA_FORMAT_ETC2_R11_EAC] = unpack_ETC2_R11_EAC;
+  table[MESA_FORMAT_ETC2_RG11_EAC] = unpack_ETC2_RG11_EAC;
+  table[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = unpack_ETC2_SIGNED_R11_EAC;
+  table[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = unpack_ETC2_SIGNED_RG11_EAC;
+  table[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] =
+ unpack_ETC2_RGB8_PUNCHTHROUGH_ALPHA1;
+  table[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] =
+ unpack_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1;
   table[MESA_FORMAT_SIGNED_A8] = unpack_SIGNED_A8;
   table[MESA_FORMAT_SIGNED_L8] = unpack_SIGNED_L8;
   table[MESA_FORMAT_SIGNED_AL88] = unpack_SIGNED_AL88;
diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index df23af1..47a1d68 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -1398,6 +1398,106 @@ static struct gl_format_info 
format_info[MESA_FORMAT_COUNT] =
   4, 4, 8 /* 8 bytes per 4x4 block */
},
 
+   {
+  MESA_FORMAT_ETC2_RGB8,
+  MESA_FORMAT_ETC2_RGB8,
+  GL_RGB,
+  GL_UNSIGNED_NORMALIZED,
+  8, 8, 8, 0,
+  0, 0, 0, 0, 0,
+  4, 4, 8 /* 8 bytes per 4x4 block */
+   },
+
+   {
+  MESA_FORMAT_ETC2_SRGB8,
+  MESA_FORMAT_ETC2_SRGB8,
+  GL_RGB,
+  GL_UNSIGNED_NORMALIZED,
+  8, 8, 8, 0,
+  0, 0, 0, 0, 0,
+  4, 4, 8 /* 8 bytes per 4x4 block */
+   },
+
+   {
+  MESA_FORMAT_ETC2_RGBA8_EAC,
+  MESA_FORMAT_ETC2_RGBA8_EAC,
+  GL_RGBA,
+  

[Mesa-dev] [PATCH 02/13] mesa: Make nonlinear_to_linear() function available outside file

2012-12-06 Thread Ian Romanick
From: Anuj Phogat anuj.pho...@gmail.com

This patch changes nonlinear_to_linear() function to non static inline
and makes it available outside format_unpack.c. Also, removes the
duplicate copies in other files.

Signed-off-by: Anuj Phogat anuj.pho...@gmail.com
Reviewed-by: Brian Paul bri...@vmware.com
---
 src/mesa/main/format_unpack.c| 26 ++--
 src/mesa/main/format_unpack.h|  3 +++
 src/mesa/main/texcompress_s3tc.c | 52 ++--
 3 files changed, 29 insertions(+), 52 deletions(-)

diff --git a/src/mesa/main/format_unpack.c b/src/mesa/main/format_unpack.c
index 7bbdebb..d34a27b 100644
--- a/src/mesa/main/format_unpack.c
+++ b/src/mesa/main/format_unpack.c
@@ -57,8 +57,8 @@ struct z32f_x24s8
  * linear RGB value in [0, 1].
  * Implemented with a 256-entry lookup table.
  */
-static inline GLfloat
-nonlinear_to_linear(GLubyte cs8)
+GLfloat
+_mesa_nonlinear_to_linear(GLubyte cs8)
 {
static GLfloat table[256];
static GLboolean tableReady = GL_FALSE;
@@ -742,9 +742,9 @@ unpack_SRGB8(const void *src, GLfloat dst[][4], GLuint n)
const GLubyte *s = (const GLubyte *) src;
GLuint i;
for (i = 0; i  n; i++) {
-  dst[i][RCOMP] = nonlinear_to_linear(s[i*3+2]);
-  dst[i][GCOMP] = nonlinear_to_linear(s[i*3+1]);
-  dst[i][BCOMP] = nonlinear_to_linear(s[i*3+0]);
+  dst[i][RCOMP] = _mesa_nonlinear_to_linear(s[i*3+2]);
+  dst[i][GCOMP] = _mesa_nonlinear_to_linear(s[i*3+1]);
+  dst[i][BCOMP] = _mesa_nonlinear_to_linear(s[i*3+0]);
   dst[i][ACOMP] = 1.0F;
}
 }
@@ -755,9 +755,9 @@ unpack_SRGBA8(const void *src, GLfloat dst[][4], GLuint n)
const GLuint *s = ((const GLuint *) src);
GLuint i;
for (i = 0; i  n; i++) {
-  dst[i][RCOMP] = nonlinear_to_linear( (s[i]  24) );
-  dst[i][GCOMP] = nonlinear_to_linear( (s[i]  16)  0xff );
-  dst[i][BCOMP] = nonlinear_to_linear( (s[i]   8)  0xff );
+  dst[i][RCOMP] = _mesa_nonlinear_to_linear( (s[i]  24) );
+  dst[i][GCOMP] = _mesa_nonlinear_to_linear( (s[i]  16)  0xff );
+  dst[i][BCOMP] = _mesa_nonlinear_to_linear( (s[i]   8)  0xff );
   dst[i][ACOMP] = UBYTE_TO_FLOAT( s[i]  0xff ); /* linear! */
}
 }
@@ -768,9 +768,9 @@ unpack_SARGB8(const void *src, GLfloat dst[][4], GLuint n)
const GLuint *s = ((const GLuint *) src);
GLuint i;
for (i = 0; i  n; i++) {
-  dst[i][RCOMP] = nonlinear_to_linear( (s[i]  16)  0xff );
-  dst[i][GCOMP] = nonlinear_to_linear( (s[i]   8)  0xff );
-  dst[i][BCOMP] = nonlinear_to_linear( (s[i]  )  0xff );
+  dst[i][RCOMP] = _mesa_nonlinear_to_linear( (s[i]  16)  0xff );
+  dst[i][GCOMP] = _mesa_nonlinear_to_linear( (s[i]   8)  0xff );
+  dst[i][BCOMP] = _mesa_nonlinear_to_linear( (s[i]  )  0xff );
   dst[i][ACOMP] = UBYTE_TO_FLOAT( s[i]  24 ); /* linear! */
}
 }
@@ -783,7 +783,7 @@ unpack_SL8(const void *src, GLfloat dst[][4], GLuint n)
for (i = 0; i  n; i++) {
   dst[i][RCOMP] = 
   dst[i][GCOMP] = 
-  dst[i][BCOMP] = nonlinear_to_linear(s[i]);
+  dst[i][BCOMP] = _mesa_nonlinear_to_linear(s[i]);
   dst[i][ACOMP] = 1.0F;
}
 }
@@ -796,7 +796,7 @@ unpack_SLA8(const void *src, GLfloat dst[][4], GLuint n)
for (i = 0; i  n; i++) {
   dst[i][RCOMP] =
   dst[i][GCOMP] =
-  dst[i][BCOMP] = nonlinear_to_linear(s[i]  0xff);
+  dst[i][BCOMP] = _mesa_nonlinear_to_linear(s[i]  0xff);
   dst[i][ACOMP] = UBYTE_TO_FLOAT(s[i]  8); /* linear! */
}
 }
diff --git a/src/mesa/main/format_unpack.h b/src/mesa/main/format_unpack.h
index aad800d..29c5263 100644
--- a/src/mesa/main/format_unpack.h
+++ b/src/mesa/main/format_unpack.h
@@ -24,6 +24,9 @@
 #ifndef FORMAT_UNPACK_H
 #define FORMAT_UNPACK_H
 
+extern GLfloat
+_mesa_nonlinear_to_linear(GLubyte cs8);
+
 extern void
 _mesa_unpack_rgba_row(gl_format format, GLuint n,
   const void *src, GLfloat dst[][4]);
diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c
index da77259..476b998 100644
--- a/src/mesa/main/texcompress_s3tc.c
+++ b/src/mesa/main/texcompress_s3tc.c
@@ -45,6 +45,7 @@
 #include texcompress_s3tc.h
 #include texstore.h
 #include swrast/s_context.h
+#include format_unpack.h
 
 
 #if defined(_WIN32) || defined(WIN32)
@@ -57,33 +58,6 @@
 #define DXTN_LIBNAME libtxc_dxtn.so
 #endif
 
-/**
- * Convert an 8-bit sRGB value from non-linear space to a
- * linear RGB value in [0, 1].
- * Implemented with a 256-entry lookup table.
- */
-static inline GLfloat
-nonlinear_to_linear(GLubyte cs8)
-{
-   static GLfloat table[256];
-   static GLboolean tableReady = GL_FALSE;
-   if (!tableReady) {
-  /* compute lookup table now */
-  GLuint i;
-  for (i = 0; i  256; i++) {
- const GLfloat cs = UBYTE_TO_FLOAT(i);
- if (cs = 0.04045) {
-table[i] = cs / 12.92f;
- }
- else {
-table[i] = (GLfloat) pow((cs + 0.055) / 1.055, 2.4);
- }
-  }
-  tableReady = GL_TRUE;
-  

[Mesa-dev] [PATCH 03/13] mesa: Add decoding functions for GL_COMPRESSED_RGB8_ETC2

2012-12-06 Thread Ian Romanick
From: Anuj Phogat anuj.pho...@gmail.com

Data in GL_COMPRESSED_RGB8_ETC2 format is decoded and stored in
MESA_FORMAT_RGBX_REV.

v2: Use CLAMP macro and stdbool.h
Signed-off-by: Anuj Phogat anuj.pho...@gmail.com
Reviewed-by: Brian Paul bri...@vmware.com
---
 src/mesa/main/texcompress.c |   2 +-
 src/mesa/main/texcompress_etc.c | 460 +++-
 src/mesa/main/texcompress_etc.h |  16 +-
 src/mesa/main/texstore.c|   2 +-
 src/mesa/swrast/s_texfetch.c|   2 +-
 5 files changed, 469 insertions(+), 13 deletions(-)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 58b346b..3394be9 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -608,7 +608,7 @@ _mesa_decompress_image(gl_format format, GLuint width, 
GLuint height,
 
/* ETC2 formats */
case MESA_FORMAT_ETC2_RGB8:
-  /* fetch = _mesa_fetch_texel_2d_f_etc2_rgb8; -- not implemented yet */
+  fetch = _mesa_fetch_texel_2d_f_etc2_rgb8;
   break;
case MESA_FORMAT_ETC2_SRGB8:
   /* fetch = _mesa_fetch_texel_2d_f_etc2_srgb8; -- not implemented yet */
diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index c645f52..42ef74a 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -24,9 +24,11 @@
 /**
  * \file texcompress_etc.c
  * GL_OES_compressed_ETC1_RGB8_texture support.
+ * Supported ETC2 texture formats are:
+ * GL_COMPRESSED_RGB8_ETC2
  */
 
-
+#include stdbool.h
 #include mfeatures.h
 #include texcompress.h
 #include texcompress_etc.h
@@ -34,14 +36,22 @@
 #include macros.h
 #include swrast/s_context.h
 
-GLboolean
-_mesa_texstore_etc1_rgb8(TEXSTORE_PARAMS)
-{
-   /* GL_ETC1_RGB8_OES is only valid in glCompressedTexImage2D */
-   ASSERT(0);
+struct etc2_block {
+   int distance;
+   uint32_t pixel_indices;
+   const int *modifier_tables[2];
+   bool flipped;
+   bool is_ind_mode;
+   bool is_diff_mode;
+   bool is_t_mode;
+   bool is_h_mode;
+   bool is_planar_mode;
+   uint8_t base_colors[3][3];
+   uint8_t paint_colors[4][3];
+};
 
-   return GL_FALSE;
-}
+static const int etc2_distance_table[8] = {
+   3, 6, 11, 16, 23, 32, 41, 64 };
 
 /* define etc1_parse_block and etc. */
 #define UINT8_TYPE GLubyte
@@ -50,6 +60,15 @@ _mesa_texstore_etc1_rgb8(TEXSTORE_PARAMS)
 #undef TAG
 #undef UINT8_TYPE
 
+GLboolean
+_mesa_texstore_etc1_rgb8(TEXSTORE_PARAMS)
+{
+   /* GL_ETC1_RGB8_OES is only valid in glCompressedTexImage2D */
+   ASSERT(0);
+
+   return GL_FALSE;
+}
+
 void
 _mesa_fetch_texel_2d_f_etc1_rgb8(const struct swrast_texture_image *texImage,
  GLint i, GLint j, GLint k, GLfloat *texel)
@@ -101,3 +120,428 @@ _mesa_etc1_unpack_rgba(uint8_t *dst_row,
 src_row, src_stride,
 src_width, src_height);
 }
+
+static uint8_t
+etc2_base_color1_t_mode(const uint8_t *in, GLuint index)
+{
+   uint8_t R1a = 0, x = 0;
+   /* base col 1 = extend_4to8bits( (R1a  2) | R1b, G1, B1) */
+   switch(index) {
+   case 0:
+  R1a = (in[0]  3)  0x3;
+  x = ((R1a  2) | (in[0]  0x3));
+  break;
+   case 1:
+  x = ((in[1]  4)  0xf);
+  break;
+   case 2:
+  x = (in[1]  0xf);
+  break;
+   default:
+  /* invalid index */
+  break;
+   }
+   return ((x  4) | (x  0xf));
+}
+
+static uint8_t
+etc2_base_color2_t_mode(const uint8_t *in, GLuint index)
+{
+   uint8_t x = 0;
+   /*extend 4to8bits(R2, G2, B2)*/
+   switch(index) {
+   case 0:
+  x = ((in[2]  4)  0xf );
+  break;
+   case 1:
+  x = (in[2]  0xf);
+  break;
+   case 2:
+  x = ((in[3]  4)  0xf);
+  break;
+   default:
+  /* invalid index */
+  break;
+   }
+   return ((x  4) | (x  0xf));
+}
+
+static uint8_t
+etc2_base_color1_h_mode(const uint8_t *in, GLuint index)
+{
+   uint8_t x = 0;
+   /* base col 1 = extend 4to8bits(R1, (G1a  1) | G1b, (B1a  3) | B1b) */
+   switch(index) {
+   case 0:
+  x = ((in[0]  3)  0xf);
+  break;
+   case 1:
+  x = (((in[0]  0x7)  1) | ((in[1]  4)  0x1));
+  break;
+   case 2:
+  x = ((in[1]  0x8) |
+   (((in[1]  0x3)  1) | ((in[2]  7)  0x1)));
+  break;
+   default:
+  /* invalid index */
+  break;
+   }
+   return ((x  4) | (x  0xf));
+ }
+
+static uint8_t
+etc2_base_color2_h_mode(const uint8_t *in, GLuint index)
+{
+   uint8_t x = 0;
+   /* base col 2 = extend 4to8bits(R2, G2, B2) */
+   switch(index) {
+   case 0:
+  x = ((in[2]  3)  0xf );
+  break;
+   case 1:
+  x = (((in[2]  0x7)  1) | ((in[3]  7)  0x1));
+  break;
+   case 2:
+  x = ((in[3]  3)  0xf);
+  break;
+   default:
+  /* invalid index */
+  break;
+   }
+   return ((x  4) | (x  0xf));
+ }
+
+static uint8_t
+etc2_base_color_o_planar(const uint8_t *in, GLuint index)
+{
+   GLuint tmp;
+   switch(index) {
+   case 0:
+  tmp = ((in[0]  1)  0x3f); /* RO */
+  return ((tmp  2) | (tmp  4));
+   case 1:
+  tmp = (((in[0]  0x1)  6) 

[Mesa-dev] [PATCH 04/13] mesa: Add decoding functions for GL_COMPRESSED_SRGB8_ETC2

2012-12-06 Thread Ian Romanick
From: Anuj Phogat anuj.pho...@gmail.com

Data in GL_COMPRESSED_SRGB8_ETC2 format is decoded and stored
in MESA_FORMAT_SARGB8.

Signed-off-by: Anuj Phogat anuj.pho...@gmail.com
Reviewed-by: Brian Paul bri...@vmware.com
---
 src/mesa/main/texcompress.c |  2 +-
 src/mesa/main/texcompress_etc.c | 83 +++--
 src/mesa/main/texcompress_etc.h |  7 
 src/mesa/main/texstore.c|  2 +-
 src/mesa/swrast/s_texfetch.c|  2 +-
 5 files changed, 89 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 3394be9..3ed0e85 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -611,7 +611,7 @@ _mesa_decompress_image(gl_format format, GLuint width, 
GLuint height,
   fetch = _mesa_fetch_texel_2d_f_etc2_rgb8;
   break;
case MESA_FORMAT_ETC2_SRGB8:
-  /* fetch = _mesa_fetch_texel_2d_f_etc2_srgb8; -- not implemented yet */
+  fetch = _mesa_fetch_texel_2d_f_etc2_srgb8;
   break;
case MESA_FORMAT_ETC2_RGBA8_EAC:
   /* fetch = _mesa_fetch_texel_2d_f_etc2_rgba8_eac; -- not implemented yet 
*/
diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index 42ef74a..2aa8b6a 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -26,6 +26,7 @@
  * GL_OES_compressed_ETC1_RGB8_texture support.
  * Supported ETC2 texture formats are:
  * GL_COMPRESSED_RGB8_ETC2
+ * GL_COMPRESSED_SRGB8_ETC2
  */
 
 #include stdbool.h
@@ -35,6 +36,7 @@
 #include texstore.h
 #include macros.h
 #include swrast/s_context.h
+#include format_unpack.h
 
 struct etc2_block {
int distance;
@@ -490,6 +492,45 @@ etc2_unpack_rgb8(uint8_t *dst_row,
}
 }
 
+static void
+etc2_unpack_srgb8(uint8_t *dst_row,
+  unsigned dst_stride,
+  const uint8_t *src_row,
+  unsigned src_stride,
+  unsigned width,
+  unsigned height)
+{
+   const unsigned bw = 4, bh = 4, bs = 8, comps = 4;
+   struct etc2_block block;
+   unsigned x, y, i, j;
+   uint8_t tmp;
+
+   for (y = 0; y  height; y += bh) {
+  const uint8_t *src = src_row;
+
+  for (x = 0; x  width; x+= bw) {
+ etc2_rgb8_parse_block(block, src);
+
+ for (j = 0; j  bh; j++) {
+uint8_t *dst = dst_row + (y + j) * dst_stride + x * comps;
+for (i = 0; i  bw; i++) {
+   etc2_rgb8_fetch_texel(block, i, j, dst);
+   /* Convert to MESA_FORMAT_SARGB8 */
+   tmp = dst[0];
+   dst[0] = dst[2];
+   dst[2] = tmp;
+   dst[3] = 255;
+
+   dst += comps;
+}
+ }
+ src += bs;
+   }
+
+  src_row += src_stride;
+}
+}
+
 /* ETC2 texture formats are valid in glCompressedTexImage2D and
  * glCompressedTexSubImage2D functions */
 GLboolean
@@ -500,6 +541,14 @@ _mesa_texstore_etc2_rgb8(TEXSTORE_PARAMS)
return GL_FALSE;
 }
 
+GLboolean
+_mesa_texstore_etc2_srgb8(TEXSTORE_PARAMS)
+{
+   ASSERT(0);
+
+   return GL_FALSE;
+}
+
 void
 _mesa_fetch_texel_2d_f_etc2_rgb8(const struct swrast_texture_image *texImage,
  GLint i, GLint j, GLint k, GLfloat *texel)
@@ -520,9 +569,30 @@ _mesa_fetch_texel_2d_f_etc2_rgb8(const struct 
swrast_texture_image *texImage,
texel[ACOMP] = 1.0f;
 }
 
+void
+_mesa_fetch_texel_2d_f_etc2_srgb8(const struct swrast_texture_image *texImage,
+  GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   struct etc2_block block;
+   uint8_t dst[3];
+   const uint8_t *src;
+
+   src = texImage-Map +
+  (((texImage-RowStride + 3) / 4) * (j / 4) + (i / 4)) * 8;
+
+   etc2_rgb8_parse_block(block, src);
+   etc2_rgb8_fetch_texel(block, i % 4, j % 4, dst);
+
+   texel[RCOMP] = _mesa_nonlinear_to_linear(dst[0]);
+   texel[GCOMP] = _mesa_nonlinear_to_linear(dst[1]);
+   texel[BCOMP] = _mesa_nonlinear_to_linear(dst[2]);
+   texel[ACOMP] = 1.0f;
+}
 
 /**
- * Decode texture data in format `MESA_FORMAT_ETC2_RGB8`
+ * Decode texture data in any one of following formats:
+ * `MESA_FORMAT_ETC2_RGB8`
+ * `MESA_FORMAT_ETC2_SRGB8`
  *
  * The size of the source data must be a multiple of the ETC2 block size
  * even if the texture image's dimensions are not aligned to 4.
@@ -541,7 +611,12 @@ _mesa_unpack_etc2_format(uint8_t *dst_row,
  unsigned src_height,
  gl_format format)
 {
-   etc2_unpack_rgb8(dst_row, dst_stride,
-src_row, src_stride,
-src_width, src_height);
+   if (format == MESA_FORMAT_ETC2_RGB8)
+  etc2_unpack_rgb8(dst_row, dst_stride,
+   src_row, src_stride,
+   src_width, src_height);
+   else if (format == MESA_FORMAT_ETC2_SRGB8)
+  etc2_unpack_srgb8(dst_row, dst_stride,
+src_row, src_stride,
+src_width, src_height);
 }
diff --git 

[Mesa-dev] [PATCH 05/13] mesa: Add decoding functions for GL_COMPRESSED_RGBA8_ETC2_EAC

2012-12-06 Thread Ian Romanick
From: Anuj Phogat anuj.pho...@gmail.com

Data in GL_COMPRESSED_RGBA8_ETC2_EAC format is decoded and stored
in MESA_FORMAT_RGBA_REV.

Signed-off-by: Anuj Phogat anuj.pho...@gmail.com
Reviewed-by: Brian Paul bri...@vmware.com
---
 src/mesa/main/texcompress.c |   2 +-
 src/mesa/main/texcompress_etc.c | 145 ++--
 src/mesa/main/texcompress_etc.h |   7 ++
 src/mesa/main/texstore.c|   2 +-
 src/mesa/swrast/s_texfetch.c|   2 +-
 5 files changed, 151 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 3ed0e85..7dc337c 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -614,7 +614,7 @@ _mesa_decompress_image(gl_format format, GLuint width, 
GLuint height,
   fetch = _mesa_fetch_texel_2d_f_etc2_srgb8;
   break;
case MESA_FORMAT_ETC2_RGBA8_EAC:
-  /* fetch = _mesa_fetch_texel_2d_f_etc2_rgba8_eac; -- not implemented yet 
*/
+  fetch = _mesa_fetch_texel_2d_f_etc2_rgba8_eac;
   break;
case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
   /* fetch = _mesa_fetch_texel_2d_f_etc2_srgb8_alpha8_eac; -- not 
implemented yet */
diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index 2aa8b6a..26375d8 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -27,6 +27,7 @@
  * Supported ETC2 texture formats are:
  * GL_COMPRESSED_RGB8_ETC2
  * GL_COMPRESSED_SRGB8_ETC2
+ * GL_COMPRESSED_RGBA8_ETC2_EAC
  */
 
 #include stdbool.h
@@ -40,7 +41,7 @@
 
 struct etc2_block {
int distance;
-   uint32_t pixel_indices;
+   uint64_t pixel_indices[2];
const int *modifier_tables[2];
bool flipped;
bool is_ind_mode;
@@ -50,11 +51,33 @@ struct etc2_block {
bool is_planar_mode;
uint8_t base_colors[3][3];
uint8_t paint_colors[4][3];
+   uint8_t base_codeword;
+   uint8_t multiplier;
+   uint8_t table_index;
 };
 
 static const int etc2_distance_table[8] = {
3, 6, 11, 16, 23, 32, 41, 64 };
 
+static const int etc2_modifier_tables[16][8] = {
+   {  -3,   -6,   -9,  -15,   2,   5,   8,   14},
+   {  -3,   -7,  -10,  -13,   2,   6,   9,   12},
+   {  -2,   -5,   -8,  -13,   1,   4,   7,   12},
+   {  -2,   -4,   -6,  -13,   1,   3,   5,   12},
+   {  -3,   -6,   -8,  -12,   2,   5,   7,   11},
+   {  -3,   -7,   -9,  -11,   2,   6,   8,   10},
+   {  -4,   -7,   -8,  -11,   3,   6,   7,   10},
+   {  -3,   -5,   -8,  -11,   2,   4,   7,   10},
+   {  -2,   -6,   -8,  -10,   1,   5,   7,9},
+   {  -2,   -5,   -8,  -10,   1,   4,   7,9},
+   {  -2,   -4,   -8,  -10,   1,   3,   7,9},
+   {  -2,   -5,   -7,  -10,   1,   4,   6,9},
+   {  -3,   -4,   -7,  -10,   2,   3,   6,9},
+   {  -1,   -2,   -3,  -10,   0,   1,   2,9},
+   {  -4,   -6,   -8,   -9,   3,   5,   7,8},
+   {  -3,   -5,   -7,   -9,   2,   4,   6,8},
+};
+
 /* define etc1_parse_block and etc. */
 #define UINT8_TYPE GLubyte
 #define TAG(x) x
@@ -402,7 +425,7 @@ etc2_rgb8_parse_block(struct etc2_block *block, const 
uint8_t *src)
   block-flipped = (src[3]  0x1);
}
 
-   block-pixel_indices =
+   block-pixel_indices[0] =
   (src[4]  24) | (src[5]  16) | (src[6]  8) | src[7];
 }
 
@@ -415,8 +438,8 @@ etc2_rgb8_fetch_texel(const struct etc2_block *block,
 
/* get pixel index */
bit = y + x * 4;
-   idx = ((block-pixel_indices  (15 + bit))  0x2) |
- ((block-pixel_indices   (bit))  0x1);
+   idx = ((block-pixel_indices[0]  (15 + bit))  0x2) |
+ ((block-pixel_indices[0]   (bit))  0x1);
 
if (block-is_ind_mode || block-is_diff_mode) {
   /* Use pixel index and subblock to get the modifier */
@@ -459,6 +482,51 @@ etc2_rgb8_fetch_texel(const struct etc2_block *block,
 }
 
 static void
+etc2_alpha8_fetch_texel(const struct etc2_block *block,
+  int x, int y, uint8_t *dst)
+{
+   int modifier, alpha, bit, idx;
+   /* get pixel index */
+   bit = ((3 - y) + (3 - x) * 4) * 3;
+   idx = (block-pixel_indices[1]  bit)  0x7;
+   modifier = etc2_modifier_tables[block-table_index][idx];
+   alpha = block-base_codeword + modifier * block-multiplier;
+   dst[3] = etc2_clamp(alpha);
+}
+
+static void
+etc2_alpha8_parse_block(struct etc2_block *block, const uint8_t *src)
+{
+   block-base_codeword = src[0];
+   block-multiplier = (src[1]  4)  0xf;
+   block-table_index = src[1]  0xf;
+   block-pixel_indices[1] = (((uint64_t)src[2]  40) |
+  ((uint64_t)src[3]  32) |
+  ((uint64_t)src[4]  24) |
+  ((uint64_t)src[5]  16) |
+  ((uint64_t)src[6]  8)  |
+  ((uint64_t)src[7]));
+}
+
+static void
+etc2_rgba8_parse_block(struct etc2_block *block, const uint8_t *src)
+{
+   /* RGB component is parsed the same way as for MESA_FORMAT_ETC2_RGB8 */
+   etc2_rgb8_parse_block(block, src + 8);
+
+   /* Parse Alpha component */
+   etc2_alpha8_parse_block(block, 

[Mesa-dev] [PATCH 06/13] mesa: Add decoding functions for GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC

2012-12-06 Thread Ian Romanick
From: Anuj Phogat anuj.pho...@gmail.com

Data in GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC format is decoded and stored
in MESA_FORMAT_SARGB8.

Signed-off-by: Anuj Phogat anuj.pho...@gmail.com
Reviewed-by: Brian Paul bri...@vmware.com
---
 src/mesa/main/texcompress.c |  2 +-
 src/mesa/main/texcompress_etc.c | 80 +
 src/mesa/main/texcompress_etc.h |  8 +
 src/mesa/main/texstore.c|  2 +-
 src/mesa/swrast/s_texfetch.c|  2 +-
 5 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 7dc337c..2d4c6df 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -617,7 +617,7 @@ _mesa_decompress_image(gl_format format, GLuint width, 
GLuint height,
   fetch = _mesa_fetch_texel_2d_f_etc2_rgba8_eac;
   break;
case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
-  /* fetch = _mesa_fetch_texel_2d_f_etc2_srgb8_alpha8_eac; -- not 
implemented yet */
+  fetch = _mesa_fetch_texel_2d_f_etc2_srgb8_alpha8_eac;
   break;
case MESA_FORMAT_ETC2_R11_EAC:
   /* fetch = _mesa_fetch_texel_2d_f_etc2_r11_eac; -- not implemented yet */
diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index 26375d8..d704388 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -28,6 +28,7 @@
  * GL_COMPRESSED_RGB8_ETC2
  * GL_COMPRESSED_SRGB8_ETC2
  * GL_COMPRESSED_RGBA8_ETC2_EAC
+ * GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC
  */
 
 #include stdbool.h
@@ -635,6 +636,50 @@ etc2_unpack_rgba8(uint8_t *dst_row,
 }
 }
 
+static void
+etc2_unpack_srgb8_alpha8(uint8_t *dst_row,
+ unsigned dst_stride,
+ const uint8_t *src_row,
+ unsigned src_stride,
+ unsigned width,
+ unsigned height)
+{
+   /* If internalformat is COMPRESSED_SRGB8_ALPHA8_ETC2_EAC, each 4 × 4 block
+* of RGBA information is compressed to 128 bits. To decode a block, the
+* two 64-bit integers int64bitAlpha and int64bitColor are calculated.
+*/
+   const unsigned bw = 4, bh = 4, bs = 16, comps = 4;
+   struct etc2_block block;
+   unsigned x, y, i, j;
+   uint8_t tmp;
+
+   for (y = 0; y  height; y += bh) {
+  const uint8_t *src = src_row;
+
+  for (x = 0; x  width; x+= bw) {
+ etc2_rgba8_parse_block(block, src);
+
+ for (j = 0; j  bh; j++) {
+uint8_t *dst = dst_row + (y + j) * dst_stride + x * comps;
+for (i = 0; i  bw; i++) {
+   etc2_rgba8_fetch_texel(block, i, j, dst);
+
+   /* Convert to MESA_FORMAT_SARGB8 */
+   tmp = dst[0];
+   dst[0] = dst[2];
+   dst[2] = tmp;
+   dst[3] = dst[3];
+
+   dst += comps;
+}
+ }
+ src += bs;
+   }
+
+  src_row += src_stride;
+}
+}
+
 /* ETC2 texture formats are valid in glCompressedTexImage2D and
  * glCompressedTexSubImage2D functions */
 GLboolean
@@ -661,6 +706,14 @@ _mesa_texstore_etc2_rgba8_eac(TEXSTORE_PARAMS)
return GL_FALSE;
 }
 
+GLboolean
+_mesa_texstore_etc2_srgb8_alpha8_eac(TEXSTORE_PARAMS)
+{
+   ASSERT(0);
+
+   return GL_FALSE;
+}
+
 void
 _mesa_fetch_texel_2d_f_etc2_rgb8(const struct swrast_texture_image *texImage,
  GLint i, GLint j, GLint k, GLfloat *texel)
@@ -721,11 +774,34 @@ _mesa_fetch_texel_2d_f_etc2_rgba8_eac(const struct 
swrast_texture_image *texImag
texel[ACOMP] = UBYTE_TO_FLOAT(dst[3]);
 }
 
+void
+_mesa_fetch_texel_2d_f_etc2_srgb8_alpha8_eac(const struct
+ swrast_texture_image *texImage,
+ GLint i, GLint j,
+ GLint k, GLfloat *texel)
+{
+   struct etc2_block block;
+   uint8_t dst[4];
+   const uint8_t *src;
+
+   src = texImage-Map +
+  (((texImage-RowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
+
+   etc2_rgba8_parse_block(block, src);
+   etc2_rgba8_fetch_texel(block, i % 4, j % 4, dst);
+
+   texel[RCOMP] = _mesa_nonlinear_to_linear(dst[0]);
+   texel[GCOMP] = _mesa_nonlinear_to_linear(dst[1]);
+   texel[BCOMP] = _mesa_nonlinear_to_linear(dst[2]);
+   texel[ACOMP] = UBYTE_TO_FLOAT(dst[3]);
+}
+
 /**
  * Decode texture data in any one of following formats:
  * `MESA_FORMAT_ETC2_RGB8`
  * `MESA_FORMAT_ETC2_SRGB8`
  * `MESA_FORMAT_ETC2_RGBA8_EAC`
+ * `MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC`
  *
  * The size of the source data must be a multiple of the ETC2 block size
  * even if the texture image's dimensions are not aligned to 4.
@@ -756,4 +832,8 @@ _mesa_unpack_etc2_format(uint8_t *dst_row,
   etc2_unpack_rgba8(dst_row, dst_stride,
 src_row, src_stride,
 src_width, src_height);
+   else if (format == MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC)
+  etc2_unpack_srgb8_alpha8(dst_row, 

[Mesa-dev] [PATCH 08/13] mesa: Add decoding functions for GL_COMPRESSED_RG11_EAC

2012-12-06 Thread Ian Romanick
From: Anuj Phogat anuj.pho...@gmail.com

Data in GL_COMPRESSED_RG11_EAC format is decoded and stored in
MESA_FORMAT_RG1616.

Signed-off-by: Anuj Phogat anuj.pho...@gmail.com
Reviewed-by: Brian Paul bri...@vmware.com
---
 src/mesa/main/texcompress.c |  2 +-
 src/mesa/main/texcompress_etc.c | 89 +
 src/mesa/main/texcompress_etc.h |  7 +++-
 src/mesa/main/texstore.c|  2 +-
 src/mesa/swrast/s_texfetch.c|  2 +-
 5 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 3d34d9b..f9148d4 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -623,7 +623,7 @@ _mesa_decompress_image(gl_format format, GLuint width, 
GLuint height,
   fetch = _mesa_fetch_texel_2d_f_etc2_r11_eac;
   break;
case MESA_FORMAT_ETC2_RG11_EAC:
-  /* fetch = _mesa_fetch_texel_2d_f_etc2_rg11_eac; -- not implemented yet 
*/
+  fetch = _mesa_fetch_texel_2d_f_etc2_rg11_eac;
   break;
case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
   /* fetch = _mesa_fetch_texel_2d_f_etc2_signed_r11_eac; -- not 
implemented yet */
diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index 7137eca..78af13a 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -30,6 +30,7 @@
  * GL_COMPRESSED_RGBA8_ETC2_EAC
  * GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC
  * GL_COMPRESSED_R11_EAC
+ * GL_COMPRESSED_RG11_EAC
  */
 
 #include stdbool.h
@@ -763,6 +764,54 @@ etc2_unpack_r11(uint8_t *dst_row,
 }
 }
 
+static void
+etc2_unpack_rg11(uint8_t *dst_row,
+ unsigned dst_stride,
+ const uint8_t *src_row,
+ unsigned src_stride,
+ unsigned width,
+ unsigned height)
+{
+   /* If internalformat is COMPRESSED_RG11_EAC, each 4 × 4 block of
+  RG color information is compressed to 128 bits.
+   */
+   const unsigned bw = 4, bh = 4, bs = 16, comps = 2, comp_size = 2;
+   struct etc2_block block;
+   unsigned x, y, i, j;
+
+   for (y = 0; y  height; y += bh) {
+  const uint8_t *src = src_row;
+
+  for (x = 0; x  width; x+= bw) {
+ /* red component */
+ etc2_r11_parse_block(block, src);
+
+ for (j = 0; j  bh; j++) {
+uint8_t *dst = dst_row + (y + j) * dst_stride +
+   x * comps * comp_size;
+for (i = 0; i  bw; i++) {
+   etc2_r11_fetch_texel(block, i, j, dst);
+   dst += comps * comp_size;
+}
+ }
+ /* green component */
+ etc2_r11_parse_block(block, src + 8);
+
+ for (j = 0; j  bh; j++) {
+uint8_t *dst = dst_row + (y + j) * dst_stride +
+   x * comps * comp_size;
+for (i = 0; i  bw; i++) {
+   etc2_r11_fetch_texel(block, i, j, dst + comp_size);
+   dst += comps * comp_size;
+}
+ }
+ src += bs;
+   }
+
+  src_row += src_stride;
+}
+}
+
 /* ETC2 texture formats are valid in glCompressedTexImage2D and
  * glCompressedTexSubImage2D functions */
 GLboolean
@@ -805,6 +854,14 @@ _mesa_texstore_etc2_r11_eac(TEXSTORE_PARAMS)
return GL_FALSE;
 }
 
+GLboolean
+_mesa_texstore_etc2_rg11_eac(TEXSTORE_PARAMS)
+{
+   ASSERT(0);
+
+   return GL_FALSE;
+}
+
 void
 _mesa_fetch_texel_2d_f_etc2_rgb8(const struct swrast_texture_image *texImage,
  GLint i, GLint j, GLint k, GLfloat *texel)
@@ -907,6 +964,33 @@ _mesa_fetch_texel_2d_f_etc2_r11_eac(const struct 
swrast_texture_image *texImage,
texel[ACOMP] = 1.0f;
 }
 
+void
+_mesa_fetch_texel_2d_f_etc2_rg11_eac(const struct
+ swrast_texture_image *texImage,
+ GLint i, GLint j,
+ GLint k, GLfloat *texel)
+{
+   struct etc2_block block;
+   GLushort dst[2];
+   const uint8_t *src;
+
+   src = texImage-Map +
+  (((texImage-RowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
+
+   /* red component */
+   etc2_r11_parse_block(block, src);
+   etc2_r11_fetch_texel(block, i % 4, j % 4, (uint8_t *)dst);
+
+   /* green component */
+   etc2_r11_parse_block(block, src + 8);
+   etc2_r11_fetch_texel(block, i % 4, j % 4, (uint8_t *)(dst + 1));
+
+   texel[RCOMP] = USHORT_TO_FLOAT(dst[0]);
+   texel[GCOMP] = USHORT_TO_FLOAT(dst[1]);
+   texel[BCOMP] = 0.0f;
+   texel[ACOMP] = 1.0f;
+}
+
 /**
  * Decode texture data in any one of following formats:
  * `MESA_FORMAT_ETC2_RGB8`
@@ -914,6 +998,7 @@ _mesa_fetch_texel_2d_f_etc2_r11_eac(const struct 
swrast_texture_image *texImage,
  * `MESA_FORMAT_ETC2_RGBA8_EAC`
  * `MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC`
  * `MESA_FORMAT_ETC2_R11_EAC`
+ * `MESA_FORMAT_ETC2_RG11_EAC`
  *
  * The size of the source data must be a multiple of the ETC2 block size
  * even if the texture image's dimensions are not aligned to 4.
@@ -952,4 +1037,8 @@ 

[Mesa-dev] [PATCH 07/13] mesa: Add decoding functions for GL_COMPRESSED_R11_EAC

2012-12-06 Thread Ian Romanick
From: Anuj Phogat anuj.pho...@gmail.com

Data in GL_COMPRESSED_R11_EAC format is decoded and stored in
MESA_FORMAT_R16.

Signed-off-by: Anuj Phogat anuj.pho...@gmail.com
Reviewed-by: Brian Paul bri...@vmware.com
---
 src/mesa/main/texcompress.c |   2 +-
 src/mesa/main/texcompress_etc.c | 122 +++-
 src/mesa/main/texcompress_etc.h |   6 ++
 src/mesa/main/texstore.c|   2 +-
 src/mesa/swrast/s_texfetch.c|   2 +-
 5 files changed, 128 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 2d4c6df..3d34d9b 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -620,7 +620,7 @@ _mesa_decompress_image(gl_format format, GLuint width, 
GLuint height,
   fetch = _mesa_fetch_texel_2d_f_etc2_srgb8_alpha8_eac;
   break;
case MESA_FORMAT_ETC2_R11_EAC:
-  /* fetch = _mesa_fetch_texel_2d_f_etc2_r11_eac; -- not implemented yet */
+  fetch = _mesa_fetch_texel_2d_f_etc2_r11_eac;
   break;
case MESA_FORMAT_ETC2_RG11_EAC:
   /* fetch = _mesa_fetch_texel_2d_f_etc2_rg11_eac; -- not implemented yet 
*/
diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index d704388..7137eca 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -29,6 +29,7 @@
  * GL_COMPRESSED_SRGB8_ETC2
  * GL_COMPRESSED_RGBA8_ETC2_EAC
  * GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC
+ * GL_COMPRESSED_R11_EAC
  */
 
 #include stdbool.h
@@ -304,6 +305,14 @@ etc2_base_color_v_planar(const uint8_t *in, GLuint index)
}
 }
 
+static GLint
+etc2_get_pixel_index(const struct etc2_block *block, int x, int y)
+{
+   int bit = ((3 - y) + (3 - x) * 4) * 3;
+   int idx = (block-pixel_indices[1]  bit)  0x7;
+   return idx;
+}
+
 static uint8_t
 etc2_clamp(int color)
 {
@@ -311,6 +320,13 @@ etc2_clamp(int color)
return (uint8_t) CLAMP(color, 0, 255);
 }
 
+static GLushort
+etc2_clamp2(int color)
+{
+   /* CLAMP(color, 0, 2047) */
+   return (GLushort) CLAMP(color, 0, 2047);
+}
+
 static void
 etc2_rgb8_parse_block(struct etc2_block *block, const uint8_t *src)
 {
@@ -486,16 +502,41 @@ static void
 etc2_alpha8_fetch_texel(const struct etc2_block *block,
   int x, int y, uint8_t *dst)
 {
-   int modifier, alpha, bit, idx;
+   int modifier, alpha, idx;
/* get pixel index */
-   bit = ((3 - y) + (3 - x) * 4) * 3;
-   idx = (block-pixel_indices[1]  bit)  0x7;
+   idx = etc2_get_pixel_index(block, x, y);
modifier = etc2_modifier_tables[block-table_index][idx];
alpha = block-base_codeword + modifier * block-multiplier;
dst[3] = etc2_clamp(alpha);
 }
 
 static void
+etc2_r11_fetch_texel(const struct etc2_block *block,
+ int x, int y, uint8_t *dst)
+{
+   GLint modifier, idx;
+   GLshort color;
+   /* Get pixel index */
+   idx = etc2_get_pixel_index(block, x, y);
+   modifier = etc2_modifier_tables[block-table_index][idx];
+
+   if (block-multiplier != 0)
+  /* clamp2(base codeword × 8 + 4 + modifier × multiplier × 8) */
+  color = etc2_clamp2(((block-base_codeword  3) | 0x4)  +
+  ((modifier * block-multiplier)  3));
+   else
+  color = etc2_clamp2(((block-base_codeword  3) | 0x4)  + modifier);
+
+   /* Extend 11 bits color value to 16 bits. OpenGL ES 3.0 specification
+* allows extending the color value to any number of bits. But, an
+* implementation is not allowed to truncate the 11-bit value to less than
+* 11 bits.
+*/
+   color = (color  5) | (color  6);
+   ((GLushort *)dst)[0] = color;
+}
+
+static void
 etc2_alpha8_parse_block(struct etc2_block *block, const uint8_t *src)
 {
block-base_codeword = src[0];
@@ -510,6 +551,13 @@ etc2_alpha8_parse_block(struct etc2_block *block, const 
uint8_t *src)
 }
 
 static void
+etc2_r11_parse_block(struct etc2_block *block, const uint8_t *src)
+{
+   /* Parsing logic remains same as for etc2_alpha8_parse_block */
+etc2_alpha8_parse_block(block, src);
+}
+
+static void
 etc2_rgba8_parse_block(struct etc2_block *block, const uint8_t *src)
 {
/* RGB component is parsed the same way as for MESA_FORMAT_ETC2_RGB8 */
@@ -680,6 +728,41 @@ etc2_unpack_srgb8_alpha8(uint8_t *dst_row,
 }
 }
 
+static void
+etc2_unpack_r11(uint8_t *dst_row,
+unsigned dst_stride,
+const uint8_t *src_row,
+unsigned src_stride,
+unsigned width,
+unsigned height)
+{
+   /* If internalformat is COMPRESSED_R11_EAC, each 4 × 4 block of
+  color information is compressed to 64 bits.
+   */
+   const unsigned bw = 4, bh = 4, bs = 8, comps = 1, comp_size = 2;
+   struct etc2_block block;
+   unsigned x, y, i, j;
+
+   for (y = 0; y  height; y += bh) {
+  const uint8_t *src = src_row;
+
+  for (x = 0; x  width; x+= bw) {
+ etc2_r11_parse_block(block, src);
+
+ for (j = 0; j  bh; j++) {
+uint8_t *dst = dst_row + (y + j) * dst_stride + x * 

[Mesa-dev] [PATCH 10/13] mesa: Add decoding functions for GL_COMPRESSED_SIGNED_RG11_EAC

2012-12-06 Thread Ian Romanick
From: Anuj Phogat anuj.pho...@gmail.com

Data in GL_COMPRESSED_SIGNED_RG11_EAC format is decoded and stored in
MESA_FORMAT_SIGNED_GR1616.

Signed-off-by: Anuj Phogat anuj.pho...@gmail.com
Reviewed-by: Brian Paul bri...@vmware.com
---
 src/mesa/main/texcompress.c |  2 +-
 src/mesa/main/texcompress_etc.c | 87 +
 src/mesa/main/texcompress_etc.h |  8 
 src/mesa/main/texstore.c|  2 +-
 src/mesa/swrast/s_texfetch.c|  2 +-
 5 files changed, 98 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 38d3180..b3fdfd7 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -629,7 +629,7 @@ _mesa_decompress_image(gl_format format, GLuint width, 
GLuint height,
   fetch = _mesa_fetch_texel_2d_f_etc2_signed_r11_eac;
   break;
case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
-  /* fetch = _mesa_fetch_texel_2d_f_etc2_signed_rg11_eac; -- not 
implemented yet */
+  fetch = _mesa_fetch_texel_2d_f_etc2_signed_rg11_eac;
   break;
case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
   /* fetch = _mesa_fetch_texel_2d_f_etc2_rgb8_punchthrough_alpha1; -- not 
implemented yet */
diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index 2ac140e..d5a623a 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -32,6 +32,7 @@
  * GL_COMPRESSED_R11_EAC
  * GL_COMPRESSED_RG11_EAC
  * GL_COMPRESSED_SIGNED_R11_EAC
+ * GL_COMPRESSED_SIGNED_RG11_EAC
  */
 
 #include stdbool.h
@@ -894,6 +895,54 @@ etc2_unpack_signed_r11(uint8_t *dst_row,
 }
 }
 
+static void
+etc2_unpack_signed_rg11(uint8_t *dst_row,
+unsigned dst_stride,
+const uint8_t *src_row,
+unsigned src_stride,
+unsigned width,
+unsigned height)
+{
+   /* If internalformat is COMPRESSED_SIGNED_RG11_EAC, each 4 × 4 block of
+  RG color information is compressed to 128 bits.
+   */
+   const unsigned bw = 4, bh = 4, bs = 16, comps = 2, comp_size = 2;
+   struct etc2_block block;
+   unsigned x, y, i, j;
+
+   for (y = 0; y  height; y += bh) {
+  const uint8_t *src = src_row;
+
+  for (x = 0; x  width; x+= bw) {
+ /* red component */
+ etc2_r11_parse_block(block, src);
+
+ for (j = 0; j  bh; j++) {
+uint8_t *dst = dst_row + (y + j) * dst_stride +
+  x * comps * comp_size;
+for (i = 0; i  bw; i++) {
+   etc2_signed_r11_fetch_texel(block, i, j, dst);
+   dst += comps * comp_size;
+}
+ }
+ /* green component */
+ etc2_r11_parse_block(block, src + 8);
+
+ for (j = 0; j  bh; j++) {
+uint8_t *dst = dst_row + (y + j) * dst_stride +
+   x * comps * comp_size;
+for (i = 0; i  bw; i++) {
+   etc2_signed_r11_fetch_texel(block, i, j, dst + comp_size);
+   dst += comps * comp_size;
+}
+ }
+ src += bs;
+   }
+
+  src_row += src_stride;
+}
+}
+
 /* ETC2 texture formats are valid in glCompressedTexImage2D and
  * glCompressedTexSubImage2D functions */
 GLboolean
@@ -952,6 +1001,14 @@ _mesa_texstore_etc2_rg11_eac(TEXSTORE_PARAMS)
return GL_FALSE;
 }
 
+GLboolean
+_mesa_texstore_etc2_signed_rg11_eac(TEXSTORE_PARAMS)
+{
+   ASSERT(0);
+
+   return GL_FALSE;
+}
+
 void
 _mesa_fetch_texel_2d_f_etc2_rgb8(const struct swrast_texture_image *texImage,
  GLint i, GLint j, GLint k, GLfloat *texel)
@@ -1101,6 +1158,31 @@ _mesa_fetch_texel_2d_f_etc2_signed_r11_eac(const struct 
swrast_texture_image *te
texel[ACOMP] = 1.0f;
 }
 
+void
+_mesa_fetch_texel_2d_f_etc2_signed_rg11_eac(const struct swrast_texture_image 
*texImage,
+GLint i, GLint j, GLint k, GLfloat 
*texel)
+{
+   struct etc2_block block;
+   GLushort dst[2];
+   const uint8_t *src;
+
+   src = texImage-Map +
+  (((texImage-RowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
+
+   /* red component */
+   etc2_r11_parse_block(block, src);
+   etc2_signed_r11_fetch_texel(block, i % 4, j % 4, (uint8_t *)dst);
+
+   /* green component */
+   etc2_r11_parse_block(block, src + 8);
+   etc2_signed_r11_fetch_texel(block, i % 4, j % 4, (uint8_t *)(dst + 1));
+
+   texel[RCOMP] = SHORT_TO_FLOAT(dst[0]);
+   texel[GCOMP] = SHORT_TO_FLOAT(dst[1]);
+   texel[BCOMP] = 0.0f;
+   texel[ACOMP] = 1.0f;
+}
+
 /**
  * Decode texture data in any one of following formats:
  * `MESA_FORMAT_ETC2_RGB8`
@@ -1110,6 +1192,7 @@ _mesa_fetch_texel_2d_f_etc2_signed_r11_eac(const struct 
swrast_texture_image *te
  * `MESA_FORMAT_ETC2_R11_EAC`
  * `MESA_FORMAT_ETC2_RG11_EAC`
  * `MESA_FORMAT_ETC2_SIGNED_R11_EAC`
+ * `MESA_FORMAT_ETC2_SIGNED_RG11_EAC`
  *
  * The size of the source data must be a multiple of the ETC2 block 

[Mesa-dev] [PATCH 09/13] mesa: Add decoding functions for GL_COMPRESSED_SIGNED_R11_EAC

2012-12-06 Thread Ian Romanick
From: Anuj Phogat anuj.pho...@gmail.com

Data in GL_COMPRESSED_SIGNED_R11_EAC format is decoded and stored in
MESA_FORMAT_SIGNED_R16.

v2:
16 bit signed data is converted to 16 bit unsigned data by
adding 2 ^ 15 and stored in an unsigned texture format.

v3:
1. Handle a corner case when base code word value is -128. As per
OpenGL ES 3.0 specification -128 is not an allowed value and should
be truncated to -127.
2. Converting a decoded 16 bit signed data to 16 bit unsigned data by
adding 2 ^ 15 gives us an output which matches the decompressed image
(.ppm) generated by ericsson's etcpack tool. ericsson is also doing this
conversion in their tool because .ppm image files don't support signed
data. But gles 3.0 specification doesn't suggest this conversion. We
need to keep the decoded data in signed format. Both signed format
tests in gles3 conformance pass with these changes.

Signed-off-by: Anuj Phogat anuj.pho...@gmail.com
Tested-by: Matt Turner matts...@gmail.com
Reviewed-by: Brian Paul bri...@vmware.com
---
 src/mesa/main/texcompress.c |   2 +-
 src/mesa/main/texcompress_etc.c | 115 
 src/mesa/main/texcompress_etc.h |   8 +++
 src/mesa/main/texstore.c|   2 +-
 src/mesa/swrast/s_texfetch.c|   2 +-
 5 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index f9148d4..38d3180 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -626,7 +626,7 @@ _mesa_decompress_image(gl_format format, GLuint width, 
GLuint height,
   fetch = _mesa_fetch_texel_2d_f_etc2_rg11_eac;
   break;
case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
-  /* fetch = _mesa_fetch_texel_2d_f_etc2_signed_r11_eac; -- not 
implemented yet */
+  fetch = _mesa_fetch_texel_2d_f_etc2_signed_r11_eac;
   break;
case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
   /* fetch = _mesa_fetch_texel_2d_f_etc2_signed_rg11_eac; -- not 
implemented yet */
diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index 78af13a..2ac140e 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -31,6 +31,7 @@
  * GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC
  * GL_COMPRESSED_R11_EAC
  * GL_COMPRESSED_RG11_EAC
+ * GL_COMPRESSED_SIGNED_R11_EAC
  */
 
 #include stdbool.h
@@ -328,6 +329,13 @@ etc2_clamp2(int color)
return (GLushort) CLAMP(color, 0, 2047);
 }
 
+static GLshort
+etc2_clamp3(int color)
+{
+   /* CLAMP(color, -1023, 1023) */
+   return (GLshort) CLAMP(color, -1023, 1023);
+}
+
 static void
 etc2_rgb8_parse_block(struct etc2_block *block, const uint8_t *src)
 {
@@ -538,6 +546,44 @@ etc2_r11_fetch_texel(const struct etc2_block *block,
 }
 
 static void
+etc2_signed_r11_fetch_texel(const struct etc2_block *block,
+int x, int y, uint8_t *dst)
+{
+   GLint modifier, idx;
+   GLshort color;
+   GLbyte base_codeword = (GLbyte) block-base_codeword;
+
+   if (base_codeword == -128)
+  base_codeword = -127;
+
+   /* Get pixel index */
+   idx = etc2_get_pixel_index(block, x, y);
+   modifier = etc2_modifier_tables[block-table_index][idx];
+
+   if (block-multiplier != 0)
+  /* clamp3(base codeword × 8 + modifier × multiplier × 8) */
+  color = etc2_clamp3((base_codeword  3)  +
+ ((modifier * block-multiplier)  3));
+   else
+  color = etc2_clamp3((base_codeword  3)  + modifier);
+
+   /* Extend 11 bits color value to 16 bits. OpenGL ES 3.0 specification
+* allows extending the color value to any number of bits. But, an
+* implementation is not allowed to truncate the 11-bit value to less than
+* 11 bits. A negative 11-bit value must first be made positive before bit
+* replication, and then made negative again
+*/
+   if (color = 0)
+  color = (color  5) | (color  5);
+   else {
+  color = -color;
+  color = (color  5) | (color  5);
+  color = -color;
+   }
+   ((GLshort *)dst)[0] = color;
+}
+
+static void
 etc2_alpha8_parse_block(struct etc2_block *block, const uint8_t *src)
 {
block-base_codeword = src[0];
@@ -812,6 +858,42 @@ etc2_unpack_rg11(uint8_t *dst_row,
 }
 }
 
+static void
+etc2_unpack_signed_r11(uint8_t *dst_row,
+   unsigned dst_stride,
+   const uint8_t *src_row,
+   unsigned src_stride,
+   unsigned width,
+   unsigned height)
+{
+   /* If internalformat is COMPRESSED_SIGNED_R11_EAC, each 4 × 4 block of
+  red color information is compressed to 64 bits.
+   */
+   const unsigned bw = 4, bh = 4, bs = 8, comps = 1, comp_size = 2;
+   struct etc2_block block;
+   unsigned x, y, i, j;
+
+   for (y = 0; y  height; y += bh) {
+  const uint8_t *src = src_row;
+
+  for (x = 0; x  width; x+= bw) {
+ etc2_r11_parse_block(block, src);
+
+ for (j = 0; j  bh; j++) {
+uint8_t *dst = dst_row + (y + j) * dst_stride +

  1   2   >