Re: [Mesa-dev] [PATCH 5/5] i965 gen7: add support for layered color renderbuffers

2013-05-17 Thread Chia-I Wu
On Sat, May 18, 2013 at 10:11 AM, Jordan Justen
 wrote:
> Rather than pointing the surface_state directly at a single
> sub-image of the texture for rendering, we now point the
> surface_state at the top level of the texture, and configure
> the surface_state as needed based on this.
>
> We now also need to stop setting the FORCE_ZERO_RTAINDEX bit
> in the clip date so render target array values other than zero
> will be used.
>
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h   |2 +
>  src/mesa/drivers/dri/i965/gen7_clip_state.c   |3 +-
>  src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |   63 
> +++--
>  3 files changed, 48 insertions(+), 20 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index fedd78c..d61151f 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -539,6 +539,8 @@
>  #define GEN7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3)
>  #define GEN7_SURFACE_MSFMT_MSS  (0 << 6)
>  #define GEN7_SURFACE_MSFMT_DEPTH_STENCIL(1 << 6)
> +#define GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT   18
> +#define GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT   7
>
>  /* Surface state DW5 */
>  #define BRW_SURFACE_X_OFFSET_SHIFT 25
> diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c 
> b/src/mesa/drivers/dri/i965/gen7_clip_state.c
> index 29a5ed5..1256f32 100644
> --- a/src/mesa/drivers/dri/i965/gen7_clip_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c
> @@ -107,8 +107,7 @@ upload_clip_state(struct brw_context *brw)
>  GEN6_CLIP_XY_TEST |
>   dw2);
> OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
> - U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
> - GEN6_CLIP_FORCE_ZERO_RTAINDEX);
> + U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT);
> ADVANCE_BATCH();
>  }
>
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> index 6c01545..5f15eff 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> @@ -23,6 +23,7 @@
>  #include "main/mtypes.h"
>  #include "main/blend.h"
>  #include "main/samplerobj.h"
> +#include "main/texformat.h"
>  #include "program/prog_parameter.h"
>
>  #include "intel_mipmap_tree.h"
> @@ -529,12 +530,13 @@ gen7_update_renderbuffer_surface(struct brw_context 
> *brw,
> struct gl_context *ctx = &intel->ctx;
> struct intel_renderbuffer *irb = intel_renderbuffer(rb);
> struct intel_region *region = irb->mt->region;
> -   uint32_t tile_x, tile_y;
> uint32_t format;
> /* _NEW_BUFFERS */
> gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
> -
> -   assert(!layered);
> +   uint32_t surftype;
> +   bool is_array = false;
> +   int depth = rb->Depth > 0 ? rb->Depth - 1 : 0;
> +   int min_array_element = 0;
>
> uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
>  8 * 4, 32, &brw->wm.surf_offset[unit]);
> @@ -550,7 +552,23 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
>  __FUNCTION__, _mesa_get_format_name(rb_format));
> }
>
> -   surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
> +   if (rb->TexImage) {
> +  surftype = translate_tex_target(rb->TexImage->TexObject->Target);
> +  is_array = _mesa_tex_target_is_array(rb->TexImage->TexObject->Target);
> +  if (rb->TexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
> + assert(rb->Depth > 0);
> + surftype = BRW_SURFACE_2D;
> + depth = (6 * (depth + 1)) - 1;
> +  } else if (rb->TexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
> + surftype = BRW_SURFACE_2D;
> + depth = 5;
> + is_array = true;
> +  }
> +   } else {
> +  surftype = BRW_SURFACE_2D;
> +   }
> +
> +   surf[0] = surftype << BRW_SURFACE_TYPE_SHIFT |
>   format << BRW_SURFACE_FORMAT_SHIFT |
>   (irb->mt->array_spacing_lod0 ? GEN7_SURFACE_ARYSPC_LOD0
>: GEN7_SURFACE_ARYSPC_FULL) |
> @@ -561,24 +579,33 @@ gen7_update_renderbuffer_surface(struct brw_context 
> *brw,
> if (irb->mt->align_w == 8)
>surf[0] |= GEN7_SURFACE_HALIGN_8;
>
> -   /* reloc */
> -   surf[1] = intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y) +
> - region->bo->offset; /* reloc */
> +   if (is_array) {
> +  surf[0] |= GEN7_SURFACE_IS_ARRAY;
> +   }
> +
> +   if (!layered) {
> +  if (irb->mt->num_samples > 1) {
> + min_array_element = irb->mt_layer / irb->mt->num_samples;
> +  } else {
> + min_array_element = irb->mt_layer;
> +  }
> +   }
> +
> +   surf[1] = region->bo->offset;
>
> assert(brw->has_surface_tile_offset)

Re: [Mesa-dev] [PATCH 2/2] r600g/compute: Use common transfer_{map, unmap} functions for global resources

2013-05-17 Thread Marek Olšák
On Sat, May 18, 2013 at 2:17 AM, Tom Stellard  wrote:
> From: Tom Stellard 
>
> ---
>  src/gallium/drivers/r600/evergreen_compute.c | 68 
> ++--
>  1 file changed, 24 insertions(+), 44 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
> b/src/gallium/drivers/r600/evergreen_compute.c
> index 4d490c4..6d26b8e 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -903,67 +903,47 @@ void *r600_compute_global_transfer_map(
>  {
> struct r600_context *rctx = (struct r600_context*)ctx_;
> struct compute_memory_pool *pool = rctx->screen->global_pool;
> -   struct pipe_transfer *transfer = 
> util_slab_alloc(&rctx->pool_transfers);
> struct r600_resource_global* buffer =
> (struct r600_resource_global*)resource;
> -   uint32_t* map;
>
> -   compute_memory_finalize_pending(pool, ctx_);
> -
> -   assert(resource->target == PIPE_BUFFER);
> -
> -   COMPUTE_DBG(rctx->screen, "* r600_compute_global_get_transfer()\n"
> +   COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n"
> "level = %u, usage = %u, box(x = %u, y = %u, z = %u "
> "width = %u, height = %u, depth = %u)\n", level, 
> usage,
> box->x, box->y, box->z, box->width, box->height,
> box->depth);
> +   COMPUTE_DBG(rctx->screen, "Buffer: %u (buffer offset in global 
> memory) "
> +   "+ %u (box.x)\n", buffer->chunk->start_in_dw, box->x);
>
> -   transfer->resource = resource;
> -   transfer->level = level;
> -   transfer->usage = usage;
> -   transfer->box = *box;
> -   transfer->stride = 0;
> -   transfer->layer_stride = 0;
> -
> -   assert(transfer->resource->target == PIPE_BUFFER);
> -   assert(transfer->resource->bind & PIPE_BIND_GLOBAL);
> -   assert(transfer->box.x >= 0);
> -   assert(transfer->box.y == 0);
> -   assert(transfer->box.z == 0);
>
> -   ///TODO: do it better, mapping is not possible if the pool is too big
> -
> -   COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n");
> -
> -   if (!(map = r600_buffer_mmap_sync_with_rings(rctx, 
> buffer->chunk->pool->bo, transfer->usage))) {
> -   util_slab_free(&rctx->pool_transfers, transfer);
> -   return NULL;
> -   }
> +   compute_memory_finalize_pending(pool, ctx_);
>
> -   *ptransfer = transfer;
> +   assert(resource->target == PIPE_BUFFER);
> +   assert(resource->bind & PIPE_BIND_GLOBAL);
> +   assert(box->x >= 0);
> +   assert(box->y == 0);
> +   assert(box->z == 0);
>
> -   COMPUTE_DBG(rctx->screen, "Buffer: %p + %u (buffer offset in global 
> memory) "
> -   "+ %u (box.x)\n", map, buffer->chunk->start_in_dw, 
> transfer->box.x);
> -   return ((char*)(map + buffer->chunk->start_in_dw)) + transfer->box.x;
> +   ///TODO: do it better, mapping is not possible if the pool is too big
> +   return pipe_buffer_map_range(ctx_, (struct 
> pipe_resource*)buffer->chunk->pool->bo,
> +   box->x + (buffer->chunk->start_in_dw * 4),
> +   box->width, usage, ptransfer);
>  }
>
>  void r600_compute_global_transfer_unmap(
> struct pipe_context *ctx_,
> struct pipe_transfer* transfer)
>  {
> -   struct r600_context *ctx = NULL;
> -   struct r600_resource_global* buffer = NULL;
> -
> -   assert(transfer->resource->target == PIPE_BUFFER);
> -   assert(transfer->resource->bind & PIPE_BIND_GLOBAL);
> -
> -   ctx = (struct r600_context *)ctx_;
> -   buffer = (struct r600_resource_global*)transfer->resource;
> -
> -   COMPUTE_DBG(ctx->screen, "* r600_compute_global_transfer_unmap()\n");
> -
> -   ctx->ws->buffer_unmap(buffer->chunk->pool->bo->cs_buf);
> -   util_slab_free(&ctx->pool_transfers, transfer);
> +   /* struct r600_resource_global are not real resources, they just map
> +* to an offset within the compute memory pool.  The function
> +* r600_compute_global_transfer_map() maps the memory pool
> +* resource rather than the struct r600_resource_global passed to
> +* it as an argument and then initalizes ptransfer->resource with
> +* the memory pool resource (via pipe_buffer_map_range).
> +* When transfer_unamp is called it uses the memory pool's

*unmap

For the series:

Reviewed-by: Marek Olšák 

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64730] [llvmpipe] piglit array-texture regression

2013-05-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64730

Roland Scheidegger  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #1 from Roland Scheidegger  ---
The workaround was bogus since it didn't only prevent the wrong first_layer
rebase (which wasn't an issue for opengl) but had the effect of not assigning
depth correctly (so was always one, effectively reducing all array textures to
non-array ones).
This is however already fixed by 87978518e996d02e055174d7152fff150fe3cd13.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64730] New: [llvmpipe] piglit array-texture regression

2013-05-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64730

  Priority: medium
Bug ID: 64730
  Keywords: regression
CC: jfons...@vmware.com
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: [llvmpipe] piglit array-texture regression
  Severity: normal
Classification: Unclassified
OS: Linux (All)
  Reporter: v...@freedesktop.org
  Hardware: x86-64 (AMD64)
Status: NEW
   Version: git
 Component: Other
   Product: Mesa

mesa: 46ea8041074df79561f9771e2ecf198f2cbd088f (master)

$ ./bin/array-texture -auto
Probe at (150,50)
  Expected: 0.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 1
Probe at (250,50)
  Expected: 0.00 0.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 2
Probe at (350,50)
  Expected: 0.00 0.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 3
Probe at (450,50)
  Expected: 0.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 4
Probe at (550,50)
  Expected: 1.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 5
Probe at (650,50)
  Expected: 1.00 1.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 6
Probe at (150,150)
  Expected: 0.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 1
Probe at (250,150)
  Expected: 0.00 0.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 2
Probe at (350,150)
  Expected: 0.00 0.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 3
Probe at (450,150)
  Expected: 0.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 4
Probe at (550,150)
  Expected: 1.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 5
Probe at (650,150)
  Expected: 1.00 1.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 2D image/slice 6
Probe at (150,250)
  Expected: 0.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 1
Probe at (250,250)
  Expected: 0.00 0.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 2
Probe at (350,250)
  Expected: 0.00 0.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 3
Probe at (450,250)
  Expected: 0.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 4
Probe at (550,250)
  Expected: 1.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 5
Probe at (650,250)
  Expected: 1.00 1.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 6
Probe at (150,350)
  Expected: 0.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 1
Probe at (250,350)
  Expected: 0.00 0.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 2
Probe at (350,350)
  Expected: 0.00 0.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 3
Probe at (450,350)
  Expected: 0.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 4
Probe at (550,350)
  Expected: 1.00 1.00 0.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 5
Probe at (650,350)
  Expected: 1.00 1.00 1.00
  Observed: 1.00 0.00 0.00
array-texture: failed for 1D image/slice 6
PIGLIT: {'result': 'fail' }


4f518e173847e8538bb4f0f9216e3f6417853d7a is the first bad commit
commit 4f518e173847e8538bb4f0f9216e3f6417853d7a
Author: José Fonseca 
Date:   Thu May 16 15:13:51 2013 +0100

llvmpipe: Temporary workaround to prevent segfault on array textures.

:04 04 98a1ae9d47b556629ed257353cd5c93f6d660517
3a99f61bbde526d04c60d10629f5801e05e8d235 Msrc
bisect run success

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64649] Anomaly 2 (Steam) exits with GLX_EXT_swap_control not supported, unable to set vertical sync

2013-05-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64649

--- Comment #7 from romula...@gmail.com ---
(In reply to comment #4)
>Does it look like agraceful exit or a segfault?
No segfault so a graceful exit.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64649] Anomaly 2 (Steam) exits with GLX_EXT_swap_control not supported, unable to set vertical sync

2013-05-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64649

--- Comment #6 from romula...@gmail.com ---
Created attachment 79488
  --> https://bugs.freedesktop.org/attachment.cgi?id=79488&action=edit
full steam logfile

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64649] Anomaly 2 (Steam) exits with GLX_EXT_swap_control not supported, unable to set vertical sync

2013-05-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64649

--- Comment #5 from romula...@gmail.com ---
01:00.0 VGA compatible controller: Advanced Micro Devices [AMD] nee ATI RV770
[Radeon HD 4870]

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] mesa/texformat: add _mesa_tex_target_is_array function

2013-05-17 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/main/texformat.c |   13 +
 src/mesa/main/texformat.h |2 ++
 2 files changed, 15 insertions(+)

diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
index ed40b7e..a7df868 100644
--- a/src/mesa/main/texformat.c
+++ b/src/mesa/main/texformat.c
@@ -929,3 +929,16 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum 
target,
return MESA_FORMAT_NONE;
 }
 
+GLboolean
+_mesa_tex_target_is_array(GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_1D_ARRAY_EXT:
+   case GL_TEXTURE_2D_ARRAY_EXT:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+  return GL_TRUE;
+   default: 
+  return GL_FALSE;
+   }
+}
diff --git a/src/mesa/main/texformat.h b/src/mesa/main/texformat.h
index efe2699..d6ff541 100644
--- a/src/mesa/main/texformat.h
+++ b/src/mesa/main/texformat.h
@@ -36,5 +36,7 @@ extern gl_format
 _mesa_choose_tex_format(struct gl_context *ctx, GLenum target,
 GLint internalFormat, GLenum format, GLenum type);
 
+extern GLboolean
+_mesa_tex_target_is_array(GLenum target);
 
 #endif
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] i965 gen7: add support for layered color renderbuffers

2013-05-17 Thread Jordan Justen
Rather than pointing the surface_state directly at a single
sub-image of the texture for rendering, we now point the
surface_state at the top level of the texture, and configure
the surface_state as needed based on this.

We now also need to stop setting the FORCE_ZERO_RTAINDEX bit
in the clip date so render target array values other than zero
will be used.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_defines.h   |2 +
 src/mesa/drivers/dri/i965/gen7_clip_state.c   |3 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |   63 +++--
 3 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index fedd78c..d61151f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -539,6 +539,8 @@
 #define GEN7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3)
 #define GEN7_SURFACE_MSFMT_MSS  (0 << 6)
 #define GEN7_SURFACE_MSFMT_DEPTH_STENCIL(1 << 6)
+#define GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT   18
+#define GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT   7
 
 /* Surface state DW5 */
 #define BRW_SURFACE_X_OFFSET_SHIFT 25
diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c 
b/src/mesa/drivers/dri/i965/gen7_clip_state.c
index 29a5ed5..1256f32 100644
--- a/src/mesa/drivers/dri/i965/gen7_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c
@@ -107,8 +107,7 @@ upload_clip_state(struct brw_context *brw)
 GEN6_CLIP_XY_TEST |
  dw2);
OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
- U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
- GEN6_CLIP_FORCE_ZERO_RTAINDEX);
+ U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT);
ADVANCE_BATCH();
 }
 
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 6c01545..5f15eff 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -23,6 +23,7 @@
 #include "main/mtypes.h"
 #include "main/blend.h"
 #include "main/samplerobj.h"
+#include "main/texformat.h"
 #include "program/prog_parameter.h"
 
 #include "intel_mipmap_tree.h"
@@ -529,12 +530,13 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
struct gl_context *ctx = &intel->ctx;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_region *region = irb->mt->region;
-   uint32_t tile_x, tile_y;
uint32_t format;
/* _NEW_BUFFERS */
gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
-
-   assert(!layered);
+   uint32_t surftype;
+   bool is_array = false;
+   int depth = rb->Depth > 0 ? rb->Depth - 1 : 0;
+   int min_array_element = 0;
 
uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 8 * 4, 32, &brw->wm.surf_offset[unit]);
@@ -550,7 +552,23 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
 __FUNCTION__, _mesa_get_format_name(rb_format));
}
 
-   surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
+   if (rb->TexImage) {
+  surftype = translate_tex_target(rb->TexImage->TexObject->Target);
+  is_array = _mesa_tex_target_is_array(rb->TexImage->TexObject->Target);
+  if (rb->TexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
+ assert(rb->Depth > 0);
+ surftype = BRW_SURFACE_2D;
+ depth = (6 * (depth + 1)) - 1;
+  } else if (rb->TexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
+ surftype = BRW_SURFACE_2D;
+ depth = 5;
+ is_array = true;
+  }
+   } else {
+  surftype = BRW_SURFACE_2D;
+   }
+
+   surf[0] = surftype << BRW_SURFACE_TYPE_SHIFT |
  format << BRW_SURFACE_FORMAT_SHIFT |
  (irb->mt->array_spacing_lod0 ? GEN7_SURFACE_ARYSPC_LOD0
   : GEN7_SURFACE_ARYSPC_FULL) |
@@ -561,24 +579,33 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
if (irb->mt->align_w == 8)
   surf[0] |= GEN7_SURFACE_HALIGN_8;
 
-   /* reloc */
-   surf[1] = intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y) +
- region->bo->offset; /* reloc */
+   if (is_array) {
+  surf[0] |= GEN7_SURFACE_IS_ARRAY;
+   }
+
+   if (!layered) {
+  if (irb->mt->num_samples > 1) {
+ min_array_element = irb->mt_layer / irb->mt->num_samples;
+  } else {
+ min_array_element = irb->mt_layer;
+  }
+   }
+
+   surf[1] = region->bo->offset;
 
assert(brw->has_surface_tile_offset);
-   /* Note that the low bits of these fields are missing, so
-* there's the possibility of getting in trouble.
-*/
-   assert(tile_x % 4 == 0);
-   assert(tile_y % 2 == 0);
-   surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) |
- SET_FIELD(tile_y / 2, BRW_SURFACE_

[Mesa-dev] [PATCH 2/5] intel_fbo: set gl_renderbuffer Depth field

2013-05-17 Thread Jordan Justen
Set the renderbuffer's Depth field to match the texture's
Depth when rendering to a texture.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/intel/intel_fbo.c |9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c 
b/src/mesa/drivers/dri/intel/intel_fbo.c
index a8a7ab3..243c00a 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -482,14 +482,17 @@ intel_framebuffer_renderbuffer(struct gl_context * ctx,
 static bool
 intel_renderbuffer_update_wrapper(struct intel_context *intel,
   struct intel_renderbuffer *irb,
- struct gl_texture_image *image,
-  uint32_t layer)
+  struct gl_texture_image *image,
+  uint32_t layer,
+  bool layered)
 {
struct gl_renderbuffer *rb = &irb->Base.Base;
struct intel_texture_image *intel_image = intel_texture_image(image);
struct intel_mipmap_tree *mt = intel_image->mt;
int level = image->Level;
 
+   rb->Depth = image->Depth;
+
rb->AllocStorage = intel_nop_alloc_storage;
 
intel_miptree_check_level_layer(mt, level, layer);
@@ -598,7 +601,7 @@ intel_render_texture(struct gl_context * ctx,
 
intel_miptree_check_level_layer(mt, att->TextureLevel, layer);
 
-   if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer)) {
+   if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer, 
att->Layered)) {
_swrast_render_texture(ctx, fb, att);
return;
}
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] intel: add layered parameter to update_renderbuffer_surface

2013-05-17 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |6 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |3 +++
 src/mesa/drivers/dri/intel/intel_context.h|1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index bbe8579..efc15e9 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1315,6 +1315,7 @@ brw_update_null_renderbuffer_surface(struct brw_context 
*brw, unsigned int unit)
 static void
 brw_update_renderbuffer_surface(struct brw_context *brw,
struct gl_renderbuffer *rb,
+   bool layered,
unsigned int unit)
 {
struct intel_context *intel = &brw->intel;
@@ -1328,6 +1329,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
/* _NEW_BUFFERS */
gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 
+   assert(!layered);
+
if (rb->TexImage && !brw->has_surface_tile_offset) {
   intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
 
@@ -1424,7 +1427,8 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw)
if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
-   intel->vtbl.update_renderbuffer_surface(brw, 
ctx->DrawBuffer->_ColorDrawBuffers[i], i);
+   intel->vtbl.update_renderbuffer_surface(brw, 
ctx->DrawBuffer->_ColorDrawBuffers[i],
+   ctx->DrawBuffer->Layered, 
i);
 } else {
intel->vtbl.update_null_renderbuffer_surface(brw, i);
 }
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 435f9dc..6c01545 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -522,6 +522,7 @@ gen7_update_null_renderbuffer_surface(struct brw_context 
*brw, unsigned unit)
 static void
 gen7_update_renderbuffer_surface(struct brw_context *brw,
 struct gl_renderbuffer *rb,
+bool layered,
 unsigned int unit)
 {
struct intel_context *intel = &brw->intel;
@@ -533,6 +534,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
/* _NEW_BUFFERS */
gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 
+   assert(!layered);
+
uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 8 * 4, 32, &brw->wm.surf_offset[unit]);
memset(surf, 0, 8 * 4);
diff --git a/src/mesa/drivers/dri/intel/intel_context.h 
b/src/mesa/drivers/dri/intel/intel_context.h
index c0f07ff..5420e76 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -196,6 +196,7 @@ struct intel_context
  unsigned surf_index);
   void (*update_renderbuffer_surface)(struct brw_context *brw,
  struct gl_renderbuffer *rb,
+ bool layered,
  unsigned unit);
   void (*update_null_renderbuffer_surface)(struct brw_context *brw,
   unsigned unit);
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] intel: print image depth in debug message

2013-05-17 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/intel/intel_fbo.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c 
b/src/mesa/drivers/dri/intel/intel_fbo.c
index 69f8629..a8a7ab3 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -603,9 +603,9 @@ intel_render_texture(struct gl_context * ctx,
return;
}
 
-   DBG("Begin render %s texture tex=%u w=%d h=%d refcount=%d\n",
+   DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n",
_mesa_get_format_name(image->TexFormat),
-   att->Texture->Name, image->Width, image->Height,
+   att->Texture->Name, image->Width, image->Height, image->Depth,
rb->RefCount);
 
/* update drawing region, etc */
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/5] Gen7 Layered Color Renderbuffer support

2013-05-17 Thread Jordan Justen
git://people.freedesktop.org/~jljusten/mesa
 ivb-layered-color-renderbuffer-v1

This series updates gen7 to allow layered color
render buffers. With these changes we can support
the AMD_vertex_shader_layer extension for color
renderbuffers.

Once depth is also supported, then we can
actually enable the AMD_vertex_shader_layer extension.

Layered rendering is also required for geometry
shader support.

Jordan Justen (5):
  intel: print image depth in debug message
  intel_fbo: set gl_renderbuffer Depth field
  intel: add layered parameter to update_renderbuffer_surface
  mesa/texformat: add _mesa_tex_target_is_array function
  i965 gen7: add support for layered color renderbuffers

 src/mesa/drivers/dri/i965/brw_defines.h   |2 +
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |6 +-
 src/mesa/drivers/dri/i965/gen7_clip_state.c   |3 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |   62 +++--
 src/mesa/drivers/dri/intel/intel_context.h|1 +
 src/mesa/drivers/dri/intel/intel_fbo.c|   13 +++--
 src/mesa/main/texformat.c |   13 +
 src/mesa/main/texformat.h |2 +
 8 files changed, 78 insertions(+), 24 deletions(-)

-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] llvmpipe: fix stencil issues

2013-05-17 Thread sroland
From: Roland Scheidegger 

Two (somewhat related) issues:
1) We did mask checks between depth/stencil testing and depth/stencil write.
This meant that if the depth/stencil test killed off all fragments we never
actually wrote the new stencil value. This issue affected all early/late
test/write combinations.
2) We actually did early depth/stencil test and late depth/stencil write even
when the shader could kill the fragment (alpha test or discard). Since it
matters for the new stencil value if the fragment is killed by depth/stencil
test or by the shader (in which case it will not reach the depth/stencil test)
this simply cannot work.
So fix these issues by moving the mask check after depth/stencil write (only
for early write it would work for late write too but probably not worth the
mask check there) and disable early depth test when it can't work correctly.
This addresses https://bugs.freedesktop.org/show_bug.cgi?id=41787 though
replaying the trace it still looks somewhat wrong to me, so maybe more bugs...
Verified this fixes affected piglit tests (glean stencil2 and some from hiz
group) if the simple_shader optimization in generate_fs_loop() is forced to
false (otherwise we skip mask checks hence don't hit issue 1 - I don't think
there's anything in piglit which would exhibit issue 2).
---
 src/gallium/drivers/llvmpipe/lp_bld_depth.c |   31 ++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |   20 -
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c 
b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 2376ca7..afc2d9d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -1096,23 +1096,18 @@ lp_build_depth_stencil_test(struct gallivm_state 
*gallivm,
   stencil_shift, "");
 
/* Finally, merge the z/stencil values */
-   if ((depth->enabled && depth->writemask) ||
-   (stencil[0].enabled && (stencil[0].writemask ||
-   (stencil[1].enabled && stencil[1].writemask 
{
-
-  if (format_desc->block.bits <= 32) {
- if (have_z && have_s)
-*z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
- else if (have_z)
-*z_value = z_dst;
- else
-*z_value = stencil_vals;
- *s_value = *z_value;
-  }
-  else {
+   if (format_desc->block.bits <= 32) {
+  if (have_z && have_s)
+ *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
+  else if (have_z)
  *z_value = z_dst;
- *s_value = stencil_vals;
-  }
+  else
+ *z_value = stencil_vals;
+  *s_value = *z_value;
+   }
+   else {
+  *z_value = z_dst;
+  *s_value = stencil_vals;
}
 
if (s_pass_mask)
@@ -1120,9 +1115,5 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
if (depth->enabled && stencil[0].enabled)
   lp_build_mask_update(mask, z_pass);
-
-   if (do_branch)
-  lp_build_mask_check(mask);
-
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 1dfc75a..ae63615 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -266,13 +266,20 @@ generate_fs_loop(struct gallivm_state *gallivm,
   assert(zs_format_desc);
 
   if (!shader->info.base.writes_z) {
- if (key->alpha.enabled || shader->info.base.uses_kill)
+ if (key->alpha.enabled || shader->info.base.uses_kill) {
 /* With alpha test and kill, can do the depth test early
  * and hopefully eliminate some quads.  But need to do a
  * special deferred depth write once the final mask value
- * is known.
+ * is known. This only works though if there's either no
+ * stencil test or the stencil value isn't written.
  */
-depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+if (key->stencil[0].enabled && (key->stencil[0].writemask ||
+(key->stencil[1].enabled &&
+ key->stencil[1].writemask)))
+   depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+else
+   depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+ }
  else
 depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
   }
@@ -281,9 +288,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
   }
 
   if (!(key->depth.enabled && key->depth.writemask) &&
-  !((key->stencil[0].enabled && (key->stencil[0].writemask ||
+  !(key->stencil[0].enabled && (key->stencil[0].writemask ||
 (key->stencil[1].enabled &&
- key->stencil[1].writemask)
+ key->stencil[1].writ

[Mesa-dev] [PATCH 2/2] r600g/compute: Use common transfer_{map, unmap} functions for global resources

2013-05-17 Thread Tom Stellard
From: Tom Stellard 

---
 src/gallium/drivers/r600/evergreen_compute.c | 68 ++--
 1 file changed, 24 insertions(+), 44 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 4d490c4..6d26b8e 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -903,67 +903,47 @@ void *r600_compute_global_transfer_map(
 {
struct r600_context *rctx = (struct r600_context*)ctx_;
struct compute_memory_pool *pool = rctx->screen->global_pool;
-   struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
struct r600_resource_global* buffer =
(struct r600_resource_global*)resource;
-   uint32_t* map;
 
-   compute_memory_finalize_pending(pool, ctx_);
-
-   assert(resource->target == PIPE_BUFFER);
-
-   COMPUTE_DBG(rctx->screen, "* r600_compute_global_get_transfer()\n"
+   COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n"
"level = %u, usage = %u, box(x = %u, y = %u, z = %u "
"width = %u, height = %u, depth = %u)\n", level, usage,
box->x, box->y, box->z, box->width, box->height,
box->depth);
+   COMPUTE_DBG(rctx->screen, "Buffer: %u (buffer offset in global memory) "
+   "+ %u (box.x)\n", buffer->chunk->start_in_dw, box->x);
 
-   transfer->resource = resource;
-   transfer->level = level;
-   transfer->usage = usage;
-   transfer->box = *box;
-   transfer->stride = 0;
-   transfer->layer_stride = 0;
-
-   assert(transfer->resource->target == PIPE_BUFFER);
-   assert(transfer->resource->bind & PIPE_BIND_GLOBAL);
-   assert(transfer->box.x >= 0);
-   assert(transfer->box.y == 0);
-   assert(transfer->box.z == 0);
 
-   ///TODO: do it better, mapping is not possible if the pool is too big
-
-   COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n");
-
-   if (!(map = r600_buffer_mmap_sync_with_rings(rctx, 
buffer->chunk->pool->bo, transfer->usage))) {
-   util_slab_free(&rctx->pool_transfers, transfer);
-   return NULL;
-   }
+   compute_memory_finalize_pending(pool, ctx_);
 
-   *ptransfer = transfer;
+   assert(resource->target == PIPE_BUFFER);
+   assert(resource->bind & PIPE_BIND_GLOBAL);
+   assert(box->x >= 0);
+   assert(box->y == 0);
+   assert(box->z == 0);
 
-   COMPUTE_DBG(rctx->screen, "Buffer: %p + %u (buffer offset in global 
memory) "
-   "+ %u (box.x)\n", map, buffer->chunk->start_in_dw, 
transfer->box.x);
-   return ((char*)(map + buffer->chunk->start_in_dw)) + transfer->box.x;
+   ///TODO: do it better, mapping is not possible if the pool is too big
+   return pipe_buffer_map_range(ctx_, (struct 
pipe_resource*)buffer->chunk->pool->bo,
+   box->x + (buffer->chunk->start_in_dw * 4),
+   box->width, usage, ptransfer);
 }
 
 void r600_compute_global_transfer_unmap(
struct pipe_context *ctx_,
struct pipe_transfer* transfer)
 {
-   struct r600_context *ctx = NULL;
-   struct r600_resource_global* buffer = NULL;
-
-   assert(transfer->resource->target == PIPE_BUFFER);
-   assert(transfer->resource->bind & PIPE_BIND_GLOBAL);
-
-   ctx = (struct r600_context *)ctx_;
-   buffer = (struct r600_resource_global*)transfer->resource;
-
-   COMPUTE_DBG(ctx->screen, "* r600_compute_global_transfer_unmap()\n");
-
-   ctx->ws->buffer_unmap(buffer->chunk->pool->bo->cs_buf);
-   util_slab_free(&ctx->pool_transfers, transfer);
+   /* struct r600_resource_global are not real resources, they just map
+* to an offset within the compute memory pool.  The function
+* r600_compute_global_transfer_map() maps the memory pool
+* resource rather than the struct r600_resource_global passed to
+* it as an argument and then initalizes ptransfer->resource with
+* the memory pool resource (via pipe_buffer_map_range).
+* When transfer_unamp is called it uses the memory pool's
+* vtable which calls r600_buffer_transfer_map() rather than
+* this function.
+*/
+   assert (!"This function should not be called");
 }
 
 void r600_compute_global_transfer_flush_region(
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] r600g/compute: Use common transfer_{map, unmap} functions for kernel inputs

2013-05-17 Thread Tom Stellard
From: Tom Stellard 

---
 src/gallium/drivers/r600/evergreen_compute.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 5f67759..4d490c4 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -271,6 +271,8 @@ void evergreen_compute_upload_input(
uint32_t * global_size_start;
uint32_t * local_size_start;
uint32_t * kernel_parameters_start;
+   struct pipe_box box;
+   struct pipe_transfer *transfer = NULL;
 
if (shader->input_size == 0) {
return;
@@ -278,11 +280,16 @@ void evergreen_compute_upload_input(
 
if (!shader->kernel_param) {
/* Add space for the grid dimensions */
-   shader->kernel_param = r600_compute_buffer_alloc_vram(
-   ctx->screen, input_size);
+   shader->kernel_param = (struct r600_resource *)
+   pipe_buffer_create(ctx_->screen, PIPE_BIND_CUSTOM,
+   PIPE_USAGE_IMMUTABLE, input_size);
}
 
-   num_work_groups_start = r600_buffer_mmap_sync_with_rings(ctx, 
shader->kernel_param, PIPE_TRANSFER_WRITE);
+   u_box_1d(0, input_size, &box);
+   num_work_groups_start = ctx_->transfer_map(ctx_,
+   (struct pipe_resource*)shader->kernel_param,
+   0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
+   &box, &transfer);
global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4));
local_size_start = global_size_start + (3 * (sizeof(uint)) / 4);
kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4);
@@ -306,7 +313,7 @@ void evergreen_compute_upload_input(
((unsigned*)num_work_groups_start)[i]);
}
 
-   ctx->ws->buffer_unmap(shader->kernel_param->cs_buf);
+   ctx_->transfer_unmap(ctx_, transfer);
 
/* ID=0 is reserved for the parameters */
evergreen_cs_set_constant_buffer(ctx, 0, 0, input_size,
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] scons: Use LLVM shared library if found.

2013-05-17 Thread Aaron Watry
On Fri, May 17, 2013 at 2:31 PM, Jose Fonseca  wrote:
>
>
> - Original Message -
>> On Fri, May 17, 2013 at 7:44 AM, Jose Fonseca  wrote:
>> > Vinson,
>> >
>> > Why is this necessary?
>> >
>> > (I'd prefer that LLVM is statically linked by default. )
>> >
>> > Jose
>>
>> The SCons build fails on systems that only provide a LLVM shared
>> library. 'llvm-config --libs' always enumerates the libraries instead
>> of providing the shared library on such systems.
>>
>> $ cat /etc/redhat-release
>> Fedora release 18 (Spherical Cow)
>> $ llvm-config --libs
>> -lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker -lLLVMArchive
>> -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo -lLLVMVectorize
>> -lLLVMBitWriter -lLLVMTableGen -lLLVMHexagonCodeGen -lLLVMHexagonDesc
>> -lLLVMHexagonAsmPrinter -lLLVMHexagonInfo -lLLVMNVPTXCodeGen
>> -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter
>> -lLLVMMBlazeDisassembler -lLLVMMBlazeAsmParser -lLLVMMBlazeCodeGen
>> -lLLVMMBlazeDesc -lLLVMMBlazeInfo -lLLVMMBlazeAsmPrinter
>> -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMMSP430CodeGen
>> -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter
>> -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo
>> -lLLVMCellSPUCodeGen -lLLVMCellSPUDesc -lLLVMCellSPUInfo
>> -lLLVMMipsDisassembler -lLLVMMipsAsmParser -lLLVMMipsCodeGen
>> -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter
>> -lLLVMARMDisassembler -lLLVMARMAsmParser -lLLVMARMCodeGen
>> -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMPowerPCCodeGen
>> -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter
>> -lLLVMSparcCodeGen -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMX86AsmParser
>> -lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMX86Desc -lLLVMX86Info
>> -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMR600CodeGen
>> -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMR600Desc -lLLVMR600Info
>> -lLLVMR600AsmPrinter -lLLVMMCDisassembler -lLLVMMCParser
>> -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine
>> -lLLVMTransformUtils -lLLVMipa -lLLVMAnalysis -lLLVMMCJIT
>> -lLLVMRuntimeDyld -lLLVMExecutionEngine -lLLVMTarget -lLLVMMC
>> -lLLVMObject -lLLVMCore -lLLVMSupport
>> $ ls `llvm-config --libdir`
>> BugpointPasses.so  libclang.so  libLLVM-3.2svn.so  libLTO.so
>> libprofile_rt.so  LLVMgold.so
>
> Then Fedora 18's llvm-config is busted, as `llvm-config --libs` should return 
> libLLVM-3.2svn.so
>

I'm using upstream llvm git master, and the shared library isn't
listed in llvm-config here either.


~/src/llvm$ llvm-config --libs
-lLLVMR600CodeGen -lLLVMR600Desc -lLLVMR600Info -lLLVMR600AsmPrinter
-lLLVMTableGen -lLLVMDebugInfo -lLLVMOption -lLLVMX86Disassembler
-lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG
-lLLVMAsmPrinter -lLLVMX86Desc -lLLVMX86Info -lLLVMX86AsmPrinter
-lLLVMX86Utils -lLLVMIRReader -lLLVMAsmParser -lLLVMMCDisassembler
-lLLVMMCParser -lLLVMInstrumentation -lLLVMArchive -lLLVMBitReader
-lLLVMInterpreter -lLLVMipo -lLLVMVectorize -lLLVMLinker
-lLLVMBitWriter -lLLVMMCJIT -lLLVMJIT -lLLVMCodeGen -lLLVMObjCARCOpts
-lLLVMScalarOpts -lLLVMInstCombine -lLLVMTransformUtils -lLLVMipa
-lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMExecutionEngine -lLLVMTarget
-lLLVMMC -lLLVMObject -lLLVMCore -lLLVMSupport

~/src/llvm$ which llvm-config
/usr/local/bin/llvm-config

~/src/llvm$ ls /usr/local/lib/libLLVM*
/usr/local/lib/libLLVM-3.4svn.so
/usr/local/lib/libLLVMAnalysis.a
/usr/local/lib/libLLVMArchive.a
/usr/local/lib/libLLVMAsmParser.a


I'm guessing that 'llvm-config --libs' is only listing static libraries

--Aaron


> So I believe this issue should be filled against Fedora, not worked around 
> here.   Honestly, only shipping LLVM in a .so is already a bad idea, but 
> breaking llvm-config is even worse -- what the point of scripts like 
> llvm-config if their output can't be trusted?
>
> BTW, configure.ac doesn't have this hack.  Does it fail the same way too?
>
> Jose
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Gen6+ hardware contexts & query object improvements

2013-05-17 Thread Eric Anholt
Kenneth Graunke  writes:

> Hello!
>
> This patch series bumps the kernel requirement to 3.6 for Gen6+,
> meaning that we actually get to rely on hardware context support.
> That's a little painful, but even Debian ships 3.8 now, and this
> isn't going to make it into an actual release for several more
> months.
>
> It then splits our query code into Gen4-5 and Gen6+ versions.  The new
> Gen6+ version is a lot simpler since hardware contexts guarantee that
> our statistics registers don't get polluted with data from other
> programs running on the system.  It should be more efficient, which
> may help games like Minecraft (though I haven't measured).
>
> Finally, it implements the GL_PRIMITIVES_GENERATED and
> GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries via hardware counters
> rather than manually counting on the CPU.  This paves the way for
> geometry shader support (which can output multiple primitives, breaking
> our CPU-side tracking), and should allow us to enable hardware primitive
> restart in a few more cases once a few more things are tidied.
>
> The next step is to eliminate the use of SOL reset and save/restore the
> transform feedback offsets directly.  Then we can turn on hardware
> primitive restart more aggressively and implement a few more transform
> feedback extensions.

Uncommented patches, or obvious changes to the commented patches are:

Reviewed-by: Eric Anholt 


pgpBjWfUsoRLd.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/10] i965: Handle rasterizer discard in the clipper rather than SOL on Gen7.

2013-05-17 Thread Eric Anholt
Kenneth Graunke  writes:

> In order to implement the GL_PRIMITIVES_GENERATED query in a sane
> fashion on our hardware, we can't discard primitives until the clipper.
> The patch after next explains the rationale.
>
> By setting the clipper to REJECT_ALL mode, all primitives get thrown away,
> so rendering is still appropriately disabled.
>
> This may negatively impact performance in the rasterizer discard case,
> but it's unclear how much and this hasn't been observed to be a
> bottleneck in any application we've looked at.  The clipper is the very
> next stage in the pipeline, so I don't think it will be terrible.

I'm tempted to drop a perf_debug() in this and the next patch to remind
us, if some important app starts doing discard, that we could use the
previous rasterizer discard support outside of a GL_PRIMITIVES_GENERATED
query.


pgpm9IL5zea2x.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] i965: Delete Gen7+ check for Kernel 3.3 now that we require 3.6+.

2013-05-17 Thread Eric Anholt
Kenneth Graunke  writes:

> It's just not necessary.

I'd squash this with the previous, and lower-case "Kernel" -- it seems
to be the convention when it's not part of some other proper noun.


pgpObc4KqS0OC.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/10] i965: Disable clipper statistics when meta operations are in progress.

2013-05-17 Thread Eric Anholt
Kenneth Graunke  writes:

> We don't currently use the clipper statistics, but we'll soon use
> CL_INVOCATIONS_COUNT to implement the GL_PRIMITIVES_GENERATED query.
> The number of primitives generated is not supposed to be altered during
> operations such as glGenerateMipmap.
>
> Prevents spec/EXT_transform_feedback/generatemipmap prims_generated
> from breaking when we start using pipeline statistics registers to
> implement the GL_PRIMITIVES_GENERATED query in a few commits.

Missing state flagging -- I guess we should check for new
_mesa_meta_in_progress() at the start of brw_state_upload, and flag a
BRW_NEW_META_IN_PROGRESS for it.


pgpzsXQ4J5i8d.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Add cases for ir_binop_vector_extract that assert.

2013-05-17 Thread Eric Anholt
Eric Anholt  writes:

> Kenneth Graunke  writes:
>
>> do_vec_index_to_swizzle() should remove any vector extract operations
>> with a constant index.  It's unconditionally called from
>> do_common_optimization().
>>
>> do_vec_index_to_cond_assign() should remove the rest, and it is
>> unconditionally called from brw_link_shader().  This means that we
>> should never see ir_binop_vector_extract in the backend.
>>
>> Silences compiler warnings.
>
> I found warnings also in brw_fs_visitor.cpp for the same, plus
> vector_insert, and was about to send that patch out.  I'm confused how
> you aren't seeing those.

Err, didn't see patch 2.  Still, brw_fs_visitor.cpp.


pgpb66QzZhLT7.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] A simple GLES2 shader application performance slower with h/w renderer

2013-05-17 Thread Divick Kishore
> vblank_mode was broken for a long time in EGL, but current 9.0 and 9.1
> have it fixed.  Not sure what version you're on.

I am using version 8.0.5. I have been unable to build the 9.1 for
softpipe renderer using the same options that I was using for building
8.0.5. I have posted separately on the same mailing list for the same
but I have not received any response so far. It would be great if you
could suggest something. Let me know and I can forward you the thread
where I had posted about this issue.

Thanks for pointing out that it has been fixed in latest revision.
Regards,
Divick
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Add cases for ir_binop_vector_extract that assert.

2013-05-17 Thread Eric Anholt
Kenneth Graunke  writes:

> do_vec_index_to_swizzle() should remove any vector extract operations
> with a constant index.  It's unconditionally called from
> do_common_optimization().
>
> do_vec_index_to_cond_assign() should remove the rest, and it is
> unconditionally called from brw_link_shader().  This means that we
> should never see ir_binop_vector_extract in the backend.
>
> Silences compiler warnings.

I found warnings also in brw_fs_visitor.cpp for the same, plus
vector_insert, and was about to send that patch out.  I'm confused how
you aren't seeing those.


pgpnmhn9FSfoy.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] scons: Use LLVM shared library if found.

2013-05-17 Thread Jose Fonseca


- Original Message -
> On Fri, May 17, 2013 at 7:44 AM, Jose Fonseca  wrote:
> > Vinson,
> >
> > Why is this necessary?
> >
> > (I'd prefer that LLVM is statically linked by default. )
> >
> > Jose
> 
> The SCons build fails on systems that only provide a LLVM shared
> library. 'llvm-config --libs' always enumerates the libraries instead
> of providing the shared library on such systems.
> 
> $ cat /etc/redhat-release
> Fedora release 18 (Spherical Cow)
> $ llvm-config --libs
> -lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker -lLLVMArchive
> -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo -lLLVMVectorize
> -lLLVMBitWriter -lLLVMTableGen -lLLVMHexagonCodeGen -lLLVMHexagonDesc
> -lLLVMHexagonAsmPrinter -lLLVMHexagonInfo -lLLVMNVPTXCodeGen
> -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter
> -lLLVMMBlazeDisassembler -lLLVMMBlazeAsmParser -lLLVMMBlazeCodeGen
> -lLLVMMBlazeDesc -lLLVMMBlazeInfo -lLLVMMBlazeAsmPrinter
> -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMMSP430CodeGen
> -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter
> -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo
> -lLLVMCellSPUCodeGen -lLLVMCellSPUDesc -lLLVMCellSPUInfo
> -lLLVMMipsDisassembler -lLLVMMipsAsmParser -lLLVMMipsCodeGen
> -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter
> -lLLVMARMDisassembler -lLLVMARMAsmParser -lLLVMARMCodeGen
> -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMPowerPCCodeGen
> -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter
> -lLLVMSparcCodeGen -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMX86AsmParser
> -lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMX86Desc -lLLVMX86Info
> -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMR600CodeGen
> -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMR600Desc -lLLVMR600Info
> -lLLVMR600AsmPrinter -lLLVMMCDisassembler -lLLVMMCParser
> -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine
> -lLLVMTransformUtils -lLLVMipa -lLLVMAnalysis -lLLVMMCJIT
> -lLLVMRuntimeDyld -lLLVMExecutionEngine -lLLVMTarget -lLLVMMC
> -lLLVMObject -lLLVMCore -lLLVMSupport
> $ ls `llvm-config --libdir`
> BugpointPasses.so  libclang.so  libLLVM-3.2svn.so  libLTO.so
> libprofile_rt.so  LLVMgold.so

Then Fedora 18's llvm-config is busted, as `llvm-config --libs` should return 
libLLVM-3.2svn.so

So I believe this issue should be filled against Fedora, not worked around 
here.   Honestly, only shipping LLVM in a .so is already a bad idea, but 
breaking llvm-config is even worse -- what the point of scripts like 
llvm-config if their output can't be trusted?

BTW, configure.ac doesn't have this hack.  Does it fail the same way too?

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] scons: Use LLVM shared library if found.

2013-05-17 Thread Vinson Lee
On Fri, May 17, 2013 at 7:44 AM, Jose Fonseca  wrote:
> Vinson,
>
> Why is this necessary?
>
> (I'd prefer that LLVM is statically linked by default. )
>
> Jose

The SCons build fails on systems that only provide a LLVM shared
library. 'llvm-config --libs' always enumerates the libraries instead
of providing the shared library on such systems.

$ cat /etc/redhat-release
Fedora release 18 (Spherical Cow)
$ llvm-config --libs
-lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker -lLLVMArchive
-lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo -lLLVMVectorize
-lLLVMBitWriter -lLLVMTableGen -lLLVMHexagonCodeGen -lLLVMHexagonDesc
-lLLVMHexagonAsmPrinter -lLLVMHexagonInfo -lLLVMNVPTXCodeGen
-lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter
-lLLVMMBlazeDisassembler -lLLVMMBlazeAsmParser -lLLVMMBlazeCodeGen
-lLLVMMBlazeDesc -lLLVMMBlazeInfo -lLLVMMBlazeAsmPrinter
-lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMMSP430CodeGen
-lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter
-lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo
-lLLVMCellSPUCodeGen -lLLVMCellSPUDesc -lLLVMCellSPUInfo
-lLLVMMipsDisassembler -lLLVMMipsAsmParser -lLLVMMipsCodeGen
-lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter
-lLLVMARMDisassembler -lLLVMARMAsmParser -lLLVMARMCodeGen
-lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMPowerPCCodeGen
-lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter
-lLLVMSparcCodeGen -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMX86AsmParser
-lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMX86Desc -lLLVMX86Info
-lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMR600CodeGen
-lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMR600Desc -lLLVMR600Info
-lLLVMR600AsmPrinter -lLLVMMCDisassembler -lLLVMMCParser
-lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine
-lLLVMTransformUtils -lLLVMipa -lLLVMAnalysis -lLLVMMCJIT
-lLLVMRuntimeDyld -lLLVMExecutionEngine -lLLVMTarget -lLLVMMC
-lLLVMObject -lLLVMCore -lLLVMSupport
$ ls `llvm-config --libdir`
BugpointPasses.so  libclang.so  libLLVM-3.2svn.so  libLTO.so
libprofile_rt.so  LLVMgold.so




>
> - Original Message -
>> This patch fixes SCons builds on Fedora 18.
>>
>> Signed-off-by: Vinson Lee 
>> ---
>>  scons/llvm.py | 10 +-
>>  1 file changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/scons/llvm.py b/scons/llvm.py
>> index 7cd609c..432ece6 100644
>> --- a/scons/llvm.py
>> +++ b/scons/llvm.py
>> @@ -198,7 +198,15 @@ def generate(env):
>>  if llvm_version >= distutils.version.LooseVersion('3.2'):
>>  env.Append(CXXFLAGS = ('-fno-rtti',))
>>
>> -env.ParseConfig('llvm-config --libs ' + ' '.join(components))
>> +llvm_shared_library = os.path.join(
>> +env.backtick('llvm-config --libdir').strip(),
>> +'libLLVM-%s%s' % (llvm_version, env['SHLIBSUFFIX'])
>> +)
>> +if os.path.exists(llvm_shared_library):
>> +env.Append(LIBS = ['LLVM-%s' % llvm_version])
>> +else:
>> +env.ParseConfig('llvm-config --libs ' + '
>> '.join(components))
>> +
>>  env.ParseConfig('llvm-config --ldflags')
>>  except OSError:
>>  print 'scons: llvm-config version %s failed' % llvm_version
>> --
>> 1.8.2.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] A simple GLES2 shader application performance slower with h/w renderer

2013-05-17 Thread Eric Anholt
Divick Kishore  writes:

>>> By default we sync to vblank, which for you is 60.  The software
>>> rasterizer lacks this feature.
>
> I meant that even with h/w rasterizer I get fps = 60 with vblank=0 set.
>
>
>> The weird thing is that he said he ran it with vblank_mode=0.  Makes me
>> wonder if we have a bug in our handling of that with EGL still.
>
> I can attach the sample application along if it helps to reproduce the issue.

vblank_mode was broken for a long time in EGL, but current 9.0 and 9.1
have it fixed.  Not sure what version you're on.


pgpQiKeBNnTPX.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Add cases for ir_binop_vector_extract that assert.

2013-05-17 Thread Matt Turner
On Fri, May 17, 2013 at 10:43 AM, Kenneth Graunke  wrote:
> do_vec_index_to_swizzle() should remove any vector extract operations
> with a constant index.  It's unconditionally called from
> do_common_optimization().
>
> do_vec_index_to_cond_assign() should remove the rest, and it is
> unconditionally called from brw_link_shader().  This means that we
> should never see ir_binop_vector_extract in the backend.
>
> Silences compiler warnings.
>
> Cc: Ian Romanick 
> Cc: Paul Berry 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 1 +
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 4 
>  2 files changed, 5 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> index 0f3d4ab..ea714ec 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> @@ -402,6 +402,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment 
> *ir)
> case ir_unop_unpack_unorm_2x16:
> case ir_unop_unpack_unorm_4x8:
> case ir_unop_unpack_half_2x16:
> +   case ir_binop_vector_extract:
> case ir_quadop_bitfield_insert:
> case ir_quadop_vector:
>assert(!"should have been lowered");
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index f14529a..f7dd333 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -1671,6 +1671,10 @@ vec4_visitor::visit(ir_expression *ir)
>break;
> }
>
> +   case ir_binop_vector_extract:
> +  assert(!"should have been lowered by vec_index_to_cond_assign");
> +  break;
> +
> case ir_triop_lrp:
>op[0] = fix_3src_operand(op[0]);
>op[1] = fix_3src_operand(op[1]);
> --
> 1.8.2.3

Both are

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Add cases for ir_binop_vector_extract that assert.

2013-05-17 Thread Kenneth Graunke
do_vec_index_to_swizzle() should remove any vector extract operations
with a constant index.  It's unconditionally called from
do_common_optimization().

do_vec_index_to_cond_assign() should remove the rest, and it is
unconditionally called from brw_link_shader().  This means that we
should never see ir_binop_vector_extract in the backend.

Silences compiler warnings.

Cc: Ian Romanick 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 1 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 4 
 2 files changed, 5 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 0f3d4ab..ea714ec 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -402,6 +402,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment 
*ir)
case ir_unop_unpack_unorm_2x16:
case ir_unop_unpack_unorm_4x8:
case ir_unop_unpack_half_2x16:
+   case ir_binop_vector_extract:
case ir_quadop_bitfield_insert:
case ir_quadop_vector:
   assert(!"should have been lowered");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f14529a..f7dd333 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1671,6 +1671,10 @@ vec4_visitor::visit(ir_expression *ir)
   break;
}
 
+   case ir_binop_vector_extract:
+  assert(!"should have been lowered by vec_index_to_cond_assign");
+  break;
+
case ir_triop_lrp:
   op[0] = fix_3src_operand(op[0]);
   op[1] = fix_3src_operand(op[1]);
-- 
1.8.2.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: Add cases for ir_triop_vector_insert that assert.

2013-05-17 Thread Kenneth Graunke
brw_link_shader() unconditionally calls lower_vector_insert() with true
as the second parameter.  This means that both constant and variable
indexed expressions will get lowered, so we should never see this in the
backend.

Cc: Ian Romanick 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 1 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 4 
 2 files changed, 5 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index ea714ec..4afae24 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -403,6 +403,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment 
*ir)
case ir_unop_unpack_unorm_4x8:
case ir_unop_unpack_half_2x16:
case ir_binop_vector_extract:
+   case ir_triop_vector_insert:
case ir_quadop_bitfield_insert:
case ir_quadop_vector:
   assert(!"should have been lowered");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f7dd333..b2f8f00 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1702,6 +1702,10 @@ vec4_visitor::visit(ir_expression *ir)
   emit(BFE(result_dst, op[2], op[1], op[0]));
   break;
 
+   case ir_triop_vector_insert:
+  assert(!"should have been lowered by lower_vector_insert");
+  break;
+
case ir_quadop_bitfield_insert:
   assert(!"not reached: should be handled by "
   "bitfield_insert_to_bfm_bfi\n");
-- 
1.8.2.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: declare UniformBufferBindings as an array with a static size

2013-05-17 Thread Marek Olšák
Some Gallium drivers were crashing, because the array was not large enough.

v2: clamp the per-shader maximum in st/mesa, then sum them all up

NOTE: This is a candidate for the stable branches.
---
 src/mesa/main/bufferobj.c  |   10 ++
 src/mesa/main/config.h |3 +++
 src/mesa/main/mtypes.h |3 ++-
 src/mesa/state_tracker/st_extensions.c |7 ++-
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 1566cb4..ffb67b9 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -619,13 +619,10 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
_mesa_reference_buffer_object(ctx, &ctx->CopyWriteBuffer,
  ctx->Shared->NullBufferObj);
 
-   ctx->UniformBufferBindings = calloc(ctx->Const.MaxUniformBufferBindings,
- sizeof(*ctx->UniformBufferBindings));
-
_mesa_reference_buffer_object(ctx, &ctx->UniformBuffer,
 ctx->Shared->NullBufferObj);
 
-   for (i = 0; i < ctx->Const.MaxUniformBufferBindings; i++) {
+   for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
   _mesa_reference_buffer_object(ctx,
&ctx->UniformBufferBindings[i].BufferObject,
ctx->Shared->NullBufferObj);
@@ -647,14 +644,11 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
 
_mesa_reference_buffer_object(ctx, &ctx->UniformBuffer, NULL);
 
-   for (i = 0; i < ctx->Const.MaxUniformBufferBindings; i++) {
+   for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
   _mesa_reference_buffer_object(ctx,
&ctx->UniformBufferBindings[i].BufferObject,
NULL);
}
-
-   free(ctx->UniformBufferBindings);
-   ctx->UniformBufferBindings = NULL;
 }
 
 static bool
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index ea87b75..4a4fdc9 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -168,6 +168,9 @@
 /*@{*/
 #define MAX_PROGRAM_LOCAL_PARAMS   4096
 #define MAX_UNIFORMS   4096
+#define MAX_UNIFORM_BUFFERS15 /* + 1 default uniform buffer */
+/* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
+#define MAX_COMBINED_UNIFORM_BUFFERS   (MAX_UNIFORM_BUFFERS * 6)
 /*@}*/
 
 /**
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 817633c..96c06ae 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3527,7 +3527,8 @@ struct gl_context
 * associated with uniform blocks by glUniformBlockBinding()'s state in the
 * shader program.
 */
-   struct gl_uniform_buffer_binding *UniformBufferBindings;
+   struct gl_uniform_buffer_binding
+  UniformBufferBindings[MAX_COMBINED_UNIFORM_BUFFERS];
 
/*@}*/
 
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 982e652..46acc87 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -197,6 +197,7 @@ void st_init_limits(struct st_context *st)
  screen->get_shader_param(screen, sh, 
PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
   if (pc->MaxUniformBlocks)
  pc->MaxUniformBlocks -= 1; /* The first one is for ordinary uniforms. 
*/
+  pc->MaxUniformBlocks = _min(pc->MaxUniformBlocks, MAX_UNIFORM_BUFFERS);
 
   pc->MaxCombinedUniformComponents = (pc->MaxUniformComponents +
   c->MaxUniformBlockSize / 4 *
@@ -279,15 +280,11 @@ void st_init_limits(struct st_context *st)
   st->ctx->Extensions.ARB_uniform_buffer_object = GL_TRUE;
   c->UniformBufferOffsetAlignment =
  screen->get_param(screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT);
-  /* FIXME: _mesa_init_buffer_objects() already has been, and
-   * ctx->UniformBufferBindings allocated, so unfortunately we can't just
-   * change MaxUniformBufferBindings a posteriori. */
-#if 0
   c->MaxCombinedUniformBlocks = c->MaxUniformBufferBindings =
  c->VertexProgram.MaxUniformBlocks +
  c->GeometryProgram.MaxUniformBlocks +
  c->FragmentProgram.MaxUniformBlocks;
-#endif
+  assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS);
}
 }
 
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/10] i965: Implement transform feedback query support in hardware on Gen6+.

2013-05-17 Thread Kenneth Graunke
Now that we have hardware contexts and can use MI_STORE_REGISTER_MEM,
we can use the GPU's pipeline statistics counters rather than going out
of our way to count primitives in software.

Aside from being simpler, this also paves the way for Geometry Shaders,
which can output an arbitrary number of primitives on the GPU.

The GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN query is easy: it
corresponds to the SO_NUM_PRIMS_WRITTEN/SO_NUM_PRIMS_WRITTEN0_IVB
counters.

The GL_PRIMITIVES_GENERATED query is trickier.  Gen provides several
statistics registers which /almost/ match the semantics required:
- IA_PRIMITIVES_COUNT
  The number of primitives fetched by the VF or IA (input assembler).
  This undercounts when GS is enabled, as it can output many primitives.
- GS_PRIMITIVES_COUNT
  The number of primitives output by the GS.  Unfortunately, this
  doesn't increment unless the GS unit is actually enabled, and it
  usually isn't.
- SO_PRIM_STORAGE_NEEDED*_IVB
  The amount of space needed to write primitives output by transform
  feedback.  These naturally only work when transform feedback is on.
  We'd also have to add the counters for all four streams.
- CL_INVOCATION_COUNT
  The number of primitives processed by the clipper.  This doesn't work
  if the GS or SOL throw away primitives for rasterizer discard.
  However, it does increment even if the clipper is in REJECT_ALL mode.

Dynamically switching between counters would be painfully complicated,
especially since GS, rasterizer discard, and transform feedback can all
be switched on and off repeatedly during a single query.

The most usable counter is CL_INVOCATION_COUNT.  The previous two
patches reworked rasterizer discard support so that all primitives hit
the clipper, making this work.

Cc: Eric Anholt 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/gen6_queryobj.c | 105 +++---
 1 file changed, 66 insertions(+), 39 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index 28af8d7..a032227 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -94,6 +94,57 @@ write_depth_count(struct intel_context *intel, drm_intel_bo 
*query_bo, int idx)
ADVANCE_BATCH();
 }
 
+/*
+ * Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM.
+ *
+ * Only TIMESTAMP and PS_DEPTH_COUNT have special PIPE_CONTROL support; other
+ * counters have to be read via the generic MI_STORE_REGISTER_MEM.  This
+ * function also performs a pipeline flush for proper synchronization.
+ */
+static void
+write_reg(struct intel_context *intel,
+  drm_intel_bo *query_bo, uint32_t reg, int idx)
+{
+   assert(intel->gen >= 6);
+
+   intel_batchbuffer_emit_mi_flush(intel);
+
+   /* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to
+* read a full 64-bit register, we need to do two of them.
+*/
+   BEGIN_BATCH(3);
+   OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
+   OUT_BATCH(reg);
+   OUT_RELOC(query_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ idx * sizeof(uint64_t));
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
+   OUT_BATCH(reg + sizeof(uint32_t));
+   OUT_RELOC(query_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ sizeof(uint32_t) + idx * sizeof(uint64_t));
+   ADVANCE_BATCH();
+}
+
+static void
+write_primitives_generated(struct intel_context *intel,
+   drm_intel_bo *query_bo, int idx)
+{
+   write_reg(intel, query_bo, CL_INVOCATION_COUNT, idx);
+}
+
+static void
+write_xfb_primitives_written(struct intel_context *intel,
+ drm_intel_bo *query_bo, int idx)
+{
+   if (intel->gen >= 7) {
+  write_reg(intel, query_bo, SO_NUM_PRIMS_WRITTEN0_IVB, idx);
+   } else {
+  write_reg(intel, query_bo, SO_NUM_PRIMS_WRITTEN, idx);
+   }
+}
+
 /**
  * Wait on the query object's BO and calculate the final result.
  */
@@ -152,21 +203,20 @@ gen6_queryobj_get_results(struct gl_context *ctx,
   query->Base.Result &= (1ull << 36) - 1;
   break;
 
-   case GL_SAMPLES_PASSED_ARB:
-  query->Base.Result += results[1] - results[0];
-  break;
-
case GL_ANY_SAMPLES_PASSED:
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-  query->Base.Result = results[0] != results[1];
+  if (results[0] != results[1])
+ query->Base.Result = true;
   break;
 
+   case GL_SAMPLES_PASSED_ARB:
case GL_PRIMITIVES_GENERATED:
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
-  /* We don't actually query the hardware for this value, so query->bo
-   * should always be NULL and execution should never reach here.
+  /* We need to use += rather than = here since some BLT-based operations
+   * may have added additional samples to our occlusion query value.
+   * It shouldn't matter for geometry queries, but is harmless.
*/
- 

[Mesa-dev] [PATCH 09/10] i965: Handle rasterizer discard in the clipper rather than GS on Gen6.

2013-05-17 Thread Kenneth Graunke
This has more of a negative impact than the previous patch, as on Gen6
passing primitives through to the clipper means we actually have to make
the GS thread write them to the URB.

I don't see another good solution though, and rasterizer discard is not
the most common of cases, so hopefully it won't be too terrible.

Cc: Eric Anholt 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_gs.c  |  9 +
 src/mesa/drivers/dri/i965/brw_gs_emit.c | 30 -
 src/mesa/drivers/dri/i965/gen6_clip_state.c |  6 +-
 3 files changed, 6 insertions(+), 39 deletions(-)

This patch prevents breakage in patch 10.  See patch 10 for the rationale.

diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
b/src/mesa/drivers/dri/i965/brw_gs.c
index a432b76..f354dd9 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -214,12 +214,6 @@ static void populate_key( struct brw_context *brw,
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
  }
   }
-  /* On Gen6, GS is also used for rasterizer discard. */
-  /* BRW_NEW_RASTERIZER_DISCARD */
-  if (ctx->RasterDiscard) {
- key->need_gs_prog = true;
- key->rasterizer_discard = true;
-  }
} else {
   /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
* into simpler primitives.
@@ -259,8 +253,7 @@ const struct brw_tracked_state brw_gs_prog = {
.dirty = {
   .mesa  = (_NEW_LIGHT),
   .brw   = (BRW_NEW_PRIMITIVE |
-BRW_NEW_TRANSFORM_FEEDBACK |
-BRW_NEW_RASTERIZER_DISCARD),
+BRW_NEW_TRANSFORM_FEEDBACK),
   .cache = CACHE_NEW_VS_PROG
},
.emit = brw_upload_gs_prog
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c 
b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index 87ff9f0..cbfc6aa 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -201,28 +201,6 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
 }
 
 /**
- * De-allocate the URB entry that was previously allocated to this thread
- * (without writing any vertex data to it), and terminate the thread.  This is
- * used to implement RASTERIZER_DISCARD functionality.
- */
-static void brw_gs_terminate(struct brw_gs_compile *c)
-{
-   struct brw_compile *p = &c->func;
-   brw_urb_WRITE(p,
- retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), /* dest */
- 0, /* msg_reg_nr */
- c->reg.header, /* src0 */
- false, /* allocate */
- false, /* used */
- 1, /* msg_length */
- 0, /* response_length */
- true, /* eot */
- true, /* writes_complete */
- 0, /* offset */
- BRW_URB_SWIZZLE_NONE);
-}
-
-/**
  * Send an FF_SYNC message to ensure that all previously spawned GS threads
  * have finished sending primitives down the pipeline, and to allocate a URB
  * entry for the first output vertex.  Only needed when intel->needs_ff_sync
@@ -484,14 +462,6 @@ gen6_sol_program(struct brw_gs_compile *c, struct 
brw_gs_prog_key *key,
 
brw_gs_ff_sync(c, 1);
 
-   /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so
-* release the URB that was just allocated, and terminate the thread.
-*/
-   if (key->rasterizer_discard) {
-  brw_gs_terminate(c);
-  return;
-   }
-
brw_gs_overwrite_header_dw2_from_r0(c);
switch (num_verts) {
case 1:
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c 
b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 1811a3f..b5e22dc 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -77,6 +77,10 @@ upload_clip_state(struct brw_context *brw)
   dw2 |= GEN6_CLIP_GB_TEST;
}
 
+   /* BRW_NEW_RASTERIZER_DISCARD */
+   if (ctx->RasterDiscard)
+  dw2 |= GEN6_CLIP_MODE_REJECT_ALL;
+
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
OUT_BATCH(dw1);
@@ -94,7 +98,7 @@ upload_clip_state(struct brw_context *brw)
 const struct brw_tracked_state gen6_clip_state = {
.dirty = {
   .mesa  = _NEW_TRANSFORM | _NEW_LIGHT | _NEW_BUFFERS,
-  .brw   = (BRW_NEW_CONTEXT),
+  .brw   = BRW_NEW_CONTEXT | BRW_NEW_RASTERIZER_DISCARD,
   .cache = CACHE_NEW_WM_PROG
},
.emit = upload_clip_state,
-- 
1.8.2.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/10] i965: Handle rasterizer discard in the clipper rather than SOL on Gen7.

2013-05-17 Thread Kenneth Graunke
In order to implement the GL_PRIMITIVES_GENERATED query in a sane
fashion on our hardware, we can't discard primitives until the clipper.
The patch after next explains the rationale.

By setting the clipper to REJECT_ALL mode, all primitives get thrown away,
so rendering is still appropriately disabled.

This may negatively impact performance in the rasterizer discard case,
but it's unclear how much and this hasn't been observed to be a
bottleneck in any application we've looked at.  The clipper is the very
next stage in the pipeline, so I don't think it will be terrible.

Cc: Eric Anholt 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/gen7_clip_state.c | 6 +-
 src/mesa/drivers/dri/i965/gen7_sol_state.c  | 7 +--
 2 files changed, 6 insertions(+), 7 deletions(-)

This patch prevents breakage in patch 10.  See patch 10 for the rationale.

diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c 
b/src/mesa/drivers/dri/i965/gen7_clip_state.c
index 2aa8c7f..68c08a3 100644
--- a/src/mesa/drivers/dri/i965/gen7_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c
@@ -102,6 +102,10 @@ upload_clip_state(struct brw_context *brw)
   dw2 |= GEN6_CLIP_GB_TEST;
}
 
+   /* BRW_NEW_RASTERIZER_DISCARD */
+   if (ctx->RasterDiscard)
+  dw2 |= GEN6_CLIP_MODE_REJECT_ALL;
+
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
OUT_BATCH(dw1);
@@ -122,7 +126,7 @@ const struct brw_tracked_state gen7_clip_state = {
 _NEW_POLYGON |
 _NEW_LIGHT |
 _NEW_TRANSFORM),
-  .brw   = BRW_NEW_CONTEXT,
+  .brw   = BRW_NEW_CONTEXT | BRW_NEW_RASTERIZER_DISCARD,
   .cache = CACHE_NEW_WM_PROG
},
.emit = upload_clip_state,
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c 
b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index f570336..1e484dc 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -195,10 +195,6 @@ upload_3dstate_streamout(struct brw_context *brw, bool 
active,
uint32_t dw1 = 0, dw2 = 0;
int i;
 
-   /* BRW_NEW_RASTERIZER_DISCARD */
-   if (ctx->RasterDiscard)
-  dw1 |= SO_RENDERING_DISABLE;
-
if (active) {
   int urb_entry_read_offset = 0;
   int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
@@ -267,8 +263,7 @@ const struct brw_tracked_state gen7_sol_state = {
   .brw   = (BRW_NEW_BATCH |
BRW_NEW_VERTEX_PROGRAM |
 BRW_NEW_VUE_MAP_GEOM_OUT |
-BRW_NEW_TRANSFORM_FEEDBACK |
-BRW_NEW_RASTERIZER_DISCARD)
+BRW_NEW_TRANSFORM_FEEDBACK)
},
.emit = upload_sol_state,
 };
-- 
1.8.2.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/10] i965: Disable clipper statistics when meta operations are in progress.

2013-05-17 Thread Kenneth Graunke
We don't currently use the clipper statistics, but we'll soon use
CL_INVOCATIONS_COUNT to implement the GL_PRIMITIVES_GENERATED query.
The number of primitives generated is not supposed to be altered during
operations such as glGenerateMipmap.

Prevents spec/EXT_transform_feedback/generatemipmap prims_generated
from breaking when we start using pipeline statistics registers to
implement the GL_PRIMITIVES_GENERATED query in a few commits.

Cc: Eric Anholt 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/gen6_clip_state.c | 4 +++-
 src/mesa/drivers/dri/i965/gen7_clip_state.c | 6 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c 
b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 51ffbae..1811a3f 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -30,6 +30,7 @@
 #include "brw_defines.h"
 #include "brw_util.h"
 #include "intel_batchbuffer.h"
+#include "drivers/common/meta.h"
 #include "main/fbobject.h"
 
 static void
@@ -37,6 +38,7 @@ upload_clip_state(struct brw_context *brw)
 {
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
+   uint32_t dw1 = _mesa_meta_in_progress(ctx) ? 0 : 
GEN6_CLIP_STATISTICS_ENABLE;
uint32_t dw2 = 0;
 
/* _NEW_BUFFERS */
@@ -77,7 +79,7 @@ upload_clip_state(struct brw_context *brw)
 
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
-   OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE);
+   OUT_BATCH(dw1);
OUT_BATCH(GEN6_CLIP_ENABLE |
 GEN6_CLIP_API_OGL |
 GEN6_CLIP_MODE_NORMAL |
diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c 
b/src/mesa/drivers/dri/i965/gen7_clip_state.c
index 29a5ed5..2aa8c7f 100644
--- a/src/mesa/drivers/dri/i965/gen7_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c
@@ -26,6 +26,7 @@
 #include "brw_defines.h"
 #include "brw_util.h"
 #include "intel_batchbuffer.h"
+#include "drivers/common/meta.h"
 #include "main/fbobject.h"
 
 static void
@@ -33,12 +34,15 @@ upload_clip_state(struct brw_context *brw)
 {
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
-   uint32_t dw1 = GEN6_CLIP_STATISTICS_ENABLE, dw2 = 0;
+   uint32_t dw1 = 0, dw2 = 0;
 
/* _NEW_BUFFERS */
struct gl_framebuffer *fb = ctx->DrawBuffer;
bool render_to_fbo = _mesa_is_user_fbo(fb);
 
+   if (!_mesa_meta_in_progress(ctx))
+  dw1 |= GEN6_CLIP_STATISTICS_ENABLE;
+
/* CACHE_NEW_WM_PROG */
if (brw->wm.prog_data->barycentric_interp_modes &
BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS) {
-- 
1.8.2.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/10] i965: Add #defines for the pipeline statistics counter registers.

2013-05-17 Thread Kenneth Graunke
These come from the Ivybridge PRM, Volume 1, Part 3.

Cc: Eric Anholt 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/intel/intel_reg.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/intel/intel_reg.h 
b/src/mesa/drivers/dri/intel/intel_reg.h
index acbbcfb..1cfb464 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -266,6 +266,19 @@
 #define FENCE_XMAJOR 1
 #define FENCE_YMAJOR 2
 
+/* Pipeline Statistics Counter Registers */
+#define IA_VERTICES_COUNT   0x2310
+#define IA_PRIMITIVES_COUNT 0x2318
+#define VS_INVOCATION_COUNT 0x2320
+#define HS_INVOCATION_COUNT 0x2300
+#define DS_INVOCATION_COUNT 0x2308
+#define GS_INVOCATION_COUNT 0x2328
+#define GS_PRIMITIVES_COUNT 0x2330
+#define CL_INVOCATION_COUNT 0x2338
+#define CL_PRIMITIVES_COUNT 0x2340
+#define PS_INVOCATION_COUNT 0x2348
+#define PS_DEPTH_COUNT  0x2350
+
 #define SO_NUM_PRIM_STORAGE_NEEDED 0x2280
 #define SO_PRIM_STORAGE_NEEDED0_IVB0x5240
 #define SO_PRIM_STORAGE_NEEDED1_IVB0x5248
-- 
1.8.2.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/10] i965: Rely on hardware contexts for query objects on Gen6+.

2013-05-17 Thread Kenneth Graunke
Hardware contexts greatly simplify the query object code.  The pipeline
statistics counters get saved and restored with the context, which means
that we don't need to worry about other workloads polluting them.

This means that we can simply write a single pair of values (one at
BeginQuery and one at EndQuery) rather than a series of pairs.  This
also means we don't need to worry about the BO getting full.  We also
don't need to delay BO allocation and starting snapshot until the first
draw.

The generation split here is a little off: technically, Ironlake can also
support hardware contexts.  However, the kernel currently doesn't, and
even if it were to do so someday, we'd need to wait a while before
bumping the kernel requirement to take advantage of it.

Cc: Eric Anholt 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/Makefile.sources |   1 +
 src/mesa/drivers/dri/i965/brw_context.c|   2 +
 src/mesa/drivers/dri/i965/brw_context.h|   3 +
 src/mesa/drivers/dri/i965/brw_queryobj.c   |  83 ++-
 src/mesa/drivers/dri/i965/gen6_queryobj.c  | 354 +
 5 files changed, 383 insertions(+), 60 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/gen6_queryobj.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index a0ffd3a..d67a5a4 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -104,6 +104,7 @@ i965_FILES = \
gen6_depthstencil.c \
gen6_gs_state.c \
 gen6_multisample_state.c \
+   gen6_queryobj.c \
gen6_sampler_state.c \
gen6_scissor_state.c \
gen6_sf_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 2f5fedb..beade5c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -88,6 +88,8 @@ static void brwInitDriverFunctions(struct intel_screen 
*screen,
 
brwInitFragProgFuncs( functions );
brw_init_queryobj_functions(functions);
+   if (screen->gen >= 6)
+  gen6_reinit_queryobj_functions(functions);
 
functions->QuerySamplesForFormat = brw_query_samples_for_format;
functions->BeginTransformFeedback = brw_begin_transform_feedback;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 9baf57b..9ef6aca 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1164,6 +1164,9 @@ void brw_init_queryobj_functions(struct dd_function_table 
*functions);
 void brw_emit_query_begin(struct brw_context *brw);
 void brw_emit_query_end(struct brw_context *brw);
 
+/** gen6_queryobj.c */
+void gen6_reinit_queryobj_functions(struct dd_function_table *functions);
+
 /*==
  * brw_state_dump.c
  */
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c 
b/src/mesa/drivers/dri/i965/brw_queryobj.c
index 40f926b..1c1e0b4 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -94,40 +94,21 @@ write_timestamp(struct intel_context *intel, drm_intel_bo 
*query_bo, int idx)
 static void
 write_depth_count(struct intel_context *intel, drm_intel_bo *query_bo, int idx)
 {
-   if (intel->gen >= 6) {
-  /* Emit Sandybridge workaround flush: */
-  if (intel->gen == 6)
- intel_emit_post_sync_nonzero_flush(intel);
-
-  BEGIN_BATCH(5);
-  OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
-  OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
-PIPE_CONTROL_WRITE_DEPTH_COUNT);
-  OUT_RELOC(query_bo,
-I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-PIPE_CONTROL_GLOBAL_GTT_WRITE |
-(idx * sizeof(uint64_t)));
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  ADVANCE_BATCH();
-   } else {
-  BEGIN_BATCH(4);
-  OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
-PIPE_CONTROL_DEPTH_STALL |
-PIPE_CONTROL_WRITE_DEPTH_COUNT);
-  /* This object could be mapped cacheable, but we don't have an exposed
-   * mechanism to support that.  Since it's going uncached, tell GEM that
-   * we're writing to it.  The usual clflush should be all that's required
-   * to pick up the results.
-   */
-  OUT_RELOC(query_bo,
-I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-PIPE_CONTROL_GLOBAL_GTT_WRITE |
-(idx * sizeof(uint64_t)));
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  ADVANCE_BATCH();
-   }
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
+ PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT);
+   /* This object could be mapped cacheable, but we don't have an exposed
+* mechanism to support that.  Since it's going uncached, tell GEM that
+* we're writing to it.  The

[Mesa-dev] [PATCH 04/10] i965: Disable pixel statistics in BLORP.

2013-05-17 Thread Kenneth Graunke
BLORP is used for operations like glClear, glCopyTexImage, and
glBlitFramebuffer which aren't supposed to contribute fragments toward
occlusion queries.

This prevents Piglit tests from breaking in the next commit.

Cc: Eric Anholt 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 1 -
 src/mesa/drivers/dri/i965/gen7_blorp.cpp | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 0ed5bee..c7bb815 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -750,7 +750,6 @@ gen6_blorp_emit_wm_config(struct brw_context *brw,
   assert(0);
   break;
}
-   dw4 |= GEN6_WM_STATISTICS_ENABLE;
dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index f55805c..f83c7f2 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -516,7 +516,6 @@ gen7_blorp_emit_wm_config(struct brw_context *brw,
   assert(0);
   break;
}
-   dw1 |= GEN7_WM_STATISTICS_ENABLE;
dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
dw1 |= 0 << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
-- 
1.8.2.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] i965: Require hardware contexts (and thus Kernel 3.6) on Gen6+.

2013-05-17 Thread Kenneth Graunke
Hardware contexts are necessary to reasonably support OpenGL 3.2.
In particular, we currently maintain software counters for transform
feedback buffer offsets and counters, which relies on knowing the number
of primitives generated.  Geometry shaders violate that assumption.

At the time of writing, Debian has moved to Kernel 3.8, which means most
people probably have a newer kernel by now.  It's also worth noting that
this patch won't land until Mesa 10 which is currently targeted for
September.  By that point, even more people will have a newer kernel.

This patch leaves the code for flagging BRW_NEW_CONTEXT on new
batchbuffers if hw_ctx == NULL since that still occurs pre-Gen6.

Cc: Eric Anholt 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_context.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 69b7e4d..2f5fedb 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -143,6 +143,23 @@ brwCreateContext(int api,
   return false;
}
 
+   if (intel->gen >= 6) {
+  /* Create a new hardware context.  Using a hardware context means that
+   * our GPU state will be saved/restored on context switch, allowing us
+   * to assume that the GPU is in the same state we left it in.
+   *
+   * This is required for transform feedback buffer offsets, query objects,
+   * and also allows us to reduce how much state we have to emit.
+   */
+  intel->hw_ctx = drm_intel_gem_context_create(intel->bufmgr);
+
+  if (!intel->hw_ctx) {
+ fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
+ ralloc_free(brw);
+ return false;
+  }
+   }
+
brw_init_surface_formats(brw);
 
/* Initialize swrast, tnl driver tables: */
@@ -374,7 +391,6 @@ brwCreateContext(int api,
 
brw->prim_restart.in_progress = false;
brw->prim_restart.enable_cut_index = false;
-   intel->hw_ctx = drm_intel_gem_context_create(intel->bufmgr);
 
brw_init_state( brw );
 
-- 
1.8.2.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/10] i965: Delete Gen7+ check for Kernel 3.3 now that we require 3.6+.

2013-05-17 Thread Kenneth Graunke
It's just not necessary.

Cc: Eric Anholt 
Cc: Paul Berry 
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/intel/intel_screen.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_screen.c 
b/src/mesa/drivers/dri/intel/intel_screen.c
index f3dc908..e058c7e 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -1300,13 +1300,6 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
   intelScreen->gen = 2;
}
 
-   if (intelScreen->gen == 7 &&
-   !intel_get_boolean(intelScreen->driScrnPriv,
-  I915_PARAM_HAS_GEN7_SOL_RESET)) {
-  fprintf(stderr, "i965 requires Kernel 3.3 or later.\n");
-  return false;
-   }
-
intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6;
intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7;
 
-- 
1.8.2.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/10] i965: Bump kernel requirement to 3.3 on Ivybridge.

2013-05-17 Thread Kenneth Graunke
Kernel 3.3 introduced the SOL reset execbuf parameter, needed for GL 3.0
on Ivybridge.  Bumping the requirement will give an obvious error
message rather than simply reporting GL 2.1.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/intel/intel_extensions.c |  5 +
 src/mesa/drivers/dri/intel/intel_screen.c | 26 +++---
 src/mesa/drivers/dri/intel/intel_screen.h |  2 --
 3 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c 
b/src/mesa/drivers/dri/intel/intel_extensions.c
index 8d8e325..5cb2fa3 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -91,12 +91,9 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Const.GLSLVersion = 120;
_mesa_override_glsl_version(ctx);
 
-   if (intel->gen == 6 ||
-   (intel->gen == 7 && intel->intelScreen->kernel_has_gen7_sol_reset))
-  ctx->Extensions.EXT_transform_feedback = true;
-
if (intel->gen >= 6) {
   ctx->Extensions.EXT_framebuffer_multisample = true;
+  ctx->Extensions.EXT_transform_feedback = true;
   ctx->Extensions.ARB_blend_func_extended = 
!driQueryOptionb(&intel->optionCache, "disable_blend_func_extended");
   ctx->Extensions.ARB_draw_buffers_blend = true;
   ctx->Extensions.ARB_ES3_compatibility = true;
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c 
b/src/mesa/drivers/dri/intel/intel_screen.c
index ad1b351..f3dc908 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -1195,17 +1195,10 @@ set_max_gl_versions(struct intel_screen *screen)
 
switch (screen->gen) {
case 7:
-  if (screen->kernel_has_gen7_sol_reset) {
- screen->max_gl_core_version = 31;
- screen->max_gl_compat_version = 30;
- screen->max_gl_es1_version = 11;
- screen->max_gl_es2_version = 30;
-  } else {
- screen->max_gl_core_version = 0;
- screen->max_gl_compat_version = 21;
- screen->max_gl_es1_version = 11;
- screen->max_gl_es2_version = 20;
-  }
+  screen->max_gl_core_version = 31;
+  screen->max_gl_compat_version = 30;
+  screen->max_gl_es1_version = 11;
+  screen->max_gl_es2_version = 30;
   break;
case 6:
   screen->max_gl_core_version = 31;
@@ -1293,10 +1286,6 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
 
intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
 
-   intelScreen->kernel_has_gen7_sol_reset =
-  intel_get_boolean(intelScreen->driScrnPriv,
-   I915_PARAM_HAS_GEN7_SOL_RESET);
-
if (IS_GEN7(intelScreen->deviceID)) {
   intelScreen->gen = 7;
} else if (IS_GEN6(intelScreen->deviceID)) {
@@ -1311,6 +1300,13 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
   intelScreen->gen = 2;
}
 
+   if (intelScreen->gen == 7 &&
+   !intel_get_boolean(intelScreen->driScrnPriv,
+  I915_PARAM_HAS_GEN7_SOL_RESET)) {
+  fprintf(stderr, "i965 requires Kernel 3.3 or later.\n");
+  return false;
+   }
+
intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6;
intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7;
 
diff --git a/src/mesa/drivers/dri/intel/intel_screen.h 
b/src/mesa/drivers/dri/intel/intel_screen.h
index 7da9895..4833937 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -60,8 +60,6 @@ struct intel_screen
bool hw_has_separate_stencil;
bool hw_must_use_separate_stencil;
 
-   bool kernel_has_gen7_sol_reset;
-
bool hw_has_llc;
bool hw_has_swizzling;
 
-- 
1.8.2.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Gen6+ hardware contexts & query object improvements

2013-05-17 Thread Kenneth Graunke
Hello!

This patch series bumps the kernel requirement to 3.6 for Gen6+,
meaning that we actually get to rely on hardware context support.
That's a little painful, but even Debian ships 3.8 now, and this
isn't going to make it into an actual release for several more
months.

It then splits our query code into Gen4-5 and Gen6+ versions.  The new
Gen6+ version is a lot simpler since hardware contexts guarantee that
our statistics registers don't get polluted with data from other
programs running on the system.  It should be more efficient, which
may help games like Minecraft (though I haven't measured).

Finally, it implements the GL_PRIMITIVES_GENERATED and
GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries via hardware counters
rather than manually counting on the CPU.  This paves the way for
geometry shader support (which can output multiple primitives, breaking
our CPU-side tracking), and should allow us to enable hardware primitive
restart in a few more cases once a few more things are tidied.

The next step is to eliminate the use of SOL reset and save/restore the
transform feedback offsets directly.  Then we can turn on hardware
primitive restart more aggressively and implement a few more transform
feedback extensions.

Thanks in advance for the review.

--Ken

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] gallivm: Eliminate 8.8 fixed point intermediates from AoS sampling path.

2013-05-17 Thread jfonseca
From: José Fonseca 

This change was meant as a stepping stone to use PMADDUBSW SSSE3
instruction, but actually this refactoring by itself yields a 10%
speedup on texture intensive shaders (e.g, Google Earth's ocean water
w/o S3TC on a Ivy Bridge machine), while giving yielding exactly the
same results, whereas PMADDUBSW only gave an extra 5%, at the expense of
2bits of precision in the interpolation.

I belive that the speedup of this change comes from the reduced register
pressure (as 8.8 fixed point intermediates take twice the space of 8bit
unorm).

Also, not dealing with 8.8 simplifies lp_bld_sample_aos.c code
substantially -- it's no longer necessary to have code duplicated for
low and high register halfs.

Note about lp_build_sample_mipmap(): the path for num_quads > 1 is never
executed (as it is faster on AVX to split the 256bit wide texture
computation into two 128bit chunks, in order to leverage integer
opcodes).  This path might be useful in the future, so in order to
verify mu change did not break that path, this I had to apply this
change:

  @@ -1662,11 +1662,11 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
 /*
  * we only try 8-wide sampling with soa as it appears to
  * be a loss with aos with AVX (but it should work).
  * (It should be faster if we'd support avx2)
  */
  -  if (num_quads == 1 || !use_aos) {
  +  if (/* num_quads == 1 || ! */ use_aos) {

if (num_quads > 1) {
   if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
  LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
  /*

and then run texfilt mesademo:

  LP_NATIVE_VECTOR_WIDTH=256 ./texfilt

Ran whole piglit without regressions.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c   |   60 ++--
 src/gallium/auxiliary/gallivm/lp_bld_arit.h   |   28 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c |  321 -
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   15 +-
 src/gallium/drivers/llvmpipe/lp_bld_blend.c   |4 +-
 5 files changed, 186 insertions(+), 242 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 8f8410c..3291ec4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -974,7 +974,7 @@ lp_build_lerp_simple(struct lp_build_context *bld,
  LLVMValueRef x,
  LLVMValueRef v0,
  LLVMValueRef v1,
- bool normalized)
+ unsigned flags)
 {
unsigned half_width = bld->type.width/2;
LLVMBuilderRef builder = bld->gallivm->builder;
@@ -987,14 +987,17 @@ lp_build_lerp_simple(struct lp_build_context *bld,
 
delta = lp_build_sub(bld, v1, v0);
 
-   if (normalized) {
+   if (flags & LP_BLD_LERP_WIDE_NORMALIZED) {
   if (!bld->type.sign) {
- /*
-  * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
-  * most-significant-bit to the lowest-significant-bit, so that
-  * later we can just divide by 2**n instead of 2**n - 1.
-  */
- x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));
+ if (!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)) {
+/*
+ * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
+ * most-significant-bit to the lowest-significant-bit, so that
+ * later we can just divide by 2**n instead of 2**n - 1.
+ */
+
+x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));
+ }
 
  /* (x * delta) >> n */
  res = lp_build_mul(bld, x, delta);
@@ -1005,15 +1008,18 @@ lp_build_lerp_simple(struct lp_build_context *bld,
   * use the 2**n - 1 divison approximation in lp_build_mul_norm
   * instead.
   */
+ assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS));
  res = lp_build_mul_norm(bld->gallivm, bld->type, x, delta);
   }
} else {
+  assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS));
   res = lp_build_mul(bld, x, delta);
}
 
res = lp_build_add(bld, v0, res);
 
-   if ((normalized && !bld->type.sign) || bld->type.fixed) {
+   if (((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) ||
+   bld->type.fixed) {
   /* We need to mask out the high order bits when lerping 8bit normalized 
colors stored on 16bits */
   /* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
* but it will be wrong for true fixed point use cases. Basically we need
@@ -1033,7 +1039,8 @@ LLVMValueRef
 lp_build_lerp(struct lp_build_context *bld,
   LLVMValueRef x,
   LLVMValueRef v0,
-  LLVMValueRef v1)
+  LLVMValueRef v1,
+  unsigned flags)
 {
const struct lp_type type = bld->type;
LLVMValueRef res;
@@ -1042,6 +1049,8 @@ lp_build_lerp(struct 

[Mesa-dev] [PATCH 1/2] gallivm: Add and use lp_build_lerp_3d.

2013-05-17 Thread jfonseca
From: José Fonseca 

---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c   |   20 
 src/gallium/auxiliary/gallivm/lp_bld_arit.h   |   15 ++
 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c |   51 ++---
 3 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 524a8e7..8f8410c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1095,6 +1095,26 @@ lp_build_lerp_2d(struct lp_build_context *bld,
 }
 
 
+LLVMValueRef
+lp_build_lerp_3d(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef z,
+ LLVMValueRef v000,
+ LLVMValueRef v001,
+ LLVMValueRef v010,
+ LLVMValueRef v011,
+ LLVMValueRef v100,
+ LLVMValueRef v101,
+ LLVMValueRef v110,
+ LLVMValueRef v111)
+{
+   LLVMValueRef v0 = lp_build_lerp_2d(bld, x, y, v000, v001, v010, v011);
+   LLVMValueRef v1 = lp_build_lerp_2d(bld, x, y, v100, v101, v110, v111);
+   return lp_build_lerp(bld, z, v0, v1);
+}
+
+
 /**
  * Generate min(a, b)
  * Do checks for special cases.
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 60b9907..45886d5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -106,6 +106,21 @@ lp_build_lerp_2d(struct lp_build_context *bld,
  LLVMValueRef v11);
 
 LLVMValueRef
+lp_build_lerp_3d(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef z,
+ LLVMValueRef v000,
+ LLVMValueRef v001,
+ LLVMValueRef v010,
+ LLVMValueRef v011,
+ LLVMValueRef v100,
+ LLVMValueRef v101,
+ LLVMValueRef v110,
+ LLVMValueRef v111);
+
+
+LLVMValueRef
 lp_build_min(struct lp_build_context *bld,
  LLVMValueRef a,
  LLVMValueRef b);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 16d5718..9eaca02 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -982,8 +982,7 @@ lp_build_sample_fetch_image_linear(struct 
lp_build_sample_context *bld,
s_fpart_hi,
neighbors_hi[0][0][0],
neighbors_hi[0][0][1]);
-  }
-  else {
+  } else if (dims == 2) {
  /* 2-D lerp */
  packed_lo = lp_build_lerp_2d(&h16,
   s_fpart_lo, t_fpart_lo,
@@ -998,30 +997,30 @@ lp_build_sample_fetch_image_linear(struct 
lp_build_sample_context *bld,
   neighbors_hi[0][0][1],
   neighbors_hi[0][1][0],
   neighbors_hi[0][1][1]);
-
- if (dims >= 3) {
-LLVMValueRef packed_lo2, packed_hi2;
-
-/* lerp in the second z slice */
-packed_lo2 = lp_build_lerp_2d(&h16,
-  s_fpart_lo, t_fpart_lo,
-  neighbors_lo[1][0][0],
-  neighbors_lo[1][0][1],
-  neighbors_lo[1][1][0],
-  neighbors_lo[1][1][1]);
-
-packed_hi2 = lp_build_lerp_2d(&h16,
-  s_fpart_hi, t_fpart_hi,
-  neighbors_hi[1][0][0],
-  neighbors_hi[1][0][1],
-  neighbors_hi[1][1][0],
-  neighbors_hi[1][1][1]);
-/* interp between two z slices */
-packed_lo = lp_build_lerp(&h16, r_fpart_lo,
-  packed_lo, packed_lo2);
-packed_hi = lp_build_lerp(&h16, r_fpart_hi,
-  packed_hi, packed_hi2);
- }
+  } else {
+ /* 3-D lerp */
+ assert(dims == 3);
+ packed_lo = lp_build_lerp_3d(&h16,
+  s_fpart_lo, t_fpart_lo, r_fpart_lo,
+  neighbors_lo[0][0][0],
+  neighbors_lo[0][0][1],
+  neighbors_lo[0][1][0],
+  neighbors_lo[0][1][1],
+  neighbors_lo[1][0][0],
+  neighbors_lo[1][0][1],
+  

Re: [Mesa-dev] [PATCH] llvmpipe: get rid of unused tiled/linear logic

2013-05-17 Thread Jose Fonseca
Thanks for doing this Roland.

- Original Message -
> From: Roland Scheidegger 
> 
> We do rendering to linear color buffers for quite some time, and since
> switching to linear depth buffers all the tiled/linear logic was unused.
> So get rid of (most) of it - there's still some LAYOUT_NONE things and
> late allocation of resources which probably could be simplified.

Yeah, we should remove llvmpipe_resource::layout too.  But maybe in a follow on 
change.

Jose


> ---
>  src/gallium/drivers/llvmpipe/Makefile.am |3 +-
>  src/gallium/drivers/llvmpipe/SConscript  |3 +-
>  src/gallium/drivers/llvmpipe/lp_rast_priv.h  |4 +-
>  src/gallium/drivers/llvmpipe/lp_texture.c|  388
>  +++---
>  src/gallium/drivers/llvmpipe/lp_texture.h|   10 -
>  src/gallium/drivers/llvmpipe/lp_tile_image.c |  294 ---
>  src/gallium/drivers/llvmpipe/lp_tile_image.h |   61 
>  7 files changed, 50 insertions(+), 713 deletions(-)
>  delete mode 100644 src/gallium/drivers/llvmpipe/lp_tile_image.c
>  delete mode 100644 src/gallium/drivers/llvmpipe/lp_tile_image.h
> 
> diff --git a/src/gallium/drivers/llvmpipe/Makefile.am
> b/src/gallium/drivers/llvmpipe/Makefile.am
> index f1ba5d1..9059053 100644
> --- a/src/gallium/drivers/llvmpipe/Makefile.am
> +++ b/src/gallium/drivers/llvmpipe/Makefile.am
> @@ -72,8 +72,7 @@ libllvmpipe_la_SOURCES = \
>   lp_state_vs.c \
>   lp_surface.c \
>   lp_tex_sample.c \
> - lp_texture.c \
> - lp_tile_image.c
> + lp_texture.c
>  
>  libllvmpipe_la_LDFLAGS = $(LLVM_LDFLAGS)
>  
> diff --git a/src/gallium/drivers/llvmpipe/SConscript
> b/src/gallium/drivers/llvmpipe/SConscript
> index a81cf23..22314c2 100644
> --- a/src/gallium/drivers/llvmpipe/SConscript
> +++ b/src/gallium/drivers/llvmpipe/SConscript
> @@ -52,8 +52,7 @@ llvmpipe = env.ConvenienceLibrary(
>   'lp_state_vs.c',
>   'lp_surface.c',
>   'lp_tex_sample.c',
> - 'lp_texture.c',
> - 'lp_tile_image.c',
> + 'lp_texture.c'
>   ])
>  
>  env.Alias('llvmpipe', llvmpipe)
> diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
> b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
> index 7d01da1..85febff 100644
> --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
> +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
> @@ -36,10 +36,12 @@
>  #include "lp_scene.h"
>  #include "lp_state.h"
>  #include "lp_texture.h"
> -#include "lp_tile_image.h"
>  #include "lp_limits.h"
>  
>  
> +#define TILE_VECTOR_HEIGHT 4
> +#define TILE_VECTOR_WIDTH 4
> +
>  /* If we crash in a jitted function, we can examine jit_line and jit_state
>   * to get some info.  This is not thread-safe, however.
>   */
> diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c
> b/src/gallium/drivers/llvmpipe/lp_texture.c
> index 0804619..d10a4ce 100644
> --- a/src/gallium/drivers/llvmpipe/lp_texture.c
> +++ b/src/gallium/drivers/llvmpipe/lp_texture.c
> @@ -46,7 +46,6 @@
>  #include "lp_context.h"
>  #include "lp_flush.h"
>  #include "lp_screen.h"
> -#include "lp_tile_image.h"
>  #include "lp_texture.h"
>  #include "lp_setup.h"
>  #include "lp_state.h"
> @@ -334,11 +333,6 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen,
>struct sw_winsys *winsys = screen->winsys;
>winsys->displaytarget_destroy(winsys, lpr->dt);
>  
> -  if (lpr->tiled_img.data) {
> - align_free(lpr->tiled_img.data);
> - lpr->tiled_img.data = NULL;
> -  }
> -
>FREE(lpr->layout[0]);
> }
> else if (llvmpipe_resource_is_texture(pt)) {
> @@ -351,12 +345,6 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen,
>   lpr->linear_img.data = NULL;
>}
>  
> -  /* free tiled image data */
> -  if (lpr->tiled_img.data) {
> - align_free(lpr->tiled_img.data);
> - lpr->tiled_img.data = NULL;
> -  }
> -
>/* free layout flag arrays */
>for (level = 0; level < Elements(lpr->layout); level++) {
>   FREE(lpr->layout[level]);
> @@ -398,7 +386,6 @@ llvmpipe_resource_map(struct pipe_resource *resource,
>tex_usage == LP_TEX_USAGE_WRITE_ALL);
>  
> assert(layout == LP_TEX_LAYOUT_NONE ||
> -  layout == LP_TEX_LAYOUT_TILED ||
>layout == LP_TEX_LAYOUT_LINEAR);
>  
> if (lpr->dt) {
> @@ -850,27 +837,10 @@ static unsigned
>  tex_image_face_size(const struct llvmpipe_resource *lpr, unsigned level,
>  enum lp_texture_layout layout)
>  {
> -   const unsigned width = u_minify(lpr->base.width0, level);
> -   const unsigned height = u_minify(lpr->base.height0, level);
> -
> -   assert(layout == LP_TEX_LAYOUT_TILED ||
> -  layout == LP_TEX_LAYOUT_LINEAR);
> +   assert(layout == LP_TEX_LAYOUT_LINEAR);
>  
> -   if (layout == LP_TEX_LAYOUT_TILED) {
> -  /* for tiled layout, force a 32bpp format */
> -  const enum pipe_format format = PIPE_FORMAT_B8G8R8A8_UNORM;
> -  const unsigned block_size = 

Re: [Mesa-dev] [PATCH 03/13] gallium: Introduce 32-bit bytewise format names

2013-05-17 Thread Jose Fonseca
- Original Message -
> From: Richard Sandiford 
> 
> RGBA has R at byte 0 and A at byte 3, regardless of platform
> endianness.

Maybe I'm missing something, but this naming convention seems to me the exact 
opposite of what was decided [1], which is:

 - R at byte 0, ..., and A at byte 3, regardless of platform endianness would 
be called "R8G8B8A8"

 - R at bit 0, ..., A at bit 24, encoded as integers that match the platform 
endianness would be called "RGBA"

which would be consistent with (as in a superset of) D3D10 format naming.  I'm 
afraid I must insist on this, as I don't want D3D10 formats to change in 
anyway.  It should be hard to do this -- you can easily craft a script that 
swaps these using statements like:

   git ls-files | xargs sed -i -e 's@foo@boo@'

But other than this naming convention issue, the actual implementation looks 
quite nice.

Jose

[1] http://lists.freedesktop.org/archives/mesa-dev/2013-February/034378.html , 
from "Actually, on second thought ..."


> 
> Reviewed-by: Adam Jackson 
> ---
>  src/gallium/include/pipe/p_format.h | 38
>  +
>  1 file changed, 30 insertions(+), 8 deletions(-)
> 
> diff --git a/src/gallium/include/pipe/p_format.h
> b/src/gallium/include/pipe/p_format.h
> index 098b25b..1289983 100644
> --- a/src/gallium/include/pipe/p_format.h
> +++ b/src/gallium/include/pipe/p_format.h
> @@ -33,6 +33,7 @@
>  extern "C" {
>  #endif
>  
> +#include "p_config.h"
>  
>  enum pipe_type {
> PIPE_TYPE_UNORM = 0,
> @@ -53,10 +54,10 @@ enum pipe_type {
>  
>  enum pipe_format {
> PIPE_FORMAT_NONE= 0,
> -   PIPE_FORMAT_B8G8R8A8_UNORM  = 1,
> -   PIPE_FORMAT_B8G8R8X8_UNORM  = 2,
> -   PIPE_FORMAT_A8R8G8B8_UNORM  = 3,
> -   PIPE_FORMAT_X8R8G8B8_UNORM  = 4,
> +   PIPE_FORMAT_ARGB_UNORM  = 1,
> +   PIPE_FORMAT_XRGB_UNORM  = 2,
> +   PIPE_FORMAT_BGRA_UNORM  = 3,
> +   PIPE_FORMAT_BGRX_UNORM  = 4,
> PIPE_FORMAT_B5G5R5A1_UNORM  = 5,
> PIPE_FORMAT_B4G4R4A4_UNORM  = 6,
> PIPE_FORMAT_B5G6R5_UNORM= 7,
> @@ -119,8 +120,8 @@ enum pipe_format {
> PIPE_FORMAT_R8_UNORM= 64,
> PIPE_FORMAT_R8G8_UNORM  = 65,
> PIPE_FORMAT_R8G8B8_UNORM= 66,
> -   PIPE_FORMAT_R8G8B8A8_UNORM  = 67,
> -   PIPE_FORMAT_X8B8G8R8_UNORM  = 68,
> +   PIPE_FORMAT_ABGR_UNORM  = 67,
> +   PIPE_FORMAT_RGBX_UNORM  = 68,
> PIPE_FORMAT_R8_USCALED  = 69,
> PIPE_FORMAT_R8G8_USCALED= 70,
> PIPE_FORMAT_R8G8B8_USCALED  = 71,
> @@ -180,7 +181,7 @@ enum pipe_format {
> PIPE_FORMAT_R5SG5SB6U_NORM  = 120,
>  
> /* TODO: re-order these */
> -   PIPE_FORMAT_A8B8G8R8_UNORM  = 121,
> +   PIPE_FORMAT_RGBA_UNORM  = 121,
> PIPE_FORMAT_B5G5R5X1_UNORM  = 122,
> PIPE_FORMAT_R10G10B10A2_USCALED = 123,
> PIPE_FORMAT_R11G11B10_FLOAT = 124,
> @@ -193,7 +194,7 @@ enum pipe_format {
> PIPE_FORMAT_B10G10R10A2_UNORM   = 131,
> PIPE_FORMAT_R10SG10SB10SA2U_NORM= 132,
> PIPE_FORMAT_R8G8Bx_SNORM= 133,
> -   PIPE_FORMAT_R8G8B8X8_UNORM  = 134,
> +   PIPE_FORMAT_XBGR_UNORM  = 134,
> PIPE_FORMAT_B4G4R4X4_UNORM  = 135,
>  
> /* some stencil samplers formats */
> @@ -343,6 +344,27 @@ enum pipe_format {
> PIPE_FORMAT_COUNT
>  };
>  
> +#if defined(PIPE_ARCH_LITTLE_ENDIAN)
> +#define PIPE_FORMAT_R8G8B8A8_UNORM PIPE_FORMAT_ABGR_UNORM
> +#define PIPE_FORMAT_R8G8B8X8_UNORM PIPE_FORMAT_XBGR_UNORM
> +#define PIPE_FORMAT_B8G8R8X8_UNORM PIPE_FORMAT_XRGB_UNORM
> +#define PIPE_FORMAT_B8G8R8A8_UNORM PIPE_FORMAT_ARGB_UNORM
> +#define PIPE_FORMAT_B8G8R8X8_UNORM PIPE_FORMAT_XRGB_UNORM
> +#define PIPE_FORMAT_A8R8G8B8_UNORM PIPE_FORMAT_BGRA_UNORM
> +#define PIPE_FORMAT_X8R8G8B8_UNORM PIPE_FORMAT_BGRX_UNORM
> +#define PIPE_FORMAT_A8B8G8R8_UNORM PIPE_FORMAT_RGBA_UNORM
> +#define PIPE_FORMAT_X8B8G8R8_UNORM PIPE_FORMAT_RGBX_UNORM
> +#elif defined(PIPE_ARCH_BIG_ENDIAN)
> +#define PIPE_FORMAT_R8G8B8A8_UNORM PIPE_FORMAT_RGBA_UNORM
> +#define PIPE_FORMAT_R8G8B8X8_UNORM PIPE_FORMAT_RGBX_UNORM
> +#define PIPE_FORMAT_B8G8R8A8_UNORM PIPE_FORMAT_BGRA_UNORM
> +#define PIPE_FORMAT_B8G8R8X8_UNORM PIPE_FORMAT_BGRX_UNORM
> +#define PIPE_FORMAT_A8R8G8B8_UNORM PIPE_FORMAT_ARGB_UNORM
> +#define PIPE_FORMAT_X8R8G8B8_UNORM PIPE_FORMAT_XRGB_UNORM
> +#define PIPE_FORMAT_A8B8G8R8_UNORM PIPE_FORMAT_ABGR_UNORM
> +#define PIPE_FORMAT_X8B8G8R8_UNORM PIPE_FORMAT_XBGR_UNORM
> +#endif
> +
>  enum pipe_video_chroma_format
>  {
> PIPE_VIDEO_CHROMA_FORMAT_420,
> --
> 1.8.2.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

Re: [Mesa-dev] [PATCH 1/5] radeonsi: increase array size for shader inputs and outputs

2013-05-17 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, May 17, 2013 at 11:27 AM, Michel Dänzer  wrote:
> From: Marek Olšák 
>
> and add assertions to prevent buffer overflow. This fixes corruption
> of the si_shader struct.
>
> NOTE: This is a candidate for the 9.1 branch.
>
> [ Cherry-pick of r600g commit da33f9b919039442e9ab51f9b1d1c83a73607133 ]
>
> Signed-off-by: Michel Dänzer 
> ---
>  src/gallium/drivers/radeonsi/radeonsi_shader.c | 2 ++
>  src/gallium/drivers/radeonsi/radeonsi_shader.h | 4 ++--
>  2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
> b/src/gallium/drivers/radeonsi/radeonsi_shader.c
> index f942436..bea2895 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
> @@ -589,6 +589,7 @@ static void si_llvm_emit_epilogue(struct 
> lp_build_tgsi_context * bld_base)
> switch (d->Declaration.File) {
> case TGSI_FILE_INPUT:
> i = shader->ninput++;
> +   assert(i < Elements(shader->input));
> shader->input[i].name = d->Semantic.Name;
> shader->input[i].sid = d->Semantic.Index;
> shader->input[i].interpolate = d->Interp.Interpolate;
> @@ -597,6 +598,7 @@ static void si_llvm_emit_epilogue(struct 
> lp_build_tgsi_context * bld_base)
>
> case TGSI_FILE_OUTPUT:
> i = shader->noutput++;
> +   assert(i < Elements(shader->output));
> shader->output[i].name = d->Semantic.Name;
> shader->output[i].sid = d->Semantic.Index;
> shader->output[i].interpolate = d->Interp.Interpolate;
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h 
> b/src/gallium/drivers/radeonsi/radeonsi_shader.h
> index 1552dc2..0fbd601 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
> +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
> @@ -101,10 +101,10 @@ struct si_pipe_shader_selector {
>
>  struct si_shader {
> unsignedninput;
> -   struct si_shader_io input[32];
> +   struct si_shader_io input[40];
>
> unsignednoutput;
> -   struct si_shader_io output[32];
> +   struct si_shader_io output[40];
>
> unsignedninterp;
> booluses_kill;
> --
> 1.8.3.rc1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: Remove unused variable 'texImage'.

2013-05-17 Thread Jose Fonseca
_mesa_get_attachment_teximage has no side effects so looks good to me.

Jose


- Original Message -
> All uses of 'texImage' were removed in commit
> 77a405dba7f70f8a47655e90774a5ecf5c88a6ed.
> 
> Fixes "Unused pointer value" defect reported by Coverity.
> 
> Signed-off-by: Vinson Lee 
> ---
>  src/mesa/state_tracker/st_cb_fbo.c | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/src/mesa/state_tracker/st_cb_fbo.c
> b/src/mesa/state_tracker/st_cb_fbo.c
> index aa245d3..457cec1 100644
> --- a/src/mesa/state_tracker/st_cb_fbo.c
> +++ b/src/mesa/state_tracker/st_cb_fbo.c
> @@ -394,7 +394,6 @@ st_render_texture(struct gl_context *ctx,
> struct st_renderbuffer *strb = st_renderbuffer(rb);
> struct pipe_resource *pt;
> struct st_texture_object *stObj;
> -   const struct gl_texture_image *texImage;
> struct pipe_surface surf_tmpl;
>  
> if (!st_finalize_texture(ctx, pipe, att->Texture))
> @@ -403,9 +402,6 @@ st_render_texture(struct gl_context *ctx,
> pt = st_get_texobj_resource(att->Texture);
> assert(pt);
>  
> -   /* get pointer to texture image we're rendeing to */
> -   texImage = _mesa_get_attachment_teximage(att);
> -
> /* get the texture for the texture object */
> stObj = st_texture_object(att->Texture);
>  
> --
> 1.8.2.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] scons: Use LLVM shared library if found.

2013-05-17 Thread Jose Fonseca
Vinson,

Why is this necessary?

(I'd prefer that LLVM is statically linked by default. )

Jose

- Original Message -
> This patch fixes SCons builds on Fedora 18.
> 
> Signed-off-by: Vinson Lee 
> ---
>  scons/llvm.py | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/scons/llvm.py b/scons/llvm.py
> index 7cd609c..432ece6 100644
> --- a/scons/llvm.py
> +++ b/scons/llvm.py
> @@ -198,7 +198,15 @@ def generate(env):
>  if llvm_version >= distutils.version.LooseVersion('3.2'):
>  env.Append(CXXFLAGS = ('-fno-rtti',))
>  
> -env.ParseConfig('llvm-config --libs ' + ' '.join(components))
> +llvm_shared_library = os.path.join(
> +env.backtick('llvm-config --libdir').strip(),
> +'libLLVM-%s%s' % (llvm_version, env['SHLIBSUFFIX'])
> +)
> +if os.path.exists(llvm_shared_library):
> +env.Append(LIBS = ['LLVM-%s' % llvm_version])
> +else:
> +env.ParseConfig('llvm-config --libs ' + '
> '.join(components))
> +
>  env.ParseConfig('llvm-config --ldflags')
>  except OSError:
>  print 'scons: llvm-config version %s failed' % llvm_version
> --
> 1.8.2.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] llvmpipe: Remove x/y from cmd_bin

2013-05-17 Thread Jose Fonseca


- Original Message -
> Am 16.05.2013 21:44, schrieb Adam Jackson:
> > These were mostly just a waste of memory and cache pressure, and were
> > really only used for debugging.
> > 
> > This change reduces instruction count (as measured by callgrind's Ir
> > event) of gnome-shell-perf-tool on Ivybridge by 3.5% ± 0.015% (n=20).
> > 
> > Signed-off-by: Adam Jackson 
> > ---
> >  src/gallium/drivers/llvmpipe/lp_rast.c   | 37
> >  +++-
> >  src/gallium/drivers/llvmpipe/lp_rast_debug.c | 19 +++---
> >  src/gallium/drivers/llvmpipe/lp_rast_priv.h  |  2 +-
> >  src/gallium/drivers/llvmpipe/lp_scene.c  |  4 ++-
> >  src/gallium/drivers/llvmpipe/lp_scene.h  |  4 +--
> >  src/gallium/drivers/llvmpipe/lp_setup.c  | 11 +
> >  6 files changed, 30 insertions(+), 47 deletions(-)
> > 
> > diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c
> > b/src/gallium/drivers/llvmpipe/lp_rast.c
> > index a557db4..3dc00ef 100644
> > --- a/src/gallium/drivers/llvmpipe/lp_rast.c
> > +++ b/src/gallium/drivers/llvmpipe/lp_rast.c
> > @@ -87,13 +87,14 @@ lp_rast_end( struct lp_rasterizer *rast )
> >   */
> >  static void
> >  lp_rast_tile_begin(struct lp_rasterizer_task *task,
> > -   const struct cmd_bin *bin)
> > +   const struct cmd_bin *bin,
> > +   int x, int y)
> >  {
> > -   LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, bin->x, bin->y);
> > +   LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
> >  
> > task->bin = bin;
> > -   task->x = bin->x * TILE_SIZE;
> > -   task->y = bin->y * TILE_SIZE;
> > +   task->x = x * TILE_SIZE;
> > +   task->y = y * TILE_SIZE;
> >  
> > /* reset pointers to color and depth tile(s) */
> > memset(task->color_tiles, 0, sizeof(task->color_tiles));
> > @@ -551,13 +552,14 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
> >  
> >  static void
> >  do_rasterize_bin(struct lp_rasterizer_task *task,
> > - const struct cmd_bin *bin)
> > + const struct cmd_bin *bin,
> > + int x, int y)
> >  {
> > const struct cmd_block *block;
> > unsigned k;
> >  
> > if (0)
> > -  lp_debug_bin(bin);
> > +  lp_debug_bin(bin, x, y);
> >  
> > for (block = bin->head; block; block = block->next) {
> >for (k = 0; k < block->count; k++) {
> > @@ -576,11 +578,11 @@ do_rasterize_bin(struct lp_rasterizer_task *task,
> >   */
> >  static void
> >  rasterize_bin(struct lp_rasterizer_task *task,
> > -  const struct cmd_bin *bin )
> > +  const struct cmd_bin *bin, int x, int y )
> >  {
> > -   lp_rast_tile_begin( task, bin );
> > +   lp_rast_tile_begin( task, bin, x, y );
> >  
> > -   do_rasterize_bin(task, bin);
> > +   do_rasterize_bin(task, bin, x, y);
> >  
> > lp_rast_tile_end(task);
> >  
> > @@ -622,27 +624,16 @@ rasterize_scene(struct lp_rasterizer_task *task,
> >  
> > if (!task->rast->no_rast && !scene->discard) {
> >/* loop over scene bins, rasterize each */
> > -#if 0
> > -  {
> > - unsigned i, j;
> > - for (i = 0; i < scene->tiles_x; i++) {
> > -for (j = 0; j < scene->tiles_y; j++) {
> > -   struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
> > -   rasterize_bin(task, bin, i, j);
> > -}
> > - }
> > -  }
> > -#else
> >{
> >   struct cmd_bin *bin;
> > + int i, j;
> >  
> >   assert(scene);
> > - while ((bin = lp_scene_bin_iter_next(scene))) {
> > + while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) {
> >  if (!is_empty_bin( bin ))
> > -   rasterize_bin(task, bin);
> > +   rasterize_bin(task, bin, i, j);
> >   }
> >}
> > -#endif
> > }
> >  
> >  
> > diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c
> > b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
> > index 4008251..3bc75aa 100644
> > --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c
> > +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
> > @@ -90,13 +90,13 @@ is_blend( const struct lp_rast_state *state,
> >  
> >  
> >  static void
> > -debug_bin( const struct cmd_bin *bin )
> > +debug_bin( const struct cmd_bin *bin, int x, int y )
> >  {
> > const struct lp_rast_state *state = NULL;
> > const struct cmd_block *head = bin->head;
> > int i, j = 0;
> >  
> > -   debug_printf("bin %d,%d:\n", bin->x, bin->y);
> > +   debug_printf("bin %d,%d:\n", x, y);
> >  
> > while (head) {
> >for (i = 0; i < head->count; i++, j++) {
> > @@ -231,13 +231,14 @@ debug_triangle(int tilex, int tiley,
> >  static void
> >  do_debug_bin( struct tile *tile,
> >const struct cmd_bin *bin,
> > +  int x, int y,
> >boolean print_cmds)
> >  {
> > unsigned k, j = 0;
> > const struct cmd_block *block;
> >  
> > -   int tx = bin->x * TILE_SIZE;
> > -   int ty = bin->y * TI

Re: [Mesa-dev] R600/SI Patches: A few cleanups for compute

2013-05-17 Thread Michel Dänzer
On Mit, 2013-05-15 at 14:26 -0700, Tom Stellard wrote:
> 
> The attached patches add some new patterns and instructions for SI and
> are a prerequisite for more invasive compute shader changes that I'm
> working on.
> 
> Please Review.

The SI changes are

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast |  Debian, X and DRI developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] scons: Don't force stabs debug format for Mingw.

2013-05-17 Thread jfonseca
From: José Fonseca 

- recent gdb handles DWARF fine (tested both with version
  7.1.90.20100730 from mingw-w64 project, and 7.5-1 from mingw project)

- http://people.freedesktop.org/~jrfonseca/bfdhelp/ was updated to
  handle DWARF

- it requires ugly hacks to prevent compilation failures

- it prevents proper back when stabs/dwarf is mixed (which is
  inevitable, given that the MinGW C runtime is pre-built with dwarf)

For example, without this change I get:

  (gdb) bt
  #0  _wassert (_Message=0xf925060 L"Num < NumOperands && \"Invalid child # of 
SDNode!\"",
  _File=0xf60b488 L"llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 
_Line=534)
  at ../../../../mingw-w64-crt/misc/wassert.c:51
  #1  0x0368996b in _assert (_Message=0x39d7ee4 "Num < NumOperands && \"Invalid 
child # of SDNode!\"",
  _File=0x39d7e94 "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 
_Line=534)
  at ../../../../mingw-w64-crt/misc/wassert.c:44
  #2  0x0004 in ?? ()
  #3  0x0004 in ?? ()
  #4  0x0f60b488 in ?? ()
  #5  0x in ?? ()

While with this change I get:

  (gdb) bt
  #0  _wassert (_Message=0xfb982e8 L"Num < NumOperands && \"Invalid child # of 
SDNode!\"",
  _File=0xefbcb40 L"llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 
_Line=534)
  at ../../../../mingw-w64-crt/misc/wassert.c:51
  #1  0x039c996b in _assert (_Message=0x3d17f24 "Num < NumOperands && \"Invalid 
child # of SDNode!\"",
  _File=0x3d17ed4 "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 
_Line=534)
  at ../../../../mingw-w64-crt/misc/wassert.c:44
  #2  0x033111cc in getOperand (Num=4, this=)
  at llvm/include/llvm/CodeGen/SelectionDAGNodes.h:534
  #3  getOperand (i=4, this=)
  at llvm/include/llvm/CodeGen/SelectionDAGNodes.h:779
  #4  llvm::SelectionDAG::getNode (this=0xf00cb08, Opcode=79, DL=..., VT=..., 
N1=..., N2=...)
  at llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:2859
  #5  0x03377b20 in llvm::SelectionDAGBuilder::visitExtractElement 
(this=0xfb45028, I=...)
  at llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp:2803
  [...]
---
 scons/crossmingw.py  |   42 --
 src/gallium/auxiliary/SConscript |4 
 src/mapi/glapi/SConscript|5 -
 3 files changed, 51 deletions(-)

diff --git a/scons/crossmingw.py b/scons/crossmingw.py
index 23c56c0..1287e0e 100644
--- a/scons/crossmingw.py
+++ b/scons/crossmingw.py
@@ -130,40 +130,6 @@ SCons.Tool.SourceFileScanner.add_scanner('.rc', 
SCons.Defaults.CScan)
 
 
 
-def compile_without_gstabs(env, sources, c_file):
-'''This is a hack used to compile some source files without the
--gstabs option.
-
-It seems that some versions of mingw32's gcc (4.4.2 at least) die
-when compiling large files with the -gstabs option.  -gstabs is
-related to debug symbols and can be omitted from the effected
-files.
-
-This function compiles the given c_file without -gstabs, removes
-the c_file from the sources list, then appends the new .o file to
-sources.  Then return the new sources list.
-'''
-
-# Modify CCFLAGS to not have -gstabs option:
-env2 = env.Clone()
-flags = str(env2['CCFLAGS'])
-flags = flags.replace("-gstabs", "")
-env2['CCFLAGS'] = SCons.Util.CLVar(flags)
-
-# Build the special-case files:
-obj_file = env2.SharedObject(c_file)
-
-# Replace ".cpp" or ".c" with ".o"
-o_file = c_file.replace(".cpp", ".o")
-o_file = o_file.replace(".c", ".o")
-
-# Replace the .c files with the specially-compiled .o file
-sources.remove(c_file)
-sources.append(o_file)
-
-return sources
-
-
 def generate(env):
 mingw_prefix = find(env)
 
@@ -221,13 +187,5 @@ def generate(env):
 env['LIBPREFIXES']= [ 'lib', '' ]
 env['LIBSUFFIXES']= [ '.a', '.lib' ]
 
-# MinGW x86 port of gdb does not handle well dwarf debug info which is the
-# default in recent gcc versions.  The x64 port gdb from mingw-w64 seems to
-# handle it fine though, so stick with the default there.
-if env['machine'] != 'x86_64':
-env.AppendUnique(CCFLAGS = ['-gstabs'])
-
-env.AddMethod(compile_without_gstabs, 'compile_without_gstabs')
-
 def exists(env):
 return find(env)
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index bfd5ec3..31dfed3 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -51,10 +51,6 @@ if env['llvm']:
 'GALLIVM_CPP_SOURCES'
 ])
 
-if env['toolchain'] == 'crossmingw':
-# compile lp_bld_misc.cpp without -gstabs option
-source = env.compile_without_gstabs(source, "gallivm/lp_bld_misc.cpp")
-
 gallium = env.ConvenienceLibrary(
 target = 'gallium',
 source = source,
diff --git a/src/mapi/glapi/SConscript b/src/mapi/glapi/SConscript
index ac11148..c4ac080 100644
--- a/src/mapi/glapi/SConscript
+++ b/src/mapi/glapi/SConscript
@@ -95,11 +95,6 @@ if (env['gcc'] or env['clang']) and 

[Mesa-dev] [Bug 64668] Clipping is performed incorrectly when using shaders with intel/nouveau/radeon drivers.

2013-05-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64668

--- Comment #13 from edg...@yandex.ru ---
> As far as I've been able to tell from experimenting with the nVidia
> proprietary driver, its behaviour in this corner case is to not clip at all.

You're absolutely correct, it doesn't clip.
I see that OpenGL specification is a garbage. :)
In this case I'm mostly interested in portability and compatibility, since it
doesn't clip in both Windows and Linux with proprietary drivers, it seems
logical to implement such behaviour for much less spread open implementation
mesa and open drivers...

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64649] Anomaly 2 (Steam) exits with GLX_EXT_swap_control not supported, unable to set vertical sync

2013-05-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64649

--- Comment #4 from bartosz.brzos...@11bitstudios.com ---
The swap control extension is not required by the game to function. The exit
must be caused by something else. What exactly happens? Does it look like
graceful exit or a segfault? What kind of GPU are you using?

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] radeonsi: increase array size for shader inputs and outputs

2013-05-17 Thread Michel Dänzer
From: Marek Olšák 

and add assertions to prevent buffer overflow. This fixes corruption
of the si_shader struct.

NOTE: This is a candidate for the 9.1 branch.

[ Cherry-pick of r600g commit da33f9b919039442e9ab51f9b1d1c83a73607133 ]

Signed-off-by: Michel Dänzer 
---
 src/gallium/drivers/radeonsi/radeonsi_shader.c | 2 ++
 src/gallium/drivers/radeonsi/radeonsi_shader.h | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index f942436..bea2895 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -589,6 +589,7 @@ static void si_llvm_emit_epilogue(struct 
lp_build_tgsi_context * bld_base)
switch (d->Declaration.File) {
case TGSI_FILE_INPUT:
i = shader->ninput++;
+   assert(i < Elements(shader->input));
shader->input[i].name = d->Semantic.Name;
shader->input[i].sid = d->Semantic.Index;
shader->input[i].interpolate = d->Interp.Interpolate;
@@ -597,6 +598,7 @@ static void si_llvm_emit_epilogue(struct 
lp_build_tgsi_context * bld_base)
 
case TGSI_FILE_OUTPUT:
i = shader->noutput++;
+   assert(i < Elements(shader->output));
shader->output[i].name = d->Semantic.Name;
shader->output[i].sid = d->Semantic.Index;
shader->output[i].interpolate = d->Interp.Interpolate;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h 
b/src/gallium/drivers/radeonsi/radeonsi_shader.h
index 1552dc2..0fbd601 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
@@ -101,10 +101,10 @@ struct si_pipe_shader_selector {
 
 struct si_shader {
unsignedninput;
-   struct si_shader_io input[32];
+   struct si_shader_io input[40];
 
unsignednoutput;
-   struct si_shader_io output[32];
+   struct si_shader_io output[40];
 
unsignedninterp;
booluses_kill;
-- 
1.8.3.rc1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] radeonsi: Initial support for multiple constant buffers

2013-05-17 Thread Michel Dänzer
From: Michel Dänzer 

Just enough to support an additional internal constant buffer for the user
clip planes.

NOTE: This is a candidate for the 9.1 branch.
Signed-off-by: Michel Dänzer 
---
 src/gallium/drivers/radeonsi/r600_buffer.c   | 30 ---
 src/gallium/drivers/radeonsi/radeonsi_pipe.h |  8 +++
 src/gallium/drivers/radeonsi/si_state.c  | 81 +---
 src/gallium/drivers/radeonsi/si_state_draw.c | 73 +
 4 files changed, 119 insertions(+), 73 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/r600_buffer.c 
b/src/gallium/drivers/radeonsi/r600_buffer.c
index 0c33c1e..cdf9988 100644
--- a/src/gallium/drivers/radeonsi/r600_buffer.c
+++ b/src/gallium/drivers/radeonsi/r600_buffer.c
@@ -24,7 +24,6 @@
  *  Jerome Glisse
  *  Corbin Simpson 
  */
-#include 
 
 #include "pipe/p_screen.h"
 #include "util/u_format.h"
@@ -169,32 +168,3 @@ void r600_upload_index_buffer(struct r600_context *rctx,
u_upload_data(rctx->uploader, 0, count * ib->index_size,
  ib->user_buffer, &ib->offset, &ib->buffer);
 }
-
-void r600_upload_const_buffer(struct r600_context *rctx, struct si_resource 
**rbuffer,
- const uint8_t *ptr, unsigned size,
- uint32_t *const_offset)
-{
-   *rbuffer = NULL;
-
-   if (R600_BIG_ENDIAN) {
-   uint32_t *tmpPtr;
-   unsigned i;
-
-   if (!(tmpPtr = malloc(size))) {
-   R600_ERR("Failed to allocate BE swap buffer.\n");
-   return;
-   }
-
-   for (i = 0; i < size / 4; ++i) {
-   tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]);
-   }
-
-   u_upload_data(rctx->uploader, 0, size, tmpPtr, const_offset,
- (struct pipe_resource**)rbuffer);
-
-   free(tmpPtr);
-   } else {
-   u_upload_data(rctx->uploader, 0, size, ptr, const_offset,
- (struct pipe_resource**)rbuffer);
-   }
-}
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h 
b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index c5b33f7..e50088f 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -124,6 +124,13 @@ struct r600_fence_block {
 #define R600_CONSTANT_ARRAY_SIZE 256
 #define R600_RESOURCE_ARRAY_SIZE 160
 
+struct r600_constbuf_state
+{
+   struct pipe_constant_buffer cb[2];
+   uint32_tenabled_mask;
+   uint32_tdirty_mask;
+};
+
 struct r600_context {
struct pipe_context context;
struct blitter_context  *blitter;
@@ -152,6 +159,7 @@ struct r600_context {
/* shader information */
unsignedsprite_coord_enable;
unsignedexport_16bpc;
+   struct r600_constbuf_state  constbuf_state[PIPE_SHADER_TYPES];
struct r600_textures_info   vs_samplers;
struct r600_textures_info   ps_samplers;
struct si_resource  *border_color_table;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 6d072ef..de86b1e 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -24,12 +24,15 @@
  *  Christian König 
  */
 
+#include 
+
 #include "util/u_memory.h"
 #include "util/u_framebuffer.h"
 #include "util/u_blitter.h"
 #include "util/u_helpers.h"
 #include "util/u_math.h"
 #include "util/u_pack_color.h"
+#include "util/u_upload_mgr.h"
 #include "util/u_format_s3tc.h"
 #include "tgsi/tgsi_parse.h"
 #include "radeonsi_pipe.h"
@@ -2492,64 +2495,56 @@ static void si_delete_sampler_state(struct pipe_context 
*ctx, void *state)
  * Constants
  */
 static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint 
index,
-  struct pipe_constant_buffer *cb)
+  struct pipe_constant_buffer *input)
 {
struct r600_context *rctx = (struct r600_context *)ctx;
-   struct si_resource *rbuffer = cb ? si_resource(cb->buffer) : NULL;
-   struct si_pm4_state *pm4;
-   uint32_t offset;
-   uint64_t va;
+   struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
+   struct pipe_constant_buffer *cb;
+   const uint8_t *ptr;
 
/* Note that the state tracker can unbind constant buffers by
 * passing NULL here.
 */
-   if (cb == NULL || (!cb->buffer && !cb->user_buffer))
+   if (unlikely(!input || (!input->buffer && !input->user_buffer))) {
+   state->enabled_mask &= ~(1 << index);
+   state->dirty_mask &= ~(1 << index);
+   pipe_resource_reference(&state->cb[index].buffer, NULL);
return;
+   }
 
-   pm4 = CALLOC_STRUCT(si_pm4_state);

[Mesa-dev] [PATCH 2/5] radeonsi: Fix handling of TGSI_SEMANTIC_PSIZE

2013-05-17 Thread Michel Dänzer
From: Michel Dänzer 

Two more little piglits.

NOTE: This is a candidate for the 9.1 branch.
Signed-off-by: Michel Dänzer 
---
 src/gallium/drivers/radeonsi/radeonsi_pipe.h   |  1 -
 src/gallium/drivers/radeonsi/radeonsi_shader.c |  4 +++-
 src/gallium/drivers/radeonsi/radeonsi_shader.h |  2 ++
 src/gallium/drivers/radeonsi/si_state.c|  4 
 src/gallium/drivers/radeonsi/si_state_draw.c   | 15 ---
 5 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h 
b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index 388f6df..c5b33f7 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -140,7 +140,6 @@ struct r600_context {
unsignedpa_sc_line_stipple;
unsignedpa_su_sc_mode_cntl;
unsignedpa_cl_clip_cntl;
-   unsignedpa_cl_vs_out_cntl;
/* for saving when using blitter */
struct pipe_stencil_ref stencil_ref;
struct si_pipe_shader_selector  *ps_shader;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index bea2895..e6ed545 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -612,7 +612,9 @@ static void si_llvm_emit_epilogue(struct 
lp_build_tgsi_context * bld_base)
/* Select the correct target */
switch(d->Semantic.Name) {
case TGSI_SEMANTIC_PSIZE:
-   target = V_008DFC_SQ_EXP_POS;
+   shader->vs_out_misc_write = 1;
+   shader->vs_out_point_size = 1;
+   target = V_008DFC_SQ_EXP_POS + 1;
break;
case TGSI_SEMANTIC_POSITION:
if (si_shader_ctx->type == 
TGSI_PROCESSOR_VERTEX) {
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h 
b/src/gallium/drivers/radeonsi/radeonsi_shader.h
index 0fbd601..667f2c3 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
@@ -110,6 +110,8 @@ struct si_shader {
booluses_kill;
booluses_instanceid;
boolfs_write_all;
+   boolvs_out_misc_write;
+   boolvs_out_point_size;
unsignednr_cbufs;
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index ed95b1d..6d072ef 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -412,9 +412,6 @@ static void *si_create_rs_state(struct pipe_context *ctx,
S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
-   rs->pa_cl_vs_out_cntl =
-   S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
-   S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex);
 
clip_rule = state->scissor ? 0x : 0x;
 
@@ -485,7 +482,6 @@ static void si_bind_rs_state(struct pipe_context *ctx, void 
*state)
rctx->pa_sc_line_stipple = rs->pa_sc_line_stipple;
rctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl;
rctx->pa_cl_clip_cntl = rs->pa_cl_clip_cntl;
-   rctx->pa_cl_vs_out_cntl = rs->pa_cl_vs_out_cntl;
 
si_pm4_bind_state(rctx, rasterizer, rs);
si_update_fb_rs_state(rctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 8d16907..a9ecc64 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -55,8 +55,13 @@ static void si_pipe_shader_vs(struct pipe_context *ctx, 
struct si_pipe_shader *s
 * takes care of adding a dummy export.
 */
for (nparams = 0, i = 0 ; i < shader->shader.noutput; i++) {
-   if (shader->shader.output[i].name != TGSI_SEMANTIC_POSITION)
+   switch (shader->shader.output[i].name) {
+   case TGSI_SEMANTIC_POSITION:
+   case TGSI_SEMANTIC_PSIZE:
+   break;
+   default:
nparams++;
+   }
}
if (nparams < 1)
nparams = 1;
@@ -66,7 +71,9 @@ static void si_pipe_shader_vs(struct pipe_context *ctx, 
struct si_pipe_shader *s
 
si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
   S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
-  S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
+  
S_02870C_POS1_EXPORT_

[Mesa-dev] [PATCH 4/5] radeonsi: Handle TGSI_SEMANTIC_CLIPVERTEX

2013-05-17 Thread Michel Dänzer
From: Michel Dänzer 

17 more little piglits.

NOTE: This is a candidate for the 9.1 branch.
Signed-off-by: Michel Dänzer 
---
 src/gallium/drivers/radeonsi/radeonsi_pipe.h   |  1 -
 src/gallium/drivers/radeonsi/radeonsi_shader.c | 62 ++
 src/gallium/drivers/radeonsi/radeonsi_shader.h |  1 +
 src/gallium/drivers/radeonsi/si_state.c| 10 -
 src/gallium/drivers/radeonsi/si_state.h|  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c   | 19 +---
 6 files changed, 86 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h 
b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index e50088f..3274049 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -146,7 +146,6 @@ struct r600_context {
struct pipe_framebuffer_state   framebuffer;
unsignedpa_sc_line_stipple;
unsignedpa_su_sc_mode_cntl;
-   unsignedpa_cl_clip_cntl;
/* for saving when using blitter */
struct pipe_stencil_ref stencil_ref;
struct si_pipe_shader_selector  *ps_shader;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index e6ed545..484f7ec 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -554,6 +554,64 @@ static void si_alpha_test(struct lp_build_tgsi_context 
*bld_base,
}
 }
 
+static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
+   unsigned index)
+{
+   struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+   struct lp_build_context *base = &bld_base->base;
+   struct lp_build_context *uint = 
&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
+   LLVMValueRef args[9];
+   unsigned reg_index;
+   unsigned chan;
+   unsigned const_chan;
+   LLVMValueRef out_elts[4];
+   LLVMValueRef base_elt;
+   LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, 
SI_PARAM_CONST);
+   LLVMValueRef const_resource = build_indexed_load(si_shader_ctx, ptr, 
uint->one);
+
+   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+   LLVMValueRef out_ptr = 
si_shader_ctx->radeon_bld.soa.outputs[index][chan];
+   out_elts[chan] = LLVMBuildLoad(base->gallivm->builder, out_ptr, 
"");
+   }
+
+   for (reg_index = 0; reg_index < 2; reg_index ++) {
+   args[5] =
+   args[6] =
+   args[7] =
+   args[8] = lp_build_const_float(base->gallivm, 0.0f);
+
+   /* Compute dot products of position and user clip plane vectors 
*/
+   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+   for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; 
const_chan++) {
+   args[0] = const_resource;
+   args[1] = lp_build_const_int32(base->gallivm,
+  ((reg_index * 4 
+ chan) * 4 +
+   const_chan) * 
4);
+   base_elt = 
build_intrinsic(base->gallivm->builder,
+  "llvm.SI.load.const",
+  base->elem_type,
+  args, 2,
+  
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+   args[5 + chan] =
+   lp_build_add(base, args[5 + chan],
+lp_build_mul(base, 
base_elt,
+ 
out_elts[const_chan]));
+   }
+   }
+
+   args[0] = lp_build_const_int32(base->gallivm, 0xf);
+   args[1] = uint->zero;
+   args[2] = uint->zero;
+   args[3] = lp_build_const_int32(base->gallivm,
+  V_008DFC_SQ_EXP_POS + 2 + 
reg_index);
+   args[4] = uint->zero;
+   lp_build_intrinsic(base->gallivm->builder,
+  "llvm.SI.export",
+  
LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9);
+   }
+}
+
 /* XXX: This is partially implemented for VS only at this point.  It is not 
complete */
 static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 {
@@ -642,6 +700,10 @@ static void si_llvm_emit_epilogue(struct 
lp_build_tgsi_context * bld_base)
color_count++;
}
  

[Mesa-dev] [PATCH 5/5] radeonsi: Fix user clip planes

2013-05-17 Thread Michel Dänzer
From: Michel Dänzer 

4 more little piglits.

NOTE: This is a candidate for the 9.1 branch.
Signed-off-by: Michel Dänzer 
---
 src/gallium/drivers/radeonsi/si_state_draw.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 4380d2c..ae571a4 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -316,10 +316,9 @@ static bool si_update_draw_info_state(struct r600_context 
*rctx,
   (rctx->queued.named.rasterizer->clip_plane_enable &
vs->clip_dist_write));
si_pm4_set_reg(pm4, R_028810_PA_CL_CLIP_CNTL,
-  rctx->queued.named.rasterizer->pa_cl_clip_cntl
-   /*| (rctx->vs_shader->shader.clip_dist_write ||
-   rctx->vs_shader->shader.vs_prohibit_ucps ?
-   0 : rctx->rasterizer->clip_plane_enable & 0x3F)*/);
+  rctx->queued.named.rasterizer->pa_cl_clip_cntl |
+  (vs->clip_dist_write ? 0 :
+   rctx->queued.named.rasterizer->clip_plane_enable & 
0x3F));
 
si_pm4_set_state(rctx, draw_info, pm4);
return true;
-- 
1.8.3.rc1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev