[Mesa-dev] [PATCH 1/4] swrast: do depth/stencil clearing with Map/UnmapRenderbuffer()

2011-12-15 Thread Brian Paul
Another step toward getting rid of the renderbuffer PutRow/etc functions.

v2: fix assorted depth/stencil clear bugs found by Eric
---
 src/mesa/swrast/s_clear.c   |   49 ++---
 src/mesa/swrast/s_depth.c   |  235 ---
 src/mesa/swrast/s_depth.h   |5 +-
 src/mesa/swrast/s_stencil.c |  154 +---
 src/mesa/swrast/s_stencil.h |2 +-
 5 files changed, 305 insertions(+), 140 deletions(-)

diff --git a/src/mesa/swrast/s_clear.c b/src/mesa/swrast/s_clear.c
index 851f6d1..3566370 100644
--- a/src/mesa/swrast/s_clear.c
+++ b/src/mesa/swrast/s_clear.c
@@ -195,6 +195,8 @@ clear_color_buffers(struct gl_context *ctx)
 void
 _swrast_Clear(struct gl_context *ctx, GLbitfield buffers)
 {
+   const GLbitfield BUFFER_DS = BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL;
+
 #ifdef DEBUG_FOO
{
   const GLbitfield legalBits =
@@ -216,24 +218,39 @@ _swrast_Clear(struct gl_context *ctx, GLbitfield buffers)
if (SWRAST_CONTEXT(ctx)-NewState)
   _swrast_validate_derived(ctx);
 
-   swrast_render_start(ctx);
+   if ((buffers  BUFFER_BITS_COLOR)
+(ctx-DrawBuffer-_NumColorDrawBuffers  0)) {
+  /* XXX remove the swrast_render_start/finish() calls after
+   * clear_color_buffers() is converted to use Map/UnmapRenderbuffer()
+   * The other clearing functions don't need these calls.
+   */
+  swrast_render_start(ctx);
+  clear_color_buffers(ctx);
+  swrast_render_finish(ctx);
+   }
 
-   /* do software clearing here */
-   if (buffers) {
-  if ((buffers  BUFFER_BITS_COLOR)
-   (ctx-DrawBuffer-_NumColorDrawBuffers  0)) {
- clear_color_buffers(ctx);
-  }
-  if (buffers  BUFFER_BIT_DEPTH) {
- _swrast_clear_depth_buffer(ctx, ctx-DrawBuffer-_DepthBuffer);
-  }
-  if (buffers  BUFFER_BIT_ACCUM) {
- _mesa_clear_accum_buffer(ctx);
+   if (buffers  BUFFER_BIT_ACCUM) {
+  _mesa_clear_accum_buffer(ctx);
+   }
+
+   if (buffers  BUFFER_DS) {
+  struct gl_renderbuffer *depthRb =
+ ctx-DrawBuffer-Attachment[BUFFER_DEPTH].Renderbuffer;
+  struct gl_renderbuffer *stencilRb =
+ ctx-DrawBuffer-Attachment[BUFFER_STENCIL].Renderbuffer;
+
+  if ((buffers  BUFFER_DS) == BUFFER_DS  depthRb == stencilRb) {
+ /* clear depth and stencil together */
+ _swrast_clear_depth_stencil_buffer(ctx);
   }
-  if (buffers  BUFFER_BIT_STENCIL) {
- _swrast_clear_stencil_buffer(ctx, ctx-DrawBuffer-_StencilBuffer);
+  else {
+ /* clear depth, stencil separately */
+ if (buffers  BUFFER_BIT_DEPTH) {
+_swrast_clear_depth_buffer(ctx);
+ }
+ if (buffers  BUFFER_BIT_STENCIL) {
+_swrast_clear_stencil_buffer(ctx);
+ }
   }
}
-
-   swrast_render_finish(ctx);
 }
diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c
index 8d3ad10..6a45c89 100644
--- a/src/mesa/swrast/s_depth.c
+++ b/src/mesa/swrast/s_depth.c
@@ -26,6 +26,7 @@
 #include main/glheader.h
 #include main/context.h
 #include main/formats.h
+#include main/format_pack.h
 #include main/macros.h
 #include main/imports.h
 
@@ -1312,14 +1313,21 @@ _swrast_read_depth_span_float( struct gl_context *ctx, 
struct gl_renderbuffer *r
}
 }
 
+
 /**
- * Clear the given z/depth renderbuffer.
+ * Clear the given z/depth renderbuffer.  If the buffer is a combined
+ * depth+stencil buffer, only the Z bits will be touched.
  */
 void
-_swrast_clear_depth_buffer( struct gl_context *ctx, struct gl_renderbuffer *rb 
)
+_swrast_clear_depth_buffer(struct gl_context *ctx)
 {
+   struct gl_renderbuffer *rb =
+  ctx-DrawBuffer-Attachment[BUFFER_DEPTH].Renderbuffer;
GLuint clearValue;
GLint x, y, width, height;
+   GLubyte *map;
+   GLint rowStride, i, j;
+   GLbitfield mapMode;
 
if (!rb || !ctx-Depth.Mask) {
   /* no depth buffer, or writing to it is disabled */
@@ -1334,67 +1342,218 @@ _swrast_clear_depth_buffer( struct gl_context *ctx, 
struct gl_renderbuffer *rb )
   clearValue = (GLuint) (ctx-Depth.Clear * ctx-DrawBuffer-_DepthMaxF);
}
 
-   assert(rb-_BaseFormat == GL_DEPTH_COMPONENT);
-
/* compute region to clear */
x = ctx-DrawBuffer-_Xmin;
y = ctx-DrawBuffer-_Ymin;
width  = ctx-DrawBuffer-_Xmax - ctx-DrawBuffer-_Xmin;
height = ctx-DrawBuffer-_Ymax - ctx-DrawBuffer-_Ymin;
 
-   if (rb-GetPointer(ctx, rb, 0, 0)) {
-  /* Direct buffer access is possible.  Either this is just malloc'd
-   * memory, or perhaps the driver mmap'd the zbuffer memory.
-   */
-  if (rb-DataType == GL_UNSIGNED_SHORT) {
- if ((clearValue  0xff) == ((clearValue  8)  0xff) 
- ((GLushort *) rb-GetPointer(ctx, rb, 0, 0) + width ==
-  (GLushort *) rb-GetPointer(ctx, rb, 0, 1))) {
-/* optimized case */
-GLushort *dst = (GLushort *) rb-GetPointer(ctx, rb, x, y);
-GLuint len = width * height * sizeof(GLushort);
-memset(dst, 

[Mesa-dev] [PATCH 2/4] mesa: add _mesa_get_format_max_bits()

2011-12-15 Thread Brian Paul
Returns max bits per channel for the given format.
---
 src/mesa/main/formats.c |   16 
 src/mesa/main/formats.h |3 +++
 2 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index 1f83a53..cca0014 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -27,6 +27,7 @@
 #include imports.h
 #include formats.h
 #include mfeatures.h
+#include macros.h
 
 
 /**
@@ -1613,6 +1614,21 @@ _mesa_get_format_bits(gl_format format, GLenum pname)
 }
 
 
+GLuint
+_mesa_get_format_max_bits(gl_format format)
+{
+   const struct gl_format_info *info = _mesa_get_format_info(format);
+   GLuint max = MAX2(info-RedBits, info-GreenBits);
+   max = MAX2(max, info-BlueBits);
+   max = MAX2(max, info-AlphaBits);
+   max = MAX2(max, info-LuminanceBits);
+   max = MAX2(max, info-IntensityBits);
+   max = MAX2(max, info-DepthBits);
+   max = MAX2(max, info-StencilBits);
+   return max;
+}
+
+
 /**
  * Return the data type (or more specifically, the data representation)
  * for the given format.
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index e6b429d..9609343 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -290,6 +290,9 @@ _mesa_get_format_bytes(gl_format format);
 extern GLint
 _mesa_get_format_bits(gl_format format, GLenum pname);
 
+extern GLuint
+_mesa_get_format_max_bits(gl_format format);
+
 extern GLenum
 _mesa_get_format_datatype(gl_format format);
 
-- 
1.7.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] mesa: add _mesa_pack_colormask()

2011-12-15 Thread Brian Paul
For generating bit-wise colormasks for arbitrary pixel formats.
---
 src/mesa/main/format_pack.c |   75 +++
 src/mesa/main/format_pack.h |3 ++
 2 files changed, 78 insertions(+), 0 deletions(-)

diff --git a/src/mesa/main/format_pack.c b/src/mesa/main/format_pack.c
index 390b494..840559b 100644
--- a/src/mesa/main/format_pack.c
+++ b/src/mesa/main/format_pack.c
@@ -2494,3 +2494,78 @@ _mesa_pack_uint_24_8_depth_stencil_row(gl_format format, 
GLuint n,
   return;
}
 }
+
+
+
+/**
+ * Convert a boolean color mask to a packed color where each channel of
+ * the packed value at dst will be 0 or ~0 depending on the colorMask.
+ */
+void
+_mesa_pack_colormask(gl_format format, const GLubyte colorMask[4], void *dst)
+{
+   GLfloat maskColor[4];
+
+   switch (_mesa_get_format_datatype(format)) {
+   case GL_UNSIGNED_NORMALIZED:
+  /* simple: 1.0 will convert to ~0 in the right bit positions */
+  maskColor[0] = colorMask[0] ? 1.0 : 0.0;
+  maskColor[1] = colorMask[1] ? 1.0 : 0.0;
+  maskColor[2] = colorMask[2] ? 1.0 : 0.0;
+  maskColor[3] = colorMask[3] ? 1.0 : 0.0;
+  _mesa_pack_float_rgba_row(format, 1,
+(const GLfloat (*)[4]) maskColor, dst);
+  break;
+   case GL_SIGNED_NORMALIZED:
+   case GL_FLOAT:
+  /* These formats are harder because it's hard to know the floating
+   * point values that will convert to ~0 for each color channel's bits.
+   * This solution just generates a non-zero value for each color channel
+   * then fixes up the non-zero values to be ~0.
+   * Note: we'll need to add special case code if we ever have to deal
+   * with formats with unequal color channel sizes, like R11_G11_B10.
+   * We issue a warning below for channel sizes other than 8,16,32.
+   */
+  {
+ GLuint bits = _mesa_get_format_max_bits(format); /* bits per chan */
+ GLuint bytes = _mesa_get_format_bytes(format);
+ GLuint i;
+
+ /* this should put non-zero values into the channels of dst */
+ maskColor[0] = colorMask[0] ? -1.0 : 0.0;
+ maskColor[1] = colorMask[1] ? -1.0 : 0.0;
+ maskColor[2] = colorMask[2] ? -1.0 : 0.0;
+ maskColor[3] = colorMask[3] ? -1.0 : 0.0;
+ _mesa_pack_float_rgba_row(format, 1,
+   (const GLfloat (*)[4]) maskColor, dst);
+
+ /* fix-up the dst channels by converting non-zero values to ~0 */
+ if (bits == 8) {
+GLubyte *d = (GLubyte *) dst;
+for (i = 0; i  bytes; i++) {
+   d[i] = d[i] ? 0x : 0x0;
+}
+ }
+ else if (bits == 16) {
+GLushort *d = (GLushort *) dst;
+for (i = 0; i  bytes / 2; i++) {
+   d[i] = d[i] ? 0x : 0x0;
+}
+ }
+ else if (bits == 32) {
+GLuint *d = (GLuint *) dst;
+for (i = 0; i  bytes / 4; i++) {
+   d[i] = d[i] ? 0xU : 0x0;
+}
+ }
+ else {
+_mesa_problem(NULL, unexpected size in _mesa_pack_colormask());
+return;
+ }
+  }
+  break;
+   default:
+  _mesa_problem(NULL, unexpected format data type in gen_color_mask());
+  return;
+   }
+}
diff --git a/src/mesa/main/format_pack.h b/src/mesa/main/format_pack.h
index 7df1356..f1b4805 100644
--- a/src/mesa/main/format_pack.h
+++ b/src/mesa/main/format_pack.h
@@ -95,4 +95,7 @@ _mesa_pack_uint_24_8_depth_stencil_row(gl_format format, 
GLuint n,
const GLuint *src, void *dst);
 
 
+extern void
+_mesa_pack_colormask(gl_format format, const GLubyte colorMask[4], void *dst);
+
 #endif
-- 
1.7.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] swrast: rewrite color buffer clearing to use Map/UnmapRenderbuffer()

2011-12-15 Thread Brian Paul
v2: use _mesa_pack_colormask() helper and fix incorrect masking arithmetic
---
 src/mesa/swrast/s_clear.c |  225 +++--
 1 files changed, 113 insertions(+), 112 deletions(-)

diff --git a/src/mesa/swrast/s_clear.c b/src/mesa/swrast/s_clear.c
index 3566370..d942e6e 100644
--- a/src/mesa/swrast/s_clear.c
+++ b/src/mesa/swrast/s_clear.c
@@ -24,131 +24,146 @@
 
 #include main/glheader.h
 #include main/accum.h
-#include main/colormac.h
 #include main/condrender.h
+#include main/format_pack.h
 #include main/macros.h
 #include main/imports.h
 #include main/mtypes.h
 
 #include s_context.h
 #include s_depth.h
-#include s_masking.h
 #include s_stencil.h
 
 
+
 /**
- * Clear the color buffer when glColorMask is in effect.
+ * Clear an rgba color buffer with masking if needed.
  */
 static void
-clear_rgba_buffer_with_masking(struct gl_context *ctx, struct gl_renderbuffer 
*rb,
-   GLuint buf)
+clear_rgba_buffer(struct gl_context *ctx, struct gl_renderbuffer *rb,
+  const GLubyte colorMask[4])
 {
const GLint x = ctx-DrawBuffer-_Xmin;
const GLint y = ctx-DrawBuffer-_Ymin;
const GLint height = ctx-DrawBuffer-_Ymax - ctx-DrawBuffer-_Ymin;
const GLint width  = ctx-DrawBuffer-_Xmax - ctx-DrawBuffer-_Xmin;
-   SWspan span;
-   GLint i;
-
-   ASSERT(rb-PutRow);
+   const GLuint pixelSize = _mesa_get_format_bytes(rb-Format);
+   const GLboolean doMasking = (colorMask[0] == 0 ||
+colorMask[1] == 0 ||
+colorMask[2] == 0 ||
+colorMask[3] == 0);
+   const GLfloat (*clearColor)[4] =
+  (const GLfloat (*)[4]) ctx-Color.ClearColor.f;
+   GLbitfield mapMode = GL_MAP_WRITE_BIT;
+   GLubyte *map;
+   GLint rowStride;
+   GLint i, j;
 
-   /* Initialize color span with clear color */
-   /* XXX optimize for clearcolor == black/zero (bzero) */
-   INIT_SPAN(span, GL_BITMAP);
-   span.end = width;
-   span.arrayMask = SPAN_RGBA;
-   span.array-ChanType = rb-DataType;
-   if (span.array-ChanType == GL_UNSIGNED_BYTE) {
-  GLubyte clearColor[4];
-  _mesa_unclamped_float_rgba_to_ubyte(clearColor, ctx-Color.ClearColor.f);
-  for (i = 0; i  width; i++) {
- COPY_4UBV(span.array-rgba[i], clearColor);
-  }
-   }
-   else if (span.array-ChanType == GL_UNSIGNED_SHORT) {
-  GLushort clearColor[4];
-  UNCLAMPED_FLOAT_TO_USHORT(clearColor[RCOMP], ctx-Color.ClearColor.f[0]);
-  UNCLAMPED_FLOAT_TO_USHORT(clearColor[GCOMP], ctx-Color.ClearColor.f[1]);
-  UNCLAMPED_FLOAT_TO_USHORT(clearColor[BCOMP], ctx-Color.ClearColor.f[2]);
-  UNCLAMPED_FLOAT_TO_USHORT(clearColor[ACOMP], ctx-Color.ClearColor.f[3]);
-  for (i = 0; i  width; i++) {
- COPY_4V_CAST(span.array-rgba[i], clearColor, GLchan);
-  }
-   }
-   else {
-  ASSERT(span.array-ChanType == GL_FLOAT);
-  for (i = 0; i  width; i++) {
- UNCLAMPED_FLOAT_TO_CHAN(span.array-rgba[i][0], 
ctx-Color.ClearColor.f[0]);
- UNCLAMPED_FLOAT_TO_CHAN(span.array-rgba[i][1], 
ctx-Color.ClearColor.f[1]);
- UNCLAMPED_FLOAT_TO_CHAN(span.array-rgba[i][2], 
ctx-Color.ClearColor.f[2]);
- UNCLAMPED_FLOAT_TO_CHAN(span.array-rgba[i][3], 
ctx-Color.ClearColor.f[3]);
-  }
+   if (doMasking) {
+  /* we'll need to read buffer values too */
+  mapMode |= GL_MAP_READ_BIT;
}
 
-   /* Note that masking will change the color values, but only the
-* channels for which the write mask is GL_FALSE.  The channels
-* which which are write-enabled won't get modified.
-*/
-   for (i = 0; i  height; i++) {
-  span.x = x;
-  span.y = y + i;
-  _swrast_mask_rgba_span(ctx, rb, span, buf);
-  /* write masked row */
-  rb-PutRow(ctx, rb, width, x, y + i, span.array-rgba, NULL);
+   /* map dest buffer */
+   ctx-Driver.MapRenderbuffer(ctx, rb, x, y, width, height,
+   mapMode, map, rowStride);
+   if (!map) {
+  _mesa_error(ctx, GL_OUT_OF_MEMORY, glClear(color));
+  return;
}
-}
-
 
-/**
- * Clear an rgba color buffer without channel masking.
- */
-static void
-clear_rgba_buffer(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint 
buf)
-{
-   const GLint x = ctx-DrawBuffer-_Xmin;
-   const GLint y = ctx-DrawBuffer-_Ymin;
-   const GLint height = ctx-DrawBuffer-_Ymax - ctx-DrawBuffer-_Ymin;
-   const GLint width  = ctx-DrawBuffer-_Xmax - ctx-DrawBuffer-_Xmin;
-   GLubyte clear8[4];
-   GLushort clear16[4];
-   GLvoid *clearVal;
-   GLfloat clearFloat[4];
-   GLint i;
+   /* for 1, 2, 4-byte clearing */
+#define SIMPLE_TYPE_CLEAR(TYPE) \
+   do { \
+  TYPE pixel, pixelMask;\
+  _mesa_pack_float_rgba_row(rb-Format, 1, clearColor, pixel); \
+  if (doMasking) {  \

[Mesa-dev] [PATCH] mesa: add MESA_FORMAT_RGB565[_REV] as candidates for GL_R3_G3_B2

2011-12-15 Thread Brian Paul
---
 src/mesa/main/texformat.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
index c776b41..7e60541 100644
--- a/src/mesa/main/texformat.c
+++ b/src/mesa/main/texformat.c
@@ -118,6 +118,8 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint 
internalFormat,
 break;
   case GL_R3_G3_B2:
 RETURN_IF_SUPPORTED(MESA_FORMAT_RGB332);
+RETURN_IF_SUPPORTED(MESA_FORMAT_RGB565);
+RETURN_IF_SUPPORTED(MESA_FORMAT_RGB565_REV);
 RETURN_IF_SUPPORTED(MESA_FORMAT_RGB888);
 RETURN_IF_SUPPORTED(MESA_FORMAT_XRGB);
 RETURN_IF_SUPPORTED(MESA_FORMAT_ARGB);
-- 
1.7.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gbm: Introduce a new API gbm_bo_create_from_name.

2011-12-15 Thread zhigang . gong
From: Zhigang Gong zhigang.g...@linux.intel.com

Glamor need a function to create a texture from a
BO allocated by using libdrm directly in DDX layer.
EGL image extension API eglCreateImageKHR does support
this function, but that extension only support one
colore format - ARGB32 which is not sufficent for
us. I discussed this in the mail list and KRH suggest
me to extent GBM to support more color formats should
be better. I took his advice and decide to use gbm
bo to create image. Now before extent the color formats,
I have to add a new API to the create a gbo from a
name. This commit is for that purpose.

Signed-off-by: Zhigang Gong zhigang.g...@linux.intel.com
---
 src/gbm/backends/dri/gbm_dri.c |   87 ++--
 src/gbm/main/gbm.c |   14 ++
 src/gbm/main/gbm.h |6 +++
 src/gbm/main/gbmint.h  |4 ++
 4 files changed, 98 insertions(+), 13 deletions(-)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 9de8cb6..d78cf5f 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -284,6 +284,22 @@ gbm_dri_bo_create_from_egl_image(struct gbm_device *gbm,
return bo-base.base;
 }
 
+static int
+gbm_dri_match_format(enum gbm_bo_format format, int *dri_format)
+{
+   switch (format) {
+   case GBM_BO_FORMAT_XRGB:
+  *dri_format = __DRI_IMAGE_FORMAT_XRGB;
+  break;
+   case GBM_BO_FORMAT_ARGB:
+  *dri_format = __DRI_IMAGE_FORMAT_ARGB;
+  break;
+   default:
+  return -1;
+   }
+  return 0;
+}
+
 static struct gbm_bo *
 gbm_dri_bo_create(struct gbm_device *gbm,
   uint32_t width, uint32_t height,
@@ -296,23 +312,15 @@ gbm_dri_bo_create(struct gbm_device *gbm,
 
bo = calloc(1, sizeof *bo);
if (bo == NULL)
-  return NULL;
+  goto fail;
 
bo-base.base.gbm = gbm;
bo-base.base.width = width;
bo-base.base.height = height;
 
-   switch (format) {
-   case GBM_BO_FORMAT_XRGB:
-  dri_format = __DRI_IMAGE_FORMAT_XRGB;
-  break;
-   case GBM_BO_FORMAT_ARGB:
-  dri_format = __DRI_IMAGE_FORMAT_ARGB;
-  break;
-   default:
-  return NULL;
-   }
-
+   if (gbm_dri_match_format(format, dri_format) != 0)
+  goto fail;
+
if (usage  GBM_BO_USE_SCANOUT)
   dri_use |= __DRI_IMAGE_USE_SCANOUT;
if (usage  GBM_BO_USE_CURSOR_64X64)
@@ -323,8 +331,9 @@ gbm_dri_bo_create(struct gbm_device *gbm,
   width, height,
   dri_format, dri_use,
   bo);
+
if (bo-image == NULL)
-  return NULL;
+  goto fail;
 
dri-image-queryImage(bo-image, __DRI_IMAGE_ATTRIB_HANDLE,
   bo-base.base.handle.s32);
@@ -332,8 +341,59 @@ gbm_dri_bo_create(struct gbm_device *gbm,
   (int *) bo-base.base.pitch);
 
return bo-base.base;
+
+fail:
+
+   if (bo)
+  free(bo);
+   return NULL;
 }
 
+static struct gbm_bo *
+gbm_dri_bo_create_from_name(struct gbm_device *gbm,
+uint32_t width, uint32_t height,
+enum gbm_bo_format format, int name,
+int pitch)
+{
+   struct gbm_dri_device *dri = gbm_dri_device(gbm);
+   struct gbm_dri_bo *bo;
+   int dri_format;
+
+   bo = calloc(1, sizeof *bo);
+   if (bo == NULL)
+  goto fail;
+
+   bo-base.base.gbm = gbm;
+   bo-base.base.width = width;
+   bo-base.base.height = height;
+
+   if (gbm_dri_match_format(format, dri_format) != 0)
+  goto fail;
+
+   bo-image =
+  dri-image-createImageFromName(dri-screen,
+  width, height,
+  dri_format, name, pitch,
+  bo);
+   if (bo-image == NULL)
+  goto fail;
+
+   dri-image-queryImage(bo-image, __DRI_IMAGE_ATTRIB_HANDLE,
+  bo-base.base.handle.s32);
+   dri-image-queryImage(bo-image, __DRI_IMAGE_ATTRIB_STRIDE,
+  (int *) bo-base.base.pitch);
+
+   return bo-base.base;
+
+fail:
+
+   if (bo)
+  free(bo);
+   return NULL;
+
+}
+
+
 static void
 dri_destroy(struct gbm_device *gbm)
 {
@@ -358,6 +418,7 @@ dri_device_create(int fd)
dri-base.base.fd = fd;
dri-base.base.bo_create = gbm_dri_bo_create;
dri-base.base.bo_create_from_egl_image = gbm_dri_bo_create_from_egl_image;
+   dri-base.base.bo_create_from_name = gbm_dri_bo_create_from_name;
dri-base.base.is_format_supported = gbm_dri_is_format_supported;
dri-base.base.bo_destroy = gbm_dri_bo_destroy;
dri-base.base.destroy = dri_destroy;
diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c
index 8440b2c..93e06f3 100644
--- a/src/gbm/main/gbm.c
+++ b/src/gbm/main/gbm.c
@@ -177,6 +177,20 @@ gbm_bo_create(struct gbm_device *gbm,
 }
 
 GBM_EXPORT struct gbm_bo *
+gbm_bo_create_from_name(struct gbm_device *gbm,
+uint32_t width, uint32_t 

[Mesa-dev] [PATCH 2/2] gbm/dri: Added more color formats.

2011-12-15 Thread zhigang . gong
From: Zhigang Gong zhigang.g...@linux.intel.com

Added two color formats RGB565 and A8 to gbm layer.
Add A8 to dri layer.

Signed-off-by: Zhigang Gong zhigang.g...@linux.intel.com
---
 include/GL/internal/dri_interface.h   |1 +
 src/gbm/backends/dri/gbm_dri.c|6 ++
 src/gbm/main/gbm.h|2 ++
 src/mesa/drivers/dri/intel/intel_screen.c |5 +
 4 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/include/GL/internal/dri_interface.h 
b/include/GL/internal/dri_interface.h
index 4f768f0..40034e2 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -840,6 +840,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FORMAT_XRGB 0x1002
 #define __DRI_IMAGE_FORMAT_ARGB 0x1003
 #define __DRI_IMAGE_FORMAT_ABGR 0x1004
+#define __DRI_IMAGE_FORMAT_A8   0x1005
 
 #define __DRI_IMAGE_USE_SHARE  0x0001
 #define __DRI_IMAGE_USE_SCANOUT0x0002
diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index d78cf5f..206ddb4 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -294,6 +294,12 @@ gbm_dri_match_format(enum gbm_bo_format format, int 
*dri_format)
case GBM_BO_FORMAT_ARGB:
   *dri_format = __DRI_IMAGE_FORMAT_ARGB;
   break;
+   case GBM_BO_FORMAT_RGB565:
+  *dri_format = __DRI_IMAGE_FORMAT_RGB565;
+  break;
+   case GBM_BO_FORMAT_A8:
+  *dri_format = __DRI_IMAGE_FORMAT_A8;
+  break;
default:
   return -1;
}
diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
index aeed0df..889efa2 100644
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -51,6 +51,8 @@ union gbm_bo_handle {
 enum gbm_bo_format {
GBM_BO_FORMAT_XRGB,
GBM_BO_FORMAT_ARGB,
+   GBM_BO_FORMAT_RGB565,
+   GBM_BO_FORMAT_A8
 };
 
 enum gbm_bo_flags {
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c 
b/src/mesa/drivers/dri/intel/intel_screen.c
index e4cc5b0..6284071 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -143,6 +143,11 @@ intel_create_image_from_name(__DRIscreen *screen,
return NULL;
 
 switch (format) {
+case __DRI_IMAGE_FORMAT_A8:
+   image-format = MESA_FORMAT_A8;
+   image-internal_format = GL_ALPHA;
+   image-data_type = GL_UNSIGNED_BYTE;
+   break;
 case __DRI_IMAGE_FORMAT_RGB565:
image-format = MESA_FORMAT_RGB565;
image-internal_format = GL_RGB;
-- 
1.7.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/14] i965/vs: Implement EXT_texture_swizzle support for VS texturing.

2011-12-15 Thread Eric Anholt
On Wed, 14 Dec 2011 15:11:53 -0800, Kenneth Graunke kenn...@whitecape.org 
wrote:
 On 12/13/2011 01:19 PM, Eric Anholt wrote:
  On Thu,  8 Dec 2011 17:08:04 -0800, Kenneth Graunke kenn...@whitecape.org 
  wrote:
  Signed-off-by: Kenneth Graunke kenn...@whitecape.org
  +   if (one_mask) {
  +  swizzled_result.writemask = one_mask;
  +  emit(MOV(swizzled_result, src_reg(1.0f)));
  +   }
   }
  
  I think this would be wrong for SWIZZLE_ONE of integer textures.
 
 My tests indicate that it works.  swizzled_result gets the appropriate
 register type, so this ends up being something like:
 
 mov(8)   m41.ywUD1F{ align16 WE_normal 1Q };
 
 and the MOV instruction does the float-int conversion for us.
 
 Is that okay?  If not, what would you prefer?

Oh, you're right.  I was not thinking correctly.


pgpwzhr2ElOtj.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gbm: Introduce a new API gbm_bo_create_from_name.

2011-12-15 Thread Eric Anholt
On Thu, 15 Dec 2011 17:58:45 +0800, zhigang.g...@linux.intel.com wrote:
 From: Zhigang Gong zhigang.g...@linux.intel.com
 
 Glamor need a function to create a texture from a
 BO allocated by using libdrm directly in DDX layer.
 EGL image extension API eglCreateImageKHR does support
 this function, but that extension only support one
 colore format - ARGB32 which is not sufficent for
 us. I discussed this in the mail list and KRH suggest
 me to extent GBM to support more color formats should
 be better. I took his advice and decide to use gbm
 bo to create image. Now before extent the color formats,
 I have to add a new API to the create a gbo from a
 name. This commit is for that purpose.

If the DDX is going to be doing the memory management, then the DRI
driver needs to be using the same bufmgr struct and DRI fd as the DDX so
you can share the handle (and thus the mapping and userland caching)
instead of the name.


pgpzbnHLU93VL.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: add MESA_FORMAT_RGB565[_REV] as candidates for GL_R3_G3_B2

2011-12-15 Thread Eric Anholt
On Thu, 15 Dec 2011 07:43:20 -0700, Brian Paul bri...@vmware.com wrote:
 ---
  src/mesa/main/texformat.c |2 ++
  1 files changed, 2 insertions(+), 0 deletions(-)
 
 diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
 index c776b41..7e60541 100644
 --- a/src/mesa/main/texformat.c
 +++ b/src/mesa/main/texformat.c
 @@ -118,6 +118,8 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint 
 internalFormat,
break;
case GL_R3_G3_B2:
RETURN_IF_SUPPORTED(MESA_FORMAT_RGB332);
 +  RETURN_IF_SUPPORTED(MESA_FORMAT_RGB565);
 +  RETURN_IF_SUPPORTED(MESA_FORMAT_RGB565_REV);
RETURN_IF_SUPPORTED(MESA_FORMAT_RGB888);
RETURN_IF_SUPPORTED(MESA_FORMAT_XRGB);
RETURN_IF_SUPPORTED(MESA_FORMAT_ARGB);

Reviewed-by: Eric Anholt e...@anholt.net


pgpYmVwRncQ8g.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: Set the correct ctx-NewState bitfield for rasterizer discard.

2011-12-15 Thread Eric Anholt
On Thu, 15 Dec 2011 00:00:49 +0100, Marek Olšák mar...@gmail.com wrote:
 On Wed, Dec 14, 2011 at 11:25 PM, Paul Berry stereotype...@gmail.com wrote:
  (c) Do nothing, and rely on programmers to remember that RasterDiscard is an
  exception to the usual correspondence between dirty bits and substructures
  of gl_context.
 
  I'm really not comfortable with (c) because of the risk of future bugs.  I
  suppose I could be talked into (b) if there's popular support for it, but
  it's not my favourite, because as I said earlier, I think there are actually
  a lot of good reasons to think of rasterizer discard as related to transform
  feedback.  My preference is to do (a).
 
 (d) Rework the _NEW_* flags such that they roughly match hardware
 state groups, not OpenGL state groups. Direct3D 11 and Gallium are two
 examples of how it could be done.

The problem is that everyone disagrees on what hardware state group a
piece of state is in.  On i965, rasterizer discard is really in the
transform feedback state -- the SOL (transform feedback) unit on gen7,
and the GS on gen6.


pgpAWYgOwtiRd.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meta: rework dest image allocation in mipmap generation code

2011-12-15 Thread Eric Anholt
On Wed, 14 Dec 2011 14:38:52 -0700, Brian Paul bri...@vmware.com wrote:
 On 12/14/2011 02:13 PM, Eric Anholt wrote:
  On Tue, 13 Dec 2011 18:07:05 -0700, Brian Paulbrian.e.p...@gmail.com  
  wrote:
  From: Brian Paulbri...@vmware.com
 
  This fixes two things:
  1. If the texture object was created with glTexStorage2D, the call
  to _mesa_TexImage2D() would generate INVALID_OPERATION since the
  texture is marked as immutable.
  2. _mesa_TexImage2D() always frees any existing texture image memory
  before allocating new memory.  That's inefficient since the existing
  image is usually the right size already.  Now we only make the call
  when necessary.
 
  v2: use _mesa_TexImage() in prepare_dest_image() to make sure side-effects
  of changing a texture image are observed (like FBO completeness).
 
  Should this live in main/mipmap.c?  It looks like
  generate_mipmap_uncompressed() needs it.
 
 I think the broader question is are we updating fbo completeness when 
 we change any texture image by mipmap generation, right?
 
 We're probably missing that in a few places.
 
 How about I look into that after this meta fix?  R-b?

If you put the function in main/mipmap.c where you're about to move it
to when you look at it after the meta fix :)


pgp9mfh48V3j9.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gbm: Introduce a new API gbm_bo_create_from_name.

2011-12-15 Thread zhigang gong
On Thu, Dec 15, 2011 at 11:57 PM, Eric Anholt e...@anholt.net wrote:
 On Thu, 15 Dec 2011 17:58:45 +0800, zhigang.g...@linux.intel.com wrote:
 From: Zhigang Gong zhigang.g...@linux.intel.com

 Glamor need a function to create a texture from a
 BO allocated by using libdrm directly in DDX layer.
 EGL image extension API eglCreateImageKHR does support
 this function, but that extension only support one
 colore format - ARGB32 which is not sufficent for
 us. I discussed this in the mail list and KRH suggest
 me to extent GBM to support more color formats should
 be better. I took his advice and decide to use gbm
 bo to create image. Now before extent the color formats,
 I have to add a new API to the create a gbo from a
 name. This commit is for that purpose.

 If the DDX is going to be doing the memory management, then the DRI
 driver needs to be using the same bufmgr struct and DRI fd as the DDX so
 you can share the handle (and thus the mapping and userland caching)
 instead of the name.
Looks better than just create an image from the name as the DDX and DRI
are really running in the same process in this case. But I don't know how to
share the DDX's bufmgr with DRI driver, and how to make a texture from
a handle directly by using the EGL/GBM interfaces. Can you give me more hints
about how to achieve that?  Thanks.


 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] vertex array regression

2011-12-15 Thread Brian Paul

Mathias,

There's a regression in vertex array drawing with this commit:

commit ed42c2580717527b2005580940fc766d95bb6b0b
Author: Mathias Fröhlich mathias.froehl...@web.de
Date:   Mon Oct 31 16:23:40 2011 +0100

vbo: Use The VERT_{ATTRIB,BIT} defines.

Signed-off-by: Mathias Froehlich mathias.froehl...@web.de
Reviewed-by: Brian Paul bri...@vmware.com
Reviewed-by: Eric Anholt e...@anholt.net


To see the problem, run mesa/demos/src/demos/isosurf, choose 
glDrawArrays or glDrawElements from the pop-up menu (right mouse 
button).  I see the problem (random/missing vertices or failed 
assertion) with all gallium drivers.  The swrast/i965 drivers seem 
uneffected.


I'll try to debug it further, but maybe you could double-check your work.

-Brian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: Set the correct ctx-NewState bitfield for rasterizer discard.

2011-12-15 Thread Paul Berry
On 15 December 2011 08:02, Eric Anholt e...@anholt.net wrote:

 On Thu, 15 Dec 2011 00:00:49 +0100, Marek Olšák mar...@gmail.com wrote:
  On Wed, Dec 14, 2011 at 11:25 PM, Paul Berry stereotype...@gmail.com
 wrote:
   (c) Do nothing, and rely on programmers to remember that RasterDiscard
 is an
   exception to the usual correspondence between dirty bits and
 substructures
   of gl_context.
  
   I'm really not comfortable with (c) because of the risk of future
 bugs.  I
   suppose I could be talked into (b) if there's popular support for it,
 but
   it's not my favourite, because as I said earlier, I think there are
 actually
   a lot of good reasons to think of rasterizer discard as related to
 transform
   feedback.  My preference is to do (a).
 
  (d) Rework the _NEW_* flags such that they roughly match hardware
  state groups, not OpenGL state groups. Direct3D 11 and Gallium are two
  examples of how it could be done.

 The problem is that everyone disagrees on what hardware state group a
 piece of state is in.  On i965, rasterizer discard is really in the
 transform feedback state -- the SOL (transform feedback) unit on gen7,
 and the GS on gen6.


I have been thinking about this more this morning, and I have an idea for
how to accomplish (d) that I think would address this problem.  It's not a
trivial change, but it's something we could implement incrementally, so we
apply it to rasterizer discard now, and over time extend it to cover other
pieces of state.  Here's the idea:

The key problem is that there are so many distinct pieces of state that we
could never possibly assign a separate bit to each one--we would run out of
space in the bitfield.  So instead of having core Mesa decide how they are
grouped (and, inevitably, wind up grouping them in a way that doesn't work
well for some drivers), let each driver decide how they are grouped.  The
drivers communicate this grouping to core Mesa by populating a new data
structure (at initialization time) called ctx-StateFlags.  ctx-StateFlags
has an entry for each distinct piece of state, which tells which bits in
ctx-NewState should be set when that state changes.

So, for example, in BeginTransformFeedback() and EndTransformFeedback(),
instead of doing this:

FLUSH_VERTICES(ctx, _NEW_TRANSFORM_FEEDBACK);

We would do this:

FLUSH_VERTICES(ctx, ctx-StateFlags-TransformFeedback_Active);

In PauseTransformFeedback() and ResumeTransformFeedback() we would do:

FLUSH_VERTICES(ctx, ctx-StateFlags-TransformFeedback_Paused);

And in enable.c, when rasterizer discard is turned on or off, we would do:

FLUSH_VERTICES(ctx, ctx-StateFlags-RasterizerDiscard);

In the i965 driver, where all of these features map to the GS stage of the
pipeline, we would initialize TransformFeedback_Active,
TransformFeedback_Paused, and RasterizerDiscard all to the same value.  In
the r600 driver, where rasterizer discard is implemented using face
culling, StateFlags-RasterizerDiscard would indicate a totally different
bit than those used for transform feedback.

In the short term, we could implement this technique just for rasterizer
discard, to address the differences between r600 and i965 that we're
discussing in this email thread.  In the long term, our goal would be to
replace all of the _NEW_* constants with a fine-grained set of values in
StateFlags.  Once we've done that, each driver can set up StateFlags in a
way that precisely matches how state is grouped for that particular piece
of hardware.

What do y'all think?  If there's support for this idea I'd be glad to make
an RFC patch.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gbm: Introduce a new API gbm_bo_create_from_name.

2011-12-15 Thread Kristian Høgsberg
On Thu, Dec 15, 2011 at 11:08 AM, zhigang gong zhigang.g...@gmail.com wrote:
 On Thu, Dec 15, 2011 at 11:57 PM, Eric Anholt e...@anholt.net wrote:
 On Thu, 15 Dec 2011 17:58:45 +0800, zhigang.g...@linux.intel.com wrote:
 From: Zhigang Gong zhigang.g...@linux.intel.com

 Glamor need a function to create a texture from a
 BO allocated by using libdrm directly in DDX layer.
 EGL image extension API eglCreateImageKHR does support
 this function, but that extension only support one
 colore format - ARGB32 which is not sufficent for
 us. I discussed this in the mail list and KRH suggest
 me to extent GBM to support more color formats should
 be better. I took his advice and decide to use gbm
 bo to create image. Now before extent the color formats,
 I have to add a new API to the create a gbo from a
 name. This commit is for that purpose.

 If the DDX is going to be doing the memory management, then the DRI
 driver needs to be using the same bufmgr struct and DRI fd as the DDX so
 you can share the handle (and thus the mapping and userland caching)
 instead of the name.
 Looks better than just create an image from the name as the DDX and DRI
 are really running in the same process in this case. But I don't know how to
 share the DDX's bufmgr with DRI driver, and how to make a texture from
 a handle directly by using the EGL/GBM interfaces. Can you give me more hints
 about how to achieve that?  Thanks.

I think you need to make the DDX use GBM for all allocations then.

Kristian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 43864] src/gallium/drivers/llvmpipe/lp_state_so.c:46: error: ‘struct pipe_stream_output_info’ has no member named ‘output_buffer’

2011-12-15 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=43864

--- Comment #1 from Vinson Lee v...@vmware.com 2011-12-15 10:46:10 PST ---
861a029ddb31e91bb4d8e18ab708d0d172f63aad is the first bad commit
commit 861a029ddb31e91bb4d8e18ab708d0d172f63aad
Author: Marek Olšák mar...@gmail.com
Date:   Thu Dec 15 18:42:21 2011 +0100

gallium: interface changes necessary to implement transform feedback (v5)

Namely:
- EXT_transform_feedback
- ARB_transform_feedback2
- ARB_transform_feedback_instanced

The old interface was not useful for OpenGL and had to be reworked.

This interface was originally designed for OpenGL, but additional
changes have been made in order to make st/d3d1x support easier.

The most notable change is the stream-out info must be linked
with a vertex or geometry shader and cannot be set independently.
This is due to limitations of existing hardware (special shader
instructions must be used to write into stream-out buffers),
and it's also how OpenGL works (stream outputs must be specified
prior to linking shaders).

Other than that, each stream output buffer has a view into it that
internally maintains the number of bytes which have been written
into it. (one buffer can be bound in several different transform
feedback objects in OpenGL, so we must be able to have several views
around) The set_stream_output_targets function contains a parameter
saying whether new data should be appended or not.

Also, the view can optionally be used to provide the vertex
count for draw_vbo. Note that the count is supposed to be stored
in device memory and the CPU never gets to know its value.

OpenGL way | Gallium way

BeginTF= set_so_targets(append_bitmask = 0)
PauseTF= set_so_targets(num_targets = 0)
ResumeTF   = set_so_targets(append_bitmask = ~0)
EndTF  = set_so_targets(num_targets = 0)
DrawTF = use pipe_draw_info::count_from_stream_output

v2: * removed the reset_stream_output_targets function
* added a parameter append_bitmask to set_stream_output_targets,
  each bit specifies whether new data should be appended to each
  buffer or not.
v3: * added PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME for ARB_tfb2,
  note that the draw-auto subset is always required (for d3d10),
  only the pause/resume functionality is limited if the CAP is not
  advertised
v4: * update gallium/docs
v5: * compactified struct pipe_stream_output_info, updated dump/trace

:04 04 d6f5d1a415057bae4ea6ebf55a892eb499f2cd49
0e27123f5b6e2fe920c151d83a807911589b2b22 Msrc
bisect run success

-- 
Configure bugmail: https://bugs.freedesktop.org/userprefs.cgi?tab=email
--- You are receiving this mail because: ---
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: Set the correct ctx-NewState bitfield for rasterizer discard.

2011-12-15 Thread Paul Berry
On 14 December 2011 13:42, Marek Olšák mar...@gmail.com wrote:

 I think RASTERIZER_DISCARD has nothing to do with transform feedback.
 I think it's part of the same spec because it's not useful without it.
 As I understand it, _NEW_TRANSFORM_FEEDBACK is dirty when transform
 feedback buffer bindings are changed or just enabled/disabled. On the
 other hand, RASTERIZER_DISCARD enables or disables the rasterizer, so
 it should fall into the same category as face culling for example. (I
 even implemented it using face culling on r600)


Another thought which just came up in a discussion: in i965, face culling
only applies to triangles, so this wouldn't work for discarding points and
lines.  Is it possible that the same situation applies to r600?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-15 Thread Jose Fonseca
- Original Message -
 On 12/14/2011 12:58 AM, Ian Romanick wrote:
  On 12/13/2011 01:25 PM, Jose Fonseca wrote:
 
 
  - Original Message -
  On 12/13/2011 03:09 PM, Jose Fonseca wrote:
 
  - Original Message -
  On 12/13/2011 12:26 PM, Bryan Cain wrote:
  On 12/13/2011 02:11 PM, Jose Fonseca wrote:
  - Original Message -
  This is an updated version of the patch set I sent to the
  list
  a
  few
  hours
  ago.
  There is now a TGSI property called
  TGSI_PROPERTY_NUM_CLIP_DISTANCES
  that drivers can use to determine how many of the 8
  available
  clip
  distances
  are actually used by a shader.
  Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be easily
  derived from the shader, and queried through
  src/gallium/auxiliary/tgsi/tgsi_scan.h ?
  No.  The clip distances can be indirectly addressed (there are
  up
  to 2
  of them in vec4 form for a total of 8 floats), which makes it
  impossible
  to determine which ones are used by analyzing the shader.
  The description is almost complete. :)  The issue is that the
  shader
  may
  declare
 
  out float gl_ClipDistance[4];
 
  the use non-constant addressing of the array.  The compiler
  knows
  that
  gl_ClipDistance has at most 4 elements, but post-hoc analysis
  would
  not
  be able to determine that.  Often the fixed-function hardware
  (see
  below) needs to know which clip distance values are actually
  written.
  But don't all the clip distances written by the shader need to
  be
  declared?
 
  E.g.:
 
  DCL OUT[0], CLIPDIST[0]
  DCL OUT[1], CLIPDIST[1]
  DCL OUT[2], CLIPDIST[2]
  DCL OUT[3], CLIPDIST[3]
 
  therefore a trivial analysis of the declarations convey that?
 
  No.  Clip distance is an array of up to 8 floats in GLSL, but
  it's
  represented in the hardware as 2 vec4s.  You can tell by
  analyzing
  the
  declarations whether there are more than 4 clip distances in use,
  but
  not which components the shader writes to.
  TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components in
  use,
  not
  the number of full vectors.
 
  Lets imagine
 
 out float gl_ClipDistance[6];
 
  Each a clip distance is a scalar float.
 
  Either all hardware represents the 8 clip distances as two 4
  vectors,
  and we do:
 
 DCL OUT[0].xywz, CLIPDIST[0]
 DCL OUT[1].xy, CLIPDIST[1]
 
  using the full range of struct tgsi_declaration::UsageMask [1] or
  we
  represent them as as scalars:
 
 DCL OUT[0].x, CLIPDIST[0]
 DCL OUT[1].x, CLIPDIST[1]
 DCL OUT[2].x, CLIPDIST[2]
 DCL OUT[3].x, CLIPDIST[3]
 DCL OUT[4].x, CLIPDIST[4]
 DCL OUT[5].x, CLIPDIST[5]
 
  If indirect addressing is allowed as I read bore, then maybe the
  later
  is better.
  
  As far as I'm aware, all hardware represents it as the former, and
  we
  have a lowering pass to fix-up the float[] accesses to be vec4[]
  accesses.
 
 GeForce8+ = scalar architecture, no vectors, addresses are byte
 based,
 can access individual components just fine.

Ok. So we should avoid baking this vec4 assumption in TGSI semantics.

 Something like:
 
 gl_ClipDistance[i - 12] = some_value;
 
 DCL OUT[0].xyzw, POSITION
 DCL OUT[1-8].x, CLIPDIST[0-7]
 
 MOV OUT1[ADDR[0].x - 12].x, TEMP[0].
 *  **
 
 *   - tgsi_dimension.Index specifying the base address by referencing
 a
 declaration
 **  - tgsi_src_register.Index
 
 is the only way I see to make this work nicely on all hardware. 
 (This is also needed if OUT[i] and OUT[i + 1] cannot be assigned to
 contiguous hardware resources because of semantic.)

I think that having indexable temps, like D3D10, would be a cleaner:

  DCL OUT[0].xyzw, POSITION
  DCL OUT[1][0-7].x, CLIPDIST[0-7]

  MOV OUT[1][ADDR[0].x - 12].x, TEMP[0].

I propose we first add this new kind of temp at a first stage, then prohibit 
indirect addressing of all but this kind of temps.

 For constrained hardware the driver can build the clunky
 
 c := ADDR[0].x % 4
 i := ADDR[0].x / 4
 IF [c == 0]
   MOV OUT[i].x, TEMP[0].
 ELSE
 IF [c == 1]
   MOV OUT[i].y, TEMP[0].
 ELSE
 IF [c == 2]
   MOV OUT[i].z, TEMP[0].
 ELSE
   MOV OUT[i].w, TEMP[0].
 ENDIF
 
 itself.

Sounds good plan to me.

BTW, I took a look at inputs/outputs UsageMasks and although we don't use them, 
I really think we really should, as having that info readily accessible would 
allow to avoid wasting time/bandwidth copying attributes which are not needed.

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-15 Thread Jose Fonseca


- Original Message -
 On 12/13/2011 02:12 PM, Jose Fonseca wrote:
 
 
  - Original Message -
  On 12/13/2011 03:48 PM, Jose Fonseca wrote:
  - Original Message -
  On 12/13/2011 03:25 PM, Jose Fonseca wrote:
  - Original Message -
  On 12/13/2011 03:09 PM, Jose Fonseca wrote:
  - Original Message -
  On 12/13/2011 12:26 PM, Bryan Cain wrote:
  On 12/13/2011 02:11 PM, Jose Fonseca wrote:
  - Original Message -
  This is an updated version of the patch set I sent to the
  list
  a
  few
  hours
  ago.
  There is now a TGSI property called
  TGSI_PROPERTY_NUM_CLIP_DISTANCES
  that drivers can use to determine how many of the 8
  available
  clip
  distances
  are actually used by a shader.
  Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be
  easily
  derived from the shader, and queried through
  src/gallium/auxiliary/tgsi/tgsi_scan.h ?
  No.  The clip distances can be indirectly addressed (there
  are
  up
  to 2
  of them in vec4 form for a total of 8 floats), which makes
  it
  impossible
  to determine which ones are used by analyzing the shader.
  The description is almost complete. :)  The issue is that
  the
  shader
  may
  declare
 
  out float gl_ClipDistance[4];
 
  the use non-constant addressing of the array.  The compiler
  knows
  that
  gl_ClipDistance has at most 4 elements, but post-hoc
  analysis
  would
  not
  be able to determine that.  Often the fixed-function
  hardware
  (see
  below) needs to know which clip distance values are actually
  written.
  But don't all the clip distances written by the shader need
  to
  be
  declared?
 
  E.g.:
 
  DCL OUT[0], CLIPDIST[0]
  DCL OUT[1], CLIPDIST[1]
  DCL OUT[2], CLIPDIST[2]
  DCL OUT[3], CLIPDIST[3]
 
  therefore a trivial analysis of the declarations convey that?
  No.  Clip distance is an array of up to 8 floats in GLSL, but
  it's
  represented in the hardware as 2 vec4s.  You can tell by
  analyzing
  the
  declarations whether there are more than 4 clip distances in
  use,
  but
  not which components the shader writes to.
  TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components
  in
  use,
  not
  the number of full vectors.
  Lets imagine
 
 out float gl_ClipDistance[6];
 
  Each a clip distance is a scalar float.
 
  Either all hardware represents the 8 clip distances as two 4
  vectors, and we do:
 
 DCL OUT[0].xywz, CLIPDIST[0]
 DCL OUT[1].xy, CLIPDIST[1]
 
  using the full range of struct tgsi_declaration::UsageMask [1]
  or
  we represent them as as scalars:
 
 DCL OUT[0].x, CLIPDIST[0]
 DCL OUT[1].x, CLIPDIST[1]
 DCL OUT[2].x, CLIPDIST[2]
 DCL OUT[3].x, CLIPDIST[3]
 DCL OUT[4].x, CLIPDIST[4]
 DCL OUT[5].x, CLIPDIST[5]
 
  If indirect addressing is allowed as I read bore, then maybe
  the
  later is better.
 
  I confess my ignorance about clipping and maybe I'm being
  dense,
  but I still don't see the need for this
  TGSI_PROPERTY_NUM_CLIP_DISTANCES.  Could you please draft an
  example TGSI shader showing this property (or just paste one
  generated with your change)?  I think that would help a lot.
 
 
  Jose
 
 
  [1] I don't know if tgsi_dump pays much attention to
tgsi_declaration::UsageMask, but it does exist.
  UsageMask might work, but before that can be considered a viable
  solution, someone will need to make it possible to actually
  declare
  it
  from ureg.  As it is, ureg is hardcoded to set UsageMask to xyzw
  no
  matter what on all declared inputs and outputs.
  ureg automatically fills the UsageMask from the destionation
  register masks, since it easy to determine from the opcodes.
 
  Which leads me to my second point, if indirect addressing of
  CLIPDIST is allowed, then we can't really pack the clip distance
  as 4-elem vectors in TGSI: not only the syntax would be very
  weird, but it would create havoc on all tgsi-translating code
  that
  makes decisions based on indirect addressing of registers.
 
  That is,
 
 float gl_ClipDistance[6];
 
 gl_ClipDistance[i] = foo;
 
  would become
 
  DCL OUT[0].x, CLIPDIST[0]
  DCL OUT[1].x, CLIPDIST[1]
  DCL OUT[2].x, CLIPDIST[2]
  DCL OUT[3].x, CLIPDIST[3]
  DCL OUT[4].x, CLIPDIST[4]
  DCL OUT[5].x, CLIPDIST[5]
  MOV OUT[ADDR[0].x].x, foo
 
  and the info from TGSI_PROPERTY_NUM_CLIP_DISTANCES can be
  obtained
  by walking the declaration (which can/should be done only once in
  tgsi_scan).
 
  But this just doesn't look like it would ever work:
 
  DCL OUT[0].xyzw, CLIPDIST[0]
  DCL OUT[1].xy  , CLIPDIST[1]
  MOV OUT[ADDR[0].x]., foo
 
  Jose
 
  If ureg automatically fills the UsageMask from the accessed
  components,
  it's probably a better solution than the property.
 
  About the indirect addressing of components: the GLSL compiler
  lowers
  indirect addressing of the gl_ClipDistance array to indirect
  addressing
  of the 2 vec4s, combined with conditional moves to the different
  components.  Which is 

Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-15 Thread Christoph Bumiller
On 15.12.2011 20:09, Jose Fonseca wrote:
 - Original Message -
 On 12/14/2011 12:58 AM, Ian Romanick wrote:
 On 12/13/2011 01:25 PM, Jose Fonseca wrote:

 - Original Message -
 On 12/13/2011 03:09 PM, Jose Fonseca wrote:
 - Original Message -
 On 12/13/2011 12:26 PM, Bryan Cain wrote:
 On 12/13/2011 02:11 PM, Jose Fonseca wrote:
 - Original Message -
 This is an updated version of the patch set I sent to the
 list
 a
 few
 hours
 ago.
 There is now a TGSI property called
 TGSI_PROPERTY_NUM_CLIP_DISTANCES
 that drivers can use to determine how many of the 8
 available
 clip
 distances
 are actually used by a shader.
 Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be easily
 derived from the shader, and queried through
 src/gallium/auxiliary/tgsi/tgsi_scan.h ?
 No.  The clip distances can be indirectly addressed (there are
 up
 to 2
 of them in vec4 form for a total of 8 floats), which makes it
 impossible
 to determine which ones are used by analyzing the shader.
 The description is almost complete. :)  The issue is that the
 shader
 may
 declare

 out float gl_ClipDistance[4];

 the use non-constant addressing of the array.  The compiler
 knows
 that
 gl_ClipDistance has at most 4 elements, but post-hoc analysis
 would
 not
 be able to determine that.  Often the fixed-function hardware
 (see
 below) needs to know which clip distance values are actually
 written.
 But don't all the clip distances written by the shader need to
 be
 declared?

 E.g.:

 DCL OUT[0], CLIPDIST[0]
 DCL OUT[1], CLIPDIST[1]
 DCL OUT[2], CLIPDIST[2]
 DCL OUT[3], CLIPDIST[3]

 therefore a trivial analysis of the declarations convey that?
 No.  Clip distance is an array of up to 8 floats in GLSL, but
 it's
 represented in the hardware as 2 vec4s.  You can tell by
 analyzing
 the
 declarations whether there are more than 4 clip distances in use,
 but
 not which components the shader writes to.
 TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components in
 use,
 not
 the number of full vectors.
 Lets imagine

out float gl_ClipDistance[6];

 Each a clip distance is a scalar float.

 Either all hardware represents the 8 clip distances as two 4
 vectors,
 and we do:

DCL OUT[0].xywz, CLIPDIST[0]
DCL OUT[1].xy, CLIPDIST[1]

 using the full range of struct tgsi_declaration::UsageMask [1] or
 we
 represent them as as scalars:

DCL OUT[0].x, CLIPDIST[0]
DCL OUT[1].x, CLIPDIST[1]
DCL OUT[2].x, CLIPDIST[2]
DCL OUT[3].x, CLIPDIST[3]
DCL OUT[4].x, CLIPDIST[4]
DCL OUT[5].x, CLIPDIST[5]

 If indirect addressing is allowed as I read bore, then maybe the
 later
 is better.
 As far as I'm aware, all hardware represents it as the former, and
 we
 have a lowering pass to fix-up the float[] accesses to be vec4[]
 accesses.
 GeForce8+ = scalar architecture, no vectors, addresses are byte
 based,
 can access individual components just fine.
 Ok. So we should avoid baking this vec4 assumption in TGSI semantics.

 Something like:

 gl_ClipDistance[i - 12] = some_value;

 DCL OUT[0].xyzw, POSITION
 DCL OUT[1-8].x, CLIPDIST[0-7]

 MOV OUT1[ADDR[0].x - 12].x, TEMP[0].
 *  **

 *   - tgsi_dimension.Index specifying the base address by referencing
 a
 declaration
 **  - tgsi_src_register.Index

 is the only way I see to make this work nicely on all hardware. 
 (This is also needed if OUT[i] and OUT[i + 1] cannot be assigned to
 contiguous hardware resources because of semantic.)
 I think that having indexable temps, like D3D10, would be a cleaner:

The problem is that we need an indexable version of every file then (at
least INPUT, OUTPUT), and then all the nice 32 bit structs break down
when we get more than 16 files.

D3D doesn't have these because indirect IN/OUT isn't allowed there, but
it is in GL and the hardware can do it.

Also, having an indexable version of every file seems odd, especially
since we need a way to distinguish individual arrays inside that file
anyway (just SM4 uses 2 indices to access INDEXABLE_TEMP; for INPUT
we'll need 3 indices).

   DCL OUT[0].xyzw, POSITION
   DCL OUT[1][0-7].x, CLIPDIST[0-7]

   MOV OUT[1][ADDR[0].x - 12].x, TEMP[0].

 I propose we first add this new kind of temp at a first stage, then prohibit 
 indirect addressing of all but this kind of temps.

There's already TEMPORARY_ARRAY, but no one wants to use it because it's
not clear how to distinguish individual arrays ...

 For constrained hardware the driver can build the clunky

 c := ADDR[0].x % 4
 i := ADDR[0].x / 4
 IF [c == 0]
   MOV OUT[i].x, TEMP[0].
 ELSE
 IF [c == 1]
   MOV OUT[i].y, TEMP[0].
 ELSE
 IF [c == 2]
   MOV OUT[i].z, TEMP[0].
 ELSE
   MOV OUT[i].w, TEMP[0].
 ENDIF

 itself.
 Sounds good plan to me.

 BTW, I took a look at inputs/outputs UsageMasks and although we don't use 
 them, I really think we really should, as having that info readily accessible 
 would allow to avoid wasting time/bandwidth copying attributes which are not 
 

Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-15 Thread Jose Fonseca
- Original Message -
 On 12/13/2011 04:22 PM, Jose Fonseca wrote:
  - Original Message -
 
  - Original Message -
  On 12/13/2011 03:48 PM, Jose Fonseca wrote:
  - Original Message -
  On 12/13/2011 03:25 PM, Jose Fonseca wrote:
  - Original Message -
  On 12/13/2011 03:09 PM, Jose Fonseca wrote:
  - Original Message -
  On 12/13/2011 12:26 PM, Bryan Cain wrote:
  On 12/13/2011 02:11 PM, Jose Fonseca wrote:
  - Original Message -
  This is an updated version of the patch set I sent to
  the
  list
  a
  few
  hours
  ago.
  There is now a TGSI property called
  TGSI_PROPERTY_NUM_CLIP_DISTANCES
  that drivers can use to determine how many of the 8
  available
  clip
  distances
  are actually used by a shader.
  Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be
  easily
  derived from the shader, and queried through
  src/gallium/auxiliary/tgsi/tgsi_scan.h ?
  No.  The clip distances can be indirectly addressed (there
  are
  up
  to 2
  of them in vec4 form for a total of 8 floats), which makes
  it
  impossible
  to determine which ones are used by analyzing the shader.
  The description is almost complete. :)  The issue is that
  the
  shader
  may
  declare
 
  out float gl_ClipDistance[4];
 
  the use non-constant addressing of the array.  The compiler
  knows
  that
  gl_ClipDistance has at most 4 elements, but post-hoc
  analysis
  would
  not
  be able to determine that.  Often the fixed-function
  hardware
  (see
  below) needs to know which clip distance values are
  actually
  written.
  But don't all the clip distances written by the shader need
  to
  be
  declared?
 
  E.g.:
 
  DCL OUT[0], CLIPDIST[0]
  DCL OUT[1], CLIPDIST[1]
  DCL OUT[2], CLIPDIST[2]
  DCL OUT[3], CLIPDIST[3]
 
  therefore a trivial analysis of the declarations convey
  that?
  No.  Clip distance is an array of up to 8 floats in GLSL, but
  it's
  represented in the hardware as 2 vec4s.  You can tell by
  analyzing
  the
  declarations whether there are more than 4 clip distances in
  use,
  but
  not which components the shader writes to.
  TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components
  in
  use,
  not
  the number of full vectors.
  Lets imagine
 
out float gl_ClipDistance[6];
 
  Each a clip distance is a scalar float.
 
  Either all hardware represents the 8 clip distances as two 4
  vectors, and we do:
 
DCL OUT[0].xywz, CLIPDIST[0]
DCL OUT[1].xy, CLIPDIST[1]
 
  using the full range of struct tgsi_declaration::UsageMask [1]
  or
  we represent them as as scalars:
 
DCL OUT[0].x, CLIPDIST[0]
DCL OUT[1].x, CLIPDIST[1]
DCL OUT[2].x, CLIPDIST[2]
DCL OUT[3].x, CLIPDIST[3]
DCL OUT[4].x, CLIPDIST[4]
DCL OUT[5].x, CLIPDIST[5]
 
  If indirect addressing is allowed as I read bore, then maybe
  the
  later is better.
 
  I confess my ignorance about clipping and maybe I'm being
  dense,
  but I still don't see the need for this
  TGSI_PROPERTY_NUM_CLIP_DISTANCES.  Could you please draft an
  example TGSI shader showing this property (or just paste one
  generated with your change)?  I think that would help a lot.
 
 
  Jose
 
 
  [1] I don't know if tgsi_dump pays much attention to
   tgsi_declaration::UsageMask, but it does exist.
  UsageMask might work, but before that can be considered a
  viable
  solution, someone will need to make it possible to actually
  declare
  it
  from ureg.  As it is, ureg is hardcoded to set UsageMask to
  xyzw
  no
  matter what on all declared inputs and outputs.
  ureg automatically fills the UsageMask from the destionation
  register masks, since it easy to determine from the opcodes.
  Wait, where does it do that?  When I search through tgsi_ureg.c
  for
  UsageMask, all it shows are assignments of TGSI_WRITEMASK_XYZW
  to
  the
  UsageMask property.
  ah. I may be lying. But I'm pretty sure I wrote such code
  somewhere,
  sometime. Let me dig it.
  I was lying.  I wrote tgsi_util_get_inst_usage_mask() in
  src/gallium/auxiliary/tgsi/tgsi_util.c , but it only analyses
  which registers are _read_, and never got hooked into ureg anyway.
 
  I don't want you to go over hoops just to pass a scalar quantity.
  So may be just add ability to ureg to specify declaration's output
  mask?
 
 One problem with this is that the output mask would have to be a
 parameter to a new declaration function in ureg, like
 ureg_DECL_output_with_mask() instead of just ureg_DECL_output().  In
 this case, ureg_DECL_output_with_mask() would be the only DECL
 function
 with a usage mask specified.  If that asymmetry is okay with you, I
 think I could go the UsageMask route.

It looks from Christoph Bullimer's reply that it might be better to describe  
CLIPDIST as an array after all.

But FWIW ureg_DECL_output_with_mask() makes sense on its own right, as I really 
think we should start filling these masks, instead of assuming that everything 
takes 4 components.

Ureg to fill them automatically from the 

Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-15 Thread Jose Fonseca


- Original Message -
 On 15.12.2011 20:09, Jose Fonseca wrote:
  - Original Message -
  On 12/14/2011 12:58 AM, Ian Romanick wrote:
  On 12/13/2011 01:25 PM, Jose Fonseca wrote:
 
  - Original Message -
  On 12/13/2011 03:09 PM, Jose Fonseca wrote:
  - Original Message -
  On 12/13/2011 12:26 PM, Bryan Cain wrote:
  On 12/13/2011 02:11 PM, Jose Fonseca wrote:
  - Original Message -
  This is an updated version of the patch set I sent to the
  list
  a
  few
  hours
  ago.
  There is now a TGSI property called
  TGSI_PROPERTY_NUM_CLIP_DISTANCES
  that drivers can use to determine how many of the 8
  available
  clip
  distances
  are actually used by a shader.
  Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be
  easily
  derived from the shader, and queried through
  src/gallium/auxiliary/tgsi/tgsi_scan.h ?
  No.  The clip distances can be indirectly addressed (there
  are
  up
  to 2
  of them in vec4 form for a total of 8 floats), which makes
  it
  impossible
  to determine which ones are used by analyzing the shader.
  The description is almost complete. :)  The issue is that the
  shader
  may
  declare
 
  out float gl_ClipDistance[4];
 
  the use non-constant addressing of the array.  The compiler
  knows
  that
  gl_ClipDistance has at most 4 elements, but post-hoc analysis
  would
  not
  be able to determine that.  Often the fixed-function hardware
  (see
  below) needs to know which clip distance values are actually
  written.
  But don't all the clip distances written by the shader need to
  be
  declared?
 
  E.g.:
 
  DCL OUT[0], CLIPDIST[0]
  DCL OUT[1], CLIPDIST[1]
  DCL OUT[2], CLIPDIST[2]
  DCL OUT[3], CLIPDIST[3]
 
  therefore a trivial analysis of the declarations convey that?
  No.  Clip distance is an array of up to 8 floats in GLSL, but
  it's
  represented in the hardware as 2 vec4s.  You can tell by
  analyzing
  the
  declarations whether there are more than 4 clip distances in
  use,
  but
  not which components the shader writes to.
  TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components in
  use,
  not
  the number of full vectors.
  Lets imagine
 
 out float gl_ClipDistance[6];
 
  Each a clip distance is a scalar float.
 
  Either all hardware represents the 8 clip distances as two 4
  vectors,
  and we do:
 
 DCL OUT[0].xywz, CLIPDIST[0]
 DCL OUT[1].xy, CLIPDIST[1]
 
  using the full range of struct tgsi_declaration::UsageMask [1]
  or
  we
  represent them as as scalars:
 
 DCL OUT[0].x, CLIPDIST[0]
 DCL OUT[1].x, CLIPDIST[1]
 DCL OUT[2].x, CLIPDIST[2]
 DCL OUT[3].x, CLIPDIST[3]
 DCL OUT[4].x, CLIPDIST[4]
 DCL OUT[5].x, CLIPDIST[5]
 
  If indirect addressing is allowed as I read bore, then maybe the
  later
  is better.
  As far as I'm aware, all hardware represents it as the former,
  and
  we
  have a lowering pass to fix-up the float[] accesses to be vec4[]
  accesses.
  GeForce8+ = scalar architecture, no vectors, addresses are byte
  based,
  can access individual components just fine.
  Ok. So we should avoid baking this vec4 assumption in TGSI
  semantics.
 
  Something like:
 
  gl_ClipDistance[i - 12] = some_value;
 
  DCL OUT[0].xyzw, POSITION
  DCL OUT[1-8].x, CLIPDIST[0-7]
 
  MOV OUT1[ADDR[0].x - 12].x, TEMP[0].
  *  **
 
  *   - tgsi_dimension.Index specifying the base address by
  referencing
  a
  declaration
  **  - tgsi_src_register.Index
 
  is the only way I see to make this work nicely on all hardware.
  (This is also needed if OUT[i] and OUT[i + 1] cannot be assigned
  to
  contiguous hardware resources because of semantic.)
  I think that having indexable temps, like D3D10, would be a
  cleaner:
 
 The problem is that we need an indexable version of every file then
 (at
 least INPUT, OUTPUT), and then all the nice 32 bit structs break down
 when we get more than 16 files.
 
 D3D doesn't have these because indirect IN/OUT isn't allowed there,
 but
 it is in GL and the hardware can do it.

Indirect IN/OUT is allowed on D3D9 , 
http://msdn.microsoft.com/en-us/library/windows/desktop/bb172963%28v=vs.85%29.aspx
 , but it looks like SM4 indeed doens't allow,  
http://msdn.microsoft.com/en-us/library/windows/desktop/ff471378%28v=VS.85%29.aspx
 , which means that indirect input needs spilling the inputs into a indexable 
temporary.

 Also, having an indexable version of every file seems odd, especially
 since we need a way to distinguish individual arrays inside that file
 anyway (just SM4 uses 2 indices to access INDEXABLE_TEMP; for INPUT
 we'll need 3 indices).

Fair enough.

DCL OUT[0].xyzw, POSITION
DCL OUT[1][0-7].x, CLIPDIST[0-7]
 
MOV OUT[1][ADDR[0].x - 12].x, TEMP[0].
 
  I propose we first add this new kind of temp at a first stage, then
  prohibit indirect addressing of all but this kind of temps.
 
 There's already TEMPORARY_ARRAY, but no one wants to use it because
 it's
 not clear how to distinguish individual arrays 

Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-15 Thread Jose Fonseca


- Original Message -
 
 
 - Original Message -
  On 15.12.2011 20:09, Jose Fonseca wrote:
   - Original Message -
   On 12/14/2011 12:58 AM, Ian Romanick wrote:
   On 12/13/2011 01:25 PM, Jose Fonseca wrote:
  
   - Original Message -
   On 12/13/2011 03:09 PM, Jose Fonseca wrote:
   - Original Message -
   On 12/13/2011 12:26 PM, Bryan Cain wrote:
   On 12/13/2011 02:11 PM, Jose Fonseca wrote:
   - Original Message -
   This is an updated version of the patch set I sent to
   the
   list
   a
   few
   hours
   ago.
   There is now a TGSI property called
   TGSI_PROPERTY_NUM_CLIP_DISTANCES
   that drivers can use to determine how many of the 8
   available
   clip
   distances
   are actually used by a shader.
   Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be
   easily
   derived from the shader, and queried through
   src/gallium/auxiliary/tgsi/tgsi_scan.h ?
   No.  The clip distances can be indirectly addressed (there
   are
   up
   to 2
   of them in vec4 form for a total of 8 floats), which makes
   it
   impossible
   to determine which ones are used by analyzing the shader.
   The description is almost complete. :)  The issue is that
   the
   shader
   may
   declare
  
   out float gl_ClipDistance[4];
  
   the use non-constant addressing of the array.  The compiler
   knows
   that
   gl_ClipDistance has at most 4 elements, but post-hoc
   analysis
   would
   not
   be able to determine that.  Often the fixed-function
   hardware
   (see
   below) needs to know which clip distance values are
   actually
   written.
   But don't all the clip distances written by the shader need
   to
   be
   declared?
  
   E.g.:
  
   DCL OUT[0], CLIPDIST[0]
   DCL OUT[1], CLIPDIST[1]
   DCL OUT[2], CLIPDIST[2]
   DCL OUT[3], CLIPDIST[3]
  
   therefore a trivial analysis of the declarations convey
   that?
   No.  Clip distance is an array of up to 8 floats in GLSL, but
   it's
   represented in the hardware as 2 vec4s.  You can tell by
   analyzing
   the
   declarations whether there are more than 4 clip distances in
   use,
   but
   not which components the shader writes to.
   TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components
   in
   use,
   not
   the number of full vectors.
   Lets imagine
  
  out float gl_ClipDistance[6];
  
   Each a clip distance is a scalar float.
  
   Either all hardware represents the 8 clip distances as two 4
   vectors,
   and we do:
  
  DCL OUT[0].xywz, CLIPDIST[0]
  DCL OUT[1].xy, CLIPDIST[1]
  
   using the full range of struct tgsi_declaration::UsageMask [1]
   or
   we
   represent them as as scalars:
  
  DCL OUT[0].x, CLIPDIST[0]
  DCL OUT[1].x, CLIPDIST[1]
  DCL OUT[2].x, CLIPDIST[2]
  DCL OUT[3].x, CLIPDIST[3]
  DCL OUT[4].x, CLIPDIST[4]
  DCL OUT[5].x, CLIPDIST[5]
  
   If indirect addressing is allowed as I read bore, then maybe
   the
   later
   is better.
   As far as I'm aware, all hardware represents it as the former,
   and
   we
   have a lowering pass to fix-up the float[] accesses to be
   vec4[]
   accesses.
   GeForce8+ = scalar architecture, no vectors, addresses are byte
   based,
   can access individual components just fine.
   Ok. So we should avoid baking this vec4 assumption in TGSI
   semantics.
  
   Something like:
  
   gl_ClipDistance[i - 12] = some_value;
  
   DCL OUT[0].xyzw, POSITION
   DCL OUT[1-8].x, CLIPDIST[0-7]
  
   MOV OUT1[ADDR[0].x - 12].x, TEMP[0].
   *  **
  
   *   - tgsi_dimension.Index specifying the base address by
   referencing
   a
   declaration
   **  - tgsi_src_register.Index
  
   is the only way I see to make this work nicely on all hardware.
   (This is also needed if OUT[i] and OUT[i + 1] cannot be assigned
   to
   contiguous hardware resources because of semantic.)
   I think that having indexable temps, like D3D10, would be a
   cleaner:
  
  The problem is that we need an indexable version of every file then
  (at
  least INPUT, OUTPUT), and then all the nice 32 bit structs break
  down
  when we get more than 16 files.
  
  D3D doesn't have these because indirect IN/OUT isn't allowed there,
  but
  it is in GL and the hardware can do it.
 
 Indirect IN/OUT is allowed on D3D9 ,
 http://msdn.microsoft.com/en-us/library/windows/desktop/bb172963%28v=vs.85%29.aspx
 , but it looks like SM4 indeed doens't allow,
  
 http://msdn.microsoft.com/en-us/library/windows/desktop/ff471378%28v=VS.85%29.aspx
 , which means that indirect input needs spilling the inputs into a
 indexable temporary.
 
  Also, having an indexable version of every file seems odd,
  especially
  since we need a way to distinguish individual arrays inside that
  file
  anyway (just SM4 uses 2 indices to access INDEXABLE_TEMP; for INPUT
  we'll need 3 indices).
 
 Fair enough.
 
 DCL OUT[0].xyzw, POSITION
 DCL OUT[1][0-7].x, CLIPDIST[0-7]
  
 MOV OUT[1][ADDR[0].x - 12].x, TEMP[0].
  
   I propose we first 

Re: [Mesa-dev] vertex array regression

2011-12-15 Thread Mathias Fröhlich

Brian,

On Thursday, December 15, 2011 17:08:57 Brian Paul wrote:
 There's a regression in vertex array drawing with this commit:
 
 commit ed42c2580717527b2005580940fc766d95bb6b0b
 Author: Mathias Fröhlich mathias.froehl...@web.de
 Date:   Mon Oct 31 16:23:40 2011 +0100
 
  vbo: Use The VERT_{ATTRIB,BIT} defines.
 
  Signed-off-by: Mathias Froehlich mathias.froehl...@web.de
  Reviewed-by: Brian Paul bri...@vmware.com
  Reviewed-by: Eric Anholt e...@anholt.net
 
 
 To see the problem, run mesa/demos/src/demos/isosurf, choose
 glDrawArrays or glDrawElements from the pop-up menu (right mouse
 button).  I see the problem (random/missing vertices or failed
 assertion) with all gallium drivers.  The swrast/i965 drivers seem
 uneffected.
 
 I'll try to debug it further, but maybe you could double-check your work.

I will look into that. Probably not today but latest at the weekend.

Mathias
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] vertex array regression

2011-12-15 Thread Brian Paul
2011/12/15 Mathias Fröhlich mathias.froehl...@web.de:

 Brian,

 On Thursday, December 15, 2011 17:08:57 Brian Paul wrote:
 There's a regression in vertex array drawing with this commit:

 commit ed42c2580717527b2005580940fc766d95bb6b0b
 Author: Mathias Fröhlich mathias.froehl...@web.de
 Date:   Mon Oct 31 16:23:40 2011 +0100

      vbo: Use The VERT_{ATTRIB,BIT} defines.

      Signed-off-by: Mathias Froehlich mathias.froehl...@web.de
      Reviewed-by: Brian Paul bri...@vmware.com
      Reviewed-by: Eric Anholt e...@anholt.net


 To see the problem, run mesa/demos/src/demos/isosurf, choose
 glDrawArrays or glDrawElements from the pop-up menu (right mouse
 button).  I see the problem (random/missing vertices or failed
 assertion) with all gallium drivers.  The swrast/i965 drivers seem
 uneffected.

 I'll try to debug it further, but maybe you could double-check your work.

 I will look into that. Probably not today but latest at the weekend.

I found the problem.  It's this chunk in vbo_context.c:

@@ -182,14 +177,15 @@ GLboolean _vbo_CreateContext( struct gl_context *ctx )
   GLuint i;

   /* When no vertex program, pull in the material attributes in
-   * the 16..32 generic range.
+   * the generic range.
*/
-  for (i = 0; i  16; i++)
+  for (i = 0; i  VERT_ATTRIB_FF_MAX; i++)
 vbo-map_vp_none[i] = i;
-  for (i = 0; i  12; i++)
-vbo-map_vp_none[16+i] = VBO_ATTRIB_MAT_FRONT_AMBIENT + i;
-  for (i = 0; i  4; i++)
-vbo-map_vp_none[28+i] = i;
+  for (i = 0; i  NR_MAT_ATTRIBS; i++)
+vbo-map_vp_none[VERT_ATTRIB_GENERIC(i)]
+= VBO_ATTRIB_MAT_FRONT_AMBIENT + i;
+  for (i = NR_MAT_ATTRIBS; i  VERT_ATTRIB_GENERIC_MAX; i++)
+vbo-map_vp_none[VERT_ATTRIB_GENERIC(i)] = i;

   for (i = 0; i  Elements(vbo-map_vp_arb); i++)
 vbo-map_vp_arb[i] = i;

Or more precisely:

-  for (i = 0; i  4; i++)
-vbo-map_vp_none[28+i] = i;
+  for (i = NR_MAT_ATTRIBS; i  VERT_ATTRIB_GENERIC_MAX; i++)
+vbo-map_vp_none[VERT_ATTRIB_GENERIC(i)] = i;

This change resulted in a different mapping in the map_vp_none[] array.

The original code produced:
vbo-map_vp_none = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 0, 1, 2, 3}

The new code produces:
vbo-map_vp_none = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 12, 13, 14,
15}

The last four/five elements are different.  Though, I don't really
know what their purpose is.  It seems to me that they could/should
just be identity entries, ex: map_vp_none[i]=i as is the case for the
map_vp_arb[] array.

And I found that simply removing those lines fixes the problem:

@@ -184,8 +184,6 @@ GLboolean _vbo_CreateContext( struct gl_context *ctx )
   for (i = 0; i  NR_MAT_ATTRIBS; i++)
 vbo-map_vp_none[VERT_ATTRIB_GENERIC(i)]
 = VBO_ATTRIB_MAT_FRONT_AMBIENT + i;
-  for (i = NR_MAT_ATTRIBS; i  VERT_ATTRIB_GENERIC_MAX; i++)
-vbo-map_vp_none[VERT_ATTRIB_GENERIC(i)] = i;

   for (i = 0; i  Elements(vbo-map_vp_arb); i++)
 vbo-map_vp_arb[i] = i;

For fixed function, the point is to simply place the per-vertex
material attributes in the generic attribute arrays.  There are 12
such material attributes.  So there's four slots left over.

-Brian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Implement bounds checking for transform feedback output.

2011-12-15 Thread Kenneth Graunke
Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/i965/brw_context.c |1 +
 src/mesa/drivers/dri/i965/brw_context.h |3 ++
 src/mesa/drivers/dri/i965/brw_gs_emit.c |   10 
 src/mesa/drivers/dri/i965/gen6_sol.c|   38 +++
 4 files changed, 52 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 7d360b0..fd60853 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -117,6 +117,7 @@ static void brwInitDriverFunctions( struct 
dd_function_table *functions )
brw_init_queryobj_functions(functions);
 
functions-PrepareExecBegin = brwPrepareExecBegin;
+   functions-BeginTransformFeedback = brw_begin_transform_feedback;
functions-EndTransformFeedback = brw_end_transform_feedback;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 8e52488..20623d4 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1073,6 +1073,9 @@ brw_fprog_uses_noperspective(const struct 
gl_fragment_program *fprog);
 
 /* gen6_sol.c */
 void
+brw_begin_transform_feedback(struct gl_context *ctx,
+struct gl_transform_feedback_object *obj);
+void
 brw_end_transform_feedback(struct gl_context *ctx,
struct gl_transform_feedback_object *obj);
 
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c 
b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index 72d4eca..5dd3734 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -352,6 +352,15 @@ gen6_sol_program(struct brw_gs_compile *c, struct 
brw_gs_prog_key *key,
*/
   brw_MOV(p, get_element_ud(c-reg.header, 5),
   get_element_ud(c-reg.SVBI, 0));
+
+  /* Make sure that the buffers have enough room for all the vertices. */
+  brw_ADD(p, get_element_ud(c-reg.temp, 0),
+get_element_ud(c-reg.SVBI, 0), brw_imm_ud(num_verts));
+  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L,
+get_element_ud(c-reg.temp, 0),
+get_element_ud(c-reg.SVBI, 4));
+  brw_IF(p, BRW_EXECUTE_1);
+
   /* For each vertex, generate code to output each varying using the
* appropriate binding table entry.
*/
@@ -392,6 +401,7 @@ gen6_sol_program(struct brw_gs_compile *c, struct 
brw_gs_prog_key *key,
 get_element_ud(c-reg.header, 5), brw_imm_ud(1));
  }
   }
+  brw_ENDIF(p);
 
   /* Now, reinitialize the header register from R0 to restore the parts of
* the register that we overwrote while streaming out transform feedback
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c 
b/src/mesa/drivers/dri/i965/gen6_sol.c
index b11bce2..56d4a6a 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -26,6 +26,7 @@
  * Code to initialize the binding table entries used by transform feedback.
  */
 
+#include main/macros.h
 #include brw_context.h
 #include intel_buffer_objects.h
 #include intel_batchbuffer.h
@@ -101,6 +102,43 @@ const struct brw_tracked_state gen6_sol_surface = {
 };
 
 void
+brw_begin_transform_feedback(struct gl_context *ctx,
+struct gl_transform_feedback_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   const struct gl_shader_program *vs_prog =
+  ctx-Shader.CurrentVertexProgram;
+   const struct gl_transform_feedback_info *linked_xfb_info =
+  vs_prog-LinkedTransformFeedback;
+   struct gl_transform_feedback_object *xfb_obj =
+  ctx-TransformFeedback.CurrentObject;
+
+   unsigned max_index = 0x;
+
+   /* Compute the maximum number of vertices that we can write without
+* overflowing any of the buffers currently being used for feedback.
+*/
+   for (int i = 0; i  MAX_FEEDBACK_ATTRIBS; ++i) {
+  unsigned stride = linked_xfb_info-BufferStride[i];
+
+  /* Skip any inactive buffers, which have a stride of 0. */
+  if (stride == 0)
+continue;
+
+  unsigned max_for_this_buffer = xfb_obj-Size[i] / (4 * stride);
+  max_index = MIN2(max_index, max_for_this_buffer);
+   }
+
+   /* Initialize the SVBI 0 register to zero and set the maximum index. */
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_GS_SVB_INDEX  16 | (4 - 2));
+   OUT_BATCH(0); /* SVBI 0 */
+   OUT_BATCH(0);
+   OUT_BATCH(max_index);
+   ADVANCE_BATCH();
+}
+
+void
 brw_end_transform_feedback(struct gl_context *ctx,
struct gl_transform_feedback_object *obj)
 {
-- 
1.7.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] mesa: make update_fbo_texture() non-static

2011-12-15 Thread Brian Paul
We'll call this from the mipmap generation code.
---
 src/mesa/main/teximage.c |   11 ++-
 src/mesa/main/teximage.h |4 
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 8a002b6..eccc0fd 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2205,9 +2205,10 @@ check_rtt_cb(GLuint key, void *data, void *userData)
  * in size or format since that effects FBO completeness.
  * Any FBOs rendering into the texture must be re-validated.
  */
-static void
-update_fbo_texture(struct gl_context *ctx, struct gl_texture_object *texObj,
-   GLuint face, GLuint level)
+void
+_mesa_update_fbo_texture(struct gl_context *ctx,
+ struct gl_texture_object *texObj,
+ GLuint face, GLuint level)
 {
/* Only check this texture if it's been marked as RenderToTexture */
if (texObj-_RenderToTexture) {
@@ -2502,7 +2503,7 @@ teximage(struct gl_context *ctx, GLuint dims,
 
check_gen_mipmap(ctx, target, texObj, level);
 
-   update_fbo_texture(ctx, texObj, face, level);
+   _mesa_update_fbo_texture(ctx, texObj, face, level);
 
/* state update */
texObj-_Complete = GL_FALSE;
@@ -2844,7 +2845,7 @@ copyteximage(struct gl_context *ctx, GLuint dims,
 
 check_gen_mipmap(ctx, target, texObj, level);
 
-update_fbo_texture(ctx, texObj, face, level);
+_mesa_update_fbo_texture(ctx, texObj, face, level);
 
 /* state update */
 texObj-_Complete = GL_FALSE;
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index 9cc7d5a..d756646 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -80,6 +80,10 @@ _mesa_choose_texture_format(struct gl_context *ctx,
 GLenum target, GLint level,
 GLenum internalFormat, GLenum format, GLenum type);
 
+extern void
+_mesa_update_fbo_texture(struct gl_context *ctx,
+ struct gl_texture_object *texObj,
+ GLuint face, GLuint level);
 
 extern void
 _mesa_clear_texture_image(struct gl_context *ctx,
-- 
1.7.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] mesa: new _mesa_prepare_mipmap_level() function for mipmap generation

2011-12-15 Thread Brian Paul
This helper function is used during mipmap generation to prepare space
for the destination mipmap levels.

This improves/fixes two things:
1. If the texture object was created with glTexStorage2D, calling
   _mesa_TexImage2D() to allocate the new image would generate
   INVALID_OPERATION since the texture is marked as immutable.
2. _mesa_TexImage2D() always frees any existing texture image memory
   before allocating new memory.  That's inefficient if the existing
   image is the right size already.
---
 src/mesa/main/mipmap.c |  128 +++
 src/mesa/main/mipmap.h |6 ++
 2 files changed, 101 insertions(+), 33 deletions(-)

diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index fd6e582..867cb22 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -1803,6 +1803,81 @@ next_mipmap_level_size(GLenum target, GLint border,
}
 }
 
+
+/**
+ * Helper function for mipmap generation.
+ * Make sure the specified destination mipmap level is the right size/format
+ * for mipmap generation.  If not, (re) allocate it.
+ * \return GL_TRUE if successful, GL_FALSE if mipmap generation should stop
+ */
+GLboolean
+_mesa_prepare_mipmap_level(struct gl_context *ctx,
+   struct gl_texture_object *texObj, GLuint level,
+   GLsizei width, GLsizei height, GLsizei depth,
+   GLsizei border, GLenum intFormat, gl_format format)
+{
+   const GLuint numFaces = texObj-Target == GL_TEXTURE_CUBE_MAP ? 6 : 1;
+   GLuint face;
+
+   if (texObj-Immutable) {
+  /* The texture was created with glTexStorage() so the number/size of
+   * mipmap levels is fixed and the storage for all images is already
+   * allocated.
+   */
+  if (!texObj-Image[0][level]) {
+ /* No more levels to create - we're done */
+ return GL_FALSE;
+  }
+  else {
+ /* Nothing to do - the texture memory must have already been
+  * allocated to the right size so we're all set.
+  */
+ return GL_TRUE;
+  }
+   }
+
+   for (face = 0; face  numFaces; face++) {
+  struct gl_texture_image *dstImage;
+  GLenum target;
+
+  if (numFaces == 1)
+ target = texObj-Target;
+  else
+ target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face;
+
+  dstImage = _mesa_get_tex_image(ctx, texObj, target, level);
+  if (!dstImage) {
+ /* out of memory */
+ return GL_FALSE;
+  }
+
+  if (dstImage-Width != width ||
+  dstImage-Height != height ||
+  dstImage-Depth != depth ||
+  dstImage-Border != border ||
+  dstImage-InternalFormat != intFormat ||
+  dstImage-TexFormat != format) {
+ /* need to (re)allocate image */
+ ctx-Driver.FreeTextureImageBuffer(ctx, dstImage);
+
+ _mesa_init_teximage_fields(ctx, target, dstImage,
+width, height, depth,
+border, intFormat, format);
+
+ ctx-Driver.AllocTextureImageBuffer(ctx, dstImage,
+ format, width, height, depth);
+
+ /* in case the mipmap level is part of an FBO: */
+ _mesa_update_fbo_texture(ctx, texObj, face, level);
+
+ ctx-NewState |= _NEW_TEXTURE;
+  }
+   }
+
+   return GL_TRUE;
+}
+
+
 static void
 generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target,
 struct gl_texture_object *texObj,
@@ -1841,31 +1916,20 @@ generate_mipmap_uncompressed(struct gl_context *ctx, 
GLenum target,
   if (!nextLevel)
  return;
 
-  /* get dest gl_texture_image */
-  dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
-  if (!dstImage) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, generating mipmaps);
+  if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
+  dstWidth, dstHeight, dstDepth,
+  border, srcImage-InternalFormat,
+  srcImage-TexFormat)) {
  return;
   }
 
-  /* Free old image data */
-  ctx-Driver.FreeTextureImageBuffer(ctx, dstImage);
-
-  _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight,
- dstDepth, border, srcImage-InternalFormat,
- srcImage-TexFormat);
-
-  /* Alloc storage for new texture image */
-  if (!ctx-Driver.AllocTextureImageBuffer(ctx, dstImage,
-   dstImage-TexFormat,
-   dstWidth, dstHeight,
-   dstDepth)) {
+  /* get dest gl_texture_image */
+  dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
+  if (!dstImage) {
  _mesa_error(ctx, GL_OUT_OF_MEMORY, generating mipmaps);
  

[Mesa-dev] [PATCH 3/3] meta: use _mesa_prepare_mipmap_level() in the mipmap generation code

2011-12-15 Thread Brian Paul
See previous commit for more information.
---
 src/mesa/drivers/common/meta.c |   47 ++-
 1 files changed, 12 insertions(+), 35 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 259041f..1683c85 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -2943,43 +2943,20 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, 
GLenum target,
  break;
   }
 
-  /* Set MaxLevel large enough to hold the new level when we allocate it  
*/
+  /* Allocate storage for the destination mipmap image(s) */
+
+  /* Set MaxLevel large enough to hold the new level when we allocate it */
   _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, dstLevel);
 
-  /* Create empty dest image */
-  if (target == GL_TEXTURE_1D) {
- _mesa_TexImage1D(target, dstLevel, srcImage-InternalFormat,
-  dstWidth, border,
-  GL_RGBA, GL_UNSIGNED_BYTE, NULL);
-  }
-  else if (target == GL_TEXTURE_3D) {
- _mesa_TexImage3D(target, dstLevel, srcImage-InternalFormat,
-  dstWidth, dstHeight, dstDepth, border,
-  GL_RGBA, GL_UNSIGNED_BYTE, NULL);
-  }
-  else {
- /* 2D or cube */
- _mesa_TexImage2D(faceTarget, dstLevel, srcImage-InternalFormat,
-  dstWidth, dstHeight, border,
-  GL_RGBA, GL_UNSIGNED_BYTE, NULL);
-
- if (target == GL_TEXTURE_CUBE_MAP) {
-/* If texturing from a cube, we need to make sure all src faces
- * have been defined (even if we're not sampling from them.)
- * Otherwise the texture object will be 'incomplete' and
- * texturing from it will not be allowed.
- */
-GLuint face;
-for (face = 0; face  6; face++) {
-   if (!texObj-Image[face][srcLevel] ||
-   texObj-Image[face][srcLevel]-Width != srcWidth) {
-  _mesa_TexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face,
-   srcLevel, srcImage-InternalFormat,
-   srcWidth, srcHeight, border,
-   GL_RGBA, GL_UNSIGNED_BYTE, NULL);
-   }
-}
- }
+  if (!_mesa_prepare_mipmap_level(ctx, texObj, dstLevel,
+  dstWidth, dstHeight, dstDepth,
+  srcImage-Border,
+  srcImage-InternalFormat,
+  srcImage-TexFormat)) {
+ /* All done.  We either ran out of memory or we would go beyond the
+  * last valid level of an immutable texture if we continued.
+  */
+ break;
   }
 
   /* limit minification to src level */
-- 
1.7.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/8] i965: Properly demote the depth mt format for fake packed depth/stencil.

2011-12-15 Thread Eric Anholt
gen7 only supports the non-packed formats, even if you associate a
real separate stencil buffer -- otherwise it's as if the depth test
always fails.

This requires a little bit of care in the match_texture_image case,
since the miptree doesn't track the effective format of a mapping
(with S8 merged in).
---
 src/mesa/drivers/dri/i965/gen7_misc_state.c|1 +
 src/mesa/drivers/dri/intel/intel_fbo.c |3 ++-
 src/mesa/drivers/dri/intel/intel_mipmap_tree.c |   16 +++-
 src/mesa/drivers/dri/intel/intel_mipmap_tree.h |2 +-
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c 
b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index 3946cb3..89a4e71 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -53,6 +53,7 @@ static void emit_depthbuffer(struct brw_context *brw)
 
/* Gen7 doesn't support packed depth/stencil */
assert(stencil_mt == NULL || depth_mt != stencil_mt);
+   assert(!depth_mt || 
!_mesa_is_format_packed_depth_stencil(depth_mt-format));
 
intel_emit_depth_stall_flushes(intel);
 
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c 
b/src/mesa/drivers/dri/intel/intel_fbo.c
index 6f518ee..1711803 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -279,7 +279,8 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, 
struct gl_renderbuffer
 return false;
   }
 
-  assert(irb-mt-format == MESA_FORMAT_S8_Z24);
+  assert(irb-mt-format == MESA_FORMAT_S8_Z24 ||
+irb-mt-format == MESA_FORMAT_X8_Z24);
   ok = intel_renderbuffer_update_wrapper(intel, depth_irb, irb-mt,
 0, 0, /* level, layer */
 MESA_FORMAT_X8_Z24,
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 989038e..ee2d1e0 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -136,6 +136,16 @@ intel_miptree_create_internal(struct intel_context *intel,
 intel_miptree_release(mt);
 return NULL;
   }
+
+  /* Fix up the Z miptree format for how we're splitting out separate
+   * stencil.  Gen7 expects there to be no stencil bits in its depth 
buffer.
+   */
+  if (mt-format == MESA_FORMAT_S8_Z24) {
+mt-format = MESA_FORMAT_X8_Z24;
+  } else {
+_mesa_problem(Unknown format %s in separate stencil\n,
+  _mesa_get_format_name(mt-format));
+  }
}
 
return mt;
@@ -320,8 +330,12 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt,
GLuint level = intelImage-base.Base.Level;
int width, height, depth;
 
-   if (image-TexFormat != mt-format)
+   if (image-TexFormat != mt-format 
+   !(image-TexFormat == MESA_FORMAT_S8_Z24 
+mt-format == MESA_FORMAT_X8_Z24 
+mt-stencil_mt)) {
   return false;
+   }
 
intel_miptree_get_dimensions_for_image(image, width, height, depth);
 
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index fef1dcf..9082864 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -149,7 +149,7 @@ struct intel_mipmap_tree
 * two miptrees for storing the data.  If the depthstencil texture or rb is
 * MESA_FORMAT_Z32_FLOAT_X24S8, then mt-format will be
 * MESA_FORMAT_Z32_FLOAT, otherwise for MESA_FORMAT_S8_Z24 objects it will 
be
-* MESA_FORMAT_S8_Z24.
+* MESA_FORMAT_X8_Z24.
 */
gl_format format;
 
-- 
1.7.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/8] intel: Stop creating the wrapped stencil irb.

2011-12-15 Thread Eric Anholt
There were only two places it was really used at this point, which was
in the batchbuffer emit of the separate stencil packets for gen6/7.
Just write in the -stencil_mt reference in those two places and ditch
all this flailing around with allocation and refcounts.

v2: Fix separate stencil on gen7.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org (v1)
---
 src/mesa/drivers/dri/i965/brw_misc_state.c |   40 +--
 src/mesa/drivers/dri/i965/gen7_misc_state.c|   32 
 src/mesa/drivers/dri/intel/intel_fbo.c |   64 
 src/mesa/drivers/dri/intel/intel_fbo.h |6 --
 src/mesa/drivers/dri/intel/intel_mipmap_tree.h |3 +-
 5 files changed, 67 insertions(+), 78 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 3e8cb3f..8a6a694 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -208,6 +208,7 @@ brw_depthbuffer_format(struct brw_context *brw)
 
if (!drb 
(srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) 
+   !srb-mt-stencil_mt 
srb-Base.Format == MESA_FORMAT_S8_Z24) {
   drb = srb;
}
@@ -239,8 +240,10 @@ static void emit_depthbuffer(struct brw_context *brw)
/* _NEW_BUFFERS */
struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, 
BUFFER_DEPTH);
struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, 
BUFFER_STENCIL);
+   struct intel_mipmap_tree *stencil_mt = NULL;
struct intel_region *hiz_region = NULL;
unsigned int len;
+   bool separate_stencil = false;
 
if (depth_irb 
depth_irb-mt 
@@ -256,17 +259,21 @@ static void emit_depthbuffer(struct brw_context *brw)
   intel_emit_depth_stall_flushes(intel);
}
 
-   /*
-* If either depth or stencil buffer has packed depth/stencil format,
-* then don't use separate stencil. Emit only a depth buffer.
+   /* Find the real separate stencil mt if present. */
+   if (stencil_irb) {
+  stencil_mt = stencil_irb-mt;
+  if (stencil_mt-stencil_mt)
+stencil_mt = stencil_mt-stencil_mt;
+
+  if (stencil_mt-format == MESA_FORMAT_S8)
+separate_stencil = true;
+   }
+
+   /* If there's a packed depth/stencil bound to stencil only, we need to
+* emit the packed depth/stencil buffer packet.
 */
-   if (depth_irb  depth_irb-Base.Format == MESA_FORMAT_S8_Z24) {
-  stencil_irb = NULL;
-   } else if (!depth_irb  stencil_irb
-  stencil_irb-Base.Format == MESA_FORMAT_S8_Z24) {
+   if (!depth_irb  stencil_irb  !separate_stencil)
   depth_irb = stencil_irb;
-  stencil_irb = NULL;
-   }
 
if (intel-gen = 6)
   len = 7;
@@ -275,7 +282,7 @@ static void emit_depthbuffer(struct brw_context *brw)
else
   len = 5;
 
-   if (!depth_irb  !stencil_irb) {
+   if (!depth_irb  !separate_stencil) {
   BEGIN_BATCH(len);
   OUT_BATCH(_3DSTATE_DEPTH_BUFFER  16 | (len - 2));
   OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT  18) |
@@ -292,7 +299,7 @@ static void emit_depthbuffer(struct brw_context *brw)
 
   ADVANCE_BATCH();
 
-   } else if (!depth_irb  stencil_irb) {
+   } else if (!depth_irb  separate_stencil) {
   /*
* There exists a separate stencil buffer but no depth buffer.
*
@@ -317,10 +324,9 @@ static void emit_depthbuffer(struct brw_context *brw)
* Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
* [DevGT+]: This field must be set to TRUE.
*/
-  struct intel_region *region = stencil_irb-mt-region;
+  struct intel_region *region = stencil_mt-region;
 
   assert(intel-has_separate_stencil);
-  assert(stencil_irb-Base.Format == MESA_FORMAT_S8);
 
   BEGIN_BATCH(len);
   OUT_BATCH(_3DSTATE_DEPTH_BUFFER  16 | (len - 2));
@@ -346,7 +352,7 @@ static void emit_depthbuffer(struct brw_context *brw)
   uint32_t tile_x, tile_y, offset;
 
   /* If using separate stencil, hiz must be enabled. */
-  assert(!stencil_irb || hiz_region);
+  assert(!separate_stencil || hiz_region);
 
   offset = intel_renderbuffer_tile_offsets(depth_irb, tile_x, tile_y);
 
@@ -381,7 +387,7 @@ static void emit_depthbuffer(struct brw_context *brw)
   ADVANCE_BATCH();
}
 
-   if (hiz_region || stencil_irb) {
+   if (hiz_region || separate_stencil) {
   /*
* In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
* stencil enable' and 'hiz enable' bits were set. Therefore we must
@@ -408,8 +414,8 @@ static void emit_depthbuffer(struct brw_context *brw)
   }
 
   /* Emit stencil buffer. */
-  if (stencil_irb) {
-struct intel_region *region = stencil_irb-mt-region;
+  if (separate_stencil) {
+struct intel_region *region = stencil_mt-region;
 BEGIN_BATCH(3);
 OUT_BATCH((_3DSTATE_STENCIL_BUFFER  16) | (3 - 2));
 OUT_BATCH(region-pitch * region-cpp - 1);
diff --git 

[Mesa-dev] [PATCH 2/8] intel: Reuse intel_miptree_match_image().

2011-12-15 Thread Eric Anholt
This little bit of logic was duplicated, which isn't much, but I was
going to need to duplicate a bit of additional logic in the next
commit.
---
 src/mesa/drivers/dri/intel/intel_tex_validate.c |   15 ++-
 1 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c 
b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index 748fbdc..b0daa2c 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -52,9 +52,6 @@ intel_finalize_mipmap_tree(struct intel_context *intel, 
GLuint unit)
intel_update_max_level(intelObj, sampler);
firstImage = intel_texture_image(tObj-Image[0][tObj-BaseLevel]);
 
-   intel_miptree_get_dimensions_for_image(firstImage-base.Base,
-  width, height, depth);
-
/* Check tree can hold all active levels.  Check tree matches
 * target, imageFormat, etc.
 *
@@ -64,13 +61,10 @@ intel_finalize_mipmap_tree(struct intel_context *intel, 
GLuint unit)
 * of that, we just always relayout on baselevel change.
 */
if (intelObj-mt 
-   (intelObj-mt-target != intelObj-base.Target ||
-   intelObj-mt-format != firstImage-base.Base.TexFormat ||
+   (!intel_miptree_match_image(intelObj-mt, firstImage-base.Base) ||
+   intelObj-mt-target != intelObj-base.Target ||
intelObj-mt-first_level != tObj-BaseLevel ||
-   intelObj-mt-last_level  intelObj-_MaxLevel ||
-   intelObj-mt-width0 != width ||
-   intelObj-mt-height0 != height ||
-   intelObj-mt-depth0 != depth)) {
+   intelObj-mt-last_level  intelObj-_MaxLevel)) {
   intel_miptree_release(intelObj-mt);
}
 
@@ -78,6 +72,9 @@ intel_finalize_mipmap_tree(struct intel_context *intel, 
GLuint unit)
/* May need to create a new tree:
 */
if (!intelObj-mt) {
+  intel_miptree_get_dimensions_for_image(firstImage-base.Base,
+width, height, depth);
+
   intelObj-mt = intel_miptree_create(intel,
   intelObj-base.Target,
  firstImage-base.Base.TexFormat,
-- 
1.7.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/8] intel: Stop creating the wrapped depth irb.

2011-12-15 Thread Eric Anholt
All the operations were just trying to get at irb-wrapped_depth-mt,
which is the same as irb-mt now.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/intel/intel_fbo.c |   92 +++-
 src/mesa/drivers/dri/intel/intel_fbo.h |   27 +-
 2 files changed, 8 insertions(+), 111 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c 
b/src/mesa/drivers/dri/intel/intel_fbo.c
index 1711803..c60b57e 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -113,8 +113,6 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb)
 
intel_miptree_release(irb-mt);
 
-   _mesa_reference_renderbuffer(irb-wrapped_depth, NULL);
-
free(irb);
 }
 
@@ -260,34 +258,6 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, 
struct gl_renderbuffer
   }
}
 
-   if (irb-mt-stencil_mt) {
-  bool ok;
-  struct intel_renderbuffer *depth_irb;
-
-  /* The RB got allocated as separate stencil.  Hook up our wrapped
-   * renderbuffer so that consumers of intel_get_renderbuffer(BUFFER_DEPTH)
-   * end up with pointers to the separate depth.
-   */
-  if (!irb-wrapped_depth) {
-_mesa_reference_renderbuffer(irb-wrapped_depth,
- intel_new_renderbuffer(ctx, ~0));
-  }
-
-  depth_irb = intel_renderbuffer(irb-wrapped_depth);
-  if (!depth_irb) {
-intel_miptree_release(irb-mt);
-return false;
-  }
-
-  assert(irb-mt-format == MESA_FORMAT_S8_Z24 ||
-irb-mt-format == MESA_FORMAT_X8_Z24);
-  ok = intel_renderbuffer_update_wrapper(intel, depth_irb, irb-mt,
-0, 0, /* level, layer */
-MESA_FORMAT_X8_Z24,
-GL_DEPTH_COMPONENT24);
-  assert(ok);
-   }
-
return true;
 }
 
@@ -533,45 +503,14 @@ intel_renderbuffer_update_wrapper(struct intel_context 
*intel,
irb-mt_layer = layer;
 
intel_miptree_reference(irb-mt, mt);
-   if (mt-stencil_mt  _mesa_is_depthstencil_format(rb-InternalFormat)) {
-  struct intel_renderbuffer *depth_irb;
-
-  if (!irb-wrapped_depth) {
-depth_irb = intel_renderbuffer_wrap_miptree(intel,
-mt, level, layer,
-MESA_FORMAT_X8_Z24,
-GL_DEPTH_COMPONENT24);
-_mesa_reference_renderbuffer(irb-wrapped_depth, depth_irb-Base);
-
-if (!irb-wrapped_depth) {
-   intel_miptree_release(irb-mt);
-   return false;
-}
-  } else {
-bool ok = true;
-
-depth_irb = intel_renderbuffer(irb-wrapped_depth);
-
-ok = intel_renderbuffer_update_wrapper(intel,
-depth_irb,
-mt,
-level, layer,
-MESA_FORMAT_X8_Z24,
-GL_DEPTH_COMPONENT24);
-if (!ok) {
-   intel_miptree_release(irb-mt);
-   return false;
-}
-  }
-   } else {
-  intel_renderbuffer_set_draw_offset(irb);
 
-  if (mt-hiz_mt == NULL 
- intel-vtbl.is_hiz_depth_format(intel, rb-Format)) {
-intel_miptree_alloc_hiz(intel, mt);
- if (!mt-hiz_mt)
-return false;
-  }
+   intel_renderbuffer_set_draw_offset(irb);
+
+   if (mt-hiz_mt == NULL 
+   intel-vtbl.is_hiz_depth_format(intel, rb-Format)) {
+  intel_miptree_alloc_hiz(intel, mt);
+  if (!mt-hiz_mt)
+return false;
}
 
return true;
@@ -982,11 +921,6 @@ intel_renderbuffer_set_needs_hiz_resolve(struct 
intel_renderbuffer *irb)
   intel_miptree_slice_set_needs_hiz_resolve(irb-mt,
 irb-mt_level,
 irb-mt_layer);
-   } else if (irb-wrapped_depth) {
-  intel_renderbuffer_set_needs_hiz_resolve(
-   intel_renderbuffer(irb-wrapped_depth));
-   } else {
-  return;
}
 }
 
@@ -997,11 +931,6 @@ intel_renderbuffer_set_needs_depth_resolve(struct 
intel_renderbuffer *irb)
   intel_miptree_slice_set_needs_depth_resolve(irb-mt,
   irb-mt_level,
   irb-mt_layer);
-   } else if (irb-wrapped_depth) {
-  intel_renderbuffer_set_needs_depth_resolve(
-   intel_renderbuffer(irb-wrapped_depth));
-   } else {
-  return;
}
 }
 
@@ -1014,9 +943,6 @@ intel_renderbuffer_resolve_hiz(struct intel_context *intel,
  irb-mt,
  irb-mt_level,
  irb-mt_layer);
- 

[Mesa-dev] [PATCH 5/8] i965: Add support for mapping Z32_FLOAT_X24S8 fake packed depth/stencil.

2011-12-15 Thread Eric Anholt
The format handling here is tricky, because we're not actually
generating a Z32_FLOAT_X24S8 miptree, so we're guessing the format
that GL wants based on seeing Z32_FLOAT with a separate stencil.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/intel/intel_mipmap_tree.c |   22 +-
 1 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index ee2d1e0..0d49fec 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -876,7 +876,8 @@ intel_miptree_map_depthstencil(struct intel_context *intel,
 {
struct intel_mipmap_tree *z_mt = mt;
struct intel_mipmap_tree *s_mt = mt-stencil_mt;
-   int packed_bpp = 4;
+   bool map_z32f_x24s8 = mt-format == MESA_FORMAT_Z32_FLOAT;
+   int packed_bpp = map_z32f_x24s8 ? 8 : 4;
 
map-stride = map-w * packed_bpp;
map-buffer = map-ptr = malloc(map-stride * map-h);
@@ -911,7 +912,12 @@ intel_miptree_map_depthstencil(struct intel_context *intel,
uint8_t s = s_map[s_offset];
uint32_t z = z_map[z_offset];
 
-   packed_map[y * map-w + x] = (s  24) | (z  0x00ff);
+   if (map_z32f_x24s8) {
+  packed_map[(y * map-w + x) * 2 + 0] = z;
+  packed_map[(y * map-w + x) * 2 + 1] = s;
+   } else {
+  packed_map[y * map-w + x] = (s  24) | (z  0x00ff);
+   }
 }
   }
 
@@ -940,6 +946,7 @@ intel_miptree_unmap_depthstencil(struct intel_context 
*intel,
 {
struct intel_mipmap_tree *z_mt = mt;
struct intel_mipmap_tree *s_mt = mt-stencil_mt;
+   bool map_z32f_x24s8 = mt-format == MESA_FORMAT_Z32_FLOAT;
 
if (map-mode  GL_MAP_WRITE_BIT) {
   uint32_t *packed_map = map-ptr;
@@ -960,10 +967,15 @@ intel_miptree_unmap_depthstencil(struct intel_context 
*intel,
 y + s_image_y + map-y);
ptrdiff_t z_offset = ((y + z_image_y) * z_mt-region-pitch +
  (x + z_image_x));
-   uint32_t packed = packed_map[y * map-w + x];
 
-   s_map[s_offset] = packed  24;
-   z_map[z_offset] = packed;
+   if (map_z32f_x24s8) {
+  z_map[z_offset] = packed_map[(y * map-w + x) * 2 + 0];
+  s_map[s_offset] = packed_map[(y * map-w + x) * 2 + 1];
+   } else {
+  uint32_t packed = packed_map[y * map-w + x];
+  s_map[s_offset] = packed  24;
+  z_map[z_offset] = packed;
+   }
 }
   }
 
-- 
1.7.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/8] i965: Add separate stencil/HiZ setup for MESA_FORMAT_Z32_FLOAT_X24S8.

2011-12-15 Thread Eric Anholt
This is a little more unusual than the separate MESA_FORMAT_S8_Z24
support, because in addition to storing the real stencil data in a
MESA_FORMAT_S8 miptree, we also make the Z miptree be
MESA_FORMAT_Z32_FLOAT instead of the requested format.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/i965/brw_misc_state.c |2 +-
 src/mesa/drivers/dri/i965/brw_vtbl.c   |2 +
 src/mesa/drivers/dri/intel/intel_mipmap_tree.c |   31 +--
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 8a6a694..ad49c8f 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -216,7 +216,7 @@ brw_depthbuffer_format(struct brw_context *brw)
if (!drb)
   return BRW_DEPTHFORMAT_D32_FLOAT;
 
-   switch (drb-Base.Format) {
+   switch (drb-mt-format) {
case MESA_FORMAT_Z16:
   return BRW_DEPTHFORMAT_D16_UNORM;
case MESA_FORMAT_Z32_FLOAT:
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c 
b/src/mesa/drivers/dri/i965/brw_vtbl.c
index bc76ec2..d348806 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -202,6 +202,8 @@ static bool brw_is_hiz_depth_format(struct intel_context 
*intel,
   return false;
 
switch (format) {
+   case MESA_FORMAT_Z32_FLOAT:
+   case MESA_FORMAT_Z32_FLOAT_X24S8:
case MESA_FORMAT_X8_Z24:
case MESA_FORMAT_S8_Z24:
   return true;
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 0d49fec..e0f9632 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -89,9 +89,6 @@ intel_miptree_create_internal(struct intel_context *intel,
mt-compressed = compress_byte ? 1 : 0;
mt-refcount = 1; 
 
-   intel_get_texture_alignment_unit(intel, format,
-   mt-align_w, mt-align_h);
-
if (target == GL_TEXTURE_CUBE_MAP) {
   assert(depth0 == 1);
   mt-depth0 = 6;
@@ -109,16 +106,6 @@ intel_miptree_create_internal(struct intel_context *intel,
   mt-cpp = 2;
}
 
-#ifdef I915
-   (void) intel;
-   if (intel-is_945)
-  i945_miptree_layout(mt);
-   else
-  i915_miptree_layout(mt);
-#else
-   brw_miptree_layout(intel, mt);
-#endif
-
if (_mesa_is_depthstencil_format(_mesa_get_format_base_format(format)) 
(intel-must_use_separate_stencil ||
(intel-has_separate_stencil 
@@ -142,12 +129,28 @@ intel_miptree_create_internal(struct intel_context *intel,
*/
   if (mt-format == MESA_FORMAT_S8_Z24) {
 mt-format = MESA_FORMAT_X8_Z24;
+  } else if (mt-format == MESA_FORMAT_Z32_FLOAT_X24S8) {
+mt-format = MESA_FORMAT_Z32_FLOAT;
+mt-cpp = 4;
   } else {
-_mesa_problem(Unknown format %s in separate stencil\n,
+_mesa_problem(NULL, Unknown format %s in separate stencil mt\n,
   _mesa_get_format_name(mt-format));
   }
}
 
+   intel_get_texture_alignment_unit(intel, mt-format,
+   mt-align_w, mt-align_h);
+
+#ifdef I915
+   (void) intel;
+   if (intel-is_945)
+  i945_miptree_layout(mt);
+   else
+  i915_miptree_layout(mt);
+#else
+   brw_miptree_layout(intel, mt);
+#endif
+
return mt;
 }
 
-- 
1.7.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/8] i965: Use the miptree format for texture surface format choice.

2011-12-15 Thread Eric Anholt
With separate stencil GL_DEPTH32F_STENCIL8, the miptree will have a
really different format (MESA_FORMAT_Z32_FLOAT) from the teximage
(MESA_FORMAT_Z32_FLOAT_X24S8).

v2: Do it for gen7, too.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org (v1)
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |2 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index f9b0b71..d36b6f8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -632,7 +632,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint 
unit )
surf[0] = (translate_tex_target(tObj-Target)  BRW_SURFACE_TYPE_SHIFT |
  BRW_SURFACE_MIPMAPLAYOUT_BELOW  BRW_SURFACE_MIPLAYOUT_SHIFT |
  BRW_SURFACE_CUBEFACE_ENABLES |
- (translate_tex_format(firstImage-TexFormat,
+ (translate_tex_format(mt-format,
firstImage-InternalFormat,
sampler-DepthMode,
sampler-sRGBDecode) 
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 0defa67..1c0183b 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -76,7 +76,7 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint 
unit)
   surf-ss0.vertical_alignment = 1;
 
surf-ss0.surface_type = translate_tex_target(tObj-Target);
-   surf-ss0.surface_format = translate_tex_format(firstImage-TexFormat,
+   surf-ss0.surface_format = translate_tex_format(mt-format,
firstImage-InternalFormat,
sampler-DepthMode,
sampler-sRGBDecode);
-- 
1.7.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/8] i965: Add support for GL_ARB_depth_buffer_float under 3.0 override.

2011-12-15 Thread Eric Anholt
This is not exposed generally yet because some of the swrast paths hit
in piglit (drawpixels, copypixels, blit) aren't yet converted to
MapRenderbuffer.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/i965/brw_misc_state.c   |5 -
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |   10 ++
 src/mesa/drivers/dri/intel/intel_extensions.c|2 ++
 src/mesa/drivers/dri/intel/intel_tex_format.c|4 
 4 files changed, 20 insertions(+), 1 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index ad49c8f..e76901a 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -209,7 +209,8 @@ brw_depthbuffer_format(struct brw_context *brw)
if (!drb 
(srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) 
!srb-mt-stencil_mt 
-   srb-Base.Format == MESA_FORMAT_S8_Z24) {
+   (srb-Base.Format == MESA_FORMAT_S8_Z24 ||
+   srb-Base.Format == MESA_FORMAT_Z32_FLOAT_X24S8)) {
   drb = srb;
}
 
@@ -225,6 +226,8 @@ brw_depthbuffer_format(struct brw_context *brw)
   return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
case MESA_FORMAT_S8_Z24:
   return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+   case MESA_FORMAT_Z32_FLOAT_X24S8:
+  return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
default:
   _mesa_problem(ctx, Unexpected depth format %s\n,
_mesa_get_format_name(drb-Base.Format));
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index d36b6f8..7a95952 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -541,12 +541,16 @@ brw_init_surface_formats(struct brw_context *brw)
brw-format_supported_as_render_target[MESA_FORMAT_X8_Z24] = true;
brw-format_supported_as_render_target[MESA_FORMAT_S8] = true;
brw-format_supported_as_render_target[MESA_FORMAT_Z16] = true;
+   brw-format_supported_as_render_target[MESA_FORMAT_Z32_FLOAT] = true;
+   brw-format_supported_as_render_target[MESA_FORMAT_Z32_FLOAT_X24S8] = true;
 
/* We remap depth formats to a supported texturing format in
 * translate_tex_format().
 */
ctx-TextureFormatSupported[MESA_FORMAT_S8_Z24] = true;
ctx-TextureFormatSupported[MESA_FORMAT_X8_Z24] = true;
+   ctx-TextureFormatSupported[MESA_FORMAT_Z32_FLOAT] = true;
+   ctx-TextureFormatSupported[MESA_FORMAT_Z32_FLOAT_X24S8] = true;
 }
 
 bool
@@ -574,6 +578,12 @@ translate_tex_format(gl_format mesa_format,
case MESA_FORMAT_X8_Z24:
   return BRW_SURFACEFORMAT_I24X8_UNORM;
 
+   case MESA_FORMAT_Z32_FLOAT:
+  return BRW_SURFACEFORMAT_I32_FLOAT;
+
+   case MESA_FORMAT_Z32_FLOAT_X24S8:
+  return BRW_SURFACEFORMAT_R32G32_FLOAT;
+
case MESA_FORMAT_SARGB8:
case MESA_FORMAT_SLA8:
case MESA_FORMAT_SL8:
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c 
b/src/mesa/drivers/dri/intel/intel_extensions.c
index 681f5f2..2e2a31f 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -109,6 +109,8 @@ intelInitExtensions(struct gl_context *ctx)
 
if (intel-gen = 4) {
   ctx-Extensions.ARB_color_buffer_float = true;
+  if (override_version = 30)
+ctx-Extensions.ARB_depth_buffer_float = true;
   ctx-Extensions.ARB_depth_clamp = true;
   ctx-Extensions.ARB_fragment_coord_conventions = true;
   ctx-Extensions.ARB_fragment_program_shadow = true;
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c 
b/src/mesa/drivers/dri/intel/intel_tex_format.c
index 898846f..f53054d 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -41,6 +41,10 @@ intel_mesa_format_to_rb_datatype(gl_format format)
case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
   return GL_FLOAT;
 
+  /* The core depthstencil wrappers demand this. */
+   case MESA_FORMAT_Z32_FLOAT_X24S8:
+  return GL_FLOAT_32_UNSIGNED_INT_24_8_REV;
+
default:
   /* Unsupported format.  We may hit this when people ask for 
FBO-incomplete
* formats.
-- 
1.7.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev