date:20101009

Mesa (master): dri/nv10: Fake fast Z clears for pre-nv17 cards.

2010-10-09 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e2acc7be2683fd3c295480724b02f5a497309cfd
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e2acc7be2683fd3c295480724b02f5a497309cfd

Author: Francisco Jerez 
Date:   Sun Oct 10 01:39:13 2010 +0200

dri/nv10: Fake fast Z clears for pre-nv17 cards.

---

 src/mesa/drivers/dri/nouveau/nv10_context.c   |  130 +
 src/mesa/drivers/dri/nouveau/nv10_driver.h|6 +
 src/mesa/drivers/dri/nouveau/nv10_state_fb.c  |8 ++
 src/mesa/drivers/dri/nouveau/nv10_state_tnl.c |3 +
 4 files changed, 127 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c 
b/src/mesa/drivers/dri/nouveau/nv10_context.c
index f0e2744..3d898fd 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_context.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_context.c
@@ -61,39 +61,129 @@ use_fast_zclear(GLcontext *ctx, GLbitfield buffers)
fb->_Ymax == fb->Height && fb->_Ymin == 0;
 }
 
+GLboolean
+nv10_use_viewport_zclear(GLcontext *ctx)
+{
+   struct nouveau_context *nctx = to_nouveau_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+   return context_chipset(ctx) < 0x17 &&
+   !nctx->hierz.clear_blocked && fb->_DepthBuffer &&
+   (_mesa_get_format_bits(fb->_DepthBuffer->Format,
+  GL_DEPTH_BITS) >= 24);
+}
+
+float
+nv10_transform_depth(GLcontext *ctx, float z)
+{
+   struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+   if (nv10_use_viewport_zclear(ctx))
+   return 2097152.0 * (z + (nctx->hierz.clear_seq & 7));
+   else
+   return ctx->DrawBuffer->_DepthMaxF * z;
+}
+
 static void
-nv10_clear(GLcontext *ctx, GLbitfield buffers)
+nv10_zclear(GLcontext *ctx, GLbitfield *buffers)
+{
+   /*
+* Pre-nv17 cards don't have native support for fast Z clears,
+* but in some cases we can still "clear" the Z buffer without
+* actually blitting to it if we're willing to sacrifice a few
+* bits of depth precision.
+*
+* Each time a clear is requested we modify the viewport
+* transform in such a way that the old contents of the depth
+* buffer are clamped to the requested clear value when
+* they're read by the GPU.
+*/
+   struct nouveau_context *nctx = to_nouveau_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
+   struct nouveau_surface *s = &to_nouveau_renderbuffer(
+   fb->_DepthBuffer->Wrapped)->surface;
+
+   if (nv10_use_viewport_zclear(ctx)) {
+   int x, y, w, h;
+   float z = ctx->Depth.Clear;
+   uint32_t value = pack_zs_f(s->format, z, 0);
+
+   get_scissors(fb, &x, &y, &w, &h);
+   *buffers &= ~BUFFER_BIT_DEPTH;
+
+   if (use_fast_zclear(ctx, *buffers)) {
+   if (nfb->hierz.clear_value != value) {
+   /* Don't fast clear if we're changing
+* the depth value. */
+   nfb->hierz.clear_value = value;
+
+   } else if (z == 0.0) {
+   nctx->hierz.clear_seq++;
+   context_dirty(ctx, ZCLEAR);
+
+   if ((nctx->hierz.clear_seq & 7) != 0 &&
+   nctx->hierz.clear_seq != 1)
+   /* We didn't wrap around -- no need to
+* clear the depth buffer for real. */
+   return;
+
+   } else if (z == 1.0) {
+   nctx->hierz.clear_seq--;
+   context_dirty(ctx, ZCLEAR);
+
+   if ((nctx->hierz.clear_seq & 7) != 7)
+   /* No wrap around */
+   return;
+   }
+   }
+
+   value = pack_zs_f(s->format,
+ (z + (nctx->hierz.clear_seq & 7)) / 8, 0);
+   context_drv(ctx)->surface_fill(ctx, s, ~0, value, x, y, w, h);
+   }
+}
+
+static void
+nv17_zclear(GLcontext *ctx, GLbitfield *buffers)
 {
struct nouveau_context *nctx = to_nouveau_context(ctx);
struct nouveau_channel *chan = context_chan(ctx);
struct nouveau_grobj *celsius = context_eng3d(ctx);
struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(
ctx->DrawBuffer);
+   struct nouveau_surface *s = &to_nouveau_renderbuffer(
+   nfb->base._DepthBuffer->Wrapped)->surface;
 
-   nouveau_validate_framebuffer(ctx);
+   /* Clear the hierarchical depth buffer */
+   BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1);
+   OUT_RING(chan,

Mesa (master): dri/nouveau: Minor cleanup.

2010-10-09 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 35a1893fd1993932a428f5f83051383d51c8135e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=35a1893fd1993932a428f5f83051383d51c8135e

Author: Francisco Jerez 
Date:   Sun Oct 10 01:45:23 2010 +0200

dri/nouveau: Minor cleanup.

---

 src/mesa/drivers/dri/nouveau/nouveau_context.c |3 +--
 src/mesa/drivers/dri/nouveau/nv10_context.c|2 +-
 src/mesa/drivers/dri/nouveau/nv10_render.c |   20 ++--
 src/mesa/drivers/dri/nouveau/nv20_render.c |   20 ++--
 4 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c 
b/src/mesa/drivers/dri/nouveau/nouveau_context.c
index 1121d2d..6369e8d 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_context.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c
@@ -353,6 +353,5 @@ nouveau_validate_framebuffer(GLcontext *ctx)
validate_framebuffer(dri_ctx, dri_read,
 &dri_ctx->dri2.read_stamp);
 
-   if (nouveau_next_dirty_state(ctx) >= 0)
-   nouveau_state_emit(ctx);
+   nouveau_state_emit(ctx);
 }
diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c 
b/src/mesa/drivers/dri/nouveau/nv10_context.c
index 41723ff..f0e2744 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_context.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_context.c
@@ -305,7 +305,7 @@ nv10_hwctx_init(GLcontext *ctx)
OUT_RING(chan, 1);
 
BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
-   OUT_RING(chan, 0.0);
+   OUT_RINGf(chan, 0.0);
OUT_RINGf(chan, 16777216.0);
 
FIRE_RING(chan);
diff --git a/src/mesa/drivers/dri/nouveau/nv10_render.c 
b/src/mesa/drivers/dri/nouveau/nv10_render.c
index 54245ea..e4c51f8 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_render.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_render.c
@@ -167,35 +167,35 @@ nv10_render_bind_vertices(GLcontext *ctx)
 
 #define BATCH_BEGIN(prim)  \
BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);  \
-   OUT_RING(chan, prim);
+   OUT_RING(chan, prim)
 #define BATCH_END()\
BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);  \
-   OUT_RING(chan, 0);
+   OUT_RING(chan, 0)
 
 #define MAX_PACKET 0x400
 
 #define MAX_OUT_L 0x100
 #define BATCH_PACKET_L(n)  \
-   BEGIN_RING_NI(chan, celsius, NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS, n);
+   BEGIN_RING_NI(chan, celsius, NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS, n)
 #define BATCH_OUT_L(i, n)  \
-   OUT_RING(chan, ((n) - 1) << 24 | (i));
+   OUT_RING(chan, ((n) - 1) << 24 | (i))
 
 #define MAX_OUT_I16 0x2
 #define BATCH_PACKET_I16(n)\
-   BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, n);
+   BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, n)
 #define BATCH_OUT_I16(i0, i1)  \
-   OUT_RING(chan, (i1) << 16 | (i0));
+   OUT_RING(chan, (i1) << 16 | (i0))
 
 #define MAX_OUT_I32 0x1
 #define BATCH_PACKET_I32(n)\
-   BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U32, n);
+   BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U32, n)
 #define BATCH_OUT_I32(i)   \
-   OUT_RING(chan, i);
+   OUT_RING(chan, i)
 
 #define IMM_PACKET(m, n)   \
-   BEGIN_RING(chan, celsius, m, n);
+   BEGIN_RING(chan, celsius, m, n)
 #define IMM_OUT(x) \
-   OUT_RINGf(chan, x);
+   OUT_RINGf(chan, x)
 
 #define TAG(x) nv10_##x
 #include "nouveau_render_t.c"
diff --git a/src/mesa/drivers/dri/nouveau/nv20_render.c 
b/src/mesa/drivers/dri/nouveau/nv20_render.c
index a696ac1..d7c3e74 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_render.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_render.c
@@ -191,35 +191,35 @@ nv20_render_bind_vertices(GLcontext *ctx)
 
 #define BATCH_BEGIN(prim)  \
BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1);  \
-   OUT_RING(chan, prim);
+   OUT_RING(chan, prim)
 #define BATCH_END()\
BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1);  \
-   OUT_RING(chan, 0);
+   OUT_RING(chan, 0)
 
 #define MAX_PACKET 0x400
 
 #define MAX_OUT_L 0x100
 #define BATCH_PACKET_L(n)  \
-   BEGIN_RING_NI(chan, kelvin, NV20TCL_VB_VERTEX_BATCH, n);
+   BEGIN_RING_NI(chan, kelvin, NV20TCL_VB_VERTEX_BATCH, n)
 #define BATCH_OUT_L(i, n)  \
-   OUT_RING(chan, ((n) - 1) << 24 | (i));
+   OUT_RING(chan, ((n) - 1) << 24 | (i))
 
 #define MAX_OUT_I16 0x2
 #define BATCH_PACKET_I16(n)\
-   BEGIN_RING_NI(chan, kelvin, NV20TCL_VB_ELEMEN

Mesa (master): gallivm: Cleanup the rest of the flow module.

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: 307df6a858dcab1bc10f3f52d9968acb3ea6d74f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=307df6a858dcab1bc10f3f52d9968acb3ea6d74f

Author: José Fonseca 
Date:   Sat Oct  9 21:39:14 2010 +0100

gallivm: Cleanup the rest of the flow module.

---

 src/gallium/auxiliary/gallivm/lp_bld_flow.c |  210 +++
 src/gallium/auxiliary/gallivm/lp_bld_flow.h |   28 ++--
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |   12 +--
 3 files changed, 39 insertions(+), 211 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index ac63bd5..99a49df 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -38,146 +38,6 @@
 #include "lp_bld_flow.h"
 
 
-#define LP_BUILD_FLOW_MAX_VARIABLES 64
-#define LP_BUILD_FLOW_MAX_DEPTH 32
-
-/**
- * Enumeration of all possible flow constructs.
- */
-enum lp_build_flow_construct_kind {
-   LP_BUILD_FLOW_SKIP,
-   LP_BUILD_FLOW_IF
-};
-
-
-/**
- * Early exit. Useful to skip to the end of a function or block when
- * the execution mask becomes zero or when there is an error condition.
- */
-struct lp_build_flow_skip
-{
-   /** Block to skip to */
-   LLVMBasicBlockRef block;
-};
-
-
-/**
- * Union of all possible flow constructs' data
- */
-union lp_build_flow_construct_data
-{
-   struct lp_build_flow_skip skip;
-};
-
-
-/**
- * Element of the flow construct stack.
- */
-struct lp_build_flow_construct
-{
-   enum lp_build_flow_construct_kind kind;
-   union lp_build_flow_construct_data data;
-};
-
-
-/**
- * All necessary data to generate LLVM control flow constructs.
- *
- * Besides keeping track of the control flow construct themselves we also
- * need to keep track of variables in order to generate SSA Phi values.
- */
-struct lp_build_flow_context
-{
-   LLVMBuilderRef builder;
-
-   /**
-* Control flow stack.
-*/
-   struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
-   unsigned num_constructs;
-};
-
-
-struct lp_build_flow_context *
-lp_build_flow_create(LLVMBuilderRef builder)
-{
-   struct lp_build_flow_context *flow;
-
-   flow = CALLOC_STRUCT(lp_build_flow_context);
-   if(!flow)
-  return NULL;
-
-   flow->builder = builder;
-
-   return flow;
-}
-
-
-void
-lp_build_flow_destroy(struct lp_build_flow_context *flow)
-{
-   assert(flow->num_constructs == 0);
-   FREE(flow);
-}
-
-
-/**
- * Begin/push a new flow control construct, such as a loop, skip block
- * or variable scope.
- */
-static union lp_build_flow_construct_data *
-lp_build_flow_push(struct lp_build_flow_context *flow,
-   enum lp_build_flow_construct_kind kind)
-{
-   assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH);
-   if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH)
-  return NULL;
-
-   flow->constructs[flow->num_constructs].kind = kind;
-   return &flow->constructs[flow->num_constructs++].data;
-}
-
-
-/**
- * Return the current/top flow control construct on the stack.
- * \param kind  the expected type of the top-most construct
- */
-static union lp_build_flow_construct_data *
-lp_build_flow_peek(struct lp_build_flow_context *flow,
-   enum lp_build_flow_construct_kind kind)
-{
-   assert(flow->num_constructs);
-   if(!flow->num_constructs)
-  return NULL;
-
-   assert(flow->constructs[flow->num_constructs - 1].kind == kind);
-   if(flow->constructs[flow->num_constructs - 1].kind != kind)
-  return NULL;
-
-   return &flow->constructs[flow->num_constructs - 1].data;
-}
-
-
-/**
- * End/pop the current/top flow control construct on the stack.
- * \param kind  the expected type of the top-most construct
- */
-static union lp_build_flow_construct_data *
-lp_build_flow_pop(struct lp_build_flow_context *flow,
-  enum lp_build_flow_construct_kind kind)
-{
-   assert(flow->num_constructs);
-   if(!flow->num_constructs)
-  return NULL;
-
-   assert(flow->constructs[flow->num_constructs - 1].kind == kind);
-   if(flow->constructs[flow->num_constructs - 1].kind != kind)
-  return NULL;
-
-   return &flow->constructs[--flow->num_constructs].data;
-}
-
-
 /**
  * Note: this function has no dependencies on the flow code and could
  * be used elsewhere.
@@ -208,34 +68,18 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const 
char *name)
 }
 
 
-static LLVMBasicBlockRef
-lp_build_flow_insert_block(struct lp_build_flow_context *flow)
-{
-   return lp_build_insert_new_block(flow->builder, "");
-}
-
-
 /**
  * Begin a "skip" block.  Inside this block we can test a condition and
  * skip to the end of the block if the condition is false.
  */
 void
-lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
+lp_build_flow_skip_begin(struct lp_build_skip_context *skip,
+ LLVMBuilderRef builder)
 {
-   struct lp_build_flow_skip *skip;
-   LLVMBuilderRef builder;
-
-   skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP

Mesa (master): gallivm: Simplify if/then/else implementation.

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: d0ea4641597d23df2198fd76ed7430c06cef8c5d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d0ea4641597d23df2198fd76ed7430c06cef8c5d

Author: José Fonseca 
Date:   Sat Oct  9 21:14:05 2010 +0100

gallivm: Simplify if/then/else implementation.

No need for for a flow stack anymore.

---

 src/gallium/auxiliary/gallivm/lp_bld_flow.c   |   79 +---
 src/gallium/auxiliary/gallivm/lp_bld_flow.h   |   10 ++-
 src/gallium/auxiliary/gallivm/lp_bld_sample.c |8 +--
 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c |   14 +---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   14 +---
 5 files changed, 34 insertions(+), 91 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index 22c2db8..ac63bd5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -62,22 +62,11 @@ struct lp_build_flow_skip
 
 
 /**
- * if/else/endif.
- */
-struct lp_build_flow_if
-{
-   LLVMValueRef condition;
-   LLVMBasicBlockRef entry_block, true_block, false_block, merge_block;
-};
-
-
-/**
  * Union of all possible flow constructs' data
  */
 union lp_build_flow_construct_data
 {
struct lp_build_flow_skip skip;
-   struct lp_build_flow_if ifthen;
 };
 
 
@@ -468,24 +457,16 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,
 
   Is built with:
 
- LLVMValueRef x = LLVMGetUndef();  // or something else
-
- flow = lp_build_flow_create(builder);
-
-lp_build_flow_scope_begin(flow);
+ // x needs an alloca variable
+ x = lp_build_alloca(builder, type, "x");
 
-   // x needs a phi node
-   lp_build_flow_scope_declare(flow, &x);
 
-   lp_build_if(ctx, flow, builder, cond);
-  x = LLVMAdd(1, 2);
-   lp_build_else(ctx);
-  x = LLVMAdd(2, 3);
-   lp_build_endif(ctx);
+ lp_build_if(ctx, builder, cond);
+LLVMBuildStore(LLVMBuildAdd(1, 2), x);
+ lp_build_else(ctx);
+LLVMBuildStore(LLVMBuildAdd(2, 3). x);
+ lp_build_endif(ctx);
 
-lp_build_flow_scope_end(flow);
-
- lp_build_flow_destroy(flow);
  */
 
 
@@ -494,22 +475,14 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,
  * Begin an if/else/endif construct.
  */
 void
-lp_build_if(struct lp_build_if_state *ctx,
-struct lp_build_flow_context *flow,
+lp_build_if(struct lp_build_if_state *ifthen,
 LLVMBuilderRef builder,
 LLVMValueRef condition)
 {
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
-   struct lp_build_flow_if *ifthen;
-
-   memset(ctx, 0, sizeof(*ctx));
-   ctx->builder = builder;
-   ctx->flow = flow;
-
-   /* push/create new scope */
-   ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen;
-   assert(ifthen);
 
+   memset(ifthen, 0, sizeof *ifthen);
+   ifthen->builder = builder;
ifthen->condition = condition;
ifthen->entry_block = block;
 
@@ -529,19 +502,13 @@ lp_build_if(struct lp_build_if_state *ctx,
  * Begin else-part of a conditional
  */
 void
-lp_build_else(struct lp_build_if_state *ctx)
+lp_build_else(struct lp_build_if_state *ifthen)
 {
-   struct lp_build_flow_context *flow = ctx->flow;
-   struct lp_build_flow_if *ifthen;
-
-   ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen;
-   assert(ifthen);
-
/* create/insert false_block before the merge block */
ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, 
"if-false-block");
 
/* successive code goes into the else block */
-   LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block);
+   LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->false_block);
 }
 
 
@@ -549,39 +516,33 @@ lp_build_else(struct lp_build_if_state *ctx)
  * End a conditional.
  */
 void
-lp_build_endif(struct lp_build_if_state *ctx)
+lp_build_endif(struct lp_build_if_state *ifthen)
 {
-   struct lp_build_flow_context *flow = ctx->flow;
-   struct lp_build_flow_if *ifthen;
-
-   ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen;
-   assert(ifthen);
-
/* Insert branch to the merge block from current block */
-   LLVMBuildBr(ctx->builder, ifthen->merge_block);
+   LLVMBuildBr(ifthen->builder, ifthen->merge_block);
 
/***
 *** Now patch in the various branch instructions.
 ***/
 
/* Insert the conditional branch instruction at the end of entry_block */
-   LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block);
+   LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->entry_block);
if (ifthen->false_block) {
   /* we have an else clause */
-  LLVMBuildCondBr(ctx->builder, ifthen->condition,
+  LLVMBuildCondBr(ifthen->builder, ifthen->condition,
   ifthen->true_block, ifthen->false_block);
}
else {
   /* no else clause */
-  LLVMBuildCondBr(ctx->builder, ifthen->condition,
+  LLVMBuildCondBr(ifthen->builder, ifthen->condition,
   ifthen->t

Mesa (master): gallivm: Factor out the SI-> FP texture size conversion for SoA path too

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: 1949f8c31507ed4a8774c380e6b604c328f4ec98
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1949f8c31507ed4a8774c380e6b604c328f4ec98

Author: José Fonseca 
Date:   Sat Oct  9 20:26:11 2010 +0100

gallivm: Factor out the SI->FP texture size conversion for SoA path too

---

 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   90 +
 1 files changed, 56 insertions(+), 34 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 1af0318..3b63ac6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -230,6 +230,7 @@ static void
 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 LLVMValueRef coord,
 LLVMValueRef length,
+LLVMValueRef length_f,
 boolean is_pot,
 unsigned wrap_mode,
 LLVMValueRef *x0_out,
@@ -240,7 +241,6 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context 
*bld,
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
-   LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, 
uint_coord_bld->one);
LLVMValueRef coord0, coord1, weight;
 
@@ -442,13 +442,13 @@ static LLVMValueRef
 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
  LLVMValueRef coord,
  LLVMValueRef length,
+ LLVMValueRef length_f,
  boolean is_pot,
  unsigned wrap_mode)
 {
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
-   LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, 
uint_coord_bld->one);
LLVMValueRef icoord;

@@ -563,9 +563,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context 
*bld,
 static void
 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
   unsigned unit,
-  LLVMValueRef width_vec,
-  LLVMValueRef height_vec,
-  LLVMValueRef depth_vec,
+  LLVMValueRef size,
   LLVMValueRef row_stride_vec,
   LLVMValueRef img_stride_vec,
   LLVMValueRef data_ptr,
@@ -575,24 +573,45 @@ lp_build_sample_image_nearest(struct 
lp_build_sample_context *bld,
   LLVMValueRef colors_out[4])
 {
const unsigned dims = bld->dims;
+   LLVMValueRef width_vec;
+   LLVMValueRef height_vec;
+   LLVMValueRef depth_vec;
+   LLVMValueRef flt_size;
+   LLVMValueRef flt_width_vec;
+   LLVMValueRef flt_height_vec;
+   LLVMValueRef flt_depth_vec;
LLVMValueRef x, y, z;
 
+   lp_build_extract_image_sizes(bld,
+bld->int_size_type,
+bld->int_coord_type,
+size,
+&width_vec, &height_vec, &depth_vec);
+
+   flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
+
+   lp_build_extract_image_sizes(bld,
+bld->float_size_type,
+bld->coord_type,
+flt_size,
+&flt_width_vec, &flt_height_vec, 
&flt_depth_vec);
+
/*
 * Compute integer texcoords.
 */
-   x = lp_build_sample_wrap_nearest(bld, s, width_vec,
+   x = lp_build_sample_wrap_nearest(bld, s, width_vec, flt_width_vec,
 bld->static_state->pot_width,
 bld->static_state->wrap_s);
lp_build_name(x, "tex.x.wrapped");
 
if (dims >= 2) {
-  y = lp_build_sample_wrap_nearest(bld, t, height_vec,
+  y = lp_build_sample_wrap_nearest(bld, t, height_vec, flt_height_vec,
bld->static_state->pot_height,
bld->static_state->wrap_t);
   lp_build_name(y, "tex.y.wrapped");
 
   if (dims == 3) {
- z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
+ z = lp_build_sample_wrap_nearest(bld, r, depth_vec, flt_depth_vec,
   bld->static_state->pot_depth,
   bld->static_state->wrap_r);
  lp_build_name(z, "t

Mesa (master): gallivm: Remove support for Phi generation.

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: d45c379027054e563c4f4379fb69fc9f68612f75
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d45c379027054e563c4f4379fb69fc9f68612f75

Author: José Fonseca 
Date:   Sat Oct  9 20:14:03 2010 +0100

gallivm: Remove support for Phi generation.

Simply rely on mem2reg pass. It's easier and more reliable.

---

 src/gallium/auxiliary/gallivm/lp_bld_flow.c |  211 ---
 src/gallium/auxiliary/gallivm/lp_bld_flow.h |   10 --
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |4 -
 3 files changed, 0 insertions(+), 225 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index a5d65e9..22c2db8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -45,23 +45,12 @@
  * Enumeration of all possible flow constructs.
  */
 enum lp_build_flow_construct_kind {
-   LP_BUILD_FLOW_SCOPE,
LP_BUILD_FLOW_SKIP,
LP_BUILD_FLOW_IF
 };
 
 
 /**
- * Variable declaration scope.
- */
-struct lp_build_flow_scope
-{
-   /** Number of variables declared in this scope */
-   unsigned num_variables;
-};
-
-
-/**
  * Early exit. Useful to skip to the end of a function or block when
  * the execution mask becomes zero or when there is an error condition.
  */
@@ -69,11 +58,6 @@ struct lp_build_flow_skip
 {
/** Block to skip to */
LLVMBasicBlockRef block;
-
-   /** Number of variables declared at the beginning */
-   unsigned num_variables;
-
-   LLVMValueRef *phi;  /**< array [num_variables] */
 };
 
 
@@ -82,10 +66,6 @@ struct lp_build_flow_skip
  */
 struct lp_build_flow_if
 {
-   unsigned num_variables;
-
-   LLVMValueRef *phi;  /**< array [num_variables] */
-
LLVMValueRef condition;
LLVMBasicBlockRef entry_block, true_block, false_block, merge_block;
 };
@@ -96,7 +76,6 @@ struct lp_build_flow_if
  */
 union lp_build_flow_construct_data
 {
-   struct lp_build_flow_scope scope;
struct lp_build_flow_skip skip;
struct lp_build_flow_if ifthen;
 };
@@ -127,12 +106,6 @@ struct lp_build_flow_context
 */
struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
unsigned num_constructs;
-
-   /**
-* Variable stack
-*/
-   LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES];
-   unsigned num_variables;
 };
 
 
@@ -155,7 +128,6 @@ void
 lp_build_flow_destroy(struct lp_build_flow_context *flow)
 {
assert(flow->num_constructs == 0);
-   assert(flow->num_variables == 0);
FREE(flow);
 }
 
@@ -218,93 +190,6 @@ lp_build_flow_pop(struct lp_build_flow_context *flow,
 
 
 /**
- * Begin a variable scope.
- *
- *
- */
-void
-lp_build_flow_scope_begin(struct lp_build_flow_context *flow)
-{
-   struct lp_build_flow_scope *scope;
-
-   scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope;
-   if(!scope)
-  return;
-
-   scope->num_variables = 0;
-}
-
-
-/**
- * Declare a variable.
- *
- * A variable is a named entity which can have different LLVMValueRef's at
- * different points of the program. This is relevant for control flow because
- * when there are multiple branches to a same location we need to replace
- * the variable's value with a Phi function as explained in
- * http://en.wikipedia.org/wiki/Static_single_assignment_form .
- *
- * We keep track of variables by keeping around a pointer to where they're
- * current.
- *
- * There are a few cautions to observe:
- *
- * - Variable's value must not be NULL. If there is no initial value then
- *   LLVMGetUndef() should be used.
- *
- * - Variable's value must be kept up-to-date. If the variable is going to be
- *   modified by a function then a pointer should be passed so that its value
- *   is accurate. Failure to do this will cause some of the variables'
- *   transient values to be lost, leading to wrong results.
- *
- * - A program should be written from top to bottom, by always appending
- *   instructions to the bottom with a single LLVMBuilderRef. Inserting and/or
- *   modifying existing statements will most likely lead to wrong results.
- *
- */
-void
-lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
-LLVMValueRef *variable)
-{
-   struct lp_build_flow_scope *scope;
-
-   scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope;
-   if(!scope)
-  return;
-
-   assert(*variable);
-   if(!*variable)
-  return;
-
-   assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES);
-   if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES)
-  return;
-
-   flow->variables[flow->num_variables++] = variable;
-   ++scope->num_variables;
-}
-
-
-void
-lp_build_flow_scope_end(struct lp_build_flow_context *flow)
-{
-   struct lp_build_flow_scope *scope;
-
-   scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope;
-   if(!scope)
-  return;
-
-   assert(flow->num_variables >= scope->num_variables);
-   if(flow->num_variables < scope->num_variables) {
-  flow->num_variables = 0;
-

Mesa (master): gallivm: Use varilables instead of Phis for cubemap selection.

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: ea7b49028b15364a32988ec77ec88f2a6a591437
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ea7b49028b15364a32988ec77ec88f2a6a591437

Author: José Fonseca 
Date:   Sat Oct  9 19:53:21 2010 +0100

gallivm: Use varilables instead of Phis for cubemap selection.

---

 src/gallium/auxiliary/gallivm/lp_bld_sample.c |   62 ++--
 1 files changed, 26 insertions(+), 36 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 43ea8b1..acceae2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -927,21 +927,15 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
{
   struct lp_build_flow_context *flow_ctx;
   struct lp_build_if_state if_ctx;
+  LLVMValueRef face_s_var;
+  LLVMValueRef face_t_var;
+  LLVMValueRef face_var;
 
   flow_ctx = lp_build_flow_create(bld->builder);
-  lp_build_flow_scope_begin(flow_ctx);
 
-  *face_s = bld->coord_bld.undef;
-  *face_t = bld->coord_bld.undef;
-  *face = bld->int_bld.undef;
-
-  lp_build_name(*face_s, "face_s");
-  lp_build_name(*face_t, "face_t");
-  lp_build_name(*face, "face");
-
-  lp_build_flow_scope_declare(flow_ctx, face_s);
-  lp_build_flow_scope_declare(flow_ctx, face_t);
-  lp_build_flow_scope_declare(flow_ctx, face);
+  face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, 
"face_s_var");
+  face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, 
"face_t_var");
+  face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, 
"face_var");
 
   lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
   {
@@ -953,57 +947,53 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
  *face = lp_build_cube_face(bld, rx,
 PIPE_TEX_FACE_POS_X,
 PIPE_TEX_FACE_NEG_X);
+ LLVMBuildStore(bld->builder, *face_s, face_s_var);
+ LLVMBuildStore(bld->builder, *face_t, face_t_var);
+ LLVMBuildStore(bld->builder, *face, face_var);
   }
   lp_build_else(&if_ctx);
   {
- struct lp_build_flow_context *flow_ctx2;
  struct lp_build_if_state if_ctx2;
 
- LLVMValueRef face_s2 = bld->coord_bld.undef;
- LLVMValueRef face_t2 = bld->coord_bld.undef;
- LLVMValueRef face2 = bld->int_bld.undef;
-
- flow_ctx2 = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx2);
- lp_build_flow_scope_declare(flow_ctx2, &face_s2);
- lp_build_flow_scope_declare(flow_ctx2, &face_t2);
- lp_build_flow_scope_declare(flow_ctx2, &face2);
-
  ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, 
"");
 
- lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
+ lp_build_if(&if_ctx2, flow_ctx, bld->builder, ary_ge_arx_arz);
  {
 /* +/- Y face */
 LLVMValueRef sign = lp_build_sgn(float_bld, ry);
 LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
-face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
-face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
-face2 = lp_build_cube_face(bld, ry,
+*face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
+*face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
+*face = lp_build_cube_face(bld, ry,
PIPE_TEX_FACE_POS_Y,
PIPE_TEX_FACE_NEG_Y);
+LLVMBuildStore(bld->builder, *face_s, face_s_var);
+LLVMBuildStore(bld->builder, *face_t, face_t_var);
+LLVMBuildStore(bld->builder, *face, face_var);
  }
  lp_build_else(&if_ctx2);
  {
 /* +/- Z face */
 LLVMValueRef sign = lp_build_sgn(float_bld, rz);
 LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
-face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
-face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
-face2 = lp_build_cube_face(bld, rz,
+*face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
+*face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+*face = lp_build_cube_face(bld, rz,
PIPE_TEX_FACE_POS_Z,
PIPE_TEX_FACE_NEG_Z);
+LLVMBuildStore(bld->builder, *face_s, face_s_var);
+LLVMBuildStore(bld->builder, *face_t, face_t_var);
+LLVMBuildStore(bld->builder, *face, face_var);
  }
  lp_build_endif(&if_ctx2);
- lp_build_flow_scope_end(flow_ctx2);
- lp_build_flow_destroy(flow_ctx2);
- *face_s = face_s2;
-

Mesa (master): gallivm: Don't generate Phis for execution mask.

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: cc40abad519cc0f765c6d8f6fad4154bed8dd9c2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cc40abad519cc0f765c6d8f6fad4154bed8dd9c2

Author: José Fonseca 
Date:   Sat Oct  9 12:55:31 2010 +0100

gallivm: Don't generate Phis for execution mask.

---

 src/gallium/auxiliary/gallivm/lp_bld_flow.c |   28 --
 src/gallium/auxiliary/gallivm/lp_bld_flow.h |5 +++-
 src/gallium/drivers/llvmpipe/lp_bld_depth.c |8 +++---
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |8 ++
 4 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index 1ec33c7..a5d65e9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -454,12 +454,15 @@ void
 lp_build_mask_check(struct lp_build_mask_context *mask)
 {
LLVMBuilderRef builder = mask->flow->builder;
+   LLVMValueRef value;
LLVMValueRef cond;
 
+   value = lp_build_mask_value(mask);
+
/* cond = (mask == 0) */
cond = LLVMBuildICmp(builder,
 LLVMIntEQ,
-LLVMBuildBitCast(builder, mask->value, mask->reg_type, 
""),
+LLVMBuildBitCast(builder, value, mask->reg_type, ""),
 LLVMConstNull(mask->reg_type),
 "");
 
@@ -485,14 +488,23 @@ lp_build_mask_begin(struct lp_build_mask_context *mask,
 
mask->flow = flow;
mask->reg_type = LLVMIntType(type.width * type.length);
-   mask->value = value;
+   mask->var = lp_build_alloca(flow->builder,
+   lp_build_int_vec_type(type),
+   "execution_mask");
+
+   LLVMBuildStore(flow->builder, value, mask->var);
 
-   lp_build_flow_scope_begin(flow);
-   lp_build_flow_scope_declare(flow, &mask->value);
lp_build_flow_skip_begin(flow);
 }
 
 
+LLVMValueRef
+lp_build_mask_value(struct lp_build_mask_context *mask)
+{
+   return LLVMBuildLoad(mask->flow->builder, mask->var, "");
+}
+
+
 /**
  * Update boolean mask with given value (bitwise AND).
  * Typically used to update the quad's pixel alive/killed mask
@@ -502,7 +514,10 @@ void
 lp_build_mask_update(struct lp_build_mask_context *mask,
  LLVMValueRef value)
 {
-   mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");
+   value = LLVMBuildAnd(mask->flow->builder,
+lp_build_mask_value(mask),
+value, "");
+   LLVMBuildStore(mask->flow->builder, value, mask->var);
 }
 
 
@@ -513,8 +528,7 @@ LLVMValueRef
 lp_build_mask_end(struct lp_build_mask_context *mask)
 {
lp_build_flow_skip_end(mask->flow);
-   lp_build_flow_scope_end(mask->flow);
-   return mask->value;
+   return lp_build_mask_value(mask);
 }
 
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
index 095c781..0fc6317 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -77,7 +77,7 @@ struct lp_build_mask_context
 
LLVMTypeRef reg_type;
 
-   LLVMValueRef value;
+   LLVMValueRef var;
 };
 
 
@@ -87,6 +87,9 @@ lp_build_mask_begin(struct lp_build_mask_context *mask,
 struct lp_type type,
 LLVMValueRef value);
 
+LLVMValueRef
+lp_build_mask_value(struct lp_build_mask_context *mask);
+
 /**
  * Bitwise AND the mask with the given value, if a previous mask was set.
  */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c 
b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 8d9be2e..e768493 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -473,7 +473,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
LLVMValueRef stencil_vals = NULL;
LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
-   LLVMValueRef orig_mask = mask->value;
+   LLVMValueRef orig_mask = lp_build_mask_value(mask);
LLVMValueRef front_facing = NULL;
 
/* Prototype a simpler path:
@@ -527,7 +527,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
  type.sign = 1;
  lp_build_context_init(&bld, builder, type);
 
- z_dst = lp_build_select(&bld, mask->value, z_src, z_dst);
+ z_dst = lp_build_select(&bld, lp_build_mask_value(mask), z_src, 
z_dst);
  z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst");
  *zs_value = z_dst;
   }
@@ -710,7 +710,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
   }
 
   if (depth->writemask) {
- LLVMValueRef zselectmask = mask->value;
+ LLVMValueRef zselectmask = lp_build_mask_value(mask);
 
  /* mask off bits that failed Z test */
  zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, "");
@@ -810,7 +810,7 @@ lp_build_deferred_depth_write(LLVMBuilde

Mesa (master): gallivm: Special bri-linear computation path for unmodified rho.

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: 679dd26623a53b5a052845bf4c6aef224cfdd5a2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=679dd26623a53b5a052845bf4c6aef224cfdd5a2

Author: José Fonseca 
Date:   Sat Oct  9 12:12:03 2010 +0100

gallivm: Special bri-linear computation path for unmodified rho.

---

 src/gallium/auxiliary/gallivm/lp_bld_sample.c |  111 -
 1 files changed, 91 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 5bc3c26..43ea8b1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -294,31 +294,30 @@ lp_build_rho(struct lp_build_sample_context *bld,
  * TODO: This could be done in fixed point, where applicable.
  */
 static void
-lp_build_brilinear_lod(struct lp_build_sample_context *bld,
+lp_build_brilinear_lod(struct lp_build_context *bld,
LLVMValueRef lod,
double factor,
LLVMValueRef *out_lod_ipart,
LLVMValueRef *out_lod_fpart)
 {
-   struct lp_build_context *float_bld = &bld->float_bld;
LLVMValueRef lod_fpart;
-   float pre_offset = (factor - 0.5)/factor - 0.5;
-   float post_offset = 1 - factor;
+   double pre_offset = (factor - 0.5)/factor - 0.5;
+   double post_offset = 1 - factor;
 
if (0) {
   lp_build_printf(bld->builder, "lod = %f\n", lod);
}
 
-   lod = lp_build_add(float_bld, lod,
-  lp_build_const_vec(float_bld->type, pre_offset));
+   lod = lp_build_add(bld, lod,
+  lp_build_const_vec(bld->type, pre_offset));
 
-   lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, &lod_fpart);
+   lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
 
-   lod_fpart = lp_build_mul(float_bld, lod_fpart,
-lp_build_const_vec(float_bld->type, factor));
+   lod_fpart = lp_build_mul(bld, lod_fpart,
+lp_build_const_vec(bld->type, factor));
 
-   lod_fpart = lp_build_add(float_bld, lod_fpart,
-lp_build_const_vec(float_bld->type, post_offset));
+   lod_fpart = lp_build_add(bld, lod_fpart,
+lp_build_const_vec(bld->type, post_offset));
 
/*
 * It's not necessary to clamp lod_fpart since:
@@ -335,6 +334,61 @@ lp_build_brilinear_lod(struct lp_build_sample_context *bld,
 }
 
 
+/*
+ * Combined log2 and brilinear lod computation.
+ *
+ * It's in all identical to calling lp_build_fast_log2() and
+ * lp_build_brilinear_lod() above, but by combining we can compute the interger
+ * and fractional part independently.
+ */
+static void
+lp_build_brilinear_rho(struct lp_build_context *bld,
+   LLVMValueRef rho,
+   double factor,
+   LLVMValueRef *out_lod_ipart,
+   LLVMValueRef *out_lod_fpart)
+{
+   LLVMValueRef lod_ipart;
+   LLVMValueRef lod_fpart;
+
+   const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
+   const double post_offset = 1 - 2*factor;
+
+   assert(bld->type.floating);
+
+   assert(lp_check_value(bld->type, rho));
+
+   /*
+* The pre factor will make the intersections with the exact powers of two
+* happen precisely where we want then to be, which means that the integer
+* part will not need any post adjustments.
+*/
+   rho = lp_build_mul(bld, rho,
+  lp_build_const_vec(bld->type, pre_factor));
+
+   /* ipart = ifloor(log2(rho)) */
+   lod_ipart = lp_build_extract_exponent(bld, rho, 0);
+
+   /* fpart = rho / 2**ipart */
+   lod_fpart = lp_build_extract_mantissa(bld, rho);
+
+   lod_fpart = lp_build_mul(bld, lod_fpart,
+lp_build_const_vec(bld->type, factor));
+
+   lod_fpart = lp_build_add(bld, lod_fpart,
+lp_build_const_vec(bld->type, post_offset));
+
+   /*
+* Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
+* - the above expression will never produce numbers greater than one.
+* - the mip filtering branch is only taken if lod_fpart is positive
+*/
+
+   *out_lod_ipart = lod_ipart;
+   *out_lod_fpart = lod_fpart;
+}
+
+
 /**
  * Generate code to compute texture level of detail (lambda).
  * \param ddx  partial derivatives of (s, t, r, q) with respect to X
@@ -389,16 +443,32 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 
  rho = lp_build_rho(bld, ddx, ddy);
 
- /* compute lod = log2(rho) */
- if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||
-  mip_filter == PIPE_TEX_MIPFILTER_NEAREST) &&
- !lod_bias &&
+ /*
+  * Compute lod = log2(rho)
+  */
+
+ if (!lod_bias &&
  !bld->static_state->lod_bias_non_zero &&
  !bld->static_state->apply_max_lod &&
  !bld->static_state->apply_min_lod) {
-

Mesa (master): gallivm: Less code duplication in log computation.

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: 81a09c8a975ec1e727a7863823e39549c5096746
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=81a09c8a975ec1e727a7863823e39549c5096746

Author: José Fonseca 
Date:   Sat Oct  9 12:11:20 2010 +0100

gallivm: Less code duplication in log computation.

---

 src/gallium/auxiliary/gallivm/lp_bld_arit.c |  113 +++
 src/gallium/auxiliary/gallivm/lp_bld_arit.h |   10 ++-
 2 files changed, 88 insertions(+), 35 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 6ab1350..2c049d0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -2262,6 +2262,71 @@ lp_build_exp2(struct lp_build_context *bld,
 
 
 /**
+ * Extract the exponent of a IEEE-754 floating point value.
+ *
+ * Optionally apply an integer bias.
+ *
+ * Result is an integer value with
+ *
+ *   ifloor(log2(x)) + bias
+ */
+LLVMValueRef
+lp_build_extract_exponent(struct lp_build_context *bld,
+  LLVMValueRef x,
+  int bias)
+{
+   const struct lp_type type = bld->type;
+   unsigned mantissa = lp_mantissa(type);
+   LLVMValueRef res;
+
+   assert(type.floating);
+
+   assert(lp_check_value(bld->type, x));
+
+   x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, "");
+
+   res = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, 
mantissa), "");
+   res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(type, 255), 
"");
+   res = LLVMBuildSub(bld->builder, res, lp_build_const_int_vec(type, 127 - 
bias), "");
+
+   return res;
+}
+
+
+/**
+ * Extract the mantissa of the a floating.
+ *
+ * Result is a floating point value with
+ *
+ *   x / floor(log2(x))
+ */
+LLVMValueRef
+lp_build_extract_mantissa(struct lp_build_context *bld,
+  LLVMValueRef x)
+{
+   const struct lp_type type = bld->type;
+   unsigned mantissa = lp_mantissa(type);
+   LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 
1);
+   LLVMValueRef one = LLVMConstBitCast(bld->one, bld->int_vec_type);
+   LLVMValueRef res;
+
+   assert(lp_check_value(bld->type, x));
+
+   assert(type.floating);
+
+   x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, "");
+
+   /* res = x / 2**ipart */
+   res = LLVMBuildAnd(bld->builder, x, mantmask, "");
+   res = LLVMBuildOr(bld->builder, res, one, "");
+   res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
+
+   return res;
+}
+
+
+
+/**
  * Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
  * These coefficients can be generate with
  * 
http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
@@ -2385,7 +2450,10 @@ lp_build_log2(struct lp_build_context *bld,
 /**
  * Faster (and less accurate) log2.
  *
- *log2(x) = floor(log2(x)) + frac(x)
+ *log2(x) = floor(log2(x)) - 1 + x / 2**floor(log2(x))
+ *
+ * Piece-wise linear approximation, with exact results when x is a
+ * power of two.
  *
  * See http://www.flipcode.com/archives/Fast_log_Function.shtml
  */
@@ -2393,35 +2461,21 @@ LLVMValueRef
 lp_build_fast_log2(struct lp_build_context *bld,
LLVMValueRef x)
 {
-   const struct lp_type type = bld->type;
-   LLVMTypeRef vec_type = bld->vec_type;
-   LLVMTypeRef int_vec_type = bld->int_vec_type;
-
-   unsigned mantissa = lp_mantissa(type);
-   LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 
1);
-   LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
-
LLVMValueRef ipart;
LLVMValueRef fpart;
 
assert(lp_check_value(bld->type, x));
 
-   assert(type.floating);
-
-   x = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
+   assert(bld->type.floating);
 
/* ipart = floor(log2(x)) - 1 */
-   ipart = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, 
mantissa), "");
-   ipart = LLVMBuildAnd(bld->builder, ipart, lp_build_const_int_vec(type, 
255), "");
-   ipart = LLVMBuildSub(bld->builder, ipart, lp_build_const_int_vec(type, 
128), "");
-   ipart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
+   ipart = lp_build_extract_exponent(bld, x, -1);
+   ipart = LLVMBuildSIToFP(bld->builder, ipart, bld->vec_type, "");
 
-   /* fpart = 1.0 + frac(x) */
-   fpart = LLVMBuildAnd(bld->builder, x, mantmask, "");
-   fpart = LLVMBuildOr(bld->builder, fpart, one, "");
-   fpart = LLVMBuildBitCast(bld->builder, fpart, vec_type, "");
+   /* fpart = x / 2**ipart */
+   fpart = lp_build_extract_mantissa(bld, x);
 
-   /* floor(log2(x)) + frac(x) */
+   /* ipart + fpart */
return LLVMBuildFAdd(bld->builder, ipart, fpart, "");
 }
 
@@ -2435,27 +2489,18 @@ LLVMValueRef
 lp_build_ilog2(struct lp_build_context *bld,
LLVMValueRef x)
 {
-   const struct lp_type type = bld->type;
-   LLVMTypeRef int_vec_type = bld->int_vec_type;
-
-   unsigned mantissa = lp_mantissa(type);
-   LLVMValue

Mesa (master): util: Defined M_SQRT2 when not available.

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: 52427f0ba703f933b70d669ae565c7aeb733236d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=52427f0ba703f933b70d669ae565c7aeb733236d

Author: José Fonseca 
Date:   Sat Oct  9 12:10:07 2010 +0100

util: Defined M_SQRT2 when not available.

---

 src/gallium/auxiliary/util/u_math.h |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index 69a7681..37294b7 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val)
 #endif
 
 
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237309504880
+#endif
+
+
 #if defined(_MSC_VER) 
 
 #if _MSC_VER < 1400 && !defined(__cplusplus) || 
defined(PIPE_SUBSYSTEM_WINDOWS_CE)

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): gallivm: Handle code have ret correctly.

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: 53d7f5e107b82550024a57232fd2f76e39de
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=53d7f5e107b82550024a57232fd2f76e39de

Author: José Fonseca 
Date:   Sat Oct  9 12:08:25 2010 +0100

gallivm: Handle code have ret correctly.

Stop disassembling on unconditional backwards jumps.

---

 src/gallium/auxiliary/gallivm/lp_bld_debug.c |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c 
b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
index d3a5aff..8c1df0d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
@@ -115,8 +115,10 @@ lp_disassemble(const void* func)
  }
   }
 
-  if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) ||
-   ud_obj.mnemonic == UD_Iinvalid)
+  if (ud_obj.mnemonic == UD_Iinvalid ||
+  (ud_insn_off(&ud_obj) >= max_jmp_pc &&
+   (ud_obj.mnemonic == UD_Iret ||
+ud_obj.mnemonic == UD_Ijmp)))
  break;
}
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): llvmpipe: Fix MSVC build. Enable the new SSE2 code on non SSE3 systems.

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: edba53024f85a27fcbca7cbe139ceda172406653
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=edba53024f85a27fcbca7cbe139ceda172406653

Author: José Fonseca 
Date:   Wed Oct  6 21:01:38 2010 +0100

llvmpipe: Fix MSVC build. Enable the new SSE2 code on non SSE3 systems.

---

 src/gallium/drivers/llvmpipe/lp_tile_soa.py |   86 ++-
 1 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py 
b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index c76549c..e49f9c6 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -289,29 +289,30 @@ def generate_format_write(format, src_channel, 
src_native_type, src_suffix):
 print
 
 
-def generate_ssse3():
+def generate_sse2():
 print '''
 #if defined(PIPE_ARCH_SSE)
 
 #include "util/u_sse.h"
 
-static INLINE void swz4( __m128i x, 
- __m128i y, 
- __m128i z, 
- __m128i w, 
- __m128i *a, 
- __m128i *b, 
- __m128i *c, 
- __m128i *d)
+static ALWAYS_INLINE void 
+swz4( const __m128i * restrict x, 
+  const __m128i * restrict y, 
+  const __m128i * restrict z, 
+  const __m128i * restrict w, 
+  __m128i * restrict a, 
+  __m128i * restrict b, 
+  __m128i * restrict c, 
+  __m128i * restrict d)
 {
__m128i i, j, k, l;
__m128i m, n, o, p;
__m128i e, f, g, h;
 
-   m = _mm_unpacklo_epi8(x,y);
-   n = _mm_unpackhi_epi8(x,y);
-   o = _mm_unpacklo_epi8(z,w);
-   p = _mm_unpackhi_epi8(z,w);
+   m = _mm_unpacklo_epi8(*x,*y);
+   n = _mm_unpackhi_epi8(*x,*y);
+   o = _mm_unpacklo_epi8(*z,*w);
+   p = _mm_unpackhi_epi8(*z,*w);
 
i = _mm_unpacklo_epi16(m,n);
j = _mm_unpackhi_epi16(m,n);
@@ -329,22 +330,23 @@ static INLINE void swz4( __m128i x,
*d = _mm_unpackhi_epi64(f,h);
 }
 
-static INLINE void unswz4( __m128i a, 
-   __m128i b, 
-   __m128i c, 
-   __m128i d, 
-   __m128i *x, 
-   __m128i *y, 
-   __m128i *z, 
-   __m128i *w)
+static ALWAYS_INLINE void
+unswz4( const __m128i * restrict a, 
+const __m128i * restrict b, 
+const __m128i * restrict c, 
+const __m128i * restrict d, 
+__m128i * restrict x, 
+__m128i * restrict y, 
+__m128i * restrict z, 
+__m128i * restrict w)
 {
__m128i i, j, k, l;
__m128i m, n, o, p;
 
-   i = _mm_unpacklo_epi8(a,b);
-   j = _mm_unpackhi_epi8(a,b);
-   k = _mm_unpacklo_epi8(c,d);
-   l = _mm_unpackhi_epi8(c,d);
+   i = _mm_unpacklo_epi8(*a,*b);
+   j = _mm_unpackhi_epi8(*a,*b);
+   k = _mm_unpacklo_epi8(*c,*d);
+   l = _mm_unpackhi_epi8(*c,*d);
 
m = _mm_unpacklo_epi16(i,k);
n = _mm_unpackhi_epi16(i,k);
@@ -358,9 +360,9 @@ static INLINE void unswz4( __m128i a,
 }
 
 static void
-lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
- const uint8_t *src, unsigned 
src_stride,
- unsigned x0, unsigned y0)
+lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst,
+const uint8_t * restrict src, unsigned 
src_stride,
+unsigned x0, unsigned y0)
 {
__m128i *dst128 = (__m128i *) dst;
unsigned x, y;
@@ -372,10 +374,10 @@ lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
   const uint8_t *src_row = src;
 
   for (x = 0; x < TILE_SIZE; x += 4) {
- swz4(*(__m128i *) (src_row + 0 * src_stride),
-  *(__m128i *) (src_row + 1 * src_stride),
-  *(__m128i *) (src_row + 2 * src_stride),
-  *(__m128i *) (src_row + 3 * src_stride),
+ swz4((const __m128i *) (src_row + 0 * src_stride),
+  (const __m128i *) (src_row + 1 * src_stride),
+  (const __m128i *) (src_row + 2 * src_stride),
+  (const __m128i *) (src_row + 3 * src_stride),
   dst128 + 2, /* b */
   dst128 + 1, /* g */
   dst128 + 0, /* r */
@@ -390,8 +392,8 @@ lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
 }
 
 static void
-lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src,
-  uint8_t *dst, unsigned dst_stride,
+lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src,
+  uint8_t * restrict dst, unsigned 
dst_stride,
   unsigned x0, unsigned y0)
 {
unsigned int x, y;
@@ -404,10 +406,10 @@ lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t 
*src,
   const uint8_t *dst_row = dst;
 
   for (x = 0; x < TILE_S

Mesa (master): llvmpipe: simplified SSE2 swz/unswz routines

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: 2de720dc8ff89676aa7bb5eb74aeb6d44e028fa2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2de720dc8ff89676aa7bb5eb74aeb6d44e028fa2

Author: Keith Whitwell 
Date:   Fri Oct  1 15:13:51 2010 +0100

llvmpipe: simplified SSE2 swz/unswz routines

We've been using these in the linear path for a while now.  Based on
Chris's SSSE3 code, but using only sse2 opcodes.  Speed seems to be
identical, but code is simpler & removes dependency on SSE3.

Should be easier to extend to other rgba8 formats.

---

 src/gallium/drivers/llvmpipe/SConscript |8 +-
 src/gallium/drivers/llvmpipe/lp_tile_soa.py |  245 ---
 2 files changed, 107 insertions(+), 146 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/SConscript 
b/src/gallium/drivers/llvmpipe/SConscript
index 650435f..774ad91 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -27,13 +27,7 @@ env.Depends('lp_tile_soa.c', [
 ])
 
 
-# Only enable SSSE3 for lp_tile_soa_sse3.c
-ssse3_env = env.Clone()
-if env['gcc'] \
-   and distutils.version.LooseVersion(env['CCVERSION']) >= 
distutils.version.LooseVersion('4.3') \
-   and env['machine'] in ('x86', 'x86_64') :
-ssse3_env.Append(CCFLAGS = ['-mssse3'])
-lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c')
+lp_tile_soa_os = env.SharedObject('lp_tile_soa.c')
 
 
 llvmpipe = env.ConvenienceLibrary(
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py 
b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index 2ba3905..c76549c 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -295,87 +295,98 @@ def generate_ssse3():
 
 #include "util/u_sse.h"
 
+static INLINE void swz4( __m128i x, 
+ __m128i y, 
+ __m128i z, 
+ __m128i w, 
+ __m128i *a, 
+ __m128i *b, 
+ __m128i *c, 
+ __m128i *d)
+{
+   __m128i i, j, k, l;
+   __m128i m, n, o, p;
+   __m128i e, f, g, h;
+
+   m = _mm_unpacklo_epi8(x,y);
+   n = _mm_unpackhi_epi8(x,y);
+   o = _mm_unpacklo_epi8(z,w);
+   p = _mm_unpackhi_epi8(z,w);
+
+   i = _mm_unpacklo_epi16(m,n);
+   j = _mm_unpackhi_epi16(m,n);
+   k = _mm_unpacklo_epi16(o,p);
+   l = _mm_unpackhi_epi16(o,p);
+
+   e = _mm_unpacklo_epi8(i,j);
+   f = _mm_unpackhi_epi8(i,j);
+   g = _mm_unpacklo_epi8(k,l);
+   h = _mm_unpackhi_epi8(k,l);
+
+   *a = _mm_unpacklo_epi64(e,g);
+   *b = _mm_unpackhi_epi64(e,g);
+   *c = _mm_unpacklo_epi64(f,h);
+   *d = _mm_unpackhi_epi64(f,h);
+}
+
+static INLINE void unswz4( __m128i a, 
+   __m128i b, 
+   __m128i c, 
+   __m128i d, 
+   __m128i *x, 
+   __m128i *y, 
+   __m128i *z, 
+   __m128i *w)
+{
+   __m128i i, j, k, l;
+   __m128i m, n, o, p;
+
+   i = _mm_unpacklo_epi8(a,b);
+   j = _mm_unpackhi_epi8(a,b);
+   k = _mm_unpacklo_epi8(c,d);
+   l = _mm_unpackhi_epi8(c,d);
+
+   m = _mm_unpacklo_epi16(i,k);
+   n = _mm_unpackhi_epi16(i,k);
+   o = _mm_unpacklo_epi16(j,l);
+   p = _mm_unpackhi_epi16(j,l);
+
+   *x = _mm_unpacklo_epi64(m,n);
+   *y = _mm_unpackhi_epi64(m,n);
+   *z = _mm_unpacklo_epi64(o,p);
+   *w = _mm_unpackhi_epi64(o,p);
+}
+
 static void
 lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
  const uint8_t *src, unsigned 
src_stride,
  unsigned x0, unsigned y0)
 {
-
+   __m128i *dst128 = (__m128i *) dst;
unsigned x, y;
-   __m128i *pdst = (__m128i*) dst;
-   const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t);
-   unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - 
sizeof(uint32_t)*TILE_VECTOR_WIDTH;
-   unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT;
-
-   const __m128i shuffle00 = 
_mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-   const __m128i shuffle01 = 
_mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-   const __m128i shuffle02 = 
_mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-   const __m128i shuffle03 = 
_mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-
-   const __m128i shuffle10 = 
_mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-   const __m128i shuffle11 = 
_mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-   const __m128i shuffle12 = 
_mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-   const __m128i shuffle13 = 
_mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0x

Mesa (master): llvmpipe: clean up shader pre/postamble, try to catch more early-z

2010-10-09 Thread Keith Whitwell

Module: Mesa
Branch: master
Commit: 5b7eb868fde98388d80601d8dea39e679828f42f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5b7eb868fde98388d80601d8dea39e679828f42f

Author: Keith Whitwell 
Date:   Sat Oct  9 11:28:00 2010 +0100

llvmpipe: clean up shader pre/postamble, try to catch more early-z

Specifically, can do early-depth-test even when alpahtest or
kill-pixel are active, providing we defer the actual z write until the
final mask is avaialable.

Improves demos/fire.c especially in the case where you get close to
the trees.

---

 src/gallium/drivers/llvmpipe/lp_bld_depth.c |   40 -
 src/gallium/drivers/llvmpipe/lp_bld_depth.h |   15 ++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |  241 +--
 3 files changed, 193 insertions(+), 103 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c 
b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 6b8ffb6..8d9be2e 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -410,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description 
*format_desc,
  * \param maskvalue is the depth test mask.
  * \param counter is a pointer of the uint32 counter.
  */
-static void
+void
 lp_build_occlusion_count(LLVMBuilderRef builder,
  struct lp_type type,
  LLVMValueRef maskvalue,
@@ -462,7 +462,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
 LLVMValueRef z_src,
 LLVMValueRef zs_dst_ptr,
 LLVMValueRef face,
-LLVMValueRef counter,
+LLVMValueRef *zs_value,
 boolean do_branch)
 {
struct lp_type type;
@@ -524,17 +524,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
* storage.
*/
   if (depth->writemask) {
- type.sign = 0;
+ type.sign = 1;
  lp_build_context_init(&bld, builder, type);
 
  z_dst = lp_build_select(&bld, mask->value, z_src, z_dst);
  z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst");
- LLVMBuildStore(builder, z_dst, zs_dst_ptr);
+ *zs_value = z_dst;
   }
 
-  if (counter)
- lp_build_occlusion_count(builder, type, mask->value, counter);
-
   return;
}
 
@@ -779,7 +776,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
   else
  zs_dst = stencil_vals;
 
-  LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
+  *zs_value = zs_dst;
}
 
if (s_pass_mask)
@@ -791,6 +788,29 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
if (do_branch)
   lp_build_mask_check(mask);
 
-   if (counter)
-  lp_build_occlusion_count(builder, type, mask->value, counter);
+}
+
+
+
+void
+lp_build_deferred_depth_write(LLVMBuilderRef builder,
+  struct lp_type z_src_type,
+  const struct util_format_description 
*format_desc,
+  struct lp_build_mask_context *mask,
+  LLVMValueRef zs_dst_ptr,
+  LLVMValueRef zs_value)
+{
+   struct lp_type type;
+   struct lp_build_context bld;
+   LLVMValueRef z_dst;
+
+   /* XXX: pointlessly redo type logic:
+*/
+   type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
+   lp_build_context_init(&bld, builder, type);
+
+   z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
+   z_dst = lp_build_select(&bld, mask->value, zs_value, z_dst);
+
+   LLVMBuildStore(builder, z_dst, zs_dst_ptr);
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h 
b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index 2a63bb9..0f89668 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -61,8 +61,21 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
 LLVMValueRef zs_src,
 LLVMValueRef zs_dst_ptr,
 LLVMValueRef facing,
-LLVMValueRef counter,
+LLVMValueRef *zs_value,
 boolean do_branch);
 
+void
+lp_build_deferred_depth_write(LLVMBuilderRef builder,
+  struct lp_type z_src_type,
+  const struct util_format_description 
*format_desc,
+  struct lp_build_mask_context *mask,
+  LLVMValueRef zs_dst_ptr,
+  LLVMValueRef zs_value);
+
+void
+lp_build_occlusion_count(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef maskvalue,
+ LLVMValueRef counter);
 
 #endif /* !LP_BLD_DEPTH_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
inde

Mesa (master): llvmpipe: try to be sensible about whether to branch after mask updates

2010-10-09 Thread Keith Whitwell

Module: Mesa
Branch: master
Commit: aa4cb5e2d8d48c7dcc9653c61a9e25494e3e7b2a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=aa4cb5e2d8d48c7dcc9653c61a9e25494e3e7b2a

Author: Keith Whitwell 
Date:   Thu Oct  7 15:01:07 2010 +0100

llvmpipe: try to be sensible about whether to branch after mask updates

Don't branch more than once in quick succession.  Don't branch at the
end of the shader.

---

 src/gallium/auxiliary/gallivm/lp_bld_flow.c |6 +--
 src/gallium/auxiliary/gallivm/lp_bld_flow.h |3 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c |   11 +++-
 src/gallium/drivers/llvmpipe/lp_bld_alpha.c |6 ++-
 src/gallium/drivers/llvmpipe/lp_bld_alpha.h |3 +-
 src/gallium/drivers/llvmpipe/lp_bld_depth.c |   14 -
 src/gallium/drivers/llvmpipe/lp_bld_depth.h |3 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |   69 ++
 8 files changed, 80 insertions(+), 35 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index cd5fbc2..1ec33c7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -450,7 +450,7 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow)
 /**
  * Check if the mask predicate is zero.  If so, jump to the end of the block.
  */
-static void
+void
 lp_build_mask_check(struct lp_build_mask_context *mask)
 {
LLVMBuilderRef builder = mask->flow->builder;
@@ -490,8 +490,6 @@ lp_build_mask_begin(struct lp_build_mask_context *mask,
lp_build_flow_scope_begin(flow);
lp_build_flow_scope_declare(flow, &mask->value);
lp_build_flow_skip_begin(flow);
-
-   lp_build_mask_check(mask);
 }
 
 
@@ -505,8 +503,6 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
  LLVMValueRef value)
 {
mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");
-
-   lp_build_mask_check(mask);
 }
 
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
index fffb493..095c781 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -94,6 +94,9 @@ void
 lp_build_mask_update(struct lp_build_mask_context *mask,
  LLVMValueRef value);
 
+void
+lp_build_mask_check(struct lp_build_mask_context *mask);
+
 LLVMValueRef
 lp_build_mask_end(struct lp_build_mask_context *mask);
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 441aeba..03020a6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -959,8 +959,13 @@ emit_kil(
   }
}
 
-   if(mask)
+   if(mask) {
   lp_build_mask_update(bld->mask, mask);
+
+  /* XXX: figure out if we are at the end of the shader and skip this:
+   */
+  lp_build_mask_check(bld->mask);
+   }
 }
 
 
@@ -987,6 +992,10 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
}
 
lp_build_mask_update(bld->mask, mask);
+
+   /* XXX: figure out if we are at the end of the shader and skip this:
+*/
+   lp_build_mask_check(bld->mask);
 }
 
 static void
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c 
b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
index e28efe7..e506437 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder,
 struct lp_type type,
 struct lp_build_mask_context *mask,
 LLVMValueRef alpha,
-LLVMValueRef ref)
+LLVMValueRef ref,
+boolean do_branch)
 {
struct lp_build_context bld;
LLVMValueRef test;
@@ -60,4 +61,7 @@ lp_build_alpha_test(LLVMBuilderRef builder,
lp_build_name(test, "alpha_mask");
 
lp_build_mask_update(mask, test);
+
+   if (do_branch)
+  lp_build_mask_check(mask);
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h 
b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
index 44603b4..27ca8aa 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder,
 struct lp_type type,
 struct lp_build_mask_context *mask,
 LLVMValueRef alpha,
-LLVMValueRef ref);
+LLVMValueRef ref,
+boolean do_branch);
 
 
 #endif /* !LP_BLD_ALPHA_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c 
b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 09b82fb..6b8ffb6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -462,7 +462,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
 LLVMValueRef z_src,

Mesa (master): gallivm: simpler uint8->float conversions

2010-10-09 Thread Keith Whitwell

Module: Mesa
Branch: master
Commit: 2ef6f75ab410bb188e028024e18891d7877febad
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2ef6f75ab410bb188e028024e18891d7877febad

Author: Keith Whitwell 
Date:   Wed Oct  6 19:09:03 2010 +0100

gallivm: simpler uint8->float conversions

LLVM seems to finds it easier to reason about these than our
mantissa-manipulation code.

---

 src/gallium/auxiliary/gallivm/lp_bld_conv.c |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c 
b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 3abb192..20aa257 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -178,6 +178,16 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
 
assert(dst_type.floating);
 
+   /* Special-case int8->float, though most cases could be handled
+* this way:
+*/
+   if (src_width == 8) {
+  scale = 1.0/255.0;
+  res = LLVMBuildSIToFP(builder, src, vec_type, "");
+  res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), 
"");
+  return res;
+   }
+
mantissa = lp_mantissa(dst_type);
 
n = MIN2(mantissa, src_width);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): gallivm: prefer blendvb for integer arguments

2010-10-09 Thread Keith Whitwell

Module: Mesa
Branch: master
Commit: c79f162367b99d9438bd1589ecfdeba69baa9d3d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c79f162367b99d9438bd1589ecfdeba69baa9d3d

Author: Keith Whitwell 
Date:   Wed Oct  6 19:10:30 2010 +0100

gallivm: prefer blendvb for integer arguments

---

 src/gallium/auxiliary/gallivm/lp_bld_logic.c |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c 
b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index ce5d021..026b60a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -462,10 +462,12 @@ lp_build_select(struct lp_build_context *bld,
   LLVMTypeRef arg_type;
   LLVMValueRef args[3];
 
-  if (type.width == 64) {
+  if (type.floating &&
+  type.width == 64) {
  intrinsic = "llvm.x86.sse41.blendvpd";
  arg_type = LLVMVectorType(LLVMDoubleType(), 2);
-  } else if (type.width == 32) {
+  } else if (type.floating &&
+ type.width == 32) {
  intrinsic = "llvm.x86.sse41.blendvps";
  arg_type = LLVMVectorType(LLVMFloatType(), 4);
   } else {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): gallivm: specialized x8z24 depthtest path

2010-10-09 Thread Keith Whitwell

Module: Mesa
Branch: master
Commit: d2cf757f44f4ee5554243f3279483a25886d9927
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d2cf757f44f4ee5554243f3279483a25886d9927

Author: Keith Whitwell 
Date:   Wed Oct  6 18:21:56 2010 +0100

gallivm: specialized x8z24 depthtest path

Avoid unnecessary masking of non-existant stencil component.

---

 src/gallium/drivers/llvmpipe/lp_bld_depth.c |   95 ++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |   30 +
 2 files changed, 94 insertions(+), 31 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c 
b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 7eabe05..09b82fb 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -71,6 +71,7 @@
 #include "gallivm/lp_bld_arit.h"
 #include "gallivm/lp_bld_bitarit.h"
 #include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_conv.h"
 #include "gallivm/lp_bld_logic.h"
 #include "gallivm/lp_bld_flow.h"
 #include "gallivm/lp_bld_intr.h"
@@ -446,7 +447,7 @@ lp_build_occlusion_count(LLVMBuilderRef builder,
  * \param format_desc  description of the depth/stencil surface
  * \param mask  the alive/dead pixel mask for the quad (vector)
  * \param stencil_refs  the front/back stencil ref values (scalar)
- * \param z_src  the incoming depth/stencil values (a 2x2 quad)
+ * \param z_src  the incoming depth/stencil values (a 2x2 quad, float32)
  * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
  * \param facing  contains float value indicating front/back facing polygon
  */
@@ -454,7 +455,7 @@ void
 lp_build_depth_stencil_test(LLVMBuilderRef builder,
 const struct pipe_depth_state *depth,
 const struct pipe_stencil_state stencil[2],
-struct lp_type type,
+struct lp_type z_src_type,
 const struct util_format_description *format_desc,
 struct lp_build_mask_context *mask,
 LLVMValueRef stencil_refs[2],
@@ -463,6 +464,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
 LLVMValueRef face,
 LLVMValueRef counter)
 {
+   struct lp_type type;
struct lp_build_context bld;
struct lp_build_context sbld;
struct lp_type s_type;
@@ -473,6 +475,95 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
LLVMValueRef orig_mask = mask->value;
LLVMValueRef front_facing = NULL;
 
+   /* Prototype a simpler path:
+*/
+   if (z_src_type.floating &&
+   format_desc->format == PIPE_FORMAT_X8Z24_UNORM &&
+   depth->enabled) 
+   {
+  LLVMValueRef zscaled;
+  LLVMValueRef const_ff_float;
+  LLVMValueRef const_8_int;
+  LLVMTypeRef int32_vec_type;
+
+  /* We know the values in z_dst are all >= 0, so allow
+   * lp_build_compare to use signed compare intrinsics:
+   */
+  type.floating = 0;
+  type.fixed = 0;
+  type.sign = 1;
+  type.norm = 1;
+  type.width = 32;
+  type.length = z_src_type.length;
+
+  int32_vec_type = LLVMVectorType(LLVMInt32Type(), z_src_type.length);
+
+  const_8_int = lp_build_const_int_vec(type, 8);
+  const_ff_float = lp_build_const_vec(z_src_type, (float)0xff);
+
+  zscaled = LLVMBuildFMul(builder, z_src, const_ff_float, "zscaled");
+  z_src = LLVMBuildFPToSI(builder, zscaled, int32_vec_type, "z_src");
+  
+  /* Load current z/stencil value from z/stencil buffer */
+  z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
+  z_dst = LLVMBuildLShr(builder, z_dst, const_8_int, "z_dst");
+
+  /* compare src Z to dst Z, returning 'pass' mask */
+  z_pass = lp_build_compare(builder,
+type,
+depth->func, z_src, z_dst);
+
+  lp_build_mask_update(mask, z_pass);
+
+  /* No need to worry about old stencil contents, just blend the
+   * old and new values and shift into the correct position for
+   * storage.
+   */
+  if (depth->writemask) {
+ type.sign = 0;
+ lp_build_context_init(&bld, builder, type);
+
+ z_dst = lp_build_select(&bld, mask->value, z_src, z_dst);
+ z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst");
+ LLVMBuildStore(builder, z_dst, zs_dst_ptr);
+  }
+
+  if (counter)
+ lp_build_occlusion_count(builder, type, mask->value, counter);
+
+  return;
+   }
+
+   /*
+* Depths are expected to be between 0 and 1, even if they are stored in
+* floats. Setting these bits here will ensure that the lp_build_conv() call
+* below won't try to unnecessarily clamp the incoming values.
+*/
+   if(z_src_type.floating) {
+  z_src_type.sign = FALSE;
+  z_src_type.norm = TRUE;
+   }
+   else {
+  assert(!z_src_type.sign);
+  assert(z_src_type.norm

Mesa (master): llvmpipe: store zero into all alloca'd values

2010-10-09 Thread Keith Whitwell

Module: Mesa
Branch: master
Commit: 6da29f36111edc821a4aa10128e9681fc75a43d7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6da29f36111edc821a4aa10128e9681fc75a43d7

Author: Keith Whitwell 
Date:   Thu Oct  7 19:49:20 2010 +0100

llvmpipe: store zero into all alloca'd values

Fixes slowdown in isosurf with earlier versions of llvm.

---

 src/gallium/auxiliary/gallivm/lp_bld_flow.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index 5bc9c74..cd5fbc2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -830,6 +830,7 @@ lp_build_alloca(LLVMBuilderRef builder,
}
 
res = LLVMBuildAlloca(first_builder, type, name);
+   LLVMBuildStore(builder, LLVMConstNull(type), res);
 
LLVMDisposeBuilder(first_builder);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): llvmpipe: dump fragment shader ir and asm when LP_DEBUG=fs

2010-10-09 Thread Keith Whitwell

Module: Mesa
Branch: master
Commit: 954965366fee3fa2eec8a11b6663d4cf218e1d5d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=954965366fee3fa2eec8a11b6663d4cf218e1d5d

Author: Keith Whitwell 
Date:   Thu Oct  7 19:01:12 2010 +0100

llvmpipe: dump fragment shader ir and asm when LP_DEBUG=fs

Better than GALLIVM_DEBUG if you're only interested in fragment shaders.

---

 src/gallium/drivers/llvmpipe/lp_state_fs.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index f75ae28..07b4f74 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -730,7 +730,7 @@ generate_fragment(struct llvmpipe_context *lp,
/* Apply optimizations to LLVM IR */
LLVMRunFunctionPassManager(screen->pass, function);
 
-   if (gallivm_debug & GALLIVM_DEBUG_IR) {
+   if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) {
   /* Print the LLVM IR to stderr */
   lp_debug_dump_value(function);
   debug_printf("\n");
@@ -744,7 +744,7 @@ generate_fragment(struct llvmpipe_context *lp,
 
   variant->jit_function[partial_mask] = 
(lp_jit_frag_func)pointer_to_func(f);
 
-  if (gallivm_debug & GALLIVM_DEBUG_ASM) {
+  if ((gallivm_debug & GALLIVM_DEBUG_ASM) || (LP_DEBUG & DEBUG_FS)) {
  lp_disassemble(f);
   }
   lp_func_delete_body(function);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): llvmpipe: use alloca for fs color outputs

2010-10-09 Thread Keith Whitwell

Module: Mesa
Branch: master
Commit: 40d7be52619fbff2479dcdf56929e3e0c5b12e72
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=40d7be52619fbff2479dcdf56929e3e0c5b12e72

Author: Keith Whitwell 
Date:   Thu Oct  7 18:59:54 2010 +0100

llvmpipe: use alloca for fs color outputs

Don't try to emit our own phi's, let llvm mem2reg do it for us.

---

 src/gallium/drivers/llvmpipe/lp_state_fs.c |   16 
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 0530c61..f75ae28 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -303,8 +303,7 @@ generate_fs(struct llvmpipe_context *lp,
/* Declare the color and z variables */
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
   for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-color[cbuf][chan] = LLVMGetUndef(vec_type);
-lp_build_flow_scope_declare(flow, &color[cbuf][chan]);
+color[cbuf][chan] = lp_build_alloca(builder, vec_type, "color");
   }
}
 
@@ -369,7 +368,7 @@ generate_fs(struct llvmpipe_context *lp,
  &mask, alpha, alpha_ref_value);
   }
 
- color[cbuf][chan] = out;
+  LLVMBuildStore(builder, out, color[cbuf][chan]);
   break;
}
 
@@ -665,9 +664,18 @@ generate_fragment(struct llvmpipe_context *lp,
* Convert the fs's output color and mask to fit to the blending type. 
*/
   for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH];
+ 
+ for (i = 0; i < num_fs; i++) {
+fs_color_vals[i] =
+   LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], 
"fs_color_vals");
+ }
+
 lp_build_conv(builder, fs_type, blend_type,
-  fs_out_color[cbuf][chan], num_fs,
+   fs_color_vals,
+   num_fs,
   &blend_in_color[chan], 1);
+
 lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
   }
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): llvmpipe: defer attribute interpolation until after mask and ztest

2010-10-09 Thread Keith Whitwell

Module: Mesa
Branch: master
Commit: 8009886b0092df2783472deaac1bcaad4a802c19
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8009886b0092df2783472deaac1bcaad4a802c19

Author: Keith Whitwell 
Date:   Wed Oct  6 22:25:48 2010 +0100

llvmpipe: defer attribute interpolation until after mask and ztest

Don't calculate 1/w for quads which aren't visible...

---

 src/gallium/drivers/llvmpipe/lp_bld_interp.c |   25 ++---
 src/gallium/drivers/llvmpipe/lp_bld_interp.h |6 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c   |   17 +++--
 3 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c 
b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index ee92ce3..c9da890 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -272,7 +272,10 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
  * This is called when we move from one quad to the next.
  */
 static void
-attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
+attribs_update(struct lp_build_interp_soa_context *bld,
+   int quad_index,
+   int start,
+   int end)
 {
struct lp_build_context *coeff_bld = &bld->coeff_bld;
LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
@@ -282,7 +285,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int 
quad_index)
 
assert(quad_index < 4);
 
-   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+   for(attrib = start; attrib < end; ++attrib) {
   const unsigned mask = bld->mask[attrib];
   const unsigned interp = bld->interp[attrib];
   for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -442,8 +445,6 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context 
*bld,
pos_init(bld, x0, y0);
 
coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
-
-   attribs_update(bld, 0);
 }
 
 
@@ -451,10 +452,20 @@ lp_build_interp_soa_init(struct 
lp_build_interp_soa_context *bld,
  * Advance the position and inputs to the given quad within the block.
  */
 void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
-   int quad_index)
+lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
+  int quad_index)
+{
+   assert(quad_index < 4);
+
+   attribs_update(bld, quad_index, 1, bld->num_attribs);
+}
+
+void
+lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
+  int quad_index)
 {
assert(quad_index < 4);
 
-   attribs_update(bld, quad_index);
+   attribs_update(bld, quad_index, 0, 1);
 }
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h 
b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 3054030..6588f7f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -89,7 +89,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context 
*bld,
  LLVMValueRef y);
 
 void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
+lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
+   int quad_index);
+
+void
+lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
int quad_index);
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 3ce8be5..0530c61 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -262,7 +262,7 @@ generate_fs(struct llvmpipe_context *lp,
 struct lp_type type,
 LLVMValueRef context_ptr,
 unsigned i,
-const struct lp_build_interp_soa_context *interp,
+struct lp_build_interp_soa_context *interp,
 struct lp_build_sampler_soa *sampler,
 LLVMValueRef *pmask,
 LLVMValueRef (*color)[4],
@@ -276,7 +276,7 @@ generate_fs(struct llvmpipe_context *lp,
LLVMTypeRef vec_type;
LLVMValueRef consts_ptr;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
-   LLVMValueRef z = interp->pos[2];
+   LLVMValueRef z;
LLVMValueRef stencil_refs[2];
struct lp_build_flow_context *flow;
struct lp_build_mask_context mask;
@@ -307,7 +307,6 @@ generate_fs(struct llvmpipe_context *lp,
 lp_build_flow_scope_declare(flow, &color[cbuf][chan]);
   }
}
-   lp_build_flow_scope_declare(flow, &z);
 
/* do triangle edge testing */
if (partial_mask) {
@@ -321,6 +320,13 @@ generate_fs(struct llvmpipe_context *lp,
/* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
 
+   lp_build_interp_soa_update_pos(interp, i);
+
+   /* Try to avoid the 1/w for quads where mask is zero.  TODO: avoid
+* this for depth-fail quads also.

Mesa (master): llvmpipe: Prevent z > 1.0

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: d0bfb3c5144a9434efd4d53ced149d42016b5bdc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d0bfb3c5144a9434efd4d53ced149d42016b5bdc

Author: José Fonseca 
Date:   Wed Oct  6 20:42:30 2010 +0100

llvmpipe: Prevent z > 1.0

The current interpolation schemes causes precision loss.

Changing the operation order helps, but does not completely avoid the
problem.

The only short term solution is to clamp z to 1.0.

This is unfortunate, but probably unavoidable until interpolation is
improved.

---

 src/gallium/drivers/llvmpipe/lp_bld_interp.c |   20 ++--
 1 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c 
b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 2a374f8..ee92ce3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -206,7 +206,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
 dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
 
 /*
- * a = a0 + x * dadx + y * dady
+ * a = a0 + (x * dadx + y * dady)
  */
 
 if (attrib == 0 && chan == 0) {
@@ -219,11 +219,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
a = a0;
if (interp != LP_INTERP_CONSTANT &&
interp != LP_INTERP_FACING) {
-  LLVMValueRef tmp;
-  tmp = LLVMBuildFMul(builder, bld->x, dadx, "");
-  a = LLVMBuildFAdd(builder, a, tmp, "");
-  tmp = LLVMBuildFMul(builder, bld->y, dady, "");
-  a = LLVMBuildFAdd(builder, a, tmp, "");
+  LLVMValueRef ax, ay, axy;
+  ax = LLVMBuildFMul(builder, bld->x, dadx, "");
+  ay = LLVMBuildFMul(builder, bld->y, dady, "");
+  axy = LLVMBuildFAdd(builder, ax, ay, "");
+  a = LLVMBuildFAdd(builder, a, axy, "");
}
 }
 
@@ -350,6 +350,14 @@ attribs_update(struct lp_build_interp_soa_context *bld, 
int quad_index)
}
 #endif
 
+   if (attrib == 0 && chan == 2) {
+  /* FIXME: Depth values can exceed 1.0, due to the fact that
+   * setup interpolation coefficients refer to (0,0) which 
causes
+   * precision loss. So we must clamp to 1.0 here to avoid 
artifacts
+   */
+  a = lp_build_min(coeff_bld, a, coeff_bld->one);
+   }
+
attrib_name(a, attrib, chan, "");
 }
 bld->attribs[attrib][chan] = a;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): gallivm: Do size computations simultanously for all dimensions (AoS).

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: 34c11c87e4e3b5639764abee413c45e918749477
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=34c11c87e4e3b5639764abee413c45e918749477

Author: José Fonseca 
Date:   Sat Oct  9 09:34:31 2010 +0100

gallivm: Do size computations simultanously for all dimensions (AoS).

Operate simultanouesly on  vector as much as possible,
instead of doing the operations on vectors with broadcasted scalars.

Also do the 24.8 fixed point scalar with integer shift of the texture size,
for unnormalized coordinates.

AoS path only for now -- the same thing can be done for SoA.

---

 src/gallium/auxiliary/gallivm/lp_bld_sample.c |  106 +
 src/gallium/auxiliary/gallivm/lp_bld_sample.h |   22 +++-
 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c |  125 +++--
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   16 +++-
 4 files changed, 177 insertions(+), 92 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 7a64392..5bc3c26 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -630,37 +630,21 @@ lp_build_get_level_stride_vec(struct 
lp_build_sample_context *bld,
 void
 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
 LLVMValueRef ilevel,
-LLVMValueRef *out_width_vec,
-LLVMValueRef *out_height_vec,
-LLVMValueRef *out_depth_vec,
+LLVMValueRef *out_size,
 LLVMValueRef *row_stride_vec,
 LLVMValueRef *img_stride_vec)
 {
const unsigned dims = bld->dims;
LLVMValueRef ilevel_vec;
-   LLVMValueRef size_vec;
-   LLVMTypeRef i32t = LLVMInt32Type();
 
ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
 
/*
 * Compute width, height, depth at mipmap level 'ilevel'
 */
-   size_vec = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+   *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
 
-   *out_width_vec = lp_build_extract_broadcast(bld->builder,
-   bld->int_size_type,
-   bld->int_coord_type,
-   size_vec,
-   LLVMConstInt(i32t, 0, 0));
if (dims >= 2) {
-
-  *out_height_vec = lp_build_extract_broadcast(bld->builder,
-   bld->int_size_type,
-   bld->int_coord_type,
-   size_vec,
-   LLVMConstInt(i32t, 1, 0));
-
   *row_stride_vec = lp_build_get_level_stride_vec(bld,
   bld->row_stride_array,
   ilevel);
@@ -668,18 +652,90 @@ lp_build_mipmap_level_sizes(struct 
lp_build_sample_context *bld,
  *img_stride_vec = lp_build_get_level_stride_vec(bld,
  bld->img_stride_array,
  ilevel);
- if (dims == 3) {
-*out_depth_vec = lp_build_extract_broadcast(bld->builder,
-bld->int_size_type,
-bld->int_coord_type,
-size_vec,
-LLVMConstInt(i32t, 2, 
0));
- }
   }
}
 }
 
 
+/**
+ * Extract and broadcast texture size.
+ *
+ * @param size_type   type of the texture size vector (either
+ *bld->int_size_type or bld->float_size_type)
+ * @param coord_type  type of the texture size vector (either
+ *bld->int_coord_type or bld->coord_type)
+ * @param int_sizevector with the integer texture size (width, height,
+ *depth)
+ */
+void
+lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
+ struct lp_type size_type,
+ struct lp_type coord_type,
+ LLVMValueRef size,
+ LLVMValueRef *out_width,
+ LLVMValueRef *out_height,
+ LLVMValueRef *out_depth)
+{
+   const unsigned dims = bld->dims;
+   LLVMTypeRef i32t = LLVMInt32Type();
+
+   *out_width = lp_build_extract_broadcast(bld->builder,
+   size_type,
+   coord_type,
+   size,
+

Mesa (master): llvmpipe: fix rasterization of vertical lines on pixel boundaries

2010-10-09 Thread Jose Fonseca

Module: Mesa
Branch: master
Commit: 6316d540564d116460bfd1382e3eee98480e28ff
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6316d540564d116460bfd1382e3eee98480e28ff

Author: Zack Rusin 
Date:   Thu Oct  7 16:26:17 2010 -0400

llvmpipe: fix rasterization of vertical lines on pixel boundaries

---

 src/gallium/drivers/llvmpipe/lp_setup_line.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c 
b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index 4d7d623..693ac28 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -475,7 +475,7 @@ try_setup_line( struct lp_setup_context *setup,
   else {
  /* do intersection test */
  float xintersect = fracf(v2[0][0]) + y2diff * dxdy;
- draw_end = (xintersect < 1.0 && xintersect > 0.0);
+ draw_end = (xintersect < 1.0 && xintersect >= 0.0);
   }
 
   /* Are we already drawing start/end?
@@ -513,7 +513,7 @@ try_setup_line( struct lp_setup_context *setup,
 x_offset_end = y_offset_end * dxdy;
  }
   }
- 
+
   /* x/y positions in fixed point */
   x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - 
fixed_width/2;
   x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - 
fixed_width/2;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): dri/nv10: Fake fast Z clears for pre-nv17 cards.

Mesa (master): dri/nouveau: Minor cleanup.

Mesa (master): gallivm: Cleanup the rest of the flow module.

Mesa (master): gallivm: Simplify if/then/else implementation.

Mesa (master): gallivm: Factor out the SI-> FP texture size conversion for SoA path too

Mesa (master): gallivm: Remove support for Phi generation.

Mesa (master): gallivm: Use varilables instead of Phis for cubemap selection.

Mesa (master): gallivm: Don't generate Phis for execution mask.

Mesa (master): gallivm: Special bri-linear computation path for unmodified rho.

Mesa (master): gallivm: Less code duplication in log computation.

Mesa (master): util: Defined M_SQRT2 when not available.

Mesa (master): gallivm: Handle code have ret correctly.

Mesa (master): llvmpipe: Fix MSVC build. Enable the new SSE2 code on non SSE3 systems.

Mesa (master): llvmpipe: simplified SSE2 swz/unswz routines

Mesa (master): llvmpipe: clean up shader pre/postamble, try to catch more early-z

Mesa (master): llvmpipe: try to be sensible about whether to branch after mask updates

Mesa (master): gallivm: simpler uint8->float conversions

Mesa (master): gallivm: prefer blendvb for integer arguments

Mesa (master): gallivm: specialized x8z24 depthtest path

Mesa (master): llvmpipe: store zero into all alloca'd values

Mesa (master): llvmpipe: dump fragment shader ir and asm when LP_DEBUG=fs

Mesa (master): llvmpipe: use alloca for fs color outputs

Mesa (master): llvmpipe: defer attribute interpolation until after mask and ztest

Mesa (master): llvmpipe: Prevent z > 1.0

Mesa (master): gallivm: Do size computations simultanously for all dimensions (AoS).

Mesa (master): llvmpipe: fix rasterization of vertical lines on pixel boundaries

26 matches

Site Navigation

Mail list logo

Footer information