Mesa (master): r600g: only init GS_VERT_ITEMSIZE on r600

2014-12-09 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 7f21cf71989ba780639594ebb34d6e7345b08436
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7f21cf71989ba780639594ebb34d6e7345b08436

Author: Dave Airlie 
Date:   Wed Dec 10 13:48:29 2014 +1000

r600g: only init GS_VERT_ITEMSIZE on r600

On evergreen there are 4 regs, on r600/700 there is only one.

Don't initialise regs and trash someone elses state.

Not sure this fixes anything, but hey one less stupid.

Reviewed-By: Glenn Kennard 
Cc: "10.3 10.4" mesa-sta...@lists.freedesktop.org
Signed-off-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_state.c |7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 61f5c5a..9a4b972 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2659,11 +2659,8 @@ void r600_update_gs_state(struct pipe_context *ctx, 
struct r600_pipe_shader *sha
r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE,
   
r600_conv_prim_to_gs_out(rshader->gs_output_prim));
 
-   r600_store_context_reg_seq(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE, 4);
-   r600_store_value(cb, cp_shader->ring_item_size >> 2);
-   r600_store_value(cb, 0);
-   r600_store_value(cb, 0);
-   r600_store_value(cb, 0);
+   r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE,
+  cp_shader->ring_item_size >> 2);
 
r600_store_context_reg(cb, R_0288A8_SQ_ESGS_RING_ITEMSIZE,
   (rshader->ring_item_size) >> 2);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): vc4: Do QPU scheduling across uniform loads.

2014-12-09 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: 8812dc503eb48bac5b9c9b5740f76025c046f90d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8812dc503eb48bac5b9c9b5740f76025c046f90d

Author: Eric Anholt 
Date:   Tue Dec  9 18:54:29 2014 -0800

vc4: Do QPU scheduling across uniform loads.

This means another pass of reordering the uniform data store, but it lets
us pair up a lot more instructions.

total instructions in shared programs: 44639 -> 43176 (-3.28%)
instructions in affected programs: 36938 -> 35475 (-3.96%)

---

 src/gallium/drivers/vc4/vc4_qpu_schedule.c |   88 +++-
 1 file changed, 60 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index c733e6e..0700b0d 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -62,6 +62,12 @@ struct schedule_node {
  * can be consumed.
  */
 uint32_t latency;
+
+/**
+ * Which uniform from uniform_data[] this instruction read, or -1 if
+ * not reading a uniform.
+ */
+int uniform;
 };
 
 struct schedule_node_child {
@@ -80,7 +86,6 @@ struct schedule_state {
 struct schedule_node *last_rb[32];
 struct schedule_node *last_sf;
 struct schedule_node *last_vpm_read;
-struct schedule_node *last_unif_read;
 struct schedule_node *last_tmu_write;
 struct schedule_node *last_tlb;
 struct schedule_node *last_vpm;
@@ -174,9 +179,6 @@ process_raddr_deps(struct schedule_state *state, struct 
schedule_node *n,
 break;
 
 case QPU_R_UNIF:
-add_write_dep(state, &state->last_unif_read, n);
-break;
-
 case QPU_R_NOP:
 case QPU_R_ELEM_QPU:
 case QPU_R_XY_PIXEL_COORD:
@@ -215,6 +217,18 @@ is_tmu_write(uint32_t waddr)
 }
 }
 
+static bool
+reads_uniform(uint64_t inst)
+{
+if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LOAD_IMM)
+return false;
+
+return (QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_UNIF ||
+QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_UNIF ||
+is_tmu_write(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
+is_tmu_write(QPU_GET_FIELD(inst, QPU_WADDR_MUL)));
+}
+
 static void
 process_mux_deps(struct schedule_state *state, struct schedule_node *n,
  uint32_t mux)
@@ -224,17 +238,6 @@ process_mux_deps(struct schedule_state *state, struct 
schedule_node *n,
 }
 
 
-static bool
-is_direct_tmu_read(uint64_t inst)
-{
-/* If it's a direct read, we happen to structure the code such that
- * there's an explicit uniform read in the instruction (for kernel
- * texture reloc processing).
- */
-return (QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_UNIF ||
-QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_UNIF);
-}
-
 static void
 process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
uint32_t waddr, bool is_add)
@@ -250,14 +253,6 @@ process_waddr_deps(struct schedule_state *state, struct 
schedule_node *n,
 }
 } else if (is_tmu_write(waddr)) {
 add_write_dep(state, &state->last_tmu_write, n);
-
-/* There is an implicit uniform read in texture ops in
- * hardware, unless this is a direct-addressed uniform read,
- * so we need to keep it in the same order as the other
- * uniforms.
- */
-if (!is_direct_tmu_read(n->inst->inst))
-add_write_dep(state, &state->last_unif_read, n);
 } else if (qpu_waddr_is_tlb(waddr)) {
 add_write_dep(state, &state->last_tlb, n);
 } else {
@@ -509,7 +504,7 @@ get_instruction_priority(uint64_t inst)
 static struct schedule_node *
 choose_instruction_to_schedule(struct choose_scoreboard *scoreboard,
struct simple_node *schedule_list,
-   uint64_t prev_inst)
+   struct schedule_node *prev_inst)
 {
 struct schedule_node *chosen = NULL;
 struct simple_node *node;
@@ -537,8 +532,11 @@ choose_instruction_to_schedule(struct choose_scoreboard 
*scoreboard,
 /* If we're trying to pair with another instruction, check
  * that they're compatible.
  */
-if (prev_inst != 0) {
-inst = qpu_merge_inst(prev_inst, inst);
+if (prev_inst) {
+if (prev_inst->uniform != -1 && n->uniform != -1)
+continue;
+
+inst = qpu_merge_inst(prev_inst->inst->inst, inst);
 if (!inst)
 continue;
 }
@@ -668,6 +666,17 @@ schedule_instruc

Mesa (master): vc4: Mark VPM read setup as impacting VPM reads, not writes.

2014-12-09 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: f431b4f110946a0f6e3822f870fbcd3d23c8317a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f431b4f110946a0f6e3822f870fbcd3d23c8317a

Author: Eric Anholt 
Date:   Tue Dec  9 14:20:54 2014 -0800

vc4: Mark VPM read setup as impacting VPM reads, not writes.

Fixes assertion failures if we adjust scheduling priorities to emphasize
VPM reads more.

---

 src/gallium/drivers/vc4/vc4_qpu_schedule.c |8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 6bba66a..4bb9b3a 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -260,10 +260,16 @@ process_waddr_deps(struct schedule_state *state, struct 
schedule_node *n,
 break;
 
 case QPU_W_VPM:
-case QPU_W_VPMVCD_SETUP:
 add_write_dep(state, &state->last_vpm, n);
 break;
 
+case QPU_W_VPMVCD_SETUP:
+if (is_a)
+add_write_dep(state, &state->last_vpm_read, n);
+else
+add_write_dep(state, &state->last_vpm, n);
+break;
+
 case QPU_W_SFU_RECIP:
 case QPU_W_SFU_RECIPSQRT:
 case QPU_W_SFU_EXP:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): vc4: Refuse to merge instructions involving 32-bit immediate loads.

2014-12-09 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: cff8c96a0d418f41e00aa97a13dc55e3ed213eb7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cff8c96a0d418f41e00aa97a13dc55e3ed213eb7

Author: Eric Anholt 
Date:   Tue Dec  9 16:34:37 2014 -0800

vc4: Refuse to merge instructions involving 32-bit immediate loads.

An immediate load overwrites the mul and add operations, so you can't
merge with them.

---

 src/gallium/drivers/vc4/vc4_qpu.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_qpu.c 
b/src/gallium/drivers/vc4/vc4_qpu.c
index 6daa072..faf8790 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -356,6 +356,11 @@ qpu_merge_inst(uint64_t a, uint64_t b)
 if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
 return 0;
 
+if (QPU_GET_FIELD(a, QPU_SIG) == QPU_SIG_LOAD_IMM ||
+QPU_GET_FIELD(b, QPU_SIG) == QPU_SIG_LOAD_IMM) {
+return 0;
+}
+
 ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): vc4: Skip raddr dependencies for 32-bit immediate loads.

2014-12-09 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: 45a89237711acff7ee31c854361f8f580ccdcc9f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=45a89237711acff7ee31c854361f8f580ccdcc9f

Author: Eric Anholt 
Date:   Tue Dec  9 14:23:39 2014 -0800

vc4: Skip raddr dependencies for 32-bit immediate loads.

These don't have raddr fields.

---

 src/gallium/drivers/vc4/vc4_qpu_schedule.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 4bb9b3a..8df816f 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -334,8 +334,11 @@ calculate_deps(struct schedule_state *state, struct 
schedule_node *n)
 uint32_t mul_b = QPU_GET_FIELD(inst, QPU_MUL_B);
 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
 
-process_raddr_deps(state, n, raddr_a, true);
-process_raddr_deps(state, n, raddr_b, false);
+if (sig != QPU_SIG_LOAD_IMM) {
+process_raddr_deps(state, n, raddr_a, true);
+process_raddr_deps(state, n, raddr_b, false);
+}
+
 if (add_op != QPU_A_NOP) {
 process_mux_deps(state, n, add_a);
 process_mux_deps(state, n, add_b);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): vc4: Populate the delay field better, and schedule high delay first.

2014-12-09 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: c5b544403fbc955dd441fb5a2e11f0de2a75e9e4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c5b544403fbc955dd441fb5a2e11f0de2a75e9e4

Author: Eric Anholt 
Date:   Tue Dec  9 14:05:52 2014 -0800

vc4: Populate the delay field better, and schedule high delay first.

This is a standard scheduling heuristic, and clearly helps.

total instructions in shared programs: 46418 -> 44467 (-4.20%)
instructions in affected programs: 42531 -> 40580 (-4.59%)

---

 src/gallium/drivers/vc4/vc4_qpu_schedule.c |   50 +++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 8df816f..c733e6e 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -49,7 +49,19 @@ struct schedule_node {
 uint32_t child_count;
 uint32_t child_array_size;
 uint32_t parent_count;
+
+/**
+ * Minimum number of cycles from scheduling this instruction until the
+ * end of the program, based on the slowest dependency chain through
+ * the children.
+ */
 uint32_t delay;
+
+/**
+ * cycles between this instruction being scheduled and when its result
+ * can be consumed.
+ */
+uint32_t latency;
 };
 
 struct schedule_node_child {
@@ -548,6 +560,13 @@ choose_instruction_to_schedule(struct choose_scoreboard 
*scoreboard,
 } else if (prio < chosen_prio) {
 continue;
 }
+
+if (n->delay > chosen->delay) {
+chosen = n;
+chosen_prio = prio;
+} else if (n->delay < chosen->delay) {
+continue;
+}
 }
 
 return chosen;
@@ -612,7 +631,7 @@ compute_delay(struct schedule_node *n)
 if (!n->children[i].node->delay)
 compute_delay(n->children[i].node);
 n->delay = MAX2(n->delay,
-n->children[i].node->delay + 1);
+n->children[i].node->delay + 
n->latency);
 }
 }
 }
@@ -734,6 +753,33 @@ schedule_instructions(struct vc4_compile *c, struct 
simple_node *schedule_list)
 }
 }
 
+static uint32_t waddr_latency(uint32_t waddr)
+{
+if (waddr < 32)
+return 2;
+
+/* Some huge number, really. */
+if (waddr >= QPU_W_TMU0_S && waddr <= QPU_W_TMU1_B)
+return 10;
+
+switch(waddr) {
+case QPU_W_SFU_RECIP:
+case QPU_W_SFU_RECIPSQRT:
+case QPU_W_SFU_EXP:
+case QPU_W_SFU_LOG:
+return 3;
+default:
+return 1;
+}
+}
+
+static uint32_t
+instruction_latency(uint64_t inst)
+{
+return MAX2(waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_ADD)),
+waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_MUL)));
+}
+
 void
 qpu_schedule_instructions(struct vc4_compile *c)
 {
@@ -761,6 +807,8 @@ qpu_schedule_instructions(struct vc4_compile *c)
 struct schedule_node *n = rzalloc(mem_ctx, struct 
schedule_node);
 
 n->inst = inst;
+n->latency = instruction_latency(inst->inst);
+
 remove_from_list(&inst->link);
 insert_at_tail(&schedule_list, &n->link);
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): clover: Fix build after llvm r223802

2014-12-09 Thread Aaron Watry
Module: Mesa
Branch: master
Commit: 25db8729dc53b60ee0caade5e797e99d6ad13fa3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=25db8729dc53b60ee0caade5e797e99d6ad13fa3

Author: Aaron Watry 
Date:   Tue Dec  9 19:28:50 2014 -0600

clover: Fix build after llvm r223802

Signed-off-by: Aaron Watry 
Reviewed-by: Tom Stellard 

---

 src/gallium/state_trackers/clover/llvm/invocation.cpp |4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index cda447d..5265d10 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -281,7 +281,11 @@ namespace {
   }
 
   for (unsigned i = 0; i < kernel_node->getNumOperands(); ++i) {
+#if HAVE_LLVM >= 0x0306
+ kernels.push_back(llvm::mdconst::dyn_extract(
+#else
  kernels.push_back(llvm::dyn_cast(
+#endif
 
kernel_node->getOperand(i)->getOperand(0)));
   }
}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): freedreno/a4xx: frag-coord / face fixes

2014-12-09 Thread Rob Clark
Module: Mesa
Branch: master
Commit: 69d23809d06cb1bb20a92430e18720baff5994bc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=69d23809d06cb1bb20a92430e18720baff5994bc

Author: Rob Clark 
Date:   Sun Dec  7 14:12:15 2014 -0500

freedreno/a4xx: frag-coord / face fixes

Signed-off-by: Rob Clark 

---

 src/gallium/drivers/freedreno/a4xx/fd4_program.c |   25 --
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index 76cadcc..cbfd8b2 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -200,6 +200,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct 
fd4_emit *emit)
 {
struct stage s[MAX_STAGES];
uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+   uint32_t face_regid, coord_regid;
int constmode;
int i, j, k;
 
@@ -217,6 +218,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct 
fd4_emit *emit)
color_regid = ir3_find_output_regid(s[FS].v,
ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
 
+   /* TODO get these dynamically: */
+   face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
+   coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
+
/* we could probably divide this up into things that need to be
 * emitted if frag-prog is dirty vs if vert-prog is dirty..
 */
@@ -235,11 +240,14 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct 
fd4_emit *emit)
A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
-   0xfcfc |  /* XXX */
+   0xfc00 |  /* XXX */
A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
-   COND(s[FS].v->frag_coord, 
A4XX_HLSQ_CONTROL_1_REG_ZWCOORD));
-   OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
-   OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid));
+   A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid));
+   OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) |
+   0x3f3f000 |   /* XXX */
+   A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid));
+   OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) |
+   0xfcfcfc00);
 
OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) |
@@ -349,7 +357,9 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct 
fd4_emit *emit)
COND(s[FS].v->has_samp, 
A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
0x8000 |  /* XXX */
-   COND(s[FS].v->total_in > 0, 
A4XX_SP_FS_CTRL_REG1_VARYING));
+   COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) 
|
+   COND(s[FS].v->total_in > 0, 
A4XX_SP_FS_CTRL_REG1_VARYING) |
+   COND(s[FS].v->frag_coord, 
A4XX_SP_FS_CTRL_REG1_FRAGCOORD));
 
OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, 
A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
@@ -373,7 +383,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct 
fd4_emit *emit)
 
OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1);
OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) |
-   COND(s[FS].v->total_in > 0, 
A4XX_RB_RENDER_CONTROL2_VARYING));
+   COND(s[FS].v->total_in > 0, 
A4XX_RB_RENDER_CONTROL2_VARYING) |
+   COND(s[FS].v->frag_face, 
A4XX_RB_RENDER_CONTROL2_FACENESS) |
+   COND(s[FS].v->frag_coord, 
A4XX_RB_RENDER_CONTROL2_XCOORD |
+   A4XX_RB_RENDER_CONTROL2_YCOORD));
 
OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_COLOR_PIPE_ENABLE |

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): freedreno/a4xx: temp hack for FLAT varyings

2014-12-09 Thread Rob Clark
Module: Mesa
Branch: master
Commit: 6a5ba23fa6156abb7d643080e2a2b477aa1ed559
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6a5ba23fa6156abb7d643080e2a2b477aa1ed559

Author: Rob Clark 
Date:   Sat Dec  6 16:29:53 2014 -0500

freedreno/a4xx: temp hack for FLAT varyings

Signed-off-by: Rob Clark 

---

 src/gallium/drivers/freedreno/a4xx/fd4_program.c |   19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index 4f2a88f..76cadcc 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -431,6 +431,25 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct 
fd4_emit *emit)
}
}
 
+   /* HACK: looks like we need to do int varyings in the frag
+* shader on a4xx (no flatshad reg?):
+*
+*(sy)(ss)nop
+*(sy)ldlv.u32 r0.x,l[r0.x], 1
+*ldlv.u32 r0.y,l[r0.x+1], 1
+*(ss)bary.f (ei)r63.x, 0, r0.x
+*(ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
+*(rpt5)nop
+*sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
+*
+* for now, don't set FLAT on vinterp[], since that
+* at least works well enough for pure float impl (ie.
+* pre glsl130).. we'll have to do a bit more work to
+* handle this properly:
+*/
+   for (i = 0; i < ARRAY_SIZE(vinterp); i++)
+   vinterp[i] = 0;
+
OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
A4XX_VPC_ATTR_THRDASSIGN(1) |

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): freedreno/ir3: lower TXP as needed

2014-12-09 Thread Rob Clark
Module: Mesa
Branch: master
Commit: eb6fd3b8eb9c19bb501a091d1696e5db1ac4c690
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=eb6fd3b8eb9c19bb501a091d1696e5db1ac4c690

Author: Rob Clark 
Date:   Sat Dec  6 15:24:23 2014 -0500

freedreno/ir3: lower TXP as needed

On a3xx, lower TXP for 3D textures, on a4xx lower all TXP.

Signed-off-by: Rob Clark 

---

 src/gallium/drivers/freedreno/ir3/ir3_compiler.c |8 
 src/gallium/drivers/freedreno/ir3/ir3_shader.c   |   12 ++--
 src/gallium/drivers/freedreno/ir3/ir3_shader.h   |2 +-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c 
b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 6cc21ac..ade4b1c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -170,6 +170,14 @@ compile_init(struct ir3_compile_context *ctx, struct 
ir3_shader_variant *so,
break;
}
 
+   if (ir3_shader_gpuid(so->shader) >= 400) {
+   /* a4xx seems to have *no* sam.p */
+   lconfig.lower_TXP = ~0;  /* lower all txp */
+   } else {
+   /* a3xx just needs to avoid sam.p for 3d tex */
+   lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
+   }
+
ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
ctx->free_tokens = !!ctx->tokens;
if (!ctx->tokens) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c 
b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 0c74f2f..c21d0a2 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -54,9 +54,10 @@ static void
 assemble_variant(struct ir3_shader_variant *v)
 {
struct fd_context *ctx = fd_context(v->shader->pctx);
+   uint32_t gpu_id = ir3_shader_gpuid(v->shader);
uint32_t sz, *bin;
 
-   bin = ir3_assemble(v->ir, &v->info, ctx->screen->gpu_id);
+   bin = ir3_assemble(v->ir, &v->info, gpu_id);
sz = v->info.sizedwords * 4;
 
v->bo = fd_bo_new(ctx->dev, sz,
@@ -67,7 +68,7 @@ assemble_variant(struct ir3_shader_variant *v)
 
free(bin);
 
-   if (ctx->screen->gpu_id >= 400) {
+   if (gpu_id >= 400) {
v->instrlen = v->info.sizedwords / (2 * 16);
} else {
v->instrlen = v->info.sizedwords / (2 * 4);
@@ -177,6 +178,13 @@ fail:
return NULL;
 }
 
+uint32_t
+ir3_shader_gpuid(struct ir3_shader *shader)
+{
+   struct fd_context *ctx = fd_context(shader->pctx);
+   return ctx->screen->gpu_id;
+}
+
 struct ir3_shader_variant *
 ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
 {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h 
b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 89442ce..fcd5895 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -214,7 +214,7 @@ struct ir3_shader {
 struct ir3_shader * ir3_shader_create(struct pipe_context *pctx,
const struct tgsi_token *tokens, enum shader_t type);
 void ir3_shader_destroy(struct ir3_shader *shader);
-
+uint32_t ir3_shader_gpuid(struct ir3_shader *shader);
 struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
struct ir3_shader_key key);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): tgsi/lowering: add support to lower TXP (v2)

2014-12-09 Thread Rob Clark
Module: Mesa
Branch: master
Commit: 219440ddebcd804d6b8cb0a79c4bbdd7701ea355
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=219440ddebcd804d6b8cb0a79c4bbdd7701ea355

Author: Rob Clark 
Date:   Sat Dec  6 13:36:02 2014 -0500

tgsi/lowering: add support to lower TXP (v2)

v2: actually do perspective divide for RECT/SHADOWRECT

Signed-off-by: Rob Clark 
Reviewed-by: Ilia Mirkin 

---

 src/gallium/auxiliary/tgsi/tgsi_lowering.c |   46 +++-
 src/gallium/auxiliary/tgsi/tgsi_lowering.h |3 ++
 2 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c 
b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
index b6b18db..dee6c41 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -1031,7 +1031,10 @@ transform_samp(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction new_inst;
/* mask is clamped coords, pmask is all coords (for projection): */
unsigned mask = 0, pmask = 0, smask;
+   unsigned tex = inst->Texture.Texture;
unsigned opcode = inst->Instruction.Opcode;
+   bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
+  (ctx->config->lower_TXP & (1 << tex));
 
if (opcode == TGSI_OPCODE_TXB2) {
   samp = &inst->Src[2];
@@ -1043,14 +1046,14 @@ transform_samp(struct tgsi_transform_context *tctx,
smask = 1 << samp->Register.Index;
 
/* check if we actually need to lower this one: */
-   if (!(ctx->saturate & smask))
+   if (!(ctx->saturate & smask) && !lower_txp)
   return -1;
 
/* figure out which coordinates need saturating:
 *   - RECT textures should not get saturated
 *   - array index coords should not get saturated
 */
-   switch (inst->Texture.Texture) {
+   switch (tex) {
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_CUBE_ARRAY:
@@ -1081,16 +1084,19 @@ transform_samp(struct tgsi_transform_context *tctx,
   pmask |= TGSI_WRITEMASK_X;
   break;
 
-  /* TODO: I think we should ignore these?
- case TGSI_TEXTURE_RECT:
- case TGSI_TEXTURE_SHADOWRECT:
-  */
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOWRECT:
+  /* we don't saturate, but in case of lower_txp we
+   * still need to do the perspective divide:
+   */
+   pmask = TGSI_WRITEMASK_XY;
+   break;
}
 
/* sanity check.. driver could be asking to saturate a non-
 * existent coordinate component:
 */
-   if (!mask)
+   if (!mask && !lower_txp)
   return -1;
 
/* MOV tmpA, src0 */
@@ -1126,8 +1132,10 @@ transform_samp(struct tgsi_transform_context *tctx,
}
 
/* MOV_SAT tmpA., tmpA */
-   create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask,
-  TGSI_SAT_ZERO_ONE);
+   if (mask) {
+  create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask,
+ TGSI_SAT_ZERO_ONE);
+   }
 
/* modify the texture samp instruction to take fixed up coord: */
new_inst = *inst;
@@ -1462,6 +1470,7 @@ tgsi_transform_lowering(const struct tgsi_lowering_config 
*config,
  OPCS(DPH) ||
  OPCS(DP2) ||
  OPCS(DP2A) ||
+ OPCS(TXP) ||
  ctx.two_side_colors ||
  ctx.saturate))
   return NULL;
@@ -1529,12 +1538,19 @@ tgsi_transform_lowering(const struct 
tgsi_lowering_config *config,
   newlen += DP2A_GROW * OPCS(DP2A);
   numtmp = MAX2(numtmp, DOTP_TMP);
}
-   if (ctx.saturate) {
-  int n = info->opcode_count[TGSI_OPCODE_TEX] +
- info->opcode_count[TGSI_OPCODE_TXP] +
- info->opcode_count[TGSI_OPCODE_TXB] +
- info->opcode_count[TGSI_OPCODE_TXB2] +
- info->opcode_count[TGSI_OPCODE_TXL];
+   if (ctx.saturate || config->lower_TXP) {
+  int n = 0;
+
+  if (ctx.saturate) {
+ n = info->opcode_count[TGSI_OPCODE_TEX] +
+info->opcode_count[TGSI_OPCODE_TXP] +
+info->opcode_count[TGSI_OPCODE_TXB] +
+info->opcode_count[TGSI_OPCODE_TXB2] +
+info->opcode_count[TGSI_OPCODE_TXL];
+  } else if (config->lower_TXP) {
+  n = info->opcode_count[TGSI_OPCODE_TXP];
+  }
+
   newlen += SAMP_GROW * n;
   numtmp = MAX2(numtmp, SAMP_TMP);
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h 
b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
index 55e1507..52c204f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
@@ -69,6 +69,9 @@ struct tgsi_lowering_config
unsigned lower_DP2:1;
unsigned lower_DP2A:1;
 
+   /* bitmask of (1 << TGSI_TEXTURE_type): */
+   unsigned lower_TXP;
+
/* To emulate certain texture wrap modes, this can be used
 * to saturate the specified tex coord to [0.0, 1.0].  The
 * bits are according to sampler #, ie. if, for example:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/li

Mesa (master): freedreno/a4xx: XA gpu hang at startup

2014-12-09 Thread Rob Clark
Module: Mesa
Branch: master
Commit: 5b38a1740beccf1f33b9dfe4d38f00a711b6b2e0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5b38a1740beccf1f33b9dfe4d38f00a711b6b2e0

Author: Rob Clark 
Date:   Sat Dec  6 12:39:19 2014 -0500

freedreno/a4xx: XA gpu hang at startup

Signed-off-by: Rob Clark 

---

 src/gallium/drivers/freedreno/a4xx/fd4_emit.c |6 ++
 src/gallium/drivers/freedreno/a4xx/fd4_gmem.c |4 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 5b47158..839d3e8 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -689,5 +689,11 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL3, 1);
OUT_RING(ring, A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE(0xf));
 
+   OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
+   OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
+
+   OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+   OUT_RING(ring, 0x0);
+
ctx->needs_rb_fbd = true;
 }
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
index 89ae260..8ad0039 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -500,10 +500,12 @@ fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile 
*tile)
OUT_RING(ring, 0x);
}
 
+   OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
if (pfb->zsbuf) {
-   OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
fd4_pipe2depth(pfb->zsbuf->format)));
+   } else {
+   OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(DEPTH4_NONE));
}
 
if (ctx->needs_rb_fbd) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): freedreno/a4xx: fix rendering to layer != 0

2014-12-09 Thread Rob Clark
Module: Mesa
Branch: master
Commit: 3dbcd25022d0bd62484ac4a9498e4a1bbb5af0b0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3dbcd25022d0bd62484ac4a9498e4a1bbb5af0b0

Author: Rob Clark 
Date:   Sun Dec  7 12:10:38 2014 -0500

freedreno/a4xx: fix rendering to layer != 0

Signed-off-by: Rob Clark 

---

 src/gallium/drivers/freedreno/a4xx/fd4_gmem.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
index 8ad0039..3c90052 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -126,12 +126,15 @@ emit_gmem2mem_surf(struct fd_context *ctx,
struct fd_ringbuffer *ring = ctx->ring;
struct fd_resource *rsc = fd_resource(psurf->texture);
struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
+   uint32_t layer_offset = slice->size0 * psurf->u.tex.first_layer;
+
+   debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
 
OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4);
OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) |
A4XX_RB_COPY_CONTROL_GMEM_BASE(base));
-   OUT_RELOCW(ring, rsc->bo, slice->offset, 0, 0);   /* RB_COPY_DEST_BASE 
*/
+   OUT_RELOCW(ring, rsc->bo, slice->offset + layer_offset, 0, 0);   /* 
RB_COPY_DEST_BASE */
OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |

A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) |

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): freedreno/a4xx: texture fixes

2014-12-09 Thread Rob Clark
Module: Mesa
Branch: master
Commit: 1e3a732603a4a4d5b3e7102cf0d7840f79ecf5c4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1e3a732603a4a4d5b3e7102cf0d7840f79ecf5c4

Author: Rob Clark 
Date:   Fri Dec  5 11:43:03 2014 -0500

freedreno/a4xx: texture fixes

Signed-off-by: Rob Clark 

---

 src/gallium/drivers/freedreno/a4xx/fd4_emit.c  |   23 +++-
 src/gallium/drivers/freedreno/a4xx/fd4_format.c|   17 +++
 src/gallium/drivers/freedreno/a4xx/fd4_format.h|1 +
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c   |   12 ++
 src/gallium/drivers/freedreno/a4xx/fd4_texture.h   |2 +-
 src/gallium/drivers/freedreno/freedreno_resource.c |6 +
 6 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index c7be161..5b47158 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -162,12 +162,20 @@ emit_textures(struct fd_context *ctx, struct 
fd_ringbuffer *ring,
unsigned i;
 
if (tex->num_samplers > 0) {
+   int num_samplers;
+
+   /* not sure if this is an a420.0 workaround, but we seem
+* to need to emit these in pairs.. emit a final dummy
+* entry if odd # of samplers:
+*/
+   num_samplers = align(tex->num_samplers, 2);
+
/* output sampler state: */
-   OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 + (2 * tex->num_samplers));
+   OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * num_samplers));
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(sb) |
-   CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
+   CP_LOAD_STATE_0_NUM_UNIT(num_samplers));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_samplers; i++) {
@@ -178,9 +186,11 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer 
*ring,
OUT_RING(ring, sampler->texsamp0);
OUT_RING(ring, sampler->texsamp1);
}
-   /* maybe an a420.0 (or a4xx.0) workaround?? or just driver bug? 
*/
-   OUT_RING(ring, 0x);
-   OUT_RING(ring, 0x);
+
+   for (; i < num_samplers; i++) {
+   OUT_RING(ring, 0x);
+   OUT_RING(ring, 0x);
+   }
}
 
if (tex->num_textures > 0) {
@@ -203,7 +213,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer 
*ring,
OUT_RING(ring, view->texconst1);
OUT_RING(ring, view->texconst2);
OUT_RING(ring, view->texconst3);
-   OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+   OUT_RELOC(ring, rsc->bo, slice->offset,
+   view->textconst4, 0);
OUT_RING(ring, 0x);
OUT_RING(ring, 0x);
OUT_RING(ring, 0x);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index bbece83..9cff134 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -232,6 +232,23 @@ fd4_pipe2swap(enum pipe_format format)
return formats[format].swap;
 }
 
+enum a4xx_tex_fetchsize
+fd4_pipe2fetchsize(enum pipe_format format)
+{
+   switch (util_format_get_blocksizebits(format)) {
+   case 8:   return TFETCH4_1_BYTE;
+   case 16:  return TFETCH4_2_BYTE;
+   case 32:  return TFETCH4_4_BYTE;
+   case 64:  return TFETCH4_8_BYTE;
+   case 128: return TFETCH4_16_BYTE;
+   default:
+   debug_printf("Unknown block size for format %s: %d\n",
+   util_format_name(format),
+   util_format_get_blocksizebits(format));
+   return TFETCH4_1_BYTE;
+   }
+}
+
 /* we need to special case a bit the depth/stencil restore, because we are
  * using the texture sampler to blit into the depth/stencil buffer, *not*
  * into a color buffer.  Otherwise fd4_tex_swiz() will do the wrong thing,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.h 
b/src/gallium/drivers/freedreno/a4xx/fd4_format.h
index 5d6d1ae..04837da 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.h
@@ -38,6 +38,7 @@ enum a4xx_tex_fmt fd4_pipe2tex(enum pipe_format format);
 enum a4xx_color_fmt fd4_pipe2color(enum pipe_format forma

Mesa (master): freedreno: cleanup slice alignment/setup

2014-12-09 Thread Rob Clark
Module: Mesa
Branch: master
Commit: 5d7c9c9160e0d425df220e5e1898d0ab7dee2c83
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d7c9c9160e0d425df220e5e1898d0ab7dee2c83

Author: Rob Clark 
Date:   Thu Dec  4 16:56:33 2014 -0500

freedreno: cleanup slice alignment/setup

Collapse things back into a setup_slices() which takes the desired
alignment as a param.  This gets things ready for a4xx which has some
slightly different requirements.

Signed-off-by: Rob Clark 

---

 src/gallium/drivers/freedreno/freedreno_resource.c |   50 ++--
 1 file changed, 14 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c 
b/src/gallium/drivers/freedreno/freedreno_resource.c
index 6b31d26..461e378 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -188,7 +188,7 @@ static const struct u_resource_vtbl fd_resource_vtbl = {
 };
 
 static uint32_t
-setup_slices(struct fd_resource *rsc)
+setup_slices(struct fd_resource *rsc, uint32_t alignment)
 {
struct pipe_resource *prsc = &rsc->base.b;
uint32_t level, size = 0;
@@ -201,7 +201,7 @@ setup_slices(struct fd_resource *rsc)
 
slice->pitch = align(width, 32);
slice->offset = size;
-   slice->size0 = slice->pitch * height * rsc->cpp;
+   slice->size0 = align(slice->pitch * height * rsc->cpp, 
alignment);
 
size += slice->size0 * depth * prsc->array_size;
 
@@ -213,33 +213,20 @@ setup_slices(struct fd_resource *rsc)
return size;
 }
 
-/* 2d array and 3d textures seem to want their layers aligned to
- * page boundaries
- */
 static uint32_t
-setup_slices_array(struct fd_resource *rsc)
+slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
 {
-   struct pipe_resource *prsc = &rsc->base.b;
-   uint32_t level, size = 0;
-   uint32_t width = prsc->width0;
-   uint32_t height = prsc->height0;
-   uint32_t depth = prsc->depth0;
-
-   for (level = 0; level <= prsc->last_level; level++) {
-   struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
-
-   slice->pitch = align(width, 32);
-   slice->offset = size;
-   slice->size0 = align(slice->pitch * height * rsc->cpp, 4096);
-
-   size += slice->size0 * depth * prsc->array_size;
-
-   width = u_minify(width, 1);
-   height = u_minify(height, 1);
-   depth = u_minify(depth, 1);
+   /* on a3xx, 2d array and 3d textures seem to want their
+* layers aligned to page boundaries:
+*/
+   switch (tmpl->target) {
+   case PIPE_TEXTURE_3D:
+   case PIPE_TEXTURE_1D_ARRAY:
+   case PIPE_TEXTURE_2D_ARRAY:
+   return 4096;
+   default:
+   return 1;
}
-
-   return size;
 }
 
 /**
@@ -273,16 +260,7 @@ fd_resource_create(struct pipe_screen *pscreen,
 
assert(rsc->cpp);
 
-   switch (tmpl->target) {
-   case PIPE_TEXTURE_3D:
-   case PIPE_TEXTURE_1D_ARRAY:
-   case PIPE_TEXTURE_2D_ARRAY:
-   size = setup_slices_array(rsc);
-   break;
-   default:
-   size = setup_slices(rsc);
-   break;
-   }
+   size = setup_slices(rsc, slice_alignment(pscreen, tmpl));
 
realloc_bo(rsc, size);
if (!rsc->bo)

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): freedreno: update generated headers

2014-12-09 Thread Rob Clark
Module: Mesa
Branch: master
Commit: 8ecbcbf0aab60e044dc4a9dabef2bdfb8db5abe9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8ecbcbf0aab60e044dc4a9dabef2bdfb8db5abe9

Author: Rob Clark 
Date:   Fri Dec  5 11:42:44 2014 -0500

freedreno: update generated headers

Signed-off-by: Rob Clark 

---

 src/gallium/drivers/freedreno/a2xx/a2xx.xml.h |2 +-
 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h |2 +-
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h |   69 +
 src/gallium/drivers/freedreno/a4xx/fd4_program.c  |6 +-
 src/gallium/drivers/freedreno/adreno_common.xml.h |2 +-
 src/gallium/drivers/freedreno/adreno_pm4.xml.h|2 +-
 6 files changed, 65 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h 
b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index d3d93c6..c1a0309 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from 
are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  
10551 bytes, from 2014-11-13 22:44:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml(  
15076 bytes, from 2014-12-01 22:40:01)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml  (  
64344 bytes, from 2014-12-03 14:14:54)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml  (  
49060 bytes, from 2014-12-03 22:36:15)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml  (  
50255 bytes, from 2014-12-07 18:43:56)
 
 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark  (robclark)
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h 
b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index 8ee835b..d4c52e1 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from 
are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  
10551 bytes, from 2014-11-13 22:44:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml(  
15076 bytes, from 2014-12-01 22:40:01)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml  (  
64344 bytes, from 2014-12-03 14:14:54)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml  (  
49060 bytes, from 2014-12-03 22:36:15)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml  (  
50255 bytes, from 2014-12-07 18:43:56)
 
 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark  (robclark)
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h 
b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index bf49527..3f84c32 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from 
are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  
10551 bytes, from 2014-11-13 22:44:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml(  
15076 bytes, from 2014-12-01 22:40:01)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml  (  
64344 bytes, from 2014-12-03 14:14:54)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml  (  
49060 bytes, from 2014-12-03 22:36:15)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml  (  
50255 bytes, from 2014-12-07 18:43:56)
 
 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark  (robclark)
@@ -91,6 +91,7 @@ enum a4xx_vtx_fmt {
VFMT4_16_16_UNORM = 29,
VFMT4_16_16_16_UNORM = 30,
VFMT4_16_16_16_16_UNORM = 31,
+   VFMT4_32_32_SINT = 37,
VFMT4_8_UINT = 40,
VFMT4_8_8_UINT = 41,
VFMT4_8_8_8_UINT = 42,
@@ -132,6 +133,14 @@ enum a4xx_tex_fmt {
TFMT4_32_32_32_32_FLOAT = 63,
 };
 
+enum a4xx_tex_fetchsize {
+   TFETCH4_1_BYTE = 0,
+   TFETCH4_2_BYTE = 1,
+   TFETCH4_4_BYTE = 2,
+   TFETCH4_8_BYTE = 3,
+   TFETCH4_16_BYTE = 4,
+};
+
 enum a4xx_depth_format {
DEPTH4_NONE = 0,
DEPTH4_16 = 1,
@@ -265,14 +274,19 @@ static inline uint32_t 
A4XX_RB_MSAA_CONTROL_SAMPLES(uint32_t val)
return ((val) << A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & 
A4XX_RB_MSAA_CONTROL_SAMPLES__MASK;
 }
 
-#define REG_A4XX_RB_MSAA_CONTROL2  0x20a3
-#define A4XX_RB_MSAA_CONTROL2_MSAA_SAMPLES__MASK   0x0380
-#define A4XX_RB_MSAA_CONTROL2_MSAA_SAMPLES__SHIFT  7
-static inline uint32_t A4XX_RB_MSAA_CONTROL2_MSAA_SAMPLES(uint32_t val)
+#define REG_A4XX_RB_RENDER_CONTROL20x20a3
+#define A4XX_RB_RENDER_CONTROL2_XCOORD 0x0001
+#define A4XX_RB_RENDER_CONTROL2_YCOORD 0x0002
+

Mesa (master): mesa: use build flag to ensure stack is realigned on x86

2014-12-09 Thread Timothy Arceri
Module: Mesa
Branch: master
Commit: f1b5f2b157a092e93590bd43544fbf2671edab36
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f1b5f2b157a092e93590bd43544fbf2671edab36

Author: Timothy Arceri 
Date:   Sun Dec  7 00:09:40 2014 +1100

mesa: use build flag to ensure stack is realigned on x86

Nowadays GCC assumes stack pointer is 16-byte aligned even on 32-bits, but that 
is an assumption OpenGL drivers (or any dynamic library for that matter) can't 
afford to make as there are many closed- and open- source application binaries 
out there that only assume 4-byte stack alignment.

V4: fix comment and indentation

V3: move all sse4.1 build flag config to the same location
 and add comment as to why we need to do the realign

V2: use $target_cpu rather than $host_cpu
  and setup build flags in config rather than makefile

https://bugs.freedesktop.org/show_bug.cgi?id=86788
Signed-off-by: Timothy Arceri 
Reviewed-by: Matt Turner 
CC: "10.4" 

---

 configure.ac   |   11 ++-
 src/mesa/Makefile.am   |2 +-
 src/mesa/main/sse_minmax.c |3 ---
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index b0df1bb..4bdf75d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -253,8 +253,16 @@ AC_SUBST([VISIBILITY_CXXFLAGS])
 dnl
 dnl Optional flags, check for compiler support
 dnl
+SSE41_CFLAGS="-msse4.1"
+dnl Code compiled by GCC with -msse* assumes a 16 byte aligned
+dnl stack, but on x86-32 such alignment is not guaranteed.
+case "$target_cpu" in
+i?86)
+SSE41_CFLAGS="$SSE41_CFLAGS -mstackrealign"
+;;
+esac
 save_CFLAGS="$CFLAGS"
-CFLAGS="-msse4.1 $CFLAGS"
+CFLAGS="$SSE41_CFLAGS $CFLAGS"
 AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
 #include 
 int main () {
@@ -267,6 +275,7 @@ if test "x$SSE41_SUPPORTED" = x1; then
 DEFINES="$DEFINES -DUSE_SSE41"
 fi
 AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1])
+AC_SUBST([SSE41_CFLAGS], $SSE41_CFLAGS)
 
 dnl Can't have static and shared libraries, default to static if user
 dnl explicitly requested. If both disabled, set to static since shared
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
index 932db4f..3b68573 100644
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -153,7 +153,7 @@ libmesagallium_la_LIBADD = \
 libmesa_sse41_la_SOURCES = \
main/streaming-load-memcpy.c \
main/sse_minmax.c
-libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) -msse4.1
+libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS)
 
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = gl.pc
diff --git a/src/mesa/main/sse_minmax.c b/src/mesa/main/sse_minmax.c
index 93cf2a6..222ac14 100644
--- a/src/mesa/main/sse_minmax.c
+++ b/src/mesa/main/sse_minmax.c
@@ -31,9 +31,6 @@
 #include 
 
 void
-#if !defined(__x86_64__)
-   __attribute__((force_align_arg_pointer))
-#endif
 _mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,
  unsigned *max_index, const unsigned count)
 {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: implement TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION

2014-12-09 Thread Marek Olšák
Module: Mesa
Branch: master
Commit: 65ef78e8611556780fce0bee1feba805347ec627
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=65ef78e8611556780fce0bee1feba805347ec627

Author: Marek Olšák 
Date:   Mon Nov 17 22:30:31 2014 +0100

draw: implement TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION

Required by Nine. Tested with util_run_tests.
It's added to softpipe, llvmpipe, and r300g/swtcl.

Tested-by: David Heidelberg 

---

 src/gallium/auxiliary/draw/draw_context.c  |   40 +---
 src/gallium/auxiliary/draw/draw_llvm.c |2 +-
 src/gallium/auxiliary/draw/draw_private.h  |4 ++
 .../auxiliary/draw/draw_pt_fetch_shade_emit.c  |2 +-
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |2 +-
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |2 +-
 src/gallium/auxiliary/draw/draw_vs.c   |2 +
 src/gallium/drivers/llvmpipe/lp_screen.c   |2 +
 src/gallium/drivers/r300/r300_screen.c |2 +-
 src/gallium/drivers/softpipe/sp_screen.c   |2 +
 10 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index 7bd2d39..04cf5b7 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -254,21 +254,48 @@ void draw_set_zs_format(struct draw_context *draw, enum 
pipe_format format)
 }
 
 
-static void update_clip_flags( struct draw_context *draw )
+static bool
+draw_is_vs_window_space(struct draw_context *draw)
 {
-   draw->clip_xy = !draw->driver.bypass_clip_xy;
+   if (draw->vs.vertex_shader) {
+  struct tgsi_shader_info *info = &draw->vs.vertex_shader->info;
+
+  return info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] != 0;
+   }
+   return false;
+}
+
+
+void
+draw_update_clip_flags(struct draw_context *draw)
+{
+   bool window_space = draw_is_vs_window_space(draw);
+
+   draw->clip_xy = !draw->driver.bypass_clip_xy && !window_space;
draw->guard_band_xy = (!draw->driver.bypass_clip_xy &&
   draw->driver.guard_band_xy);
draw->clip_z = (!draw->driver.bypass_clip_z &&
-   draw->rasterizer && draw->rasterizer->depth_clip);
+   draw->rasterizer && draw->rasterizer->depth_clip) &&
+  !window_space;
draw->clip_user = draw->rasterizer &&
- draw->rasterizer->clip_plane_enable != 0;
+ draw->rasterizer->clip_plane_enable != 0 &&
+ !window_space;
draw->guard_band_points_xy = draw->guard_band_xy ||
 (draw->driver.bypass_clip_points &&
 (draw->rasterizer &&
  draw->rasterizer->point_tri_clip));
 }
 
+
+void
+draw_update_viewport_flags(struct draw_context *draw)
+{
+   bool window_space = draw_is_vs_window_space(draw);
+
+   draw->bypass_viewport = window_space || draw->identity_viewport;
+}
+
+
 /**
  * Register new primitive rasterization/rendering state.
  * This causes the drawing pipeline to be rebuilt.
@@ -282,7 +309,7 @@ void draw_set_rasterizer_state( struct draw_context *draw,
 
   draw->rasterizer = raster;
   draw->rast_handle = rast_handle;
-  update_clip_flags(draw);
+  draw_update_clip_flags(draw);
}
 }
 
@@ -309,7 +336,7 @@ void draw_set_driver_clipping( struct draw_context *draw,
draw->driver.bypass_clip_z = bypass_clip_z;
draw->driver.guard_band_xy = guard_band_xy;
draw->driver.bypass_clip_points = bypass_clip_points;
-   update_clip_flags(draw);
+   draw_update_clip_flags(draw);
 }
 
 
@@ -363,6 +390,7 @@ void draw_set_viewport_states( struct draw_context *draw,
viewport->translate[0] == 0.0f &&
viewport->translate[1] == 0.0f &&
viewport->translate[2] == 0.0f);
+   draw_update_viewport_flags(draw);
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index dbaece3..8326072 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1836,7 +1836,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char 
*store)
key->clip_xy = llvm->draw->clip_xy;
key->clip_z = llvm->draw->clip_z;
key->clip_user = llvm->draw->clip_user;
-   key->bypass_viewport = llvm->draw->identity_viewport;
+   key->bypass_viewport = llvm->draw->bypass_viewport;
key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
diff --git a/src/gallium/auxiliary/draw/draw_private.h 
b/src/gallium/auxiliary/draw/draw_private.h
index 37045eb..7b893cb 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -252,6 +252,7 @@ struct draw_context
 
struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];

Mesa (master): mesa: Enables GL_RGB and GL_RGBA unsized internal formats for OpenGL ES 3.0

2014-12-09 Thread Iago Toral Quiroga
Module: Mesa
Branch: master
Commit: 78942787170615c9333810cf3a4819a13c9eb8e8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=78942787170615c9333810cf3a4819a13c9eb8e8

Author: Eduardo Lima Mitev 
Date:   Thu Nov 20 14:02:46 2014 +0100

mesa: Enables GL_RGB and GL_RGBA unsized internal formats for OpenGL ES 3.0

GL_RGB and GL_RGBA are valid internal formats on a GLES3 profile. See
"Table 1. Unsized Internal Formats" at
https://www.khronos.org/opengles/sdk/docs/man3/html/glTexImage2D.xhtml.

Fixes 2 dEQP tests:
- dEQP-GLES3.functional.state_query.internal_format.rgb_samples
- dEQP-GLES3.functional.state_query.internal_format.rgba_samples

Reviewed-by: Brian Paul 

---

 src/mesa/main/fbobject.c |6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index f5c11c4..4c3c157 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1430,6 +1430,9 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum 
internalFormat)
case GL_RGB8:
   return GL_RGB;
case GL_RGB:
+  if (_mesa_is_gles3(ctx))
+ return GL_RGB;
+  /* fallthrough */
case GL_R3_G3_B2:
case GL_RGB4:
case GL_RGB5:
@@ -1444,6 +1447,9 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum 
internalFormat)
case GL_RGBA8:
   return GL_RGBA;
case GL_RGBA:
+  if (_mesa_is_gles3(ctx))
+ return GL_RGBA;
+  /* fallthrough */
case GL_RGBA2:
case GL_RGBA12:
case GL_RGBA16:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): mesa: Returns zero samples when querying GL_NUM_SAMPLE_COUNTS when internal format is integer

2014-12-09 Thread Iago Toral Quiroga
Module: Mesa
Branch: master
Commit: 09cb149ba745302e366c2f965a033103d398748c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=09cb149ba745302e366c2f965a033103d398748c

Author: Eduardo Lima Mitev 
Date:   Thu Nov 20 14:52:35 2014 +0100

mesa: Returns zero samples when querying GL_NUM_SAMPLE_COUNTS when internal 
format is integer

From GL ES 3.0 specification, section 6.1.15 Internal Format Queries (page 236),
multisampling is not supported for signed and unsigned integer internal formats.

Fixes 19 dEQP tests under 'dEQP-GLES3.functional.state_query.internal_format.*'.

Reviewed-by: Ian Romanick 

---

 src/mesa/main/formatquery.c |   57 ++-
 1 file changed, 34 insertions(+), 23 deletions(-)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 40eca87..f6274fe 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -115,29 +115,40 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
 internalformat, buffer);
   break;
case GL_NUM_SAMPLE_COUNTS: {
-  /* The driver can return 0, and we should pass that along to the
-   * application.  The ARB decided that ARB_internalformat_query should
-   * behave as ARB_internalformat_query2 in this situation.
-   *
-   * The ARB_internalformat_query2 spec says:
-   *
-   * "- NUM_SAMPLE_COUNTS: The number of sample counts that would be
-   *returned by querying SAMPLES is returned in .
-   ** If  is not color-renderable,
-   *  depth-renderable, or stencil-renderable (as defined in
-   *  section 4.4.4), or if  does not support multiple
-   *  samples (ie other than TEXTURE_2D_MULTISAMPLE,
-   *  TEXTURE_2D_MULTISAMPLE_ARRAY, or RENDERBUFFER), 0 is
-   *  returned."
-   */
-  const size_t num_samples =
- ctx->Driver.QuerySamplesForFormat(ctx, target, internalformat, 
buffer);
-
-  /* QuerySamplesForFormat writes some stuff to buffer, so we have to
-   * separately over-write it with the requested value.
-   */
-  buffer[0] = (GLint) num_samples;
-  count = 1;
+  if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalformat)) 
{
+ /* From GL ES 3.0 specification, section 6.1.15 page 236: "Since
+  * multisampling is not supported for signed and unsigned integer
+  * internal formats, the value of NUM_SAMPLE_COUNTS will be zero
+  * for such formats.
+  */
+ buffer[0] = 0;
+ count = 1;
+  } else {
+ size_t num_samples;
+
+ /* The driver can return 0, and we should pass that along to the
+  * application.  The ARB decided that ARB_internalformat_query should
+  * behave as ARB_internalformat_query2 in this situation.
+  *
+  * The ARB_internalformat_query2 spec says:
+  *
+  * "- NUM_SAMPLE_COUNTS: The number of sample counts that would be
+  *returned by querying SAMPLES is returned in .
+  ** If  is not color-renderable,
+  *  depth-renderable, or stencil-renderable (as defined in
+  *  section 4.4.4), or if  does not support multiple
+  *  samples (ie other than TEXTURE_2D_MULTISAMPLE,
+  *  TEXTURE_2D_MULTISAMPLE_ARRAY, or RENDERBUFFER), 0 is
+  *  returned."
+  */
+ num_samples =  ctx->Driver.QuerySamplesForFormat(ctx, target, 
internalformat, buffer);
+
+ /* QuerySamplesForFormat writes some stuff to buffer, so we have to
+  * separately over-write it with the requested value.
+  */
+ buffer[0] = (GLint) num_samples;
+ count = 1;
+  }
   break;
}
default:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): glsl: invariant qualifier is not valid for shader inputs in GLSL ES 3.00

2014-12-09 Thread Iago Toral Quiroga
Module: Mesa
Branch: master
Commit: 426a50e2089b12d33f5c075aa5622f64076914a3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=426a50e2089b12d33f5c075aa5622f64076914a3

Author: Samuel Iglesias Gonsalvez 
Date:   Tue Nov 25 12:23:10 2014 +0100

glsl: invariant qualifier is not valid for shader inputs in GLSL ES 3.00

GLSL ES 3.00 spec, chapter 4.6.1 "The Invariant Qualifier",

Only variables output from a shader can be candidates for invariance. This
includes user-defined output variables and the built-in output variables.
As only outputs can be declared as invariant, an invariant output from one
shader stage will still match an input of a subsequent stage without the
input being declared as invariant.

This patch fixes the following dEQP tests:

dEQP-GLES3.functional.shaders.qualification_order.variables.valid.invariant_interp_storage_precision
dEQP-GLES3.functional.shaders.qualification_order.variables.valid.invariant_interp_storage
dEQP-GLES3.functional.shaders.qualification_order.variables.valid.invariant_storage_precision
dEQP-GLES3.functional.shaders.qualification_order.variables.valid.invariant_storage
dEQP-GLES3.functional.shaders.qualification_order.variables.invalid.invariant_interp_storage_precision_invariant_input
dEQP-GLES3.functional.shaders.qualification_order.variables.invalid.invariant_interp_storage_invariant_input
dEQP-GLES3.functional.shaders.qualification_order.variables.invalid.invariant_storage_precision_invariant_input
dEQP-GLES3.functional.shaders.qualification_order.variables.invalid.invariant_storage_invariant_input

No piglit regressions observed.

v2:
- Add spec content in the code

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Ian Romanick 

---

 src/glsl/glsl_parser.yy|   11 +++
 src/glsl/link_varyings.cpp |2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 6a55a4e..7fb8c38 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1602,6 +1602,17 @@ type_qualifier:
 
   $$ = $2;
   $$.flags.q.invariant = 1;
+
+  /* GLSL ES 3.00 spec, section 4.6.1 "The Invariant Qualifier":
+   *
+   * "Only variables output from a shader can be candidates for invariance.
+   * This includes user-defined output variables and the built-in output
+   * variables. As only outputs can be declared as invariant, an invariant
+   * output from one shader stage will still match an input of a subsequent
+   * stage without the input being declared as invariant."
+   */
+  if (state->es_shader && state->language_version >= 300 && $$.flags.q.in)
+ _mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used 
with shader inputs");
}
| interpolation_qualifier type_qualifier
{
diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index 43da2c6..2261799 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -116,7 +116,7 @@ cross_validate_types_and_qualifiers(struct 
gl_shader_program *prog,
   return;
}
 
-   if (input->data.invariant != output->data.invariant) {
+   if (!prog->IsES && input->data.invariant != output->data.invariant) {
   linker_error(prog,
"%s shader output `%s' %s invariant qualifier, "
"but %s shader input %s invariant qualifier\n",

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): mesa: Recompute LegalTypesMask if the GL API has changed

2014-12-09 Thread Iago Toral Quiroga
Module: Mesa
Branch: master
Commit: e1ed4f2532b4e9bafb5663cccbe28033c49b2e77
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1ed4f2532b4e9bafb5663cccbe28033c49b2e77

Author: Iago Toral Quiroga 
Date:   Tue Dec  2 12:10:14 2014 +0100

mesa: Recompute LegalTypesMask if the GL API has changed

The current code computes ctx->Array.LegalTypesMask just once,
however, computing this needs to consider ctx->API so we need
to make sure that the API for that context has not changed if
we intend to reuse the result.

The context API can change, at least, if we go through
_mesa_meta_begin, since that will always force
API_OPENGL_COMPAT until we call _mesa_meta_end. If any
operation in between these two calls triggers a call to
update_array_format, then we might be caching a value for
LegalTypesMask that will not be right once we have called
_mesa_meta_end and restored the context API.

Fixes the following 179 dEQP tests in i965:
dEQP-GLES3.functional.vertex_arrays.single_attribute.strides.fixed.*
dEQP-GLES3.functional.vertex_arrays.single_attribute.normalize.fixed.*
dEQP-GLES3.functional.vertex_arrays.single_attribute.output_types.fixed.*
dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.static_draw.*fixed*
dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.stream_draw.*fixed*
dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.dynamic_draw.*fixed*
dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.static_copy.*fixed*
dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.stream_copy.*fixed*
dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.dynamic_copy.*fixed*
dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.static_read.*fixed*
dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.stream_read.*fixed*
dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.dynamic_read.*fixed*
dEQP-GLES3.functional.vertex_arrays.multiple_attributes.input_types.3_*fixed2*
dEQP-GLES3.functional.draw.random.{2,18,28,68,83,106,109,156,181,191}

Reviewed-by: Brian Paul 

---

 src/mesa/main/mtypes.h |   32 
 src/mesa/main/varray.c |9 ++---
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index cee11a3..b95dfb9 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1657,6 +1657,20 @@ typedef enum {
DRAW_ARRAYS
 } gl_draw_method;
 
+/**
+ * Enum for the OpenGL APIs we know about and may support.
+ *
+ * NOTE: This must match the api_enum table in
+ * src/mesa/main/get_hash_generator.py
+ */
+typedef enum
+{
+   API_OPENGL_COMPAT,  /* legacy / compatibility contexts */
+   API_OPENGLES,
+   API_OPENGLES2,
+   API_OPENGL_CORE,
+   API_OPENGL_LAST = API_OPENGL_CORE
+} gl_api;
 
 /**
  * Vertex array state
@@ -1701,8 +1715,9 @@ struct gl_array_attrib
/** One of the DRAW_xxx flags, not consumed by drivers */
gl_draw_method DrawMethod;
 
-   /** Legal array datatypes */
+   /** Legal array datatypes and the API for which they have been computed */
GLbitfield LegalTypesMask;
+   gl_api LegalTypesMaskAPI;
 };
 
 
@@ -4040,21 +4055,6 @@ enum mesa_debug_severity {
 /** @} */
 
 /**
- * Enum for the OpenGL APIs we know about and may support.
- *
- * NOTE: This must match the api_enum table in
- * src/mesa/main/get_hash_generator.py
- */
-typedef enum
-{
-   API_OPENGL_COMPAT,  /* legacy / compatibility contexts */
-   API_OPENGLES,
-   API_OPENGLES2,
-   API_OPENGL_CORE,
-   API_OPENGL_LAST = API_OPENGL_CORE
-} gl_api;
-
-/**
  * Driver-specific state flags.
  *
  * These are or'd with gl_context::NewDriverState to notify a driver about
diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index 96c2b26..89aaad1 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -258,11 +258,14 @@ update_array_format(struct gl_context *ctx,
GLuint elementSize;
GLenum format = GL_RGBA;
 
-   if (ctx->Array.LegalTypesMask == 0) {
-  /* One-time initialization.  We can't do this in _mesa_init_varrays()
-   * below because extensions are not yet enabled at that point.
+   if (ctx->Array.LegalTypesMask == 0 || ctx->Array.LegalTypesMaskAPI != 
ctx->API) {
+  /* Compute the LegalTypesMask only once, unless the context API has
+   * changed, in which case we want to compute it again.  We can't do this
+   * in _mesa_init_varrays() below because extensions are not yet enabled
+   * at that point.
*/
   ctx->Array.LegalTypesMask = get_legal_types_mask(ctx);
+  ctx->Array.LegalTypesMaskAPI = ctx->API;
}
 
legalTypesMask &= ctx->Array.LegalTypesMask;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): mesa: Considers GL_DEPTH_STENCIL_ATTACHMENT a valid argument for FBO invalidation under GLES3

2014-12-09 Thread Iago Toral Quiroga
Module: Mesa
Branch: master
Commit: 242ad326552b10a31667eba0be5677a4d8397dc4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=242ad326552b10a31667eba0be5677a4d8397dc4

Author: Eduardo Lima Mitev 
Date:   Tue Nov 18 16:28:18 2014 +0100

mesa: Considers GL_DEPTH_STENCIL_ATTACHMENT a valid argument for FBO 
invalidation under GLES3

In OpenGL and OpenGL-ES 3+, GL_DEPTH_STENCIL_ATTACHMENT is a valid attachment 
point for the family of functions
that invalidate a framebuffer object (e.g, glInvalidateFramebuffer, 
glInvalidateSubFramebuffer, etc).
Currently, a GL_INVALID_ENUM error is emitted for this attachment point.

Fixes 21 dEQP test failures under 'dEQP-GLES3.functional.fbo.invalidate.*'.

Reviewed-by: Ian Romanick 

---

 src/mesa/main/fbobject.c |8 
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 02b7633..f5c11c4 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3074,6 +3074,14 @@ invalidate_framebuffer_storage(GLenum target, GLsizei 
numAttachments,
  case GL_DEPTH_ATTACHMENT:
  case GL_STENCIL_ATTACHMENT:
 break;
+ case GL_DEPTH_STENCIL_ATTACHMENT:
+/* GL_DEPTH_STENCIL_ATTACHMENT is a valid attachment point only
+ * in desktop and ES 3.0 profiles. Note that 
OES_packed_depth_stencil
+ * extension does not make this attachment point valid on ES 2.0.
+ */
+if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx))
+   break;
+/* fallthrough */
  case GL_COLOR_ATTACHMENT0:
  case GL_COLOR_ATTACHMENT1:
  case GL_COLOR_ATTACHMENT2:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): main: return two minor digits for ES shading language version

2014-12-09 Thread Iago Toral Quiroga
Module: Mesa
Branch: master
Commit: 6cc72511850961eba408a44f648c7067b6e68594
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6cc72511850961eba408a44f648c7067b6e68594

Author: Samuel Iglesias Gonsalvez 
Date:   Wed Nov 26 13:16:38 2014 +0100

main: return two minor digits for ES shading language version

For OpenGL ES 3.0 spec, the minor number for SHADING_LANGUAGE_VERSION is always
two digits, matching the OpenGL ES Shading Language Specification release
number. For example, this query might return the string "3.00".

This patch fixes the following dEQP test:

   dEQP-GLES3.functional.state_query.string.shading_language_version

No piglit regression observed.

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Ian Romanick 

---

 src/mesa/main/getstring.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
index f9d13a7..1b2c7f0 100644
--- a/src/mesa/main/getstring.c
+++ b/src/mesa/main/getstring.c
@@ -74,7 +74,7 @@ shading_language_version(struct gl_context *ctx)
case API_OPENGLES2:
   return (ctx->Version < 30)
  ? (const GLubyte *) "OpenGL ES GLSL ES 1.0.16"
- : (const GLubyte *) "OpenGL ES GLSL ES 3.0";
+ : (const GLubyte *) "OpenGL ES GLSL ES 3.00";
 
case API_OPENGLES:
   /* fall-through */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): vc4: Prioritize allocating accumulators to short-lived values.

2014-12-09 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: ab1b1fa6fbd72b05c48f83c9df5036c2bfe893a3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ab1b1fa6fbd72b05c48f83c9df5036c2bfe893a3

Author: Eric Anholt 
Date:   Mon Dec  8 17:43:29 2014 -0800

vc4: Prioritize allocating accumulators to short-lived values.

The register allocator walks from the end of the nodes array looking for
trivially-allocatable things to put on the stack, meaning (assuming
everything is trivially colorable and gets put on the stack in a single
pass) the low node numbers get allocated first.  The things allocated
first happen to get the lower-numbered registers, which is to say the fast
accumulators that can be paired more easily.

When we previously made the nodes match the temporary register numbers,
we'd end up putting the shader inputs (VS or FS) in the accumulators,
which are often long-lived values.  By prioritizing the shortest-lived
values for allocation, we can get a lot more instructions that involve
accumulators, and thus fewer conflicts for raddr and WS.

total instructions in shared programs: 52870 -> 46428 (-12.18%)
instructions in affected programs: 52260 -> 45818 (-12.33%)

---

 src/gallium/drivers/vc4/vc4_register_allocate.c |   73 ++-
 1 file changed, 59 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c 
b/src/gallium/drivers/vc4/vc4_register_allocate.c
index b62669f..3001900 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -139,6 +139,20 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
 ra_set_finalize(vc4->regs, NULL);
 }
 
+struct node_to_temp_map {
+uint32_t temp;
+uint32_t priority;
+};
+
+static int
+node_to_temp_priority(const void *in_a, const void *in_b)
+{
+const struct node_to_temp_map *a = in_a;
+const struct node_to_temp_map *b = in_b;
+
+return a->priority - b->priority;
+}
+
 /**
  * Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
  *
@@ -148,6 +162,8 @@ struct qpu_reg *
 vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
 {
 struct simple_node *node;
+struct node_to_temp_map map[c->num_temps];
+uint32_t temp_to_node[c->num_temps];
 uint32_t def[c->num_temps];
 uint32_t use[c->num_temps];
 struct qpu_reg *temp_registers = calloc(c->num_temps,
@@ -166,11 +182,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct 
vc4_compile *c)
 struct ra_graph *g = ra_alloc_interference_graph(vc4->regs,
  c->num_temps);
 
-for (uint32_t i = 0; i < c->num_temps; i++)
+for (uint32_t i = 0; i < c->num_temps; i++) {
 ra_set_node_class(g, i, vc4->reg_class_any);
+}
 
-/* Compute the live ranges so we can figure out interference, and
- * figure out our register classes and preallocated registers.
+/* Compute the live ranges so we can figure out interference.
  */
 uint32_t ip = 0;
 foreach(node, &c->instructions) {
@@ -188,27 +204,54 @@ vc4_register_allocate(struct vc4_context *vc4, struct 
vc4_compile *c)
 
 switch (inst->op) {
 case QOP_FRAG_Z:
+case QOP_FRAG_W:
+/* The payload registers have values implicitly loaded
+ * at the start of the program.
+ */
 def[inst->dst.index] = 0;
-ra_set_node_reg(g, inst->dst.index,
+break;
+default:
+break;
+}
+
+ip++;
+}
+
+for (uint32_t i = 0; i < c->num_temps; i++) {
+map[i].temp = i;
+map[i].priority = use[i] - def[i];
+}
+qsort(map, c->num_temps, sizeof(map[0]), node_to_temp_priority);
+for (uint32_t i = 0; i < c->num_temps; i++) {
+temp_to_node[map[i].temp] = i;
+}
+
+/* Figure out our register classes and preallocated registers*/
+foreach(node, &c->instructions) {
+struct qinst *inst = (struct qinst *)node;
+
+switch (inst->op) {
+case QOP_FRAG_Z:
+ra_set_node_reg(g, temp_to_node[inst->dst.index],
 AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2 + 
1);
 break;
 
 case QOP_FRAG_W:
-def[inst->dst.index] = 0;
-ra_set_node_reg(g, inst->dst.index,
+ra_set_node_reg(g, temp_to_node[inst->dst.index],
 AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
 break;
 
 case QOP_TEX_RESULT:
 case QOP_TLB_COLOR_READ:
 

Mesa (master): vc4: Reserve rb31 instead of r3 for raddr conflict spills.

2014-12-09 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: 8420a956924c720b3c4932a577623f836758c21c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8420a956924c720b3c4932a577623f836758c21c

Author: Eric Anholt 
Date:   Mon Dec  8 16:52:53 2014 -0800

vc4: Reserve rb31 instead of r3 for raddr conflict spills.

This increases the cost of a raddr b conflict spill (save r3 to rb31, move
src1 to r3, move rb31 back to r3 when done, instead of just move src1 to
r3), but on average thanks to instruction pairing it's more worthwhile to
have another accumulator.

total instructions in shared programs: 46428 -> 46171 (-0.55%)
instructions in affected programs: 38030 -> 37773 (-0.68%)

---

 src/gallium/drivers/vc4/vc4_qpu_emit.c  |   50 +++
 src/gallium/drivers/vc4/vc4_register_allocate.c |6 +--
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c 
b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 856f844..f2620c0 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -93,21 +93,41 @@ swap_file(struct qpu_reg *src)
  * In that case, we need to move one to a temporary that can be used in the
  * instruction, instead.
  */
-static void
+static bool
 fixup_raddr_conflict(struct vc4_compile *c,
- struct qpu_reg *src0, struct qpu_reg *src1)
+ struct qpu_reg dst,
+ struct qpu_reg *src0, struct qpu_reg *src1,
+ bool r3_live)
 {
 if ((src0->mux != QPU_MUX_A && src0->mux != QPU_MUX_B) ||
 src0->mux != src1->mux ||
 src0->addr == src1->addr) {
-return;
+return false;
 }
 
 if (swap_file(src0) || swap_file(src1))
-return;
+return false;
+
+if (src0->mux == QPU_MUX_A) {
+/* If we're conflicting over the A regfile, then we can just
+ * use the reserved rb31.
+ */
+queue(c, qpu_a_MOV(qpu_rb(31), *src1));
+*src1 = qpu_rb(31);
+return false;
+} else {
+/* Otherwise, we need a non-B regfile.  So, we spill r3 out to
+ * rb31, then store our desired value in r3, and tell the
+ * caller to put rb31 back into r3 when we're done.
+ */
+if (r3_live)
+queue(c, qpu_a_MOV(qpu_rb(31), qpu_r3()));
+queue(c, qpu_a_MOV(qpu_r3(), *src1));
+
+*src1 = qpu_r3();
 
-queue(c, qpu_a_MOV(qpu_r3(), *src1));
-*src1 = qpu_r3();
+return r3_live && dst.mux != QPU_MUX_R3;
+}
 }
 
 void
@@ -118,6 +138,8 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 uint32_t inputs_remaining = c->num_inputs;
 uint32_t vpm_read_fifo_count = 0;
 uint32_t vpm_read_offset = 0;
+bool written_r3 = false;
+bool needs_restore;
 
 make_empty_list(&c->qpu_inst_list);
 
@@ -416,8 +438,12 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 break;
 
 case QOP_TEX_DIRECT:
-fixup_raddr_conflict(c, &src[0], &src[1]);
+needs_restore = fixup_raddr_conflict(c, dst,
+ &src[0], &src[1],
+ written_r3);
 queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], 
src[1]));
+if (needs_restore)
+queue(c, qpu_a_MOV(qpu_r3(), qpu_rb(31)));
 break;
 
 case QOP_TEX_RESULT:
@@ -477,7 +503,9 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 if (qir_get_op_nsrc(qinst->op) == 1)
 src[1] = src[0];
 
-fixup_raddr_conflict(c, &src[0], &src[1]);
+needs_restore = fixup_raddr_conflict(c, dst,
+ &src[0], &src[1],
+ written_r3);
 
 if (translate[qinst->op].is_mul) {
 queue(c, qpu_m_alu2(translate[qinst->op].op,
@@ -488,8 +516,14 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 dst,
 src[0], src[1]));
 }
+if (needs_restore)
+queue(c, qpu_a_MOV(qpu_r3(), qpu_rb(31)));
+
 break;
 }
+
+if (dst.mux == QPU_MUX_R3)
+written_r3 = true;
 }
 
 qpu_sc