Mesa (master): r600g: only init GS_VERT_ITEMSIZE on r600
Module: Mesa Branch: master Commit: 7f21cf71989ba780639594ebb34d6e7345b08436 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7f21cf71989ba780639594ebb34d6e7345b08436 Author: Dave Airlie Date: Wed Dec 10 13:48:29 2014 +1000 r600g: only init GS_VERT_ITEMSIZE on r600 On evergreen there are 4 regs, on r600/700 there is only one. Don't initialise regs and trash someone elses state. Not sure this fixes anything, but hey one less stupid. Reviewed-By: Glenn Kennard Cc: "10.3 10.4" mesa-sta...@lists.freedesktop.org Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_state.c |7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 61f5c5a..9a4b972 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2659,11 +2659,8 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE, r600_conv_prim_to_gs_out(rshader->gs_output_prim)); - r600_store_context_reg_seq(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE, 4); - r600_store_value(cb, cp_shader->ring_item_size >> 2); - r600_store_value(cb, 0); - r600_store_value(cb, 0); - r600_store_value(cb, 0); + r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE, + cp_shader->ring_item_size >> 2); r600_store_context_reg(cb, R_0288A8_SQ_ESGS_RING_ITEMSIZE, (rshader->ring_item_size) >> 2); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Do QPU scheduling across uniform loads.
Module: Mesa Branch: master Commit: 8812dc503eb48bac5b9c9b5740f76025c046f90d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8812dc503eb48bac5b9c9b5740f76025c046f90d Author: Eric Anholt Date: Tue Dec 9 18:54:29 2014 -0800 vc4: Do QPU scheduling across uniform loads. This means another pass of reordering the uniform data store, but it lets us pair up a lot more instructions. total instructions in shared programs: 44639 -> 43176 (-3.28%) instructions in affected programs: 36938 -> 35475 (-3.96%) --- src/gallium/drivers/vc4/vc4_qpu_schedule.c | 88 +++- 1 file changed, 60 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index c733e6e..0700b0d 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -62,6 +62,12 @@ struct schedule_node { * can be consumed. */ uint32_t latency; + +/** + * Which uniform from uniform_data[] this instruction read, or -1 if + * not reading a uniform. + */ +int uniform; }; struct schedule_node_child { @@ -80,7 +86,6 @@ struct schedule_state { struct schedule_node *last_rb[32]; struct schedule_node *last_sf; struct schedule_node *last_vpm_read; -struct schedule_node *last_unif_read; struct schedule_node *last_tmu_write; struct schedule_node *last_tlb; struct schedule_node *last_vpm; @@ -174,9 +179,6 @@ process_raddr_deps(struct schedule_state *state, struct schedule_node *n, break; case QPU_R_UNIF: -add_write_dep(state, &state->last_unif_read, n); -break; - case QPU_R_NOP: case QPU_R_ELEM_QPU: case QPU_R_XY_PIXEL_COORD: @@ -215,6 +217,18 @@ is_tmu_write(uint32_t waddr) } } +static bool +reads_uniform(uint64_t inst) +{ +if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LOAD_IMM) +return false; + +return (QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_UNIF || +QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_UNIF || +is_tmu_write(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) || +is_tmu_write(QPU_GET_FIELD(inst, QPU_WADDR_MUL))); +} + static void process_mux_deps(struct schedule_state *state, struct schedule_node *n, uint32_t mux) @@ -224,17 +238,6 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n, } -static bool -is_direct_tmu_read(uint64_t inst) -{ -/* If it's a direct read, we happen to structure the code such that - * there's an explicit uniform read in the instruction (for kernel - * texture reloc processing). - */ -return (QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_UNIF || -QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_UNIF); -} - static void process_waddr_deps(struct schedule_state *state, struct schedule_node *n, uint32_t waddr, bool is_add) @@ -250,14 +253,6 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n, } } else if (is_tmu_write(waddr)) { add_write_dep(state, &state->last_tmu_write, n); - -/* There is an implicit uniform read in texture ops in - * hardware, unless this is a direct-addressed uniform read, - * so we need to keep it in the same order as the other - * uniforms. - */ -if (!is_direct_tmu_read(n->inst->inst)) -add_write_dep(state, &state->last_unif_read, n); } else if (qpu_waddr_is_tlb(waddr)) { add_write_dep(state, &state->last_tlb, n); } else { @@ -509,7 +504,7 @@ get_instruction_priority(uint64_t inst) static struct schedule_node * choose_instruction_to_schedule(struct choose_scoreboard *scoreboard, struct simple_node *schedule_list, - uint64_t prev_inst) + struct schedule_node *prev_inst) { struct schedule_node *chosen = NULL; struct simple_node *node; @@ -537,8 +532,11 @@ choose_instruction_to_schedule(struct choose_scoreboard *scoreboard, /* If we're trying to pair with another instruction, check * that they're compatible. */ -if (prev_inst != 0) { -inst = qpu_merge_inst(prev_inst, inst); +if (prev_inst) { +if (prev_inst->uniform != -1 && n->uniform != -1) +continue; + +inst = qpu_merge_inst(prev_inst->inst->inst, inst); if (!inst) continue; } @@ -668,6 +666,17 @@ schedule_instruc
Mesa (master): vc4: Mark VPM read setup as impacting VPM reads, not writes.
Module: Mesa Branch: master Commit: f431b4f110946a0f6e3822f870fbcd3d23c8317a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f431b4f110946a0f6e3822f870fbcd3d23c8317a Author: Eric Anholt Date: Tue Dec 9 14:20:54 2014 -0800 vc4: Mark VPM read setup as impacting VPM reads, not writes. Fixes assertion failures if we adjust scheduling priorities to emphasize VPM reads more. --- src/gallium/drivers/vc4/vc4_qpu_schedule.c |8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index 6bba66a..4bb9b3a 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -260,10 +260,16 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n, break; case QPU_W_VPM: -case QPU_W_VPMVCD_SETUP: add_write_dep(state, &state->last_vpm, n); break; +case QPU_W_VPMVCD_SETUP: +if (is_a) +add_write_dep(state, &state->last_vpm_read, n); +else +add_write_dep(state, &state->last_vpm, n); +break; + case QPU_W_SFU_RECIP: case QPU_W_SFU_RECIPSQRT: case QPU_W_SFU_EXP: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Refuse to merge instructions involving 32-bit immediate loads.
Module: Mesa Branch: master Commit: cff8c96a0d418f41e00aa97a13dc55e3ed213eb7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cff8c96a0d418f41e00aa97a13dc55e3ed213eb7 Author: Eric Anholt Date: Tue Dec 9 16:34:37 2014 -0800 vc4: Refuse to merge instructions involving 32-bit immediate loads. An immediate load overwrites the mul and add operations, so you can't merge with them. --- src/gallium/drivers/vc4/vc4_qpu.c |5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index 6daa072..faf8790 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -356,6 +356,11 @@ qpu_merge_inst(uint64_t a, uint64_t b) if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b)) return 0; +if (QPU_GET_FIELD(a, QPU_SIG) == QPU_SIG_LOAD_IMM || +QPU_GET_FIELD(b, QPU_SIG) == QPU_SIG_LOAD_IMM) { +return 0; +} + ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK, QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Skip raddr dependencies for 32-bit immediate loads.
Module: Mesa Branch: master Commit: 45a89237711acff7ee31c854361f8f580ccdcc9f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=45a89237711acff7ee31c854361f8f580ccdcc9f Author: Eric Anholt Date: Tue Dec 9 14:23:39 2014 -0800 vc4: Skip raddr dependencies for 32-bit immediate loads. These don't have raddr fields. --- src/gallium/drivers/vc4/vc4_qpu_schedule.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index 4bb9b3a..8df816f 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -334,8 +334,11 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) uint32_t mul_b = QPU_GET_FIELD(inst, QPU_MUL_B); uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); -process_raddr_deps(state, n, raddr_a, true); -process_raddr_deps(state, n, raddr_b, false); +if (sig != QPU_SIG_LOAD_IMM) { +process_raddr_deps(state, n, raddr_a, true); +process_raddr_deps(state, n, raddr_b, false); +} + if (add_op != QPU_A_NOP) { process_mux_deps(state, n, add_a); process_mux_deps(state, n, add_b); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Populate the delay field better, and schedule high delay first.
Module: Mesa Branch: master Commit: c5b544403fbc955dd441fb5a2e11f0de2a75e9e4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c5b544403fbc955dd441fb5a2e11f0de2a75e9e4 Author: Eric Anholt Date: Tue Dec 9 14:05:52 2014 -0800 vc4: Populate the delay field better, and schedule high delay first. This is a standard scheduling heuristic, and clearly helps. total instructions in shared programs: 46418 -> 44467 (-4.20%) instructions in affected programs: 42531 -> 40580 (-4.59%) --- src/gallium/drivers/vc4/vc4_qpu_schedule.c | 50 +++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index 8df816f..c733e6e 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -49,7 +49,19 @@ struct schedule_node { uint32_t child_count; uint32_t child_array_size; uint32_t parent_count; + +/** + * Minimum number of cycles from scheduling this instruction until the + * end of the program, based on the slowest dependency chain through + * the children. + */ uint32_t delay; + +/** + * cycles between this instruction being scheduled and when its result + * can be consumed. + */ +uint32_t latency; }; struct schedule_node_child { @@ -548,6 +560,13 @@ choose_instruction_to_schedule(struct choose_scoreboard *scoreboard, } else if (prio < chosen_prio) { continue; } + +if (n->delay > chosen->delay) { +chosen = n; +chosen_prio = prio; +} else if (n->delay < chosen->delay) { +continue; +} } return chosen; @@ -612,7 +631,7 @@ compute_delay(struct schedule_node *n) if (!n->children[i].node->delay) compute_delay(n->children[i].node); n->delay = MAX2(n->delay, -n->children[i].node->delay + 1); +n->children[i].node->delay + n->latency); } } } @@ -734,6 +753,33 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list) } } +static uint32_t waddr_latency(uint32_t waddr) +{ +if (waddr < 32) +return 2; + +/* Some huge number, really. */ +if (waddr >= QPU_W_TMU0_S && waddr <= QPU_W_TMU1_B) +return 10; + +switch(waddr) { +case QPU_W_SFU_RECIP: +case QPU_W_SFU_RECIPSQRT: +case QPU_W_SFU_EXP: +case QPU_W_SFU_LOG: +return 3; +default: +return 1; +} +} + +static uint32_t +instruction_latency(uint64_t inst) +{ +return MAX2(waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_ADD)), +waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_MUL))); +} + void qpu_schedule_instructions(struct vc4_compile *c) { @@ -761,6 +807,8 @@ qpu_schedule_instructions(struct vc4_compile *c) struct schedule_node *n = rzalloc(mem_ctx, struct schedule_node); n->inst = inst; +n->latency = instruction_latency(inst->inst); + remove_from_list(&inst->link); insert_at_tail(&schedule_list, &n->link); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Fix build after llvm r223802
Module: Mesa Branch: master Commit: 25db8729dc53b60ee0caade5e797e99d6ad13fa3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=25db8729dc53b60ee0caade5e797e99d6ad13fa3 Author: Aaron Watry Date: Tue Dec 9 19:28:50 2014 -0600 clover: Fix build after llvm r223802 Signed-off-by: Aaron Watry Reviewed-by: Tom Stellard --- src/gallium/state_trackers/clover/llvm/invocation.cpp |4 1 file changed, 4 insertions(+) diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index cda447d..5265d10 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -281,7 +281,11 @@ namespace { } for (unsigned i = 0; i < kernel_node->getNumOperands(); ++i) { +#if HAVE_LLVM >= 0x0306 + kernels.push_back(llvm::mdconst::dyn_extract( +#else kernels.push_back(llvm::dyn_cast( +#endif kernel_node->getOperand(i)->getOperand(0))); } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): freedreno/a4xx: frag-coord / face fixes
Module: Mesa Branch: master Commit: 69d23809d06cb1bb20a92430e18720baff5994bc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=69d23809d06cb1bb20a92430e18720baff5994bc Author: Rob Clark Date: Sun Dec 7 14:12:15 2014 -0500 freedreno/a4xx: frag-coord / face fixes Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_program.c | 25 -- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 76cadcc..cbfd8b2 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -200,6 +200,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) { struct stage s[MAX_STAGES]; uint32_t pos_regid, posz_regid, psize_regid, color_regid; + uint32_t face_regid, coord_regid; int constmode; int i, j, k; @@ -217,6 +218,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) color_regid = ir3_find_output_regid(s[FS].v, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + /* TODO get these dynamically: */ + face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); + coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); + /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ @@ -235,11 +240,14 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | - 0xfcfc | /* XXX */ + 0xfc00 | /* XXX */ A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | - COND(s[FS].v->frag_coord, A4XX_HLSQ_CONTROL_1_REG_ZWCOORD)); - OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); - OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid)); + A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid)); + OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) | + 0x3f3f000 | /* XXX */ + A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid)); + OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) | + 0xfcfcfc00); OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5); OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) | @@ -349,7 +357,9 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) | 0x8000 | /* XXX */ - COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING)); + COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) | + COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) | + COND(s[FS].v->frag_coord, A4XX_SP_FS_CTRL_REG1_FRAGCOORD)); OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) | @@ -373,7 +383,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1); OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) | - COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING)); + COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING) | + COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) | + COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD | + A4XX_RB_RENDER_CONTROL2_YCOORD)); OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1); OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_COLOR_PIPE_ENABLE | ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): freedreno/a4xx: temp hack for FLAT varyings
Module: Mesa Branch: master Commit: 6a5ba23fa6156abb7d643080e2a2b477aa1ed559 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6a5ba23fa6156abb7d643080e2a2b477aa1ed559 Author: Rob Clark Date: Sat Dec 6 16:29:53 2014 -0500 freedreno/a4xx: temp hack for FLAT varyings Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_program.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 4f2a88f..76cadcc 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -431,6 +431,25 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) } } + /* HACK: looks like we need to do int varyings in the frag +* shader on a4xx (no flatshad reg?): +* +*(sy)(ss)nop +*(sy)ldlv.u32 r0.x,l[r0.x], 1 +*ldlv.u32 r0.y,l[r0.x+1], 1 +*(ss)bary.f (ei)r63.x, 0, r0.x +*(ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x +*(rpt5)nop +*sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 +* +* for now, don't set FLAT on vinterp[], since that +* at least works well enough for pure float impl (ie. +* pre glsl130).. we'll have to do a bit more work to +* handle this properly: +*/ + for (i = 0; i < ARRAY_SIZE(vinterp); i++) + vinterp[i] = 0; + OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) | A4XX_VPC_ATTR_THRDASSIGN(1) | ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): freedreno/ir3: lower TXP as needed
Module: Mesa Branch: master Commit: eb6fd3b8eb9c19bb501a091d1696e5db1ac4c690 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=eb6fd3b8eb9c19bb501a091d1696e5db1ac4c690 Author: Rob Clark Date: Sat Dec 6 15:24:23 2014 -0500 freedreno/ir3: lower TXP as needed On a3xx, lower TXP for 3D textures, on a4xx lower all TXP. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_compiler.c |8 src/gallium/drivers/freedreno/ir3/ir3_shader.c | 12 ++-- src/gallium/drivers/freedreno/ir3/ir3_shader.h |2 +- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 6cc21ac..ade4b1c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -170,6 +170,14 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, break; } + if (ir3_shader_gpuid(so->shader) >= 400) { + /* a4xx seems to have *no* sam.p */ + lconfig.lower_TXP = ~0; /* lower all txp */ + } else { + /* a3xx just needs to avoid sam.p for 3d tex */ + lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D); + } + ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info); ctx->free_tokens = !!ctx->tokens; if (!ctx->tokens) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 0c74f2f..c21d0a2 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -54,9 +54,10 @@ static void assemble_variant(struct ir3_shader_variant *v) { struct fd_context *ctx = fd_context(v->shader->pctx); + uint32_t gpu_id = ir3_shader_gpuid(v->shader); uint32_t sz, *bin; - bin = ir3_assemble(v->ir, &v->info, ctx->screen->gpu_id); + bin = ir3_assemble(v->ir, &v->info, gpu_id); sz = v->info.sizedwords * 4; v->bo = fd_bo_new(ctx->dev, sz, @@ -67,7 +68,7 @@ assemble_variant(struct ir3_shader_variant *v) free(bin); - if (ctx->screen->gpu_id >= 400) { + if (gpu_id >= 400) { v->instrlen = v->info.sizedwords / (2 * 16); } else { v->instrlen = v->info.sizedwords / (2 * 4); @@ -177,6 +178,13 @@ fail: return NULL; } +uint32_t +ir3_shader_gpuid(struct ir3_shader *shader) +{ + struct fd_context *ctx = fd_context(shader->pctx); + return ctx->screen->gpu_id; +} + struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 89442ce..fcd5895 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -214,7 +214,7 @@ struct ir3_shader { struct ir3_shader * ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens, enum shader_t type); void ir3_shader_destroy(struct ir3_shader *shader); - +uint32_t ir3_shader_gpuid(struct ir3_shader *shader); struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): tgsi/lowering: add support to lower TXP (v2)
Module: Mesa Branch: master Commit: 219440ddebcd804d6b8cb0a79c4bbdd7701ea355 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=219440ddebcd804d6b8cb0a79c4bbdd7701ea355 Author: Rob Clark Date: Sat Dec 6 13:36:02 2014 -0500 tgsi/lowering: add support to lower TXP (v2) v2: actually do perspective divide for RECT/SHADOWRECT Signed-off-by: Rob Clark Reviewed-by: Ilia Mirkin --- src/gallium/auxiliary/tgsi/tgsi_lowering.c | 46 +++- src/gallium/auxiliary/tgsi/tgsi_lowering.h |3 ++ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c index b6b18db..dee6c41 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -1031,7 +1031,10 @@ transform_samp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction new_inst; /* mask is clamped coords, pmask is all coords (for projection): */ unsigned mask = 0, pmask = 0, smask; + unsigned tex = inst->Texture.Texture; unsigned opcode = inst->Instruction.Opcode; + bool lower_txp = (opcode == TGSI_OPCODE_TXP) && + (ctx->config->lower_TXP & (1 << tex)); if (opcode == TGSI_OPCODE_TXB2) { samp = &inst->Src[2]; @@ -1043,14 +1046,14 @@ transform_samp(struct tgsi_transform_context *tctx, smask = 1 << samp->Register.Index; /* check if we actually need to lower this one: */ - if (!(ctx->saturate & smask)) + if (!(ctx->saturate & smask) && !lower_txp) return -1; /* figure out which coordinates need saturating: * - RECT textures should not get saturated * - array index coords should not get saturated */ - switch (inst->Texture.Texture) { + switch (tex) { case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_CUBE_ARRAY: @@ -1081,16 +1084,19 @@ transform_samp(struct tgsi_transform_context *tctx, pmask |= TGSI_WRITEMASK_X; break; - /* TODO: I think we should ignore these? - case TGSI_TEXTURE_RECT: - case TGSI_TEXTURE_SHADOWRECT: - */ + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOWRECT: + /* we don't saturate, but in case of lower_txp we + * still need to do the perspective divide: + */ + pmask = TGSI_WRITEMASK_XY; + break; } /* sanity check.. driver could be asking to saturate a non- * existent coordinate component: */ - if (!mask) + if (!mask && !lower_txp) return -1; /* MOV tmpA, src0 */ @@ -1126,8 +1132,10 @@ transform_samp(struct tgsi_transform_context *tctx, } /* MOV_SAT tmpA., tmpA */ - create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, - TGSI_SAT_ZERO_ONE); + if (mask) { + create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, + TGSI_SAT_ZERO_ONE); + } /* modify the texture samp instruction to take fixed up coord: */ new_inst = *inst; @@ -1462,6 +1470,7 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, OPCS(DPH) || OPCS(DP2) || OPCS(DP2A) || + OPCS(TXP) || ctx.two_side_colors || ctx.saturate)) return NULL; @@ -1529,12 +1538,19 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, newlen += DP2A_GROW * OPCS(DP2A); numtmp = MAX2(numtmp, DOTP_TMP); } - if (ctx.saturate) { - int n = info->opcode_count[TGSI_OPCODE_TEX] + - info->opcode_count[TGSI_OPCODE_TXP] + - info->opcode_count[TGSI_OPCODE_TXB] + - info->opcode_count[TGSI_OPCODE_TXB2] + - info->opcode_count[TGSI_OPCODE_TXL]; + if (ctx.saturate || config->lower_TXP) { + int n = 0; + + if (ctx.saturate) { + n = info->opcode_count[TGSI_OPCODE_TEX] + +info->opcode_count[TGSI_OPCODE_TXP] + +info->opcode_count[TGSI_OPCODE_TXB] + +info->opcode_count[TGSI_OPCODE_TXB2] + +info->opcode_count[TGSI_OPCODE_TXL]; + } else if (config->lower_TXP) { + n = info->opcode_count[TGSI_OPCODE_TXP]; + } + newlen += SAMP_GROW * n; numtmp = MAX2(numtmp, SAMP_TMP); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h index 55e1507..52c204f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h @@ -69,6 +69,9 @@ struct tgsi_lowering_config unsigned lower_DP2:1; unsigned lower_DP2A:1; + /* bitmask of (1 << TGSI_TEXTURE_type): */ + unsigned lower_TXP; + /* To emulate certain texture wrap modes, this can be used * to saturate the specified tex coord to [0.0, 1.0]. The * bits are according to sampler #, ie. if, for example: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/li
Mesa (master): freedreno/a4xx: XA gpu hang at startup
Module: Mesa Branch: master Commit: 5b38a1740beccf1f33b9dfe4d38f00a711b6b2e0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5b38a1740beccf1f33b9dfe4d38f00a711b6b2e0 Author: Rob Clark Date: Sat Dec 6 12:39:19 2014 -0500 freedreno/a4xx: XA gpu hang at startup Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_emit.c |6 ++ src/gallium/drivers/freedreno/a4xx/fd4_gmem.c |4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 5b47158..839d3e8 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -689,5 +689,11 @@ fd4_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL3, 1); OUT_RING(ring, A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE(0xf)); + OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1); + OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR); + + OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); + OUT_RING(ring, 0x0); + ctx->needs_rb_fbd = true; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 89ae260..8ad0039 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -500,10 +500,12 @@ fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0x); } + OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1); if (pfb->zsbuf) { - OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1); OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT( fd4_pipe2depth(pfb->zsbuf->format))); + } else { + OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(DEPTH4_NONE)); } if (ctx->needs_rb_fbd) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): freedreno/a4xx: fix rendering to layer != 0
Module: Mesa Branch: master Commit: 3dbcd25022d0bd62484ac4a9498e4a1bbb5af0b0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3dbcd25022d0bd62484ac4a9498e4a1bbb5af0b0 Author: Rob Clark Date: Sun Dec 7 12:10:38 2014 -0500 freedreno/a4xx: fix rendering to layer != 0 Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_gmem.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 8ad0039..3c90052 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -126,12 +126,15 @@ emit_gmem2mem_surf(struct fd_context *ctx, struct fd_ringbuffer *ring = ctx->ring; struct fd_resource *rsc = fd_resource(psurf->texture); struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level]; + uint32_t layer_offset = slice->size0 * psurf->u.tex.first_layer; + + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4); OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) | A4XX_RB_COPY_CONTROL_GMEM_BASE(base)); - OUT_RELOCW(ring, rsc->bo, slice->offset, 0, 0); /* RB_COPY_DEST_BASE */ + OUT_RELOCW(ring, rsc->bo, slice->offset + layer_offset, 0, 0); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) | A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) | ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): freedreno/a4xx: texture fixes
Module: Mesa Branch: master Commit: 1e3a732603a4a4d5b3e7102cf0d7840f79ecf5c4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1e3a732603a4a4d5b3e7102cf0d7840f79ecf5c4 Author: Rob Clark Date: Fri Dec 5 11:43:03 2014 -0500 freedreno/a4xx: texture fixes Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 23 +++- src/gallium/drivers/freedreno/a4xx/fd4_format.c| 17 +++ src/gallium/drivers/freedreno/a4xx/fd4_format.h|1 + src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 12 ++ src/gallium/drivers/freedreno/a4xx/fd4_texture.h |2 +- src/gallium/drivers/freedreno/freedreno_resource.c |6 + 6 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index c7be161..5b47158 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -162,12 +162,20 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, unsigned i; if (tex->num_samplers > 0) { + int num_samplers; + + /* not sure if this is an a420.0 workaround, but we seem +* to need to emit these in pairs.. emit a final dummy +* entry if odd # of samplers: +*/ + num_samplers = align(tex->num_samplers, 2); + /* output sampler state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 + (2 * tex->num_samplers)); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * num_samplers)); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers)); + CP_LOAD_STATE_0_NUM_UNIT(num_samplers)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); for (i = 0; i < tex->num_samplers; i++) { @@ -178,9 +186,11 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, sampler->texsamp0); OUT_RING(ring, sampler->texsamp1); } - /* maybe an a420.0 (or a4xx.0) workaround?? or just driver bug? */ - OUT_RING(ring, 0x); - OUT_RING(ring, 0x); + + for (; i < num_samplers; i++) { + OUT_RING(ring, 0x); + OUT_RING(ring, 0x); + } } if (tex->num_textures > 0) { @@ -203,7 +213,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, view->texconst1); OUT_RING(ring, view->texconst2); OUT_RING(ring, view->texconst3); - OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0); + OUT_RELOC(ring, rsc->bo, slice->offset, + view->textconst4, 0); OUT_RING(ring, 0x); OUT_RING(ring, 0x); OUT_RING(ring, 0x); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c index bbece83..9cff134 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c @@ -232,6 +232,23 @@ fd4_pipe2swap(enum pipe_format format) return formats[format].swap; } +enum a4xx_tex_fetchsize +fd4_pipe2fetchsize(enum pipe_format format) +{ + switch (util_format_get_blocksizebits(format)) { + case 8: return TFETCH4_1_BYTE; + case 16: return TFETCH4_2_BYTE; + case 32: return TFETCH4_4_BYTE; + case 64: return TFETCH4_8_BYTE; + case 128: return TFETCH4_16_BYTE; + default: + debug_printf("Unknown block size for format %s: %d\n", + util_format_name(format), + util_format_get_blocksizebits(format)); + return TFETCH4_1_BYTE; + } +} + /* we need to special case a bit the depth/stencil restore, because we are * using the texture sampler to blit into the depth/stencil buffer, *not* * into a color buffer. Otherwise fd4_tex_swiz() will do the wrong thing, diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.h b/src/gallium/drivers/freedreno/a4xx/fd4_format.h index 5d6d1ae..04837da 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_format.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.h @@ -38,6 +38,7 @@ enum a4xx_tex_fmt fd4_pipe2tex(enum pipe_format format); enum a4xx_color_fmt fd4_pipe2color(enum pipe_format forma
Mesa (master): freedreno: cleanup slice alignment/setup
Module: Mesa Branch: master Commit: 5d7c9c9160e0d425df220e5e1898d0ab7dee2c83 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d7c9c9160e0d425df220e5e1898d0ab7dee2c83 Author: Rob Clark Date: Thu Dec 4 16:56:33 2014 -0500 freedreno: cleanup slice alignment/setup Collapse things back into a setup_slices() which takes the desired alignment as a param. This gets things ready for a4xx which has some slightly different requirements. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/freedreno_resource.c | 50 ++-- 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 6b31d26..461e378 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -188,7 +188,7 @@ static const struct u_resource_vtbl fd_resource_vtbl = { }; static uint32_t -setup_slices(struct fd_resource *rsc) +setup_slices(struct fd_resource *rsc, uint32_t alignment) { struct pipe_resource *prsc = &rsc->base.b; uint32_t level, size = 0; @@ -201,7 +201,7 @@ setup_slices(struct fd_resource *rsc) slice->pitch = align(width, 32); slice->offset = size; - slice->size0 = slice->pitch * height * rsc->cpp; + slice->size0 = align(slice->pitch * height * rsc->cpp, alignment); size += slice->size0 * depth * prsc->array_size; @@ -213,33 +213,20 @@ setup_slices(struct fd_resource *rsc) return size; } -/* 2d array and 3d textures seem to want their layers aligned to - * page boundaries - */ static uint32_t -setup_slices_array(struct fd_resource *rsc) +slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl) { - struct pipe_resource *prsc = &rsc->base.b; - uint32_t level, size = 0; - uint32_t width = prsc->width0; - uint32_t height = prsc->height0; - uint32_t depth = prsc->depth0; - - for (level = 0; level <= prsc->last_level; level++) { - struct fd_resource_slice *slice = fd_resource_slice(rsc, level); - - slice->pitch = align(width, 32); - slice->offset = size; - slice->size0 = align(slice->pitch * height * rsc->cpp, 4096); - - size += slice->size0 * depth * prsc->array_size; - - width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); + /* on a3xx, 2d array and 3d textures seem to want their +* layers aligned to page boundaries: +*/ + switch (tmpl->target) { + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + return 4096; + default: + return 1; } - - return size; } /** @@ -273,16 +260,7 @@ fd_resource_create(struct pipe_screen *pscreen, assert(rsc->cpp); - switch (tmpl->target) { - case PIPE_TEXTURE_3D: - case PIPE_TEXTURE_1D_ARRAY: - case PIPE_TEXTURE_2D_ARRAY: - size = setup_slices_array(rsc); - break; - default: - size = setup_slices(rsc); - break; - } + size = setup_slices(rsc, slice_alignment(pscreen, tmpl)); realloc_bo(rsc, size); if (!rsc->bo) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): freedreno: update generated headers
Module: Mesa Branch: master Commit: 8ecbcbf0aab60e044dc4a9dabef2bdfb8db5abe9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8ecbcbf0aab60e044dc4a9dabef2bdfb8db5abe9 Author: Rob Clark Date: Fri Dec 5 11:42:44 2014 -0500 freedreno: update generated headers Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a2xx/a2xx.xml.h |2 +- src/gallium/drivers/freedreno/a3xx/a3xx.xml.h |2 +- src/gallium/drivers/freedreno/a4xx/a4xx.xml.h | 69 + src/gallium/drivers/freedreno/a4xx/fd4_program.c |6 +- src/gallium/drivers/freedreno/adreno_common.xml.h |2 +- src/gallium/drivers/freedreno/adreno_pm4.xml.h|2 +- 6 files changed, 65 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index d3d93c6..c1a0309 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml( 15076 bytes, from 2014-12-01 22:40:01) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64344 bytes, from 2014-12-03 14:14:54) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 49060 bytes, from 2014-12-03 22:36:15) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 50255 bytes, from 2014-12-07 18:43:56) Copyright (C) 2013-2014 by the following authors: - Rob Clark (robclark) diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index 8ee835b..d4c52e1 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml( 15076 bytes, from 2014-12-01 22:40:01) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64344 bytes, from 2014-12-03 14:14:54) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 49060 bytes, from 2014-12-03 22:36:15) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 50255 bytes, from 2014-12-07 18:43:56) Copyright (C) 2013-2014 by the following authors: - Rob Clark (robclark) diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index bf49527..3f84c32 100644 --- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml( 15076 bytes, from 2014-12-01 22:40:01) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64344 bytes, from 2014-12-03 14:14:54) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 49060 bytes, from 2014-12-03 22:36:15) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 50255 bytes, from 2014-12-07 18:43:56) Copyright (C) 2013-2014 by the following authors: - Rob Clark (robclark) @@ -91,6 +91,7 @@ enum a4xx_vtx_fmt { VFMT4_16_16_UNORM = 29, VFMT4_16_16_16_UNORM = 30, VFMT4_16_16_16_16_UNORM = 31, + VFMT4_32_32_SINT = 37, VFMT4_8_UINT = 40, VFMT4_8_8_UINT = 41, VFMT4_8_8_8_UINT = 42, @@ -132,6 +133,14 @@ enum a4xx_tex_fmt { TFMT4_32_32_32_32_FLOAT = 63, }; +enum a4xx_tex_fetchsize { + TFETCH4_1_BYTE = 0, + TFETCH4_2_BYTE = 1, + TFETCH4_4_BYTE = 2, + TFETCH4_8_BYTE = 3, + TFETCH4_16_BYTE = 4, +}; + enum a4xx_depth_format { DEPTH4_NONE = 0, DEPTH4_16 = 1, @@ -265,14 +274,19 @@ static inline uint32_t A4XX_RB_MSAA_CONTROL_SAMPLES(uint32_t val) return ((val) << A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A4XX_RB_MSAA_CONTROL_SAMPLES__MASK; } -#define REG_A4XX_RB_MSAA_CONTROL2 0x20a3 -#define A4XX_RB_MSAA_CONTROL2_MSAA_SAMPLES__MASK 0x0380 -#define A4XX_RB_MSAA_CONTROL2_MSAA_SAMPLES__SHIFT 7 -static inline uint32_t A4XX_RB_MSAA_CONTROL2_MSAA_SAMPLES(uint32_t val) +#define REG_A4XX_RB_RENDER_CONTROL20x20a3 +#define A4XX_RB_RENDER_CONTROL2_XCOORD 0x0001 +#define A4XX_RB_RENDER_CONTROL2_YCOORD 0x0002 +
Mesa (master): mesa: use build flag to ensure stack is realigned on x86
Module: Mesa Branch: master Commit: f1b5f2b157a092e93590bd43544fbf2671edab36 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f1b5f2b157a092e93590bd43544fbf2671edab36 Author: Timothy Arceri Date: Sun Dec 7 00:09:40 2014 +1100 mesa: use build flag to ensure stack is realigned on x86 Nowadays GCC assumes stack pointer is 16-byte aligned even on 32-bits, but that is an assumption OpenGL drivers (or any dynamic library for that matter) can't afford to make as there are many closed- and open- source application binaries out there that only assume 4-byte stack alignment. V4: fix comment and indentation V3: move all sse4.1 build flag config to the same location and add comment as to why we need to do the realign V2: use $target_cpu rather than $host_cpu and setup build flags in config rather than makefile https://bugs.freedesktop.org/show_bug.cgi?id=86788 Signed-off-by: Timothy Arceri Reviewed-by: Matt Turner CC: "10.4" --- configure.ac | 11 ++- src/mesa/Makefile.am |2 +- src/mesa/main/sse_minmax.c |3 --- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index b0df1bb..4bdf75d 100644 --- a/configure.ac +++ b/configure.ac @@ -253,8 +253,16 @@ AC_SUBST([VISIBILITY_CXXFLAGS]) dnl dnl Optional flags, check for compiler support dnl +SSE41_CFLAGS="-msse4.1" +dnl Code compiled by GCC with -msse* assumes a 16 byte aligned +dnl stack, but on x86-32 such alignment is not guaranteed. +case "$target_cpu" in +i?86) +SSE41_CFLAGS="$SSE41_CFLAGS -mstackrealign" +;; +esac save_CFLAGS="$CFLAGS" -CFLAGS="-msse4.1 $CFLAGS" +CFLAGS="$SSE41_CFLAGS $CFLAGS" AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ #include int main () { @@ -267,6 +275,7 @@ if test "x$SSE41_SUPPORTED" = x1; then DEFINES="$DEFINES -DUSE_SSE41" fi AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1]) +AC_SUBST([SSE41_CFLAGS], $SSE41_CFLAGS) dnl Can't have static and shared libraries, default to static if user dnl explicitly requested. If both disabled, set to static since shared diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index 932db4f..3b68573 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -153,7 +153,7 @@ libmesagallium_la_LIBADD = \ libmesa_sse41_la_SOURCES = \ main/streaming-load-memcpy.c \ main/sse_minmax.c -libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) -msse4.1 +libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS) pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = gl.pc diff --git a/src/mesa/main/sse_minmax.c b/src/mesa/main/sse_minmax.c index 93cf2a6..222ac14 100644 --- a/src/mesa/main/sse_minmax.c +++ b/src/mesa/main/sse_minmax.c @@ -31,9 +31,6 @@ #include void -#if !defined(__x86_64__) - __attribute__((force_align_arg_pointer)) -#endif _mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index, unsigned *max_index, const unsigned count) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: implement TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION
Module: Mesa Branch: master Commit: 65ef78e8611556780fce0bee1feba805347ec627 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=65ef78e8611556780fce0bee1feba805347ec627 Author: Marek Olšák Date: Mon Nov 17 22:30:31 2014 +0100 draw: implement TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION Required by Nine. Tested with util_run_tests. It's added to softpipe, llvmpipe, and r300g/swtcl. Tested-by: David Heidelberg --- src/gallium/auxiliary/draw/draw_context.c | 40 +--- src/gallium/auxiliary/draw/draw_llvm.c |2 +- src/gallium/auxiliary/draw/draw_private.h |4 ++ .../auxiliary/draw/draw_pt_fetch_shade_emit.c |2 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c |2 +- .../draw/draw_pt_fetch_shade_pipeline_llvm.c |2 +- src/gallium/auxiliary/draw/draw_vs.c |2 + src/gallium/drivers/llvmpipe/lp_screen.c |2 + src/gallium/drivers/r300/r300_screen.c |2 +- src/gallium/drivers/softpipe/sp_screen.c |2 + 10 files changed, 49 insertions(+), 11 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 7bd2d39..04cf5b7 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -254,21 +254,48 @@ void draw_set_zs_format(struct draw_context *draw, enum pipe_format format) } -static void update_clip_flags( struct draw_context *draw ) +static bool +draw_is_vs_window_space(struct draw_context *draw) { - draw->clip_xy = !draw->driver.bypass_clip_xy; + if (draw->vs.vertex_shader) { + struct tgsi_shader_info *info = &draw->vs.vertex_shader->info; + + return info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] != 0; + } + return false; +} + + +void +draw_update_clip_flags(struct draw_context *draw) +{ + bool window_space = draw_is_vs_window_space(draw); + + draw->clip_xy = !draw->driver.bypass_clip_xy && !window_space; draw->guard_band_xy = (!draw->driver.bypass_clip_xy && draw->driver.guard_band_xy); draw->clip_z = (!draw->driver.bypass_clip_z && - draw->rasterizer && draw->rasterizer->depth_clip); + draw->rasterizer && draw->rasterizer->depth_clip) && + !window_space; draw->clip_user = draw->rasterizer && - draw->rasterizer->clip_plane_enable != 0; + draw->rasterizer->clip_plane_enable != 0 && + !window_space; draw->guard_band_points_xy = draw->guard_band_xy || (draw->driver.bypass_clip_points && (draw->rasterizer && draw->rasterizer->point_tri_clip)); } + +void +draw_update_viewport_flags(struct draw_context *draw) +{ + bool window_space = draw_is_vs_window_space(draw); + + draw->bypass_viewport = window_space || draw->identity_viewport; +} + + /** * Register new primitive rasterization/rendering state. * This causes the drawing pipeline to be rebuilt. @@ -282,7 +309,7 @@ void draw_set_rasterizer_state( struct draw_context *draw, draw->rasterizer = raster; draw->rast_handle = rast_handle; - update_clip_flags(draw); + draw_update_clip_flags(draw); } } @@ -309,7 +336,7 @@ void draw_set_driver_clipping( struct draw_context *draw, draw->driver.bypass_clip_z = bypass_clip_z; draw->driver.guard_band_xy = guard_band_xy; draw->driver.bypass_clip_points = bypass_clip_points; - update_clip_flags(draw); + draw_update_clip_flags(draw); } @@ -363,6 +390,7 @@ void draw_set_viewport_states( struct draw_context *draw, viewport->translate[0] == 0.0f && viewport->translate[1] == 0.0f && viewport->translate[2] == 0.0f); + draw_update_viewport_flags(draw); } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index dbaece3..8326072 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1836,7 +1836,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) key->clip_xy = llvm->draw->clip_xy; key->clip_z = llvm->draw->clip_z; key->clip_user = llvm->draw->clip_user; - key->bypass_viewport = llvm->draw->identity_viewport; + key->bypass_viewport = llvm->draw->bypass_viewport; key->clip_halfz = llvm->draw->rasterizer->clip_halfz; key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 37045eb..7b893cb 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -252,6 +252,7 @@ struct draw_context struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
Mesa (master): mesa: Enables GL_RGB and GL_RGBA unsized internal formats for OpenGL ES 3.0
Module: Mesa Branch: master Commit: 78942787170615c9333810cf3a4819a13c9eb8e8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=78942787170615c9333810cf3a4819a13c9eb8e8 Author: Eduardo Lima Mitev Date: Thu Nov 20 14:02:46 2014 +0100 mesa: Enables GL_RGB and GL_RGBA unsized internal formats for OpenGL ES 3.0 GL_RGB and GL_RGBA are valid internal formats on a GLES3 profile. See "Table 1. Unsized Internal Formats" at https://www.khronos.org/opengles/sdk/docs/man3/html/glTexImage2D.xhtml. Fixes 2 dEQP tests: - dEQP-GLES3.functional.state_query.internal_format.rgb_samples - dEQP-GLES3.functional.state_query.internal_format.rgba_samples Reviewed-by: Brian Paul --- src/mesa/main/fbobject.c |6 ++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index f5c11c4..4c3c157 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1430,6 +1430,9 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat) case GL_RGB8: return GL_RGB; case GL_RGB: + if (_mesa_is_gles3(ctx)) + return GL_RGB; + /* fallthrough */ case GL_R3_G3_B2: case GL_RGB4: case GL_RGB5: @@ -1444,6 +1447,9 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat) case GL_RGBA8: return GL_RGBA; case GL_RGBA: + if (_mesa_is_gles3(ctx)) + return GL_RGBA; + /* fallthrough */ case GL_RGBA2: case GL_RGBA12: case GL_RGBA16: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: Returns zero samples when querying GL_NUM_SAMPLE_COUNTS when internal format is integer
Module: Mesa Branch: master Commit: 09cb149ba745302e366c2f965a033103d398748c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=09cb149ba745302e366c2f965a033103d398748c Author: Eduardo Lima Mitev Date: Thu Nov 20 14:52:35 2014 +0100 mesa: Returns zero samples when querying GL_NUM_SAMPLE_COUNTS when internal format is integer From GL ES 3.0 specification, section 6.1.15 Internal Format Queries (page 236), multisampling is not supported for signed and unsigned integer internal formats. Fixes 19 dEQP tests under 'dEQP-GLES3.functional.state_query.internal_format.*'. Reviewed-by: Ian Romanick --- src/mesa/main/formatquery.c | 57 ++- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c index 40eca87..f6274fe 100644 --- a/src/mesa/main/formatquery.c +++ b/src/mesa/main/formatquery.c @@ -115,29 +115,40 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, internalformat, buffer); break; case GL_NUM_SAMPLE_COUNTS: { - /* The driver can return 0, and we should pass that along to the - * application. The ARB decided that ARB_internalformat_query should - * behave as ARB_internalformat_query2 in this situation. - * - * The ARB_internalformat_query2 spec says: - * - * "- NUM_SAMPLE_COUNTS: The number of sample counts that would be - *returned by querying SAMPLES is returned in . - ** If is not color-renderable, - * depth-renderable, or stencil-renderable (as defined in - * section 4.4.4), or if does not support multiple - * samples (ie other than TEXTURE_2D_MULTISAMPLE, - * TEXTURE_2D_MULTISAMPLE_ARRAY, or RENDERBUFFER), 0 is - * returned." - */ - const size_t num_samples = - ctx->Driver.QuerySamplesForFormat(ctx, target, internalformat, buffer); - - /* QuerySamplesForFormat writes some stuff to buffer, so we have to - * separately over-write it with the requested value. - */ - buffer[0] = (GLint) num_samples; - count = 1; + if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalformat)) { + /* From GL ES 3.0 specification, section 6.1.15 page 236: "Since + * multisampling is not supported for signed and unsigned integer + * internal formats, the value of NUM_SAMPLE_COUNTS will be zero + * for such formats. + */ + buffer[0] = 0; + count = 1; + } else { + size_t num_samples; + + /* The driver can return 0, and we should pass that along to the + * application. The ARB decided that ARB_internalformat_query should + * behave as ARB_internalformat_query2 in this situation. + * + * The ARB_internalformat_query2 spec says: + * + * "- NUM_SAMPLE_COUNTS: The number of sample counts that would be + *returned by querying SAMPLES is returned in . + ** If is not color-renderable, + * depth-renderable, or stencil-renderable (as defined in + * section 4.4.4), or if does not support multiple + * samples (ie other than TEXTURE_2D_MULTISAMPLE, + * TEXTURE_2D_MULTISAMPLE_ARRAY, or RENDERBUFFER), 0 is + * returned." + */ + num_samples = ctx->Driver.QuerySamplesForFormat(ctx, target, internalformat, buffer); + + /* QuerySamplesForFormat writes some stuff to buffer, so we have to + * separately over-write it with the requested value. + */ + buffer[0] = (GLint) num_samples; + count = 1; + } break; } default: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: invariant qualifier is not valid for shader inputs in GLSL ES 3.00
Module: Mesa Branch: master Commit: 426a50e2089b12d33f5c075aa5622f64076914a3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=426a50e2089b12d33f5c075aa5622f64076914a3 Author: Samuel Iglesias Gonsalvez Date: Tue Nov 25 12:23:10 2014 +0100 glsl: invariant qualifier is not valid for shader inputs in GLSL ES 3.00 GLSL ES 3.00 spec, chapter 4.6.1 "The Invariant Qualifier", Only variables output from a shader can be candidates for invariance. This includes user-defined output variables and the built-in output variables. As only outputs can be declared as invariant, an invariant output from one shader stage will still match an input of a subsequent stage without the input being declared as invariant. This patch fixes the following dEQP tests: dEQP-GLES3.functional.shaders.qualification_order.variables.valid.invariant_interp_storage_precision dEQP-GLES3.functional.shaders.qualification_order.variables.valid.invariant_interp_storage dEQP-GLES3.functional.shaders.qualification_order.variables.valid.invariant_storage_precision dEQP-GLES3.functional.shaders.qualification_order.variables.valid.invariant_storage dEQP-GLES3.functional.shaders.qualification_order.variables.invalid.invariant_interp_storage_precision_invariant_input dEQP-GLES3.functional.shaders.qualification_order.variables.invalid.invariant_interp_storage_invariant_input dEQP-GLES3.functional.shaders.qualification_order.variables.invalid.invariant_storage_precision_invariant_input dEQP-GLES3.functional.shaders.qualification_order.variables.invalid.invariant_storage_invariant_input No piglit regressions observed. v2: - Add spec content in the code Signed-off-by: Samuel Iglesias Gonsalvez Reviewed-by: Ian Romanick --- src/glsl/glsl_parser.yy| 11 +++ src/glsl/link_varyings.cpp |2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 6a55a4e..7fb8c38 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -1602,6 +1602,17 @@ type_qualifier: $$ = $2; $$.flags.q.invariant = 1; + + /* GLSL ES 3.00 spec, section 4.6.1 "The Invariant Qualifier": + * + * "Only variables output from a shader can be candidates for invariance. + * This includes user-defined output variables and the built-in output + * variables. As only outputs can be declared as invariant, an invariant + * output from one shader stage will still match an input of a subsequent + * stage without the input being declared as invariant." + */ + if (state->es_shader && state->language_version >= 300 && $$.flags.q.in) + _mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs"); } | interpolation_qualifier type_qualifier { diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index 43da2c6..2261799 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -116,7 +116,7 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, return; } - if (input->data.invariant != output->data.invariant) { + if (!prog->IsES && input->data.invariant != output->data.invariant) { linker_error(prog, "%s shader output `%s' %s invariant qualifier, " "but %s shader input %s invariant qualifier\n", ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: Recompute LegalTypesMask if the GL API has changed
Module: Mesa Branch: master Commit: e1ed4f2532b4e9bafb5663cccbe28033c49b2e77 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1ed4f2532b4e9bafb5663cccbe28033c49b2e77 Author: Iago Toral Quiroga Date: Tue Dec 2 12:10:14 2014 +0100 mesa: Recompute LegalTypesMask if the GL API has changed The current code computes ctx->Array.LegalTypesMask just once, however, computing this needs to consider ctx->API so we need to make sure that the API for that context has not changed if we intend to reuse the result. The context API can change, at least, if we go through _mesa_meta_begin, since that will always force API_OPENGL_COMPAT until we call _mesa_meta_end. If any operation in between these two calls triggers a call to update_array_format, then we might be caching a value for LegalTypesMask that will not be right once we have called _mesa_meta_end and restored the context API. Fixes the following 179 dEQP tests in i965: dEQP-GLES3.functional.vertex_arrays.single_attribute.strides.fixed.* dEQP-GLES3.functional.vertex_arrays.single_attribute.normalize.fixed.* dEQP-GLES3.functional.vertex_arrays.single_attribute.output_types.fixed.* dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.static_draw.*fixed* dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.stream_draw.*fixed* dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.dynamic_draw.*fixed* dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.static_copy.*fixed* dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.stream_copy.*fixed* dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.dynamic_copy.*fixed* dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.static_read.*fixed* dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.stream_read.*fixed* dEQP-GLES3.functional.vertex_arrays.single_attribute.usages.dynamic_read.*fixed* dEQP-GLES3.functional.vertex_arrays.multiple_attributes.input_types.3_*fixed2* dEQP-GLES3.functional.draw.random.{2,18,28,68,83,106,109,156,181,191} Reviewed-by: Brian Paul --- src/mesa/main/mtypes.h | 32 src/mesa/main/varray.c |9 ++--- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index cee11a3..b95dfb9 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1657,6 +1657,20 @@ typedef enum { DRAW_ARRAYS } gl_draw_method; +/** + * Enum for the OpenGL APIs we know about and may support. + * + * NOTE: This must match the api_enum table in + * src/mesa/main/get_hash_generator.py + */ +typedef enum +{ + API_OPENGL_COMPAT, /* legacy / compatibility contexts */ + API_OPENGLES, + API_OPENGLES2, + API_OPENGL_CORE, + API_OPENGL_LAST = API_OPENGL_CORE +} gl_api; /** * Vertex array state @@ -1701,8 +1715,9 @@ struct gl_array_attrib /** One of the DRAW_xxx flags, not consumed by drivers */ gl_draw_method DrawMethod; - /** Legal array datatypes */ + /** Legal array datatypes and the API for which they have been computed */ GLbitfield LegalTypesMask; + gl_api LegalTypesMaskAPI; }; @@ -4040,21 +4055,6 @@ enum mesa_debug_severity { /** @} */ /** - * Enum for the OpenGL APIs we know about and may support. - * - * NOTE: This must match the api_enum table in - * src/mesa/main/get_hash_generator.py - */ -typedef enum -{ - API_OPENGL_COMPAT, /* legacy / compatibility contexts */ - API_OPENGLES, - API_OPENGLES2, - API_OPENGL_CORE, - API_OPENGL_LAST = API_OPENGL_CORE -} gl_api; - -/** * Driver-specific state flags. * * These are or'd with gl_context::NewDriverState to notify a driver about diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 96c2b26..89aaad1 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -258,11 +258,14 @@ update_array_format(struct gl_context *ctx, GLuint elementSize; GLenum format = GL_RGBA; - if (ctx->Array.LegalTypesMask == 0) { - /* One-time initialization. We can't do this in _mesa_init_varrays() - * below because extensions are not yet enabled at that point. + if (ctx->Array.LegalTypesMask == 0 || ctx->Array.LegalTypesMaskAPI != ctx->API) { + /* Compute the LegalTypesMask only once, unless the context API has + * changed, in which case we want to compute it again. We can't do this + * in _mesa_init_varrays() below because extensions are not yet enabled + * at that point. */ ctx->Array.LegalTypesMask = get_legal_types_mask(ctx); + ctx->Array.LegalTypesMaskAPI = ctx->API; } legalTypesMask &= ctx->Array.LegalTypesMask; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: Considers GL_DEPTH_STENCIL_ATTACHMENT a valid argument for FBO invalidation under GLES3
Module: Mesa Branch: master Commit: 242ad326552b10a31667eba0be5677a4d8397dc4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=242ad326552b10a31667eba0be5677a4d8397dc4 Author: Eduardo Lima Mitev Date: Tue Nov 18 16:28:18 2014 +0100 mesa: Considers GL_DEPTH_STENCIL_ATTACHMENT a valid argument for FBO invalidation under GLES3 In OpenGL and OpenGL-ES 3+, GL_DEPTH_STENCIL_ATTACHMENT is a valid attachment point for the family of functions that invalidate a framebuffer object (e.g, glInvalidateFramebuffer, glInvalidateSubFramebuffer, etc). Currently, a GL_INVALID_ENUM error is emitted for this attachment point. Fixes 21 dEQP test failures under 'dEQP-GLES3.functional.fbo.invalidate.*'. Reviewed-by: Ian Romanick --- src/mesa/main/fbobject.c |8 1 file changed, 8 insertions(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 02b7633..f5c11c4 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -3074,6 +3074,14 @@ invalidate_framebuffer_storage(GLenum target, GLsizei numAttachments, case GL_DEPTH_ATTACHMENT: case GL_STENCIL_ATTACHMENT: break; + case GL_DEPTH_STENCIL_ATTACHMENT: +/* GL_DEPTH_STENCIL_ATTACHMENT is a valid attachment point only + * in desktop and ES 3.0 profiles. Note that OES_packed_depth_stencil + * extension does not make this attachment point valid on ES 2.0. + */ +if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) + break; +/* fallthrough */ case GL_COLOR_ATTACHMENT0: case GL_COLOR_ATTACHMENT1: case GL_COLOR_ATTACHMENT2: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): main: return two minor digits for ES shading language version
Module: Mesa Branch: master Commit: 6cc72511850961eba408a44f648c7067b6e68594 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6cc72511850961eba408a44f648c7067b6e68594 Author: Samuel Iglesias Gonsalvez Date: Wed Nov 26 13:16:38 2014 +0100 main: return two minor digits for ES shading language version For OpenGL ES 3.0 spec, the minor number for SHADING_LANGUAGE_VERSION is always two digits, matching the OpenGL ES Shading Language Specification release number. For example, this query might return the string "3.00". This patch fixes the following dEQP test: dEQP-GLES3.functional.state_query.string.shading_language_version No piglit regression observed. Signed-off-by: Samuel Iglesias Gonsalvez Reviewed-by: Ian Romanick --- src/mesa/main/getstring.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c index f9d13a7..1b2c7f0 100644 --- a/src/mesa/main/getstring.c +++ b/src/mesa/main/getstring.c @@ -74,7 +74,7 @@ shading_language_version(struct gl_context *ctx) case API_OPENGLES2: return (ctx->Version < 30) ? (const GLubyte *) "OpenGL ES GLSL ES 1.0.16" - : (const GLubyte *) "OpenGL ES GLSL ES 3.0"; + : (const GLubyte *) "OpenGL ES GLSL ES 3.00"; case API_OPENGLES: /* fall-through */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Prioritize allocating accumulators to short-lived values.
Module: Mesa Branch: master Commit: ab1b1fa6fbd72b05c48f83c9df5036c2bfe893a3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ab1b1fa6fbd72b05c48f83c9df5036c2bfe893a3 Author: Eric Anholt Date: Mon Dec 8 17:43:29 2014 -0800 vc4: Prioritize allocating accumulators to short-lived values. The register allocator walks from the end of the nodes array looking for trivially-allocatable things to put on the stack, meaning (assuming everything is trivially colorable and gets put on the stack in a single pass) the low node numbers get allocated first. The things allocated first happen to get the lower-numbered registers, which is to say the fast accumulators that can be paired more easily. When we previously made the nodes match the temporary register numbers, we'd end up putting the shader inputs (VS or FS) in the accumulators, which are often long-lived values. By prioritizing the shortest-lived values for allocation, we can get a lot more instructions that involve accumulators, and thus fewer conflicts for raddr and WS. total instructions in shared programs: 52870 -> 46428 (-12.18%) instructions in affected programs: 52260 -> 45818 (-12.33%) --- src/gallium/drivers/vc4/vc4_register_allocate.c | 73 ++- 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index b62669f..3001900 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -139,6 +139,20 @@ vc4_alloc_reg_set(struct vc4_context *vc4) ra_set_finalize(vc4->regs, NULL); } +struct node_to_temp_map { +uint32_t temp; +uint32_t priority; +}; + +static int +node_to_temp_priority(const void *in_a, const void *in_b) +{ +const struct node_to_temp_map *a = in_a; +const struct node_to_temp_map *b = in_b; + +return a->priority - b->priority; +} + /** * Returns a mapping from QFILE_TEMP indices to struct qpu_regs. * @@ -148,6 +162,8 @@ struct qpu_reg * vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) { struct simple_node *node; +struct node_to_temp_map map[c->num_temps]; +uint32_t temp_to_node[c->num_temps]; uint32_t def[c->num_temps]; uint32_t use[c->num_temps]; struct qpu_reg *temp_registers = calloc(c->num_temps, @@ -166,11 +182,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) struct ra_graph *g = ra_alloc_interference_graph(vc4->regs, c->num_temps); -for (uint32_t i = 0; i < c->num_temps; i++) +for (uint32_t i = 0; i < c->num_temps; i++) { ra_set_node_class(g, i, vc4->reg_class_any); +} -/* Compute the live ranges so we can figure out interference, and - * figure out our register classes and preallocated registers. +/* Compute the live ranges so we can figure out interference. */ uint32_t ip = 0; foreach(node, &c->instructions) { @@ -188,27 +204,54 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) switch (inst->op) { case QOP_FRAG_Z: +case QOP_FRAG_W: +/* The payload registers have values implicitly loaded + * at the start of the program. + */ def[inst->dst.index] = 0; -ra_set_node_reg(g, inst->dst.index, +break; +default: +break; +} + +ip++; +} + +for (uint32_t i = 0; i < c->num_temps; i++) { +map[i].temp = i; +map[i].priority = use[i] - def[i]; +} +qsort(map, c->num_temps, sizeof(map[0]), node_to_temp_priority); +for (uint32_t i = 0; i < c->num_temps; i++) { +temp_to_node[map[i].temp] = i; +} + +/* Figure out our register classes and preallocated registers*/ +foreach(node, &c->instructions) { +struct qinst *inst = (struct qinst *)node; + +switch (inst->op) { +case QOP_FRAG_Z: +ra_set_node_reg(g, temp_to_node[inst->dst.index], AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2 + 1); break; case QOP_FRAG_W: -def[inst->dst.index] = 0; -ra_set_node_reg(g, inst->dst.index, +ra_set_node_reg(g, temp_to_node[inst->dst.index], AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2); break; case QOP_TEX_RESULT: case QOP_TLB_COLOR_READ:
Mesa (master): vc4: Reserve rb31 instead of r3 for raddr conflict spills.
Module: Mesa Branch: master Commit: 8420a956924c720b3c4932a577623f836758c21c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8420a956924c720b3c4932a577623f836758c21c Author: Eric Anholt Date: Mon Dec 8 16:52:53 2014 -0800 vc4: Reserve rb31 instead of r3 for raddr conflict spills. This increases the cost of a raddr b conflict spill (save r3 to rb31, move src1 to r3, move rb31 back to r3 when done, instead of just move src1 to r3), but on average thanks to instruction pairing it's more worthwhile to have another accumulator. total instructions in shared programs: 46428 -> 46171 (-0.55%) instructions in affected programs: 38030 -> 37773 (-0.68%) --- src/gallium/drivers/vc4/vc4_qpu_emit.c | 50 +++ src/gallium/drivers/vc4/vc4_register_allocate.c |6 +-- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 856f844..f2620c0 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -93,21 +93,41 @@ swap_file(struct qpu_reg *src) * In that case, we need to move one to a temporary that can be used in the * instruction, instead. */ -static void +static bool fixup_raddr_conflict(struct vc4_compile *c, - struct qpu_reg *src0, struct qpu_reg *src1) + struct qpu_reg dst, + struct qpu_reg *src0, struct qpu_reg *src1, + bool r3_live) { if ((src0->mux != QPU_MUX_A && src0->mux != QPU_MUX_B) || src0->mux != src1->mux || src0->addr == src1->addr) { -return; +return false; } if (swap_file(src0) || swap_file(src1)) -return; +return false; + +if (src0->mux == QPU_MUX_A) { +/* If we're conflicting over the A regfile, then we can just + * use the reserved rb31. + */ +queue(c, qpu_a_MOV(qpu_rb(31), *src1)); +*src1 = qpu_rb(31); +return false; +} else { +/* Otherwise, we need a non-B regfile. So, we spill r3 out to + * rb31, then store our desired value in r3, and tell the + * caller to put rb31 back into r3 when we're done. + */ +if (r3_live) +queue(c, qpu_a_MOV(qpu_rb(31), qpu_r3())); +queue(c, qpu_a_MOV(qpu_r3(), *src1)); + +*src1 = qpu_r3(); -queue(c, qpu_a_MOV(qpu_r3(), *src1)); -*src1 = qpu_r3(); +return r3_live && dst.mux != QPU_MUX_R3; +} } void @@ -118,6 +138,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) uint32_t inputs_remaining = c->num_inputs; uint32_t vpm_read_fifo_count = 0; uint32_t vpm_read_offset = 0; +bool written_r3 = false; +bool needs_restore; make_empty_list(&c->qpu_inst_list); @@ -416,8 +438,12 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; case QOP_TEX_DIRECT: -fixup_raddr_conflict(c, &src[0], &src[1]); +needs_restore = fixup_raddr_conflict(c, dst, + &src[0], &src[1], + written_r3); queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1])); +if (needs_restore) +queue(c, qpu_a_MOV(qpu_r3(), qpu_rb(31))); break; case QOP_TEX_RESULT: @@ -477,7 +503,9 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) if (qir_get_op_nsrc(qinst->op) == 1) src[1] = src[0]; -fixup_raddr_conflict(c, &src[0], &src[1]); +needs_restore = fixup_raddr_conflict(c, dst, + &src[0], &src[1], + written_r3); if (translate[qinst->op].is_mul) { queue(c, qpu_m_alu2(translate[qinst->op].op, @@ -488,8 +516,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) dst, src[0], src[1])); } +if (needs_restore) +queue(c, qpu_a_MOV(qpu_r3(), qpu_rb(31))); + break; } + +if (dst.mux == QPU_MUX_R3) +written_r3 = true; } qpu_sc