On Fri, Feb 6, 2015 at 6:43 AM, Francisco Jerez <curroje...@riseup.net> wrote: > --- > src/mesa/drivers/dri/i965/brw_ir_vec4.h | 1 + > src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 15 ++++++++++----- > src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 > +++++++++++++++++ > 3 files changed, 28 insertions(+), 5 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h > b/src/mesa/drivers/dri/i965/brw_ir_vec4.h > index f11a2d2..941086f 100644 > --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h > @@ -171,6 +171,7 @@ public: > unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header > */ > > bool is_send_from_grf(); > + unsigned regs_read(unsigned arg) const; > bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask); > void reswizzle(int dst_writemask, int swizzle); > bool can_do_source_mods(struct brw_context *brw); > diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > index 88feeca..2b22b2c 100644 > --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > @@ -1063,7 +1063,8 @@ vec4_instruction_scheduler::calculate_deps() > /* read-after-write deps. */ > for (int i = 0; i < 3; i++) { > if (inst->src[i].file == GRF) { > - add_dep(last_grf_write[inst->src[i].reg], n); > + for (unsigned j = 0; j < inst->regs_read(i); ++j) > + add_dep(last_grf_write[inst->src[i].reg + j], n); > } else if (inst->src[i].file == HW_REG && > (inst->src[i].fixed_hw_reg.file == > BRW_GENERAL_REGISTER_FILE)) { > @@ -1103,8 +1104,10 @@ vec4_instruction_scheduler::calculate_deps() > > /* write-after-write deps. */ > if (inst->dst.file == GRF) { > - add_dep(last_grf_write[inst->dst.reg], n); > - last_grf_write[inst->dst.reg] = n; > + for (unsigned j = 0; j < inst->regs_written; ++j) { > + add_dep(last_grf_write[inst->dst.reg + j], n); > + last_grf_write[inst->dst.reg + j] = n; > + } > } else if (inst->dst.file == MRF) { > add_dep(last_mrf_write[inst->dst.reg], n); > last_mrf_write[inst->dst.reg] = n; > @@ -1156,7 +1159,8 @@ vec4_instruction_scheduler::calculate_deps() > /* write-after-read deps. */ > for (int i = 0; i < 3; i++) { > if (inst->src[i].file == GRF) { > - add_dep(n, last_grf_write[inst->src[i].reg]); > + for (unsigned j = 0; j < inst->regs_read(i); ++j) > + add_dep(n, last_grf_write[inst->src[i].reg + j]); > } else if (inst->src[i].file == HW_REG && > (inst->src[i].fixed_hw_reg.file == > BRW_GENERAL_REGISTER_FILE)) { > @@ -1194,7 +1198,8 @@ vec4_instruction_scheduler::calculate_deps() > * can mark this as WAR dependency. > */ > if (inst->dst.file == GRF) { > - last_grf_write[inst->dst.reg] = n; > + for (unsigned j = 0; j < inst->regs_written; ++j) > + last_grf_write[inst->dst.reg + j] = n; > } else if (inst->dst.file == MRF) { > last_mrf_write[inst->dst.reg] = n; > } else if (inst->dst.file == HW_REG && > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index a4fd136..b2f3151 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -262,6 +262,23 @@ vec4_instruction::is_send_from_grf() > } > } > > +unsigned > +vec4_instruction::regs_read(unsigned arg) const > +{ > + switch (opcode) { > + case SHADER_OPCODE_SHADER_TIME_ADD: > + return (arg == 0 ? mlen : > + src[arg].file != BAD_FILE ? 1 : 0); > + > + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: > + return (arg == 1 ? mlen : > + src[arg].file != BAD_FILE ? 1 : 0); > + > + default: > + return (src[arg].file != BAD_FILE ? 1 : 0); > + } > +}
Better to just do if (src[arg].file == BAD_FILE) return 0; before the switch statement to avoid the nested ternary, or do you actually need to test arg == 0/1 before checking .file in those two cases? _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev