Hi Matt, are you okay with excluding FS_OPCODE_CINTERP from the list of
virtual instructions to consider in Gen 6? As far as I can see it is
implemented as a single MOV instruction and the accumulator does not
seem to be used but maybe I am missing something.
Iago
On Wed, 2014-05-07 at 09:58 +0200, Iago Toral Quiroga wrote:
Many instructions implicitly update the accumulator on Gen 6. The
instruction
scheduling code just calls add_barrier_deps() for each accumulator access on
these platforms, but a large class of operations don't actually update the
accumulator -- mostly move and logical instructions. Teaching the scheduling
code about this would allow more flexibility to schedule instructions.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77740
---
This version properly identifies virtual instructions that write to the
accumulator in Gen 6 as indicated by Matt. FS_OPCODE_CINTERP is excluded
because it seems to be implemented as a MOV.
Passes piglit tests on IronLake.
.../drivers/dri/i965/brw_schedule_instructions.cpp | 84
+++---
src/mesa/drivers/dri/i965/brw_shader.cpp | 10 +++
src/mesa/drivers/dri/i965/brw_shader.h | 1 +
3 files changed, 36 insertions(+), 59 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 8cc6908..6f8f405 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -742,8 +742,6 @@ fs_instruction_scheduler::is_compressed(fs_inst *inst)
void
fs_instruction_scheduler::calculate_deps()
{
- const bool gen6plus = v-brw-gen = 6;
-
/* Pre-register-allocation, this tracks the last write per VGRF (so
* different reg_offsets within it can interfere when they shouldn't).
* After register allocation, reg_offsets are gone and we track individual
@@ -803,7 +801,7 @@ fs_instruction_scheduler::calculate_deps()
} else {
add_dep(last_fixed_grf_write, n);
}
- } else if (inst-src[i].is_accumulator() gen6plus) {
+ } else if (inst-src[i].is_accumulator()) {
add_dep(last_accumulator_write, n);
} else if (inst-src[i].file != BAD_FILE
inst-src[i].file != IMM
@@ -828,11 +826,7 @@ fs_instruction_scheduler::calculate_deps()
}
if (inst-reads_accumulator_implicitly()) {
- if (gen6plus) {
-add_dep(last_accumulator_write, n);
- } else {
-add_barrier_deps(n);
- }
+ add_dep(last_accumulator_write, n);
}
/* write-after-write deps. */
@@ -867,7 +861,7 @@ fs_instruction_scheduler::calculate_deps()
} else {
last_fixed_grf_write = n;
}
- } else if (inst-dst.is_accumulator() gen6plus) {
+ } else if (inst-dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
last_accumulator_write = n;
} else if (inst-dst.file != BAD_FILE
@@ -887,13 +881,10 @@ fs_instruction_scheduler::calculate_deps()
last_conditional_mod[inst-flag_subreg] = n;
}
- if (inst-writes_accumulator) {
- if (gen6plus) {
-add_dep(last_accumulator_write, n);
-last_accumulator_write = n;
- } else {
-add_barrier_deps(n);
- }
+ if (inst-writes_accumulator_implicitly(v-brw-gen)
+ !inst-dst.is_accumulator()) {
+ add_dep(last_accumulator_write, n);
+ last_accumulator_write = n;
}
}
@@ -933,7 +924,7 @@ fs_instruction_scheduler::calculate_deps()
} else {
add_dep(n, last_fixed_grf_write);
}
- } else if (inst-src[i].is_accumulator() gen6plus) {
+ } else if (inst-src[i].is_accumulator()) {
add_dep(n, last_accumulator_write);
} else if (inst-src[i].file != BAD_FILE
inst-src[i].file != IMM
@@ -958,11 +949,7 @@ fs_instruction_scheduler::calculate_deps()
}
if (inst-reads_accumulator_implicitly()) {
- if (gen6plus) {
-add_dep(n, last_accumulator_write);
- } else {
-add_barrier_deps(n);
- }
+ add_dep(n, last_accumulator_write);
}
/* Update the things this instruction wrote, so earlier reads
@@ -996,7 +983,7 @@ fs_instruction_scheduler::calculate_deps()
} else {
last_fixed_grf_write = n;
}
- } else if (inst-dst.is_accumulator() gen6plus) {
+ } else if (inst-dst.is_accumulator()) {
last_accumulator_write = n;
} else if (inst-dst.file != BAD_FILE
!inst-dst.is_null()) {
@@ -1013,12 +1000,8 @@ fs_instruction_scheduler::calculate_deps()