Since LIFO fails on some shaders in one particular way, and non-LIFO
systematically fails in another way on different kinds of shaders, try
them both. and pick whichever one successfully register allocates first.
Slightly prefer non-LIFO in case we produce extra dependencies in register
allocation, since it should start out with fewer stalls than LIFO.
This is madness, but I haven't come up with another way to get unigine
tropics to not spill while keeping other programs from not spilling and
retaining the non-unigine performance wins from texture-grf.
total instructions in shared programs: 1626728 - 1626288 (-0.03%)
instructions in affected programs: 1015 - 575 (-43.35%)
GAINED:50
LOST: 0
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 25 +--
src/mesa/drivers/dri/i965/brw_fs.h | 4 +-
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 10 +--
.../drivers/dri/i965/brw_schedule_instructions.cpp | 85 --
src/mesa/drivers/dri/i965/brw_shader.h | 6 ++
5 files changed, 76 insertions(+), 54 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 65fedfb..5d7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3283,15 +3283,28 @@ fs_visitor::run()
assign_curb_setup();
assign_urb_setup();
- schedule_instructions(false);
+ schedule_instructions(SCHEDULE_PRE_NON_LIFO);
if (0)
assign_regs_trivial();
else {
-while (!assign_regs()) {
- if (failed)
- break;
-}
+ if (!assign_regs(false)) {
+/* Try a non-spilling register allocation again with a different
+ * scheduling heuristic.
+ */
+schedule_instructions(SCHEDULE_PRE_LIFO);
+if (!assign_regs(false)) {
+ if (dispatch_width == 16) {
+ fail(Failure to register allocate. Reduce number of
+ live scalar values to avoid this.);
+ } else {
+ while (!assign_regs(true)) {
+ if (failed)
+break;
+ }
+ }
+}
+ }
}
}
assert(force_uncompressed_stack == 0);
@@ -3306,7 +3319,7 @@ fs_visitor::run()
if (failed)
return false;
- schedule_instructions(true);
+ schedule_instructions(SCHEDULE_POST);
if (dispatch_width == 8) {
c-prog_data.reg_blocks = brw_register_blocks(grf_used);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
b/src/mesa/drivers/dri/i965/brw_fs.h
index 4f97a67..be86b15 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -291,7 +291,7 @@ public:
void assign_curb_setup();
void calculate_urb_setup();
void assign_urb_setup();
- bool assign_regs();
+ bool assign_regs(bool allow_spilling);
void assign_regs_trivial();
void get_used_mrfs(bool *mrf_used);
void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
@@ -322,7 +322,7 @@ public:
bool remove_dead_constants();
bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
- void schedule_instructions(bool post_reg_alloc);
+ void schedule_instructions(instruction_scheduler_mode mode);
void insert_gen4_send_dependency_workarounds();
void insert_gen4_pre_send_dependency_workarounds(fs_inst *inst);
void insert_gen4_post_send_dependency_workarounds(fs_inst *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index d9e80d0..8567afd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -417,7 +417,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g,
int first_mrf_node)
}
bool
-fs_visitor::assign_regs()
+fs_visitor::assign_regs(bool allow_spilling)
{
/* Most of this allocation was written for a reg_width of 1
* (dispatch_width == 8). In extending to 16-wide, the code was
@@ -496,14 +496,10 @@ fs_visitor::assign_regs()
if (reg == -1) {
fail(no register to spill:\n);
dump_instructions();
- } else if (dispatch_width == 16) {
-fail(Failure to register allocate. Reduce number of live scalar
- values to avoid this.);
- } else {
-spill_reg(reg);
+ } else if (allow_spilling) {
+ spill_reg(reg);
}
-
ralloc_free(g);
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 5710380..befea0a 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -391,14 +391,16 @@