Module: Mesa
Branch: master
Commit: 8dfc6ee317dd01016a26f1fb21829b89277e59d7
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8dfc6ee317dd01016a26f1fb21829b89277e59d7

Author: Eric Anholt <[email protected]>
Date:   Fri Jul 20 12:05:57 2018 -0700

v3d: Rotate through registers to improve post-RA scheduling options.

Similarly to VC4's implementation, by not picking r0 immediately upon
freeing it, we give the scheduler more of a chance to fit later writes in
earlier.  I'm not clear on whether there's any real cost to picking phys
over accumulators, so keep that behavior for now.

shader-db:
total instructions in shared programs: 96831 -> 95669 (-1.20%)
instructions in affected programs:     77254 -> 76092 (-1.50%)

---

 src/broadcom/compiler/vir_register_allocate.c | 45 +++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/src/broadcom/compiler/vir_register_allocate.c 
b/src/broadcom/compiler/vir_register_allocate.c
index 4ec5f23264..aa5e2139c1 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -238,6 +238,43 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
                 BITSET_CLEAR(c->spillable, i);
 }
 
+struct v3d_ra_select_callback_data {
+        uint32_t next_acc;
+        uint32_t next_phys;
+};
+
+static unsigned int
+v3d_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
+{
+        struct v3d_ra_select_callback_data *v3d_ra = data;
+
+        /* Choose an accumulator if possible (I think it's lower power than
+         * phys regs), but round-robin through them to give post-RA
+         * instruction selection more options.
+         */
+        for (int i = 0; i < ACC_COUNT; i++) {
+                int acc_off = (v3d_ra->next_acc + i) % ACC_COUNT;
+                int acc = ACC_INDEX + acc_off;
+
+                if (BITSET_TEST(regs, acc)) {
+                        v3d_ra->next_acc = acc_off + 1;
+                        return acc;
+                }
+        }
+
+        for (int i = 0; i < PHYS_COUNT; i++) {
+                int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
+                int phys = PHYS_INDEX + phys_off;
+
+                if (BITSET_TEST(regs, phys)) {
+                        v3d_ra->next_phys = phys_off + 1;
+                        return phys;
+                }
+        }
+
+        unreachable("RA must pass us at least one possible reg.");
+}
+
 bool
 vir_init_reg_sets(struct v3d_compiler *compiler)
 {
@@ -309,6 +346,13 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled)
         struct qpu_reg *temp_registers = calloc(c->num_temps,
                                                 sizeof(*temp_registers));
         int acc_nodes[ACC_COUNT];
+        struct v3d_ra_select_callback_data callback_data = {
+                .next_acc = 0,
+                /* Start at RF3, to try to keep the TLB writes from using
+                 * RF0-2.
+                 */
+                .next_phys = 3,
+        };
 
         *spilled = false;
 
@@ -328,6 +372,7 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled)
         struct ra_graph *g = ra_alloc_interference_graph(c->compiler->regs,
                                                          c->num_temps +
                                                          
ARRAY_SIZE(acc_nodes));
+        ra_set_select_reg_callback(g, v3d_ra_select_callback, &callback_data);
 
         /* Make some fixed nodes for the accumulators, which we will need to
          * interfere with when ops have implied r3/r4 writes or for the thread

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to