Module: Mesa
Branch: master
Commit: e103b52aec773537d2821d8acc42ac9caa2a4b17
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e103b52aec773537d2821d8acc42ac9caa2a4b17

Author: Varad Gautam <varadgau...@gmail.com>
Date:   Tue Mar  8 01:19:59 2016 +0530

vc4: Coalesce instructions using VPM reads into the VPM read.

This is done instead of copy propagating the VPM reads into the
instructions using them, because VPM reads have to stay in order.

shader-db results:
total instructions in shared programs: 78509 -> 78114 (-0.50%)
instructions in affected programs:     5203 -> 4808 (-7.59%)
total estimated cycles in shared programs: 234670 -> 234318 (-0.15%)
estimated cycles in affected programs:     5345 -> 4993 (-6.59%)

Signed-off-by: Varad Gautam <varadgau...@gmail.com>
Reviewed-by: Eric Anholt <e...@anholt.net>
Tested-by: Rhys Kidd <rhysk...@gmail.com>

---

 src/gallium/drivers/vc4/vc4_opt_vpm.c | 74 ++++++++++++++++++++++++++++++++---
 src/gallium/drivers/vc4/vc4_qir.c     |  2 +-
 src/gallium/drivers/vc4/vc4_qir.h     |  2 +-
 3 files changed, 71 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm.c 
b/src/gallium/drivers/vc4/vc4_opt_vpm.c
index 0fcf1e5..d15b0c1 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm.c
@@ -24,14 +24,16 @@
 /**
  * @file vc4_opt_vpm.c
  *
- * This modifies instructions that generate the value consumed by a VPM write
- * to write directly into the VPM.
+ * This modifies instructions that:
+ * 1. exclusively consume a value read from the VPM to directly read the VPM if
+ *    other operands allow it.
+ * 2. generate the value consumed by a VPM write to write directly into the 
VPM.
  */
 
 #include "vc4_qir.h"
 
 bool
-qir_opt_vpm_writes(struct vc4_compile *c)
+qir_opt_vpm(struct vc4_compile *c)
 {
         if (c->stage == QSTAGE_FRAG)
                 return false;
@@ -52,8 +54,70 @@ qir_opt_vpm_writes(struct vc4_compile *c)
                 }
 
                 for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
-                        if (inst->src[i].file == QFILE_TEMP)
-                                use_count[inst->src[i].index]++;
+                        if (inst->src[i].file == QFILE_TEMP) {
+                                uint32_t temp = inst->src[i].index;
+                                use_count[temp]++;
+                        }
+                }
+        }
+
+        /* For instructions reading from a temporary that contains a VPM read
+         * result, try to move the instruction up in place of the VPM read.
+         */
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
+                if (!inst || qir_is_multi_instruction(inst))
+                        continue;
+
+                if (qir_depends_on_flags(inst) || inst->sf)
+                        continue;
+
+                if (qir_has_side_effects(c, inst) ||
+                    qir_has_side_effect_reads(c, inst) ||
+                    qir_is_tex(inst))
+                        continue;
+
+                for (int j = 0; j < qir_get_op_nsrc(inst->op); j++) {
+                        if (inst->src[j].file != QFILE_TEMP ||
+                            inst->src[j].pack)
+                                continue;
+
+                        uint32_t temp = inst->src[j].index;
+
+                        /* Since VPM reads pull from a FIFO, we only get to
+                         * read each VPM entry once (unless we reset the read
+                         * pointer).  That means we can't copy-propagate a VPM
+                         * read to multiple locations.
+                         */
+                        if (use_count[temp] != 1)
+                                continue;
+
+                        struct qinst *mov = c->defs[temp];
+                        if (!mov ||
+                            (mov->op != QOP_MOV &&
+                             mov->op != QOP_FMOV &&
+                             mov->op != QOP_MMOV) ||
+                            mov->src[0].file != QFILE_VPM) {
+                                continue;
+                        }
+
+                        uint32_t temps = 0;
+                        for (int k = 0; k < qir_get_op_nsrc(inst->op); k++) {
+                                if (inst->src[k].file == QFILE_TEMP)
+                                        temps++;
+                        }
+
+                        /* The instruction is safe to reorder if its other
+                         * sources are independent of previous instructions
+                         */
+                        if (temps == 1) {
+                                list_del(&inst->link);
+                                inst->src[j] = mov->src[0];
+                                list_replace(&mov->link, &inst->link);
+                                c->defs[temp] = NULL;
+                                free(mov);
+                                progress = true;
+                                break;
+                        }
                 }
         }
 
diff --git a/src/gallium/drivers/vc4/vc4_qir.c 
b/src/gallium/drivers/vc4/vc4_qir.c
index f9eb0e1..65f0067 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -526,7 +526,7 @@ qir_optimize(struct vc4_compile *c)
                 OPTPASS(qir_opt_copy_propagation);
                 OPTPASS(qir_opt_dead_code);
                 OPTPASS(qir_opt_small_immediates);
-                OPTPASS(qir_opt_vpm_writes);
+                OPTPASS(qir_opt_vpm);
 
                 if (!progress)
                         break;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h 
b/src/gallium/drivers/vc4/vc4_qir.h
index bae3176..4f39d72 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -484,7 +484,7 @@ bool qir_opt_copy_propagation(struct vc4_compile *c);
 bool qir_opt_cse(struct vc4_compile *c);
 bool qir_opt_dead_code(struct vc4_compile *c);
 bool qir_opt_small_immediates(struct vc4_compile *c);
-bool qir_opt_vpm_writes(struct vc4_compile *c);
+bool qir_opt_vpm(struct vc4_compile *c);
 void vc4_nir_lower_blend(struct vc4_compile *c);
 void vc4_nir_lower_io(struct vc4_compile *c);
 nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to