On Broadwell, this reduces the instruction to a single operation when NOT is 
used with
a logical instruction.

Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com>
---
v3 [mattst88]: Move bits not used by patch 4 into this.

 src/mesa/drivers/dri/i965/brw_vec4.h               |  4 +-
 .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 69 ++++++++++++++--------
 2 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index c2bbd68..6c4952f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -228,6 +228,8 @@ writemask(dst_reg reg, unsigned mask)
    return reg;
 }
 
+struct copy_entry;
+
 class vec4_instruction : public backend_instruction {
 public:
    DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
@@ -498,7 +500,7 @@ public:
                               vec4_instruction *last_rhs_inst);
 
    bool try_copy_propagation(vec4_instruction *inst, int arg,
-                             src_reg *values[4]);
+                             struct copy_entry *entry);
 
    /** Walks an exec_list of ir_instruction and sends it through this visitor. 
*/
    void visit_instructions(const exec_list *list);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 3242c3a..3194c8d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -36,10 +36,17 @@ extern "C" {
 
 namespace brw {
 
+struct copy_entry {
+   src_reg *value[4];
+   enum opcode opcode;
+};
+
 static bool
-is_direct_copy(vec4_instruction *inst)
+can_propagate_from(struct brw_context *brw, vec4_instruction *inst)
+
 {
-   return (inst->opcode == BRW_OPCODE_MOV &&
+   return ((inst->opcode == BRW_OPCODE_MOV ||
+            (inst->opcode == BRW_OPCODE_NOT && brw->gen >= 8)) &&
           !inst->predicate &&
           inst->dst.file == GRF &&
           !inst->saturate &&
@@ -206,22 +213,22 @@ is_logic_op(enum opcode opcode)
 
 bool
 vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg,
-                                   src_reg *values[4])
+                                   struct copy_entry *entry)
 {
    /* For constant propagation, we only handle the same constant
     * across all 4 channels.  Some day, we should handle the 8-bit
     * float vector format, which would let us constant propagate
     * vectors better.
     */
-   src_reg value = *values[0];
+   src_reg value = *(entry->value[0]);
    for (int i = 1; i < 4; i++) {
       /* This is equals() except we don't care about the swizzle. */
-      if (value.file != values[i]->file ||
-         value.reg != values[i]->reg ||
-         value.reg_offset != values[i]->reg_offset ||
-         value.type != values[i]->type ||
-         value.negate != values[i]->negate ||
-         value.abs != values[i]->abs) {
+      if (value.file != entry->value[i]->file ||
+         value.reg != entry->value[i]->reg ||
+         value.reg_offset != entry->value[i]->reg_offset ||
+         value.type != entry->value[i]->type ||
+         value.negate != entry->value[i]->negate ||
+         value.abs != entry->value[i]->abs) {
         return false;
       }
    }
@@ -232,7 +239,7 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst, 
int arg,
     */
    int s[4];
    for (int i = 0; i < 4; i++) {
-      s[i] = BRW_GET_SWZ(values[i]->swizzle,
+      s[i] = BRW_GET_SWZ(entry->value[i]->swizzle,
                         BRW_GET_SWZ(inst->src[arg].swizzle, i));
    }
    value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
@@ -243,7 +250,11 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst, 
int arg,
       return false;
 
    if (brw->gen >= 8) {
-      if (value.negate) {
+      if (entry->opcode == BRW_OPCODE_NOT) {
+         if (!is_logic_op(inst->opcode)) {
+            return false;
+         }
+      } else if (value.negate) {
          if (is_logic_op(inst->opcode)) {
             return false;
          }
@@ -294,6 +305,10 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst, 
int arg,
 
    value.type = inst->src[arg].type;
    inst->src[arg] = value;
+
+   if (brw->gen >= 8 && entry->opcode == BRW_OPCODE_NOT)
+      inst->src[arg].negate ^= !value.negate;
+
    return true;
 }
 
@@ -301,9 +316,9 @@ bool
 vec4_visitor::opt_copy_propagation()
 {
    bool progress = false;
-   src_reg *cur_value[virtual_grf_reg_count][4];
+   struct copy_entry entries[virtual_grf_reg_count];
 
-   memset(&cur_value, 0, sizeof(cur_value));
+   memset(&entries, 0, sizeof(entries));
 
    foreach_list(node, &this->instructions) {
       vec4_instruction *inst = (vec4_instruction *)node;
@@ -316,7 +331,7 @@ vec4_visitor::opt_copy_propagation()
        * src/glsl/opt_copy_propagation.cpp to track available copies.
        */
       if (!is_dominated_by_previous_instruction(inst)) {
-        memset(cur_value, 0, sizeof(cur_value));
+        memset(&entries, 0, sizeof(entries));
         continue;
       }
 
@@ -337,31 +352,32 @@ vec4_visitor::opt_copy_propagation()
 
         /* Find the regs that each swizzle component came from.
          */
-        src_reg *values[4];
+        struct copy_entry entry;
         int c;
         for (c = 0; c < 4; c++) {
-           values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)];
+           entry.value[c] = 
entries[reg].value[BRW_GET_SWZ(inst->src[i].swizzle, c)];
 
            /* If there's no available copy for this channel, bail.
             * We could be more aggressive here -- some channels might
             * not get used based on the destination writemask.
             */
-           if (!values[c])
+           if (!entry.value[c])
               break;
 
            /* We'll only be able to copy propagate if the sources are
             * all from the same file -- there's no ability to swizzle
             * 0 or 1 constants in with source registers like in i915.
             */
-           if (c > 0 && values[c - 1]->file != values[c]->file)
+           if (c > 0 && entry.value[c - 1]->file != entry.value[c]->file)
               break;
         }
 
         if (c != 4)
            continue;
 
-        if (try_constant_propagation(inst, i, values) ||
-            try_copy_propagation(inst, i, values))
+         entry.opcode = entries[reg].opcode;
+         if (try_constant_propagation(inst, i, entry.value) ||
+            try_copy_propagation(inst, i, &entry))
            progress = true;
       }
 
@@ -374,10 +390,11 @@ vec4_visitor::opt_copy_propagation()
          * the value is the newly propagated source.  Otherwise, we don't know
          * the new value, so clear it.
          */
-        bool direct_copy = is_direct_copy(inst);
+        bool propagate = can_propagate_from(brw, inst);
         for (int i = 0; i < 4; i++) {
            if (inst->dst.writemask & (1 << i)) {
-              cur_value[reg][i] = direct_copy ? &inst->src[0] : NULL;
+              entries[reg].value[i] = propagate ? &inst->src[0] : NULL;
+              entries[reg].opcode = inst->opcode;
            }
         }
 
@@ -385,12 +402,12 @@ vec4_visitor::opt_copy_propagation()
          * our destination's updated channels, as the two are no longer equal.
          */
         if (inst->dst.reladdr)
-           memset(cur_value, 0, sizeof(cur_value));
+           memset(&entries, 0, sizeof(entries));
         else {
            for (int i = 0; i < virtual_grf_reg_count; i++) {
               for (int j = 0; j < 4; j++) {
-                 if (is_channel_updated(inst, cur_value[i], j)){
-                    cur_value[i][j] = NULL;
+                 if (is_channel_updated(inst, entries[i].value, j)){
+                    entries[i].value[j] = NULL;
                  }
               }
            }
-- 
1.8.3.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to