On Broadwell, this reduces the instruction to a single operation when NOT is used with a logical instruction.
Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com> --- v3 [mattst88]: Move bits not used by patch 4 into this. src/mesa/drivers/dri/i965/brw_vec4.h | 4 +- .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 69 ++++++++++++++-------- 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index c2bbd68..6c4952f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -228,6 +228,8 @@ writemask(dst_reg reg, unsigned mask) return reg; } +struct copy_entry; + class vec4_instruction : public backend_instruction { public: DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction) @@ -498,7 +500,7 @@ public: vec4_instruction *last_rhs_inst); bool try_copy_propagation(vec4_instruction *inst, int arg, - src_reg *values[4]); + struct copy_entry *entry); /** Walks an exec_list of ir_instruction and sends it through this visitor. */ void visit_instructions(const exec_list *list); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index 3242c3a..3194c8d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -36,10 +36,17 @@ extern "C" { namespace brw { +struct copy_entry { + src_reg *value[4]; + enum opcode opcode; +}; + static bool -is_direct_copy(vec4_instruction *inst) +can_propagate_from(struct brw_context *brw, vec4_instruction *inst) + { - return (inst->opcode == BRW_OPCODE_MOV && + return ((inst->opcode == BRW_OPCODE_MOV || + (inst->opcode == BRW_OPCODE_NOT && brw->gen >= 8)) && !inst->predicate && inst->dst.file == GRF && !inst->saturate && @@ -206,22 +213,22 @@ is_logic_op(enum opcode opcode) bool vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg, - src_reg *values[4]) + struct copy_entry *entry) { /* For constant propagation, we only handle the same constant * across all 4 channels. Some day, we should handle the 8-bit * float vector format, which would let us constant propagate * vectors better. */ - src_reg value = *values[0]; + src_reg value = *(entry->value[0]); for (int i = 1; i < 4; i++) { /* This is equals() except we don't care about the swizzle. */ - if (value.file != values[i]->file || - value.reg != values[i]->reg || - value.reg_offset != values[i]->reg_offset || - value.type != values[i]->type || - value.negate != values[i]->negate || - value.abs != values[i]->abs) { + if (value.file != entry->value[i]->file || + value.reg != entry->value[i]->reg || + value.reg_offset != entry->value[i]->reg_offset || + value.type != entry->value[i]->type || + value.negate != entry->value[i]->negate || + value.abs != entry->value[i]->abs) { return false; } } @@ -232,7 +239,7 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg, */ int s[4]; for (int i = 0; i < 4; i++) { - s[i] = BRW_GET_SWZ(values[i]->swizzle, + s[i] = BRW_GET_SWZ(entry->value[i]->swizzle, BRW_GET_SWZ(inst->src[arg].swizzle, i)); } value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]); @@ -243,7 +250,11 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg, return false; if (brw->gen >= 8) { - if (value.negate) { + if (entry->opcode == BRW_OPCODE_NOT) { + if (!is_logic_op(inst->opcode)) { + return false; + } + } else if (value.negate) { if (is_logic_op(inst->opcode)) { return false; } @@ -294,6 +305,10 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg, value.type = inst->src[arg].type; inst->src[arg] = value; + + if (brw->gen >= 8 && entry->opcode == BRW_OPCODE_NOT) + inst->src[arg].negate ^= !value.negate; + return true; } @@ -301,9 +316,9 @@ bool vec4_visitor::opt_copy_propagation() { bool progress = false; - src_reg *cur_value[virtual_grf_reg_count][4]; + struct copy_entry entries[virtual_grf_reg_count]; - memset(&cur_value, 0, sizeof(cur_value)); + memset(&entries, 0, sizeof(entries)); foreach_list(node, &this->instructions) { vec4_instruction *inst = (vec4_instruction *)node; @@ -316,7 +331,7 @@ vec4_visitor::opt_copy_propagation() * src/glsl/opt_copy_propagation.cpp to track available copies. */ if (!is_dominated_by_previous_instruction(inst)) { - memset(cur_value, 0, sizeof(cur_value)); + memset(&entries, 0, sizeof(entries)); continue; } @@ -337,31 +352,32 @@ vec4_visitor::opt_copy_propagation() /* Find the regs that each swizzle component came from. */ - src_reg *values[4]; + struct copy_entry entry; int c; for (c = 0; c < 4; c++) { - values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)]; + entry.value[c] = entries[reg].value[BRW_GET_SWZ(inst->src[i].swizzle, c)]; /* If there's no available copy for this channel, bail. * We could be more aggressive here -- some channels might * not get used based on the destination writemask. */ - if (!values[c]) + if (!entry.value[c]) break; /* We'll only be able to copy propagate if the sources are * all from the same file -- there's no ability to swizzle * 0 or 1 constants in with source registers like in i915. */ - if (c > 0 && values[c - 1]->file != values[c]->file) + if (c > 0 && entry.value[c - 1]->file != entry.value[c]->file) break; } if (c != 4) continue; - if (try_constant_propagation(inst, i, values) || - try_copy_propagation(inst, i, values)) + entry.opcode = entries[reg].opcode; + if (try_constant_propagation(inst, i, entry.value) || + try_copy_propagation(inst, i, &entry)) progress = true; } @@ -374,10 +390,11 @@ vec4_visitor::opt_copy_propagation() * the value is the newly propagated source. Otherwise, we don't know * the new value, so clear it. */ - bool direct_copy = is_direct_copy(inst); + bool propagate = can_propagate_from(brw, inst); for (int i = 0; i < 4; i++) { if (inst->dst.writemask & (1 << i)) { - cur_value[reg][i] = direct_copy ? &inst->src[0] : NULL; + entries[reg].value[i] = propagate ? &inst->src[0] : NULL; + entries[reg].opcode = inst->opcode; } } @@ -385,12 +402,12 @@ vec4_visitor::opt_copy_propagation() * our destination's updated channels, as the two are no longer equal. */ if (inst->dst.reladdr) - memset(cur_value, 0, sizeof(cur_value)); + memset(&entries, 0, sizeof(entries)); else { for (int i = 0; i < virtual_grf_reg_count; i++) { for (int j = 0; j < 4; j++) { - if (is_channel_updated(inst, cur_value[i], j)){ - cur_value[i][j] = NULL; + if (is_channel_updated(inst, entries[i].value, j)){ + entries[i].value[j] = NULL; } } } -- 1.8.3.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev