The negation source modifier on src registers has changed meaning in Broadwell
when
used with logical operations. Don't copy propagate when negate src modifier is
set
and when the destination instruction is a logical op.
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
src/mesa/drivers/dri/i965/brw_vec4.h | 4 +-
.../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 68 +++---
2 files changed, 49 insertions(+), 23 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h
b/src/mesa/drivers/dri/i965/brw_vec4.h
index fd58b3c..51da46c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -228,6 +228,8 @@ writemask(dst_reg reg, unsigned mask)
return reg;
}
+struct copy_entry;
+
class vec4_instruction : public backend_instruction {
public:
DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
@@ -498,7 +500,7 @@ public:
vec4_instruction *last_rhs_inst);
bool try_copy_propagation(vec4_instruction *inst, int arg,
- src_reg *values[4]);
+ struct copy_entry *entry);
/** Walks an exec_list of ir_instruction and sends it through this visitor.
*/
void visit_instructions(const exec_list *list);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 83cf191..e537895 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -36,6 +36,11 @@ extern C {
namespace brw {
+struct copy_entry {
+ src_reg *value[4];
+ enum opcode opcode;
+};
+
static bool
is_direct_copy(vec4_instruction *inst)
{
@@ -195,24 +200,33 @@ try_constant_propagation(vec4_instruction *inst, int arg,
src_reg *values[4])
return false;
}
+static bool
+is_logic_op(enum opcode opcode)
+{
+ return (opcode == BRW_OPCODE_AND ||
+ opcode == BRW_OPCODE_OR ||
+ opcode == BRW_OPCODE_XOR ||
+ opcode == BRW_OPCODE_NOT);
+}
+
bool
vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg,
- src_reg *values[4])
+ struct copy_entry *entry)
{
/* For constant propagation, we only handle the same constant
* across all 4 channels. Some day, we should handle the 8-bit
* float vector format, which would let us constant propagate
* vectors better.
*/
- src_reg value = *values[0];
+ src_reg value = *(entry-value[0]);
for (int i = 1; i 4; i++) {
/* This is equals() except we don't care about the swizzle. */
- if (value.file != values[i]-file ||
- value.reg != values[i]-reg ||
- value.reg_offset != values[i]-reg_offset ||
- value.type != values[i]-type ||
- value.negate != values[i]-negate ||
- value.abs != values[i]-abs) {
+ if (value.file != entry-value[i]-file ||
+ value.reg != entry-value[i]-reg ||
+ value.reg_offset != entry-value[i]-reg_offset ||
+ value.type != entry-value[i]-type ||
+ value.negate != entry-value[i]-negate ||
+ value.abs != entry-value[i]-abs) {
return false;
}
}
@@ -223,7 +237,7 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst,
int arg,
*/
int s[4];
for (int i = 0; i 4; i++) {
- s[i] = BRW_GET_SWZ(values[i]-swizzle,
+ s[i] = BRW_GET_SWZ(entry-value[i]-swizzle,
BRW_GET_SWZ(inst-src[arg].swizzle, i));
}
value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
@@ -233,6 +247,14 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst,
int arg,
value.file != ATTR)
return false;
+ if (brw-gen =8) {
+ if (value.negate) {
+ if (is_logic_op(inst-opcode)) {
+return false;
+ }
+ }
+ }
+
if (inst-src[arg].abs) {
value.negate = false;
value.abs = true;
@@ -284,9 +306,9 @@ bool
vec4_visitor::opt_copy_propagation()
{
bool progress = false;
- src_reg *cur_value[virtual_grf_reg_count][4];
+ struct copy_entry entries[virtual_grf_reg_count];
- memset(cur_value, 0, sizeof(cur_value));
+ memset(entries, 0, sizeof(entries));
foreach_list(node, this-instructions) {
vec4_instruction *inst = (vec4_instruction *)node;
@@ -299,7 +321,7 @@ vec4_visitor::opt_copy_propagation()
* src/glsl/opt_copy_propagation.cpp to track available copies.
*/
if (!is_dominated_by_previous_instruction(inst)) {
-memset(cur_value, 0, sizeof(cur_value));
+memset(entries, 0, sizeof(entries));
continue;
}
@@ -320,31 +342,32 @@ vec4_visitor::opt_copy_propagation()
/* Find the regs that each swizzle component came from.
*/
-src_reg *values[4];
+struct copy_entry entry;
int c;
for (c = 0;