We can optimize (vec_cond eq/ne vec_cond) when vec_cond is a result of (vec CMP vec). The optimization is because of the observation that in vec_cond, (-1 != 0) is true. So, we can generate vec_cond of xor of vec resulting in a single VEC_COND_EXPR instead of 3.
The patch adds match pattern for vec a, b: (a ? -1 : 0) != (b ? -1 : 0) --> (a^b) ? -1 : 0 (a ? -1 : 0) == (b ? -1 : 0) --> ~(a^b) ? -1 : 0 PR tree-optimization/111150 gcc/ChangeLog: * match.pd: Optimization for above mentioned pattern. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/pr111150.c: New test. Signed-off-by: Eikansh Gupta <quic_eikag...@quicinc.com> --- gcc/match.pd | 18 ++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr111150.c | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr111150.c diff --git a/gcc/match.pd b/gcc/match.pd index 3d0689c9312..5cb78bd7ff9 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5522,6 +5522,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (vec_cond (bit_and (bit_not @0) @1) @2 @3))) #endif +/* (a ? -1 : 0) != (b ? -1 : 0) --> (a^b) ? -1 : 0 */ +/* (a ? -1 : 0) == (b ? -1 : 0) --> ~(a^b) ? -1 : 0 */ +(for eqne (eq ne) + (simplify + (eqne:c (vec_cond @0 uniform_integer_cst_p@2 uniform_integer_cst_p@3) + (vec_cond @1 @2 @3)) + (with + { + tree newop1 = @2; + tree newop2 = @3; + if (eqne == NE_EXPR) + std::swap (newop1, newop2); + } + (if (integer_all_onesp (@2) && integer_zerop (@3)) + (vec_cond (bit_xor @0 @1) {newop1;} {newop2;}) + (if (integer_all_onesp (@3) && integer_zerop (@2)) + (vec_cond (bit_xor @0 @1) {newop2;} {newop1;})))))) + /* Canonicalize mask ? { 0, ... } : { -1, ...} to ~mask if the mask types are compatible. */ (simplify diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr111150.c b/gcc/testsuite/gcc.dg/tree-ssa/pr111150.c new file mode 100644 index 00000000000..d10564fd722 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr111150.c @@ -0,0 +1,19 @@ +/* PR tree-optimization/111150 */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop1" } */ + +typedef int v4si __attribute((__vector_size__(4 * sizeof(int)))); + +v4si f1_(v4si a, v4si b, v4si c, v4si d) { + v4si X = a == b; + v4si Y = c == d; + return (X != Y); +} + +v4si f2_(v4si a, v4si b, v4si c, v4si d) { + v4si X = a == b; + v4si Y = c == d; + return (X == Y); +} + +/* { dg-final { scan-tree-dump-times " VEC_COND_EXPR " 2 "forwprop1" } } */ -- 2.17.1