We can optimize (vec_cond eq/ne vec_cond) when vec_cond is a
result of (vec CMP vec). The optimization is because of the
observation that in vec_cond, (-1 != 0) is true. So, we can
generate vec_cond of xor of vec resulting in a single
VEC_COND_EXPR instead of 3.

The patch adds match pattern for vec a, b:
(a ? -1 : 0) != (b ? -1 : 0) --> (a^b) ? -1 : 0
(a ? -1 : 0) == (b ? -1 : 0) --> ~(a^b) ? -1 : 0

        PR tree-optimization/111150

gcc/ChangeLog:

        * match.pd: Optimization for above mentioned pattern.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/pr111150.c: New test.

Signed-off-by: Eikansh Gupta <quic_eikag...@quicinc.com>
---
 gcc/match.pd                             | 18 ++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr111150.c | 19 +++++++++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr111150.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 3d0689c9312..5cb78bd7ff9 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5522,6 +5522,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond (bit_and (bit_not @0) @1) @2 @3)))
 #endif
 
+/* (a ? -1 : 0) != (b ? -1 : 0) --> (a^b) ? -1 : 0 */
+/* (a ? -1 : 0) == (b ? -1 : 0) --> ~(a^b) ? -1 : 0 */
+(for eqne (eq ne)
+ (simplify
+  (eqne:c (vec_cond @0 uniform_integer_cst_p@2 uniform_integer_cst_p@3)
+         (vec_cond @1 @2 @3))
+  (with
+   {
+     tree newop1 = @2;
+     tree newop2 = @3;
+     if (eqne == NE_EXPR)
+       std::swap (newop1, newop2);
+   }
+   (if (integer_all_onesp (@2) && integer_zerop (@3))
+    (vec_cond (bit_xor @0 @1) {newop1;} {newop2;})
+    (if (integer_all_onesp (@3) && integer_zerop (@2))
+     (vec_cond (bit_xor @0 @1) {newop2;} {newop1;}))))))
+
 /* Canonicalize mask ? { 0, ... } : { -1, ...} to ~mask if the mask
    types are compatible.  */
 (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr111150.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr111150.c
new file mode 100644
index 00000000000..d10564fd722
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr111150.c
@@ -0,0 +1,19 @@
+/* PR tree-optimization/111150 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-forwprop1" } */
+
+typedef int v4si __attribute((__vector_size__(4 * sizeof(int))));
+
+v4si f1_(v4si a, v4si b, v4si c, v4si d) {
+  v4si X = a == b;
+  v4si Y = c == d;
+  return (X != Y);
+}
+
+v4si f2_(v4si a, v4si b, v4si c, v4si d) {
+  v4si X = a == b;
+  v4si Y = c == d;
+  return (X == Y);
+}
+
+/* { dg-final { scan-tree-dump-times " VEC_COND_EXPR " 2 "forwprop1" } } */
-- 
2.17.1

Reply via email to