Implement (X >> C) NE/EQ 0 -> X LT/GE 0 in match.pd instead of fold-const.cc.
Bootstrapped and tested on x86_64 and aarch64.
PR tree-optimization/123109
gcc/ChangeLog:
* fold-const.cc (fold_binary_loc): Remove (X >> C) NE/EQ 0 -> X LT/GE 0
folding.
* match.pd (`(X >> C) NE/EQ 0 -> X LT/GE 0`): New pattern.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/vrp99.c: Update test.
* gcc.dg/pr123109.c: New test.
Signed-off-by: Pengxuan Zheng <[email protected]>
---
gcc/fold-const.cc | 34 ---------------------------
gcc/match.pd | 22 +++++++++++++++++
gcc/testsuite/gcc.dg/pr123109.c | 25 ++++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/vrp99.c | 2 +-
4 files changed, 48 insertions(+), 35 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/pr123109.c
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 52c92ad66b5..7cab68c887c 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -12393,40 +12393,6 @@ fold_binary_loc (location_t loc, enum tree_code code,
tree type,
build_int_cst (TREE_TYPE (iref), 0));
}
}
-
- /* Fold (X >> C) != 0 into X < 0 if C is one less than the width
- of X. Similarly fold (X >> C) == 0 into X >= 0. */
- if (TREE_CODE (arg0) == RSHIFT_EXPR
- && integer_zerop (arg1)
- && TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST)
- {
- tree arg00 = TREE_OPERAND (arg0, 0);
- tree arg01 = TREE_OPERAND (arg0, 1);
- tree itype = TREE_TYPE (arg00);
- if (wi::to_wide (arg01) == element_precision (itype) - 1)
- {
- if (TYPE_UNSIGNED (itype))
- {
- itype = signed_type_for (itype);
- arg00 = fold_convert_loc (loc, itype, arg00);
- }
- enum tree_code code2 = code == EQ_EXPR ? GE_EXPR : LT_EXPR;
- /* Make sure to transform vector compares only to supported
- ones or from unsupported ones and check that only after
- IPA so offloaded code is handled correctly in this regard. */
- if (!VECTOR_TYPE_P (itype)
- || (cfun
- && cfun->after_inlining
- /* We can jump on EQ/NE but not GE/LT. */
- && VECTOR_BOOLEAN_TYPE_P (type)
- && (expand_vec_cmp_expr_p (itype, type, code2)
- || !expand_vec_cmp_expr_p (TREE_TYPE (op0),
- type, code))))
- return fold_build2_loc (loc, code2,
- type, arg00, build_zero_cst (itype));
- }
- }
-
/* Fold (~X & C) == 0 into (X & C) != 0 and (~X & C) != 0 into
(X & C) == 0 when C is a single bit. */
if (TREE_CODE (arg0) == BIT_AND_EXPR
diff --git a/gcc/match.pd b/gcc/match.pd
index f73f66d19a9..287a430ae5a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2824,6 +2824,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(le (minus (convert:etype @0) { lo; }) { hi; })
(gt (minus (convert:etype @0) { lo; }) { hi; })))))))))
+/* Fold (X >> C) != 0 into X < 0 if C is one less than the width
+ of X. Similarly fold (X >> C) == 0 into X >= 0. */
+(for neeq (ne eq)
+ ltge (lt ge)
+ (simplify
+ (neeq
+ (rshift@2 @0 INTEGER_CST@1)
+ integer_zerop)
+ (with { tree itype = signed_type_for (TREE_TYPE (@0)); }
+ /* Make sure to transform vector compares only to supported
+ ones or from unsupported ones and check that only after
+ IPA so offloaded code is handled correctly in this regard. */
+ (if (wi::to_wide (@1) == element_precision (itype) - 1
+ && (!VECTOR_TYPE_P (itype)
+ || (cfun
+ && cfun->after_inlining
+ && VECTOR_BOOLEAN_TYPE_P (type)
+ && (expand_vec_cmp_expr_p (itype, type, ltge)
+ || !expand_vec_cmp_expr_p (TREE_TYPE (@2),
+ type, neeq)))))
+ (ltge (convert:itype @0) { build_zero_cst (itype); })))))
+
/* X + Z < Y + Z is the same as X < Y when there is no overflow. */
(for op (lt le ge gt)
(simplify
diff --git a/gcc/testsuite/gcc.dg/pr123109.c b/gcc/testsuite/gcc.dg/pr123109.c
new file mode 100644
index 00000000000..9b83d578713
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr123109.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+typedef int v4si __attribute__((vector_size(4 * sizeof(int))));
+typedef unsigned int v4usi __attribute__((vector_size(4 * sizeof(unsigned
int))));
+
+#define TEST_NE(type) \
+ type test_ne_##type (type a) { return (a >> 31) != 0; }
+
+#define TEST_EQ(type) \
+ type test_eq_##type (type a) { return (a >> 31) == 0; }
+
+TEST_NE(int)
+TEST_NE(unsigned)
+TEST_NE(v4si)
+TEST_NE(v4usi)
+TEST_EQ(int)
+TEST_EQ(unsigned)
+TEST_EQ(v4si)
+TEST_EQ(v4usi)
+
+/* { dg-final { scan-tree-dump-times ">= 0" 2 optimized } } */
+/* { dg-final { scan-tree-dump-times "< 0" 2 optimized } } */
+/* { dg-final { scan-tree-dump-times ">= { 0, 0, 0, 0 }" 2 optimized } } */
+/* { dg-final { scan-tree-dump-times ">> 31" 2 optimized } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp99.c
b/gcc/testsuite/gcc.dg/tree-ssa/vrp99.c
index baa7a706fd9..83c37c3771d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp99.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp99.c
@@ -3,7 +3,7 @@
unsigned f(unsigned i){
i >>= __SIZEOF_INT__ * __CHAR_BIT__ - 1;
- return i == 0;
+ return i == 1;
}
/* { dg-final { scan-tree-dump-not "\\(unsigned int\\)" "vrp1" } } */
--
2.34.1