Add two patterns to eliminate mispredicts in the following bit ops
scenarios:
- checking if a single bit is not set, and in this case set it: always
set the bit;
- checking if a bitmask is set (even partially), and in this case clear
it: always clear the bitmask.
Bootstrapped and tested with x86_64-pc-linux-gnu.
PR tree-optimization/64567
gcc/ChangeLog:
* match.pd (`cond (bit_and A IMM) (bit_or A IMM) A`): New
pattern.
(`cond (bit_and A IMM) (bit_and A ~IMM) A`): New pattern.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr64567-2.c: New test.
* gcc.dg/tree-ssa/pr64567.c: New test.
---
Changes from v2:
- add a single bit check for the bit_and+bit_or optimization;
- simplify mask check for bit_and+bit_and optimization;
- turn pr64567.c into a run test that checks if these optimizations
won't be applied for volatile variables;
- add a new run test to assert that the new optimizations aren't
giving a wrong result.
- v2 link: https://gcc.gnu.org/pipermail/gcc-patches/2026-February/708558.html
gcc/match.pd | 20 +++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr64567-2.c | 52 +++++++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr64567.c | 40 +++++++++++++++++
3 files changed, 112 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr64567-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr64567.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 8910591a04b..bca21fb7a9f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6316,6 +6316,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& !expand_vec_cond_expr_p (TREE_TYPE (@1), TREE_TYPE (@0)))))
(vec_cond @0 (op!:type @3 @1) (op!:type @3 @2)))))
+/* If we have a "if a bit is not set, set it" case,
+ just set the bit all the time (PR 64567). Note that
+ this does not work if we're checking for more than one
+ bit, e.g. (a & 5 ? a | 5 : a) will fail for a = 1 (we
+ would return 5 instead of 1). */
+(simplify
+ (cond (eq (bit_and @0 INTEGER_CST@1) integer_zerop)
+ (bit_ior@2 @0 INTEGER_CST@1) @0)
+ (if (wi::popcount (wi::to_wide (@1)) == 1)
+ @2))
+
+/* A clear bit version of the above: "if a bitmask is
+ set, clear it". In this case always clear the bitmask
+ (see PR 64567). */
+(simplify
+ (cond (ne (bit_and @0 INTEGER_CST@1) integer_zerop)
+ (bit_and@3 @0 INTEGER_CST@2) @0)
+ (if (wi::to_wide (@2) == ~wi::to_wide (@1))
+ @3))
+
#if GIMPLE
(match (nop_atomic_bit_test_and_p @0 @1 @4)
(bit_and (convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr64567-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr64567-2.c
new file mode 100644
index 00000000000..5b523d2cfe7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr64567-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+void abort(void);
+
+/* Macro adapted from builtin-object-size-common.h */
+#define FAIL() \
+ do { \
+ __builtin_printf ("Failure at line: %d\n", __LINE__); \
+ abort(); \
+ } while (0)
+
+
+__attribute__((noinline))
+unsigned mask_zero_set (unsigned val, unsigned mask)
+{
+ if ((val & mask) == 0)
+ val |= mask;
+
+ return val;
+}
+
+__attribute__((noinline))
+unsigned mask_notzero_clear (unsigned val, unsigned mask)
+{
+ if ((val & mask) != 0)
+ val &= ~mask;
+
+ return val;
+}
+
+int main (void) {
+ if (mask_zero_set (0, 1) != 1)
+ FAIL ();
+
+ if (mask_zero_set (4, 3) != 7)
+ FAIL ();
+
+ if (mask_zero_set (1, 5) != 1)
+ FAIL ();
+
+ if (mask_notzero_clear (7, 1) != 6)
+ FAIL ();
+
+ if (mask_notzero_clear (7, 3) != 4)
+ FAIL ();
+
+ if (mask_notzero_clear (8, 6) != 8)
+ FAIL ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr64567.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr64567.c
new file mode 100644
index 00000000000..51af71a3938
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr64567.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#define F1 0x01
+#define F2 0x02
+
+#define DECLS(n,VOL) \
+__attribute__((noinline,noclone)) \
+unsigned foo##n(unsigned A) \
+{ \
+ VOL unsigned flags = A; \
+ if (flags & (F1 | F2)) \
+ flags &= ~(F1 | F2); \
+ return flags; \
+} \
+__attribute__((noinline,noclone)) \
+unsigned bar##n(unsigned A) \
+{ \
+ VOL unsigned flags = A; \
+ if (!(flags & F1)) \
+ flags |= F1; \
+ return flags; \
+} \
+
+DECLS(0,)
+DECLS(1,volatile)
+
+int main ()
+{
+ for(int A = 0; A < 4; A++)
+ {
+ if (foo0 (A) != foo1 (A)) __builtin_abort();
+ if (bar0 (A) != bar1 (A)) __builtin_abort();
+ }
+}
+
+/* foo1 and bar1 will add 2 gotos each since they are not
+ being optimized, 'main' will add +6 (2 gotos for the loop,
+ 2 gotos for each abort check). */
+/* { dg-final { scan-tree-dump-times " goto " 10 optimized } } */
--
2.43.0