Hi!

The following testcase is miscompiled because of a flawed optimization.
If one changes the 65 in the testcase to e.g. 66, one gets:
...
  _25 = .USUBC (0, _24, _14);
  _12 = IMAGPART_EXPR <_25>;
  _26 = REALPART_EXPR <_25>;
  if (_23 >= 1)
    goto <bb 8>; [80.00%]
  else
    goto <bb 11>; [20.00%]

  <bb 8> :
  if (_23 != 1)
    goto <bb 10>; [80.00%]
  else
    goto <bb 9>; [20.00%]

  <bb 9> :
  _27 = (signed long) _26;
  _28 = _27 >> 1;
  _29 = (unsigned long) _28;
  _31 = _29 + 1;
  _30 = _31 > 1;
  goto <bb 11>; [100.00%]

  <bb 10> :
  _32 = _26 != _18;
  _33 = _22 | _32;

  <bb 11> :
  # _17 = PHI <_30(9), _22(7), _33(10)>
  # _19 = PHI <_29(9), _18(7), _18(10)>
...
so there is one path for limbs below the boundary (in this case there are
actually no limbs there, maybe we could consider optimizing that further,
say with simply folding that _23 >= 1 condition to 1 == 1 and letting
cfg cleanup handle it), another case where it is exactly the limb on the
boundary (that is the bb 9 handling where it extracts the interesting
bits (the first 3 statements) and then checks if it is zero or all ones and
finally the case of limbs above that where it compares the current result
limb against the previously recorded 0 or all ones and ors differences into
accumulated result.

Now, the optimization which the first hunk removes was based on the idea
that for that case the extraction of the interesting bits from the limb
don't need anything special, so the _27/_28/_29 statements above aren't
needed, the whole limb is interesting bits, so it handled the >= 1
case like the bb 9 above without the first 3 statements and bb 10 wasn't
there at all.  There are 2 problems with that, for the higher limbs it
only checks if the the result limb bits are all zeros or all ones, but
doesn't check if they are the same as the other extension bits, and
it forgets the previous flag whether there was an overflow.
First I wanted to fix it just by adding the _33 = _22 | _30; statement
to the end of bb 9 above, which fixed the originally filed huge testcase
and the first 2 foo calls in the testcase included in the patch, it no
longer forgets about previously checked differences from 0/1.
But as the last 2 foo calls show, it still didn't check whether each
even (or each odd depending on the exact position) result limb is
equal to the first one, so every second limb it could choose some other
0 vs. all ones value and as long as it repeated in another limb above it
it would be ok.

So, the optimization just can't work properly and the following patch
removes it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/14.2?

2024-06-07  Jakub Jelinek  <ja...@redhat.com>

        PR middle-end/115352
        * gimple-lower-bitint.cc (lower_addsub_overflow): Don't disable
        single_comparison if cmp_code is GE_EXPR.

        * gcc.dg/torture/bitint-71.c: New test.

--- gcc/gimple-lower-bitint.cc.jj       2024-04-12 10:59:48.233153262 +0200
+++ gcc/gimple-lower-bitint.cc  2024-06-06 12:06:57.065717651 +0200
@@ -4286,11 +4286,7 @@ bitint_large_huge::lower_addsub_overflow
                  bool single_comparison
                    = (startlimb + 2 >= fin || (startlimb & 1) != (i & 1));
                  if (!single_comparison)
-                   {
-                     cmp_code = GE_EXPR;
-                     if (!check_zero && (start % limb_prec) == 0)
-                       single_comparison = true;
-                   }
+                   cmp_code = GE_EXPR;
                  else if ((startlimb & 1) == (i & 1))
                    cmp_code = EQ_EXPR;
                  else
--- gcc/testsuite/gcc.dg/torture/bitint-71.c.jj 2024-06-06 12:20:55.824913276 
+0200
+++ gcc/testsuite/gcc.dg/torture/bitint-71.c    2024-06-06 12:20:45.260044338 
+0200
@@ -0,0 +1,28 @@
+/* PR middle-end/115352 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c23" } */
+/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 385
+int
+foo (_BitInt (385) b)
+{
+  return __builtin_sub_overflow_p (0, b, (_BitInt (65)) 0);
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 385
+  if (!foo (-(_BitInt (385)) 
0x00000000000000000c377e8a3fd1881fff035bb487a51c9ed1f7350befa7ec4450000000000000000a3cf8d1ebb723981wb))
+    __builtin_abort ();
+  if (!foo 
(-0x1ffffffffffffffffc377e8a3fd1881fff035bb487a51c9ed1f7350befa7ec445ffffffffffffffffa3cf8d1ebb723981uwb))
+    __builtin_abort ();
+  if (!foo (-(_BitInt (385)) 
0x00000000000000000ffffffffffffffffffffffffffffffff00000000000000000000000000000000a3cf8d1ebb723981wb))
+    __builtin_abort ();
+  if (!foo 
(-0x1ffffffffffffffff00000000000000000000000000000000ffffffffffffffffffffffffffffffffa3cf8d1ebb723981uwb))
+    __builtin_abort ();
+#endif
+}

        Jakub

Reply via email to