We stripped a paradoxical subreg, whose unspecified bits had been masked by an AND. Combine converted that into a ZERO_EXTRACT, but in the narrower mode.

The end of make_compound_operation deals with mode mismatches by creating another paradoxical subreg. However, this time the unspecified bits are still unspecified, which is wrong-code.

Fix this by re-extending to the outer mode with a ZERO_EXTEND.

Approved by Segher in the PR.  Tested on x86_64.


r~
        PR rtl-opt/69535
        * combine.c (make_compound_operation): When looking through a
        subreg, make sure to re-extend to the width of the outer mode.
testsuite/
        * gcc.dg/pr69535.c: New test.

diff --git a/gcc/combine.c b/gcc/combine.c
index 858552d..c307793 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7887,11 +7887,25 @@ make_compound_operation (rtx x, enum rtx_code in_code)
               && GET_CODE (SUBREG_REG (XEXP (x, 0))) == LSHIFTRT
               && (i = exact_log2 (UINTVAL (XEXP (x, 1)) + 1)) >= 0)
        {
-         new_rtx = make_compound_operation (XEXP (SUBREG_REG (XEXP (x, 0)), 0),
-                                        next_code);
-         new_rtx = make_extraction (GET_MODE (SUBREG_REG (XEXP (x, 0))), 
new_rtx, 0,
-                                XEXP (SUBREG_REG (XEXP (x, 0)), 1), i, 1,
-                                0, in_code == COMPARE);
+         rtx inner_x0 = SUBREG_REG (XEXP (x, 0));
+         machine_mode inner_mode = GET_MODE (inner_x0);
+         new_rtx = make_compound_operation (XEXP (inner_x0, 0), next_code);
+         new_rtx = make_extraction (inner_mode, new_rtx, 0,
+                                    XEXP (inner_x0, 1),
+                                    i, 1, 0, in_code == COMPARE);
+
+         if (new_rtx)
+           {
+             /* If we narrowed the mode when dropping the subreg, then
+                we must zero-extend to keep the semantics of the AND.  */
+             if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
+               ;
+             else if (SCALAR_INT_MODE_P (inner_mode))
+               new_rtx = simplify_gen_unary (ZERO_EXTEND, mode,
+                                             new_rtx, inner_mode);
+             else
+               new_rtx = NULL;
+           }
 
          /* If that didn't give anything, see if the AND simplifies on
             its own.  */
diff --git a/gcc/testsuite/gcc.dg/pr69535.c b/gcc/testsuite/gcc.dg/pr69535.c
new file mode 100644
index 0000000..4b4dad1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr69535.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target int128 } } */
+/* { dg-options "-O -fno-tree-bit-ccp -fno-tree-reassoc" } */
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+typedef unsigned __int128 u128;
+
+void __attribute__((noinline, noclone))
+dirtify_stack(void)
+{
+  volatile char a[] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+                      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+                      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+                      
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+}
+
+u128 __attribute__ ((noinline, noclone))
+foo(u32 u32_1, u64 u64_1, u128 u128_1, u8 u8_2, u16 u16_2, u32 u32_2, u8 u8_3)
+{
+  u128_1 /= ~u128_1 | 1;
+  u8_3 = ((u8_3 << 2) >> 1) << 7;
+  u32_2 >>= u8_3;
+  return u128_1 + u32_2 + u8_3;
+}
+
+int
+main ()
+{
+  dirtify_stack();
+  u128 x = foo(1, 1, 1, 1, 1, 1, 0);
+  if (x != 1)
+    __builtin_abort();
+  return 0;
+}

Reply via email to