https://gcc.gnu.org/g:a50b4406e5d64adb6bb0ebde3710f6742a0bdbed

commit r16-5022-ga50b4406e5d64adb6bb0ebde3710f6742a0bdbed
Author: Kishan Parmar <[email protected]>
Date:   Tue Nov 4 12:41:28 2025 +0530

    simplify-rtx: Canonicalize SUBREG and LSHIFTRT order for AND operations
    
    For a given rtx expression (and (lshiftrt (subreg X) shift) mask)
    combine pass tries to simplify the RTL form to
    
       (and (subreg (lshiftrt X shift)) mask)
    
    where the SUBREG wraps the result of the shift.  This leaves the AND
    and the shift in different modes, which complicates recognition.
    
       (and (lshiftrt (subreg X) shift) mask)
    
    where the SUBREG is inside the shift and both operations share the same
    mode.  This form is easier to recognize across targets and enables
    cleaner pattern matching.
    
    This patch checks in simplify-rtx to perform this transformation when
    safe: the SUBREG must be a lowpart, the shift amount must be valid, and
    the precision of the operation must be preserved.
    
    Tested on powerpc64le-linux-gnu, powerpc64-linux-gnu, and
    x86_64-pc-linux-gnu with no regressions.  On rs6000, the change reduces
    insn counts due to improved matching.
    
    2025-11-04  Kishan Parmar  <[email protected]>
    
    gcc/ChangeLog:
    
            PR rtl-optimization/93738
            * simplify-rtx.cc (simplify_binary_operation_1): Canonicalize
            SUBREG(LSHIFTRT) into LSHIFTRT(SUBREG) when valid.
    
    gcc/testsuite/ChangeLog:
    
            PR rtl-optimization/93738
            * gcc.target/powerpc/rlwimi-2.c: Update expected rldicl count.

Diff:
---
 gcc/simplify-rtx.cc                         | 40 +++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/rlwimi-2.c |  2 +-
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 53592d2a2f4d..59a86c6c6cd5 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -4184,6 +4184,46 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
                 not do an AND.  */
              if ((nzop0 & ~val1) == 0)
                return op0;
+
+             /* Canonicalize (and (subreg (lshiftrt X shift)) mask) into
+                (and (lshiftrt (subreg X) shift) mask).
+
+                Keeps shift and AND in the same mode, improving recognition.
+                Only applied when subreg is a lowpart, shift is valid,
+                and no precision is lost.  */
+             if (SUBREG_P (op0) && subreg_lowpart_p (op0)
+                 && GET_CODE (XEXP (op0, 0)) == LSHIFTRT
+                 && CONST_INT_P (XEXP (XEXP (op0, 0), 1))
+                 && INTVAL (XEXP (XEXP (op0, 0), 1)) >= 0
+                 && INTVAL (XEXP (XEXP (op0, 0), 1)) < HOST_BITS_PER_WIDE_INT
+                 && ((INTVAL (XEXP (XEXP (op0, 0), 1))
+                     + floor_log2 (val1))
+                     < GET_MODE_PRECISION (as_a <scalar_int_mode> (mode))))
+               {
+                 tem = XEXP (XEXP (op0, 0), 0);
+                 if (SUBREG_P (tem))
+                   {
+                     if (subreg_lowpart_p (tem))
+                       tem = SUBREG_REG (tem);
+                     else
+                       tem = NULL_RTX;
+                   }
+                 if (tem != NULL_RTX)
+                   {
+                     offset = subreg_lowpart_offset (mode, GET_MODE (tem));
+                     tem = simplify_gen_subreg (mode, tem, GET_MODE (tem),
+                                                offset);
+                     if (tem)
+                       {
+                         unsigned shiftamt = INTVAL (XEXP (XEXP (op0, 0), 1));
+                         rtx shiftamtrtx = gen_int_shift_amount (mode,
+                                                                 shiftamt);
+                         op0 = simplify_gen_binary (LSHIFTRT, mode, tem,
+                                                    shiftamtrtx);
+                         return simplify_gen_binary (AND, mode, op0, op1);
+                       }
+                   }
+               }
            }
          nzop1 = nonzero_bits (trueop1, mode);
          /* If we are clearing all the nonzero bits, the result is zero.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c 
b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
index bafa371db73f..afbde0e5fc60 100644
--- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
@@ -6,7 +6,7 @@
 /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } 
*/
+/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 6754 { target lp64 } } } 
*/
 
 /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } 
} */
 /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } 
*/

Reply via email to