sub(a, b)) [PR116815]

Dhruv Chawla via Gcc-cvs Tue, 25 Nov 2025 22:18:35 -0800

https://gcc.gnu.org/g:3c378398111f7fc3c026b705e3ac088b27d4c307


commit r16-5611-g3c378398111f7fc3c026b705e3ac088b27d4c307
Author: Dhruv Chawla <[email protected]>
Date:   Wed Jul 23 01:41:51 2025 -0700

    Make better use of overflowing operations in max/min(a, add/sub(a, b)) 
[PR116815]
    
    This patch folds the following patterns:
    - For add:
      - umax (a, add (a, b)) -> [sum,  ovf] = adds (a, b); !ovf ? sum : a
      - umin (a, add (a, b)) -> [sum,  ovf] = adds (a, b); !ovf ? a : sum
        ... along with the commutated versions:
      - umax (a, add (b, a)) -> [sum,  ovf] = adds (b, a); !ovf ? sum : a
      - umin (a, add (b, a)) -> [sum,  ovf] = adds (b, a); !ovf ? a : sum
    - For sub:
      - umax (a, sub (a, b)) -> [diff, udf] = subs (a, b); udf ? diff : a
      - umin (a, sub (a, b)) -> [diff, udf] = subs (a, b); udf ? a : diff
    
    Where ovf is the overflow flag and udf is the underflow flag. adds and subs 
are
    generated by generating parallel compare+plus/minus which map to
    add<mode>3_compareC and sub<mode>3_compare1.
    
    This patch is a respin of the patch posted at
    https://gcc.gnu.org/pipermail/gcc-patches/2025-May/685021.html as per
    the suggestion to turn it into a target-specific transform by Richard
    Biener.
    
    FIXME: This pattern cannot currently factor multiple occurences of the
           add expression into a single adds, eg: max (a, a + b) + min (a + b, 
b)
           ends up generating two adds instructions. This is something that
           was lost when going from GIMPLE to target-specific transforms.
    
    Bootstrapped and regtested on aarch64-unknown-linux-gnu.
    
    Signed-off-by: Dhruv Chawla <[email protected]>
    
            PR middle-end/116815
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64.md
            (*aarch64_plus_within_<optab><mode>3_<ovf_commutate>): New pattern.
            (*aarch64_minus_within_<optab><mode>3): Likewise.
            * config/aarch64/iterators.md (ovf_add_cmp): New code attribute.
            (udf_sub_cmp): Likewise.
            (UMAXMIN): New code iterator.
            (ovf_commutate): New iterator.
            (ovf_comm_opp): New int attribute.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/pr116815-1.c: New test.
            * gcc.target/aarch64/pr116815-2.c: Likewise.
            * gcc.target/aarch64/pr116815-3.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64.md                 |  60 +++++++++++++
 gcc/config/aarch64/iterators.md               |   9 ++
 gcc/testsuite/gcc.target/aarch64/pr116815-1.c | 120 ++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/pr116815-2.c |  44 ++++++++++
 gcc/testsuite/gcc.target/aarch64/pr116815-3.c |  60 +++++++++++++
 5 files changed, 293 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index de6b1d0ed06b..8dcb5e3f0ecb 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4482,6 +4482,66 @@
   [(set_attr "type" "<su>div")]
 )
 
+;; umax (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? sum : a
+;; umin (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? a : sum
+;; ... and the commutated versions:
+;; umax (a, add (b, a)) => [sum, ovf] = adds (b, a); !ovf ? sum : a
+;; umin (a, add (b, a)) => [sum, ovf] = adds (b, a); !ovf ? a : sum
+(define_insn_and_split "*aarch64_plus_within_<optab><mode>3_<ovf_commutate>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (UMAXMIN:GPI
+         (plus:GPI (match_operand:GPI 1 "register_operand" "r")
+                   (match_operand:GPI 2 "register_operand" "r"))
+         (match_dup ovf_commutate)))
+   (clobber (match_scratch:GPI 3 "=r"))]
+  "!TARGET_CSSC"
+  "#"
+  "&& 1"
+  [(parallel
+      [(set (reg:CC_C CC_REGNUM)
+           (compare:CC_C (plus:GPI (match_dup ovf_commutate)
+                                   (match_dup <ovf_comm_opp>))
+                         (match_dup ovf_commutate)))
+       (set (match_dup 3) (plus:GPI (match_dup ovf_commutate)
+                                   (match_dup <ovf_comm_opp>)))])
+   (set (match_dup 0)
+       (if_then_else:GPI (<ovf_add_cmp> (reg:CC_C CC_REGNUM)
+                                        (const_int 0))
+                         (match_dup 3)
+                         (match_dup ovf_commutate)))]
+  {
+    if (GET_CODE (operands[3]) == SCRATCH)
+      operands[3] = gen_reg_rtx (<MODE>mode);
+  }
+)
+
+;; umax (a, sub (a, b)) => [diff, udf] = subs (a, b); udf ? diff : a
+;; umin (a, sub (a, b)) => [diff, udf] = subs (a, b); udf ? a : diff
+(define_insn_and_split "*aarch64_minus_within_<optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (UMAXMIN:GPI
+         (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+                    (match_operand:GPI 2 "register_operand" "r"))
+         (match_dup 1)))
+   (clobber (match_scratch:GPI 3 "=r"))]
+  "!TARGET_CSSC"
+  "#"
+  "&& 1"
+  [(parallel
+      [(set (reg:CC CC_REGNUM)
+           (compare:CC (match_dup 1) (match_dup 2)))
+       (set (match_dup 3) (minus:GPI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+           (if_then_else:GPI (<udf_sub_cmp> (reg:CC CC_REGNUM)
+                                               (const_int 0))
+                             (match_dup 3)
+                             (match_dup 1)))]
+  {
+    if (GET_CODE (operands[3]) == SCRATCH)
+      operands[3] = gen_reg_rtx (<MODE>mode);
+  }
+)
+
 ;; -------------------------------------------------------------------
 ;; Comparison insns
 ;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 82579b05ff7a..ff5688529d38 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2853,6 +2853,8 @@
 
 (define_code_iterator FMAXMIN [smax smin])
 
+(define_code_iterator UMAXMIN [umax umin])
+
 ;; Signed and unsigned max operations.
 (define_code_iterator USMAX [smax umax])
 
@@ -3141,6 +3143,9 @@
 
 (define_code_attr maxminand [(smax "bic") (smin "and")])
 
+(define_code_attr ovf_add_cmp [(umax "geu") (umin "ltu")])
+(define_code_attr udf_sub_cmp [(umax "ltu") (umin "geu")])
+
 ;; MLA/MLS attributes.
 (define_code_attr as [(ss_plus "a") (ss_minus "s")])
 
@@ -5164,3 +5169,7 @@
    (UNSPEC_F2CVT "f2cvt")
    (UNSPEC_F1CVTLT "f1cvtlt")
    (UNSPEC_F2CVTLT "f2cvtlt")])
+
+;; Operand numbers for commutative operations
+(define_int_iterator ovf_commutate [1 2])
+(define_int_attr ovf_comm_opp [(1 "2") (2 "1")])
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-1.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-1.c
new file mode 100644
index 000000000000..f3bdb7943188
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-1.c
@@ -0,0 +1,120 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests.  */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+/*
+** umaxadd1:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, add, 1, a, a + b)
+
+/*
+** umaxadd2:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, add, 2, a, b + a)
+
+/*
+** umaxadd3:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, add, 3, a + b, a)
+
+/*
+** umaxadd4:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, add, 4, b + a, a)
+
+/*
+** uminadd1:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, add, 1, a, a + b)
+
+/*
+** uminadd2:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, add, 2, a, b + a)
+
+/*
+** uminadd3:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, add, 3, a + b, a)
+
+/*
+** uminadd4:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, add, 4, b + a, a)
+
+/* sub requires the inverse of the comparison from add.  */
+
+/*
+** umaxsub1:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, sub, 1, a, a - b)
+
+/*
+** umaxsub2:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, sub, 2, a - b, a)
+
+/*
+** uminsub1:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, sub, 1, a, a - b)
+
+/*
+** uminsub2:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, sub, 2, a - b, a)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-2.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
new file mode 100644
index 000000000000..015c868aec28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+cssc"
+
+/* PR middle-end/116815 */
+
+/* Make sure that umax/umin instructions are generated with CSSC.  */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-assembler-times "umax\\t" 6 } } */
+/* { dg-final { scan-assembler-times "umin\\t" 6 } } */
+/* { dg-final { scan-assembler-not "adds\\t" } } */
+/* { dg-final { scan-assembler-not "subs\\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-3.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
new file mode 100644
index 000000000000..d262d2170f39
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* PR middle-end/116815 */
+
+/* Verify that the transformation gives correct results */
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return (a < b) ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return (a > b) ? a : b;
+}
+
+__attribute__ ((noipa)) unsigned
+umaxadd (unsigned a, unsigned b)
+{
+  return max (a + b, a);
+}
+
+__attribute__ ((noipa)) unsigned
+umaxsub (unsigned a, unsigned b)
+{
+  return max (a - b, a);
+}
+
+__attribute__ ((noipa)) unsigned
+uminadd (unsigned a, unsigned b)
+{
+  return min (a + b, a);
+}
+
+__attribute__ ((noipa)) unsigned
+uminsub (unsigned a, unsigned b)
+{
+  return min (a - b, a);
+}
+
+int
+main ()
+{
+  /* Overflows to 0x30000000.  */
+  if (umaxadd (0x90000000, 0xa0000000) != 0x90000000)
+    __builtin_abort ();
+
+  if (uminadd (0x90000000, 0xa0000000) != 0x30000000)
+    __builtin_abort ();
+
+  /* Underflows to 0x60000000.  */
+  if (umaxsub (0x00000000, 0xa0000000) != 0x60000000)
+    __builtin_abort ();
+
+  if (uminsub (0x00000000, 0xa0000000) != 0x00000000)
+    __builtin_abort ();
+}

[gcc r16-5611] Make better use of overflowing operations in max/min(a, add/sub(a, b)) [PR116815]

Reply via email to