gcc-14-with-riscv-opts)] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

Jeff Law via Gcc-cvs Sun, 26 May 2024 17:08:19 -0700

https://gcc.gnu.org/g:592205a276422c31872bf41764c59589e4a17c85


commit 592205a276422c31872bf41764c59589e4a17c85
Author: Pan Li <pan2...@intel.com>
Date:   Wed May 15 10:14:05 2024 +0800

    Internal-fn: Support new IFN SAT_ADD for unsigned scalar int
    
    This patch would like to add the middle-end presentation for the
    saturation add.  Aka set the result of add to the max when overflow.
    It will take the pattern similar as below.
    
    SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
    
    Take uint8_t as example, we will have:
    
    * SAT_ADD (1, 254)   => 255.
    * SAT_ADD (1, 255)   => 255.
    * SAT_ADD (2, 255)   => 255.
    * SAT_ADD (255, 255) => 255.
    
    Given below example for the unsigned scalar integer uint64_t:
    
    uint64_t sat_add_u64 (uint64_t x, uint64_t y)
    {
      return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
    }
    
    Before this patch:
    uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
    {
      long unsigned int _1;
      _Bool _2;
      long unsigned int _3;
      long unsigned int _4;
      uint64_t _7;
      long unsigned int _10;
      __complex__ long unsigned int _11;
    
    ;;   basic block 2, loop depth 0
    ;;    pred:       ENTRY
      _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
      _1 = REALPART_EXPR <_11>;
      _10 = IMAGPART_EXPR <_11>;
      _2 = _10 != 0;
      _3 = (long unsigned int) _2;
      _4 = -_3;
      _7 = _1 | _4;
      return _7;
    ;;    succ:       EXIT
    
    }
    
    After this patch:
    uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
    {
      uint64_t _7;
    
    ;;   basic block 2, loop depth 0
    ;;    pred:       ENTRY
      _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
      return _7;
    ;;    succ:       EXIT
    }
    
    The below tests are passed for this patch:
    1. The riscv fully regression tests.
    3. The x86 bootstrap tests.
    4. The x86 fully regression tests.
    
            PR target/51492
            PR target/112600
    
    gcc/ChangeLog:
    
            * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
            to the return true switch case(s).
            * internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
            * match.pd: Add unsigned SAT_ADD match(es).
            * optabs.def (OPTAB_NL): Remove fixed-point limitation for
            us/ssadd.
            * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
            extern func decl generated in match.pd match.
            (match_saturation_arith): New func impl to match the saturation 
arith.
            (math_opts_dom_walker::after_dom_children): Try match saturation
            arith when IOR expr.
    
    Signed-off-by: Pan Li <pan2...@intel.com>
    (cherry picked from commit 52b0536710ff3f3ace72ab00ce9ef6c630cd1183)

Diff:
---
 gcc/internal-fn.cc        |  1 +
 gcc/internal-fn.def       |  2 ++
 gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 gcc/optabs.def            |  4 ++--
 gcc/tree-ssa-math-opts.cc | 32 +++++++++++++++++++++++++++++
 5 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 0a7053c2286..73045ca8c8c 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
     case IFN_UBSAN_CHECK_MUL:
     case IFN_ADD_OVERFLOW:
     case IFN_MUL_OVERFLOW:
+    case IFN_SAT_ADD:
     case IFN_VEC_WIDEN_PLUS:
     case IFN_VEC_WIDEN_PLUS_LO:
     case IFN_VEC_WIDEN_PLUS_HI:
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 848bb9dbff3..25badbb86e5 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | 
ECF_NOTHROW, first,
 DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
                              smulhrs, umulhrs, binary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
+
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
 DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 4a0aa80cee1..d6bfb9a32ae 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3045,6 +3045,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        || POINTER_TYPE_P (itype))
       && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
 
+/* Unsigned Saturation Add */
+(match (usadd_left_part_1 @0 @1)
+ (plus:c @0 @1)
+ (if (INTEGRAL_TYPE_P (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@1)))))
+
+(match (usadd_left_part_2 @0 @1)
+ (realpart (IFN_ADD_OVERFLOW:c @0 @1))
+ (if (INTEGRAL_TYPE_P (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@1)))))
+
+(match (usadd_right_part_1 @0 @1)
+ (negate (convert (lt (plus:c @0 @1) @0)))
+ (if (INTEGRAL_TYPE_P (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@1)))))
+
+(match (usadd_right_part_1 @0 @1)
+ (negate (convert (gt @0 (plus:c @0 @1))))
+ (if (INTEGRAL_TYPE_P (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@1)))))
+
+(match (usadd_right_part_2 @0 @1)
+ (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
+ (if (INTEGRAL_TYPE_P (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@1)))))
+
+/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
+   because the sub part of left_part_2 cannot work with right_part_1.
+   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
+   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
+
+/* Unsigned saturation add, case 1 (branchless):
+   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
+   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
+(match (unsigned_integer_sat_add @0 @1)
+ (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
+
+/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
+(match (unsigned_integer_sat_add @0 @1)
+ (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index ad14f9328b9..3f2cb46aff8 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
 OPTAB_NX(add_optab, "add$Q$a3")
 OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
 OPTAB_VX(addv_optab, "add$F$a3")
-OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', 
gen_signed_fixed_libfunc)
-OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', 
gen_unsigned_fixed_libfunc)
+OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', 
gen_signed_fixed_libfunc)
+OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', 
gen_unsigned_fixed_libfunc)
 OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc)
 OPTAB_NX(sub_optab, "sub$F$a3")
 OPTAB_NX(sub_optab, "sub$Q$a3")
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 705f4a4695a..757ddf17af4 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4026,6 +4026,36 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, 
gimple *&use_stmt,
   return 0;
 }
 
+extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+
+/*
+ * Try to match saturation arith pattern(s).
+ *   1. SAT_ADD (unsigned)
+ *      _7 = _4 + _6;
+ *      _8 = _4 > _7;
+ *      _9 = (long unsigned int) _8;
+ *      _10 = -_9;
+ *      _12 = _7 | _10;
+ *      =>
+ *      _12 = .SAT_ADD (_4, _6);  */
+static void
+match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
+{
+  gcall *call = NULL;
+
+  tree ops[2];
+  tree lhs = gimple_assign_lhs (stmt);
+
+  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
+      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
+                                        OPTIMIZE_FOR_BOTH))
+    {
+      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
+      gimple_call_set_lhs (call, lhs);
+      gsi_replace (gsi, call, true);
+    }
+}
+
 /* Recognize for unsigned x
    x = y - z;
    if (x > y)
@@ -5935,6 +5965,8 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
              break;
 
            case BIT_IOR_EXPR:
+             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
+             /* fall-through  */
            case BIT_XOR_EXPR:
              match_uaddc_usubc (&gsi, stmt, code);
              break;

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

Reply via email to