From: Soumya AR <[email protected]>

This patch adds support for expanding the following atomic min/max builtins:

__atomic_fetch_min
__atomic_fetch_max
__atomic_min_fetch
__atomic_max_fetch

into compare-and-swap loops.

---

expand_binop() is extended to handle min/max operations, either via a
conditional move or a compare and branch sequence.

To do this, we migrate the code from expand_expr_real_2() to expand_binop()
for min/max operations.

Earlier, the MIN/MAX case in expand_expr_real_2() was handled by calling
expand_binop() with OPTAB_WIDEN. Since expand_binop() did not support min/max,
it would return NULL if no direct optab was available. expand_expr_real_2()
would then proceed to a compare and branch / conditional move sequence.

Since we extend expand_binop() to do this now, we can remove the fallback from
expand_expr_real_2, as we would always return a non-NULL expansion from
expand_binop() for min/max operations.

Would appreciate confirmation if this approach is OK.

---

When extending resolve_overloaded_builtin to handle generic atomic min/max, we
first resolve it to the generic sign-specific variant. Then, we fallthrough to
the existing logic for fetching the sized variant. This is ensured because of
the order of the atomic min/max builtins defined in sync-builtins.def:

<generic_variant> eg: BUILT_IN_ATOMIC_FETCH_MIN_N

<generic_signed_variant> eg: BUILT_IN_ATOMIC_FETCH_SMIN_N
<all_sized_signed_variants> eg: BUILT_IN_ATOMIC_FETCH_SMIN_1,
                                BUILT_IN_ATOMIC_FETCH_SMIN_2, etc.

<generic_unsigned_variant> eg: BUILT_IN_ATOMIC_FETCH_UMIN_N
<all_sized_unsigned_variants> eg: BUILT_IN_ATOMIC_FETCH_UMIN_1,
                                  BUILT_IN_ATOMIC_FETCH_UMIN_2, etc.

---

Bootstrapped and regression tested on aarch64-linux-gnu and x86_64-linux-gnu.
Cross-compiled and regression tested for arm-linux-gnueabihf-armv7-a and
aarch64-linux-gnu without LSE.

Signed-off-by: Soumya AR <[email protected]>

gcc/ChangeLog:

        * builtins.cc (expand_builtin_atomic_fetch_op): Special handling for
        SMIN/SMAX.
        (expand_builtin): Handle BUILT_IN_ATOMIC_FETCH_{MIN,MAX}_*
        and BUILT_IN_ATOMIC_{MIN,MAX}_FETCH_*.
        * expr.cc (expand_expr_real_2): Remove fallback to compare and branch /
        conditional move sequence.
        * optabs.cc (expand_binop): Add support for expanding min/max operations
        using compare and branch / conditional move sequence.
        (expand_atomic_fetch_op_no_fallback): Special handling for SMIN/SMAX.
        (expand_atomic_fetch_op): Special handling for SMIN/SMAX.

gcc/c-family/ChangeLog:

        * c-common.cc (resolve_overloaded_builtin): Resolve generic atomic
        min/max builtins into size and sign specific builtins.
---
 gcc/builtins.cc          | 116 +++++++++++++++++++++++++++++++++++++++
 gcc/c-family/c-common.cc |  50 +++++++++++++++++
 gcc/expr.cc              |  88 +----------------------------
 gcc/optabs.cc            |  97 ++++++++++++++++++++++++++++++++
 4 files changed, 264 insertions(+), 87 deletions(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 7f580a3145f..c75bffd0136 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -6981,6 +6981,9 @@ expand_builtin_atomic_fetch_op (machine_mode mode, tree 
exp, rtx target,
                                     OPTAB_LIB_WIDEN);
          ret = expand_simple_unop (mode, NOT, ret, target, true);
        }
+      else if (code == SMIN || code == SMAX)
+       ret = expand_simple_binop (mode, code, ret, val, target, false,
+                                  OPTAB_LIB_WIDEN);
       else
        ret = expand_simple_binop (mode, code, ret, val, target, true,
                                   OPTAB_LIB_WIDEN);
@@ -8918,6 +8921,119 @@ expand_builtin (tree exp, rtx target, rtx subtarget, 
machine_mode mode,
        return target;
       break;
 
+    case BUILT_IN_ATOMIC_FETCH_SMAX_1:
+    case BUILT_IN_ATOMIC_FETCH_SMAX_2:
+    case BUILT_IN_ATOMIC_FETCH_SMAX_4:
+    case BUILT_IN_ATOMIC_FETCH_SMAX_8:
+    case BUILT_IN_ATOMIC_FETCH_SMAX_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_SMAX_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, SMAX, false,
+                                              ignore, BUILT_IN_NONE);
+      if (target)
+       return target;
+      break;
+
+    case BUILT_IN_ATOMIC_FETCH_SMIN_1:
+    case BUILT_IN_ATOMIC_FETCH_SMIN_2:
+    case BUILT_IN_ATOMIC_FETCH_SMIN_4:
+    case BUILT_IN_ATOMIC_FETCH_SMIN_8:
+    case BUILT_IN_ATOMIC_FETCH_SMIN_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_SMIN_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, SMIN, false,
+                                              ignore, BUILT_IN_NONE);
+      if (target)
+       return target;
+      break;
+
+    case BUILT_IN_ATOMIC_FETCH_UMAX_1:
+    case BUILT_IN_ATOMIC_FETCH_UMAX_2:
+    case BUILT_IN_ATOMIC_FETCH_UMAX_4:
+    case BUILT_IN_ATOMIC_FETCH_UMAX_8:
+    case BUILT_IN_ATOMIC_FETCH_UMAX_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_UMAX_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, UMAX, false,
+                                              ignore, BUILT_IN_NONE);
+      if (target)
+       return target;
+      break;
+
+    case BUILT_IN_ATOMIC_FETCH_UMIN_1:
+    case BUILT_IN_ATOMIC_FETCH_UMIN_2:
+    case BUILT_IN_ATOMIC_FETCH_UMIN_4:
+    case BUILT_IN_ATOMIC_FETCH_UMIN_8:
+    case BUILT_IN_ATOMIC_FETCH_UMIN_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_UMIN_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, UMIN, false,
+                                              ignore, BUILT_IN_NONE);
+      if (target)
+       return target;
+      break;
+
+    case BUILT_IN_ATOMIC_SMAX_FETCH_1:
+    case BUILT_IN_ATOMIC_SMAX_FETCH_2:
+    case BUILT_IN_ATOMIC_SMAX_FETCH_4:
+    case BUILT_IN_ATOMIC_SMAX_FETCH_8:
+    case BUILT_IN_ATOMIC_SMAX_FETCH_16:
+      {
+       enum built_in_function lib;
+       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_SMAX_FETCH_1);
+       lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_SMAX_1 +
+                                      (fcode - BUILT_IN_ATOMIC_SMAX_FETCH_1));
+       target = expand_builtin_atomic_fetch_op (mode, exp, target, SMAX, true,
+                                                ignore, lib);
+       if (target)
+         return target;
+       break;
+      }
+    case BUILT_IN_ATOMIC_SMIN_FETCH_1:
+    case BUILT_IN_ATOMIC_SMIN_FETCH_2:
+    case BUILT_IN_ATOMIC_SMIN_FETCH_4:
+    case BUILT_IN_ATOMIC_SMIN_FETCH_8:
+    case BUILT_IN_ATOMIC_SMIN_FETCH_16:
+      {
+       enum built_in_function lib;
+       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_SMIN_FETCH_1);
+       lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_SMIN_1 +
+                                      (fcode - BUILT_IN_ATOMIC_SMIN_FETCH_1));
+       target = expand_builtin_atomic_fetch_op (mode, exp, target, SMIN, true,
+                                                ignore, lib);
+       if (target)
+         return target;
+       break;
+      }
+    case BUILT_IN_ATOMIC_UMAX_FETCH_1:
+    case BUILT_IN_ATOMIC_UMAX_FETCH_2:
+    case BUILT_IN_ATOMIC_UMAX_FETCH_4:
+    case BUILT_IN_ATOMIC_UMAX_FETCH_8:
+    case BUILT_IN_ATOMIC_UMAX_FETCH_16:
+      {
+       enum built_in_function lib;
+       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_UMAX_FETCH_1);
+       lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_UMAX_1 +
+                                      (fcode - BUILT_IN_ATOMIC_UMAX_FETCH_1));
+       target = expand_builtin_atomic_fetch_op (mode, exp, target, UMAX, true,
+                                                ignore, lib);
+       if (target)
+         return target;
+       break;
+      }
+    case BUILT_IN_ATOMIC_UMIN_FETCH_1:
+    case BUILT_IN_ATOMIC_UMIN_FETCH_2:
+    case BUILT_IN_ATOMIC_UMIN_FETCH_4:
+    case BUILT_IN_ATOMIC_UMIN_FETCH_8:
+    case BUILT_IN_ATOMIC_UMIN_FETCH_16:
+      {
+       enum built_in_function lib;
+       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_UMIN_FETCH_1);
+       lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_UMIN_1 +
+                                      (fcode - BUILT_IN_ATOMIC_UMIN_FETCH_1));
+       target = expand_builtin_atomic_fetch_op (mode, exp, target, UMIN, true,
+                                                ignore, lib);
+       if (target)
+         return target;
+       break;
+      }
+
     case BUILT_IN_ATOMIC_TEST_AND_SET:
       target = expand_builtin_atomic_test_and_set (exp, target);
       if (target)
diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index e7dd4602ac1..cdf9e6c938d 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -8701,12 +8701,24 @@ resolve_overloaded_builtin (location_t loc, tree 
function,
     case BUILT_IN_ATOMIC_NAND_FETCH_N:
     case BUILT_IN_ATOMIC_XOR_FETCH_N:
     case BUILT_IN_ATOMIC_OR_FETCH_N:
+    case BUILT_IN_ATOMIC_MAX_FETCH_N:
+    case BUILT_IN_ATOMIC_SMAX_FETCH_N:
+    case BUILT_IN_ATOMIC_UMAX_FETCH_N:
+    case BUILT_IN_ATOMIC_MIN_FETCH_N:
+    case BUILT_IN_ATOMIC_SMIN_FETCH_N:
+    case BUILT_IN_ATOMIC_UMIN_FETCH_N:
     case BUILT_IN_ATOMIC_FETCH_ADD_N:
     case BUILT_IN_ATOMIC_FETCH_SUB_N:
     case BUILT_IN_ATOMIC_FETCH_AND_N:
     case BUILT_IN_ATOMIC_FETCH_NAND_N:
     case BUILT_IN_ATOMIC_FETCH_XOR_N:
     case BUILT_IN_ATOMIC_FETCH_OR_N:
+    case BUILT_IN_ATOMIC_FETCH_MAX_N:
+    case BUILT_IN_ATOMIC_FETCH_SMAX_N:
+    case BUILT_IN_ATOMIC_FETCH_UMAX_N:
+    case BUILT_IN_ATOMIC_FETCH_MIN_N:
+    case BUILT_IN_ATOMIC_FETCH_SMIN_N:
+    case BUILT_IN_ATOMIC_FETCH_UMIN_N:
       orig_format = false;
       /* FALLTHRU */
     case BUILT_IN_SYNC_FETCH_AND_ADD_N:
@@ -8756,6 +8768,44 @@ resolve_overloaded_builtin (location_t loc, tree 
function,
            return atomic_bitint_fetch_using_cas_loop (loc, orig_code, function,
                                                       params);
          }
+       /* Transform generic MIN/MAX to signed/unsigned variants based on type
+        */
+       if (orig_code == BUILT_IN_ATOMIC_FETCH_MIN_N
+           || orig_code == BUILT_IN_ATOMIC_FETCH_MAX_N
+           || orig_code == BUILT_IN_ATOMIC_MIN_FETCH_N
+           || orig_code == BUILT_IN_ATOMIC_MAX_FETCH_N)
+         {
+           /* Get the type of the value being operated on.  */
+           tree arg_type = TREE_TYPE ((*params)[0]);
+           if (TREE_CODE (arg_type) == POINTER_TYPE)
+             arg_type = TREE_TYPE (arg_type);
+
+           bool is_unsigned = TYPE_UNSIGNED (arg_type);
+           enum built_in_function signed_code, unsigned_code;
+
+           switch (orig_code)
+             {
+             case BUILT_IN_ATOMIC_FETCH_MIN_N:
+               signed_code = BUILT_IN_ATOMIC_FETCH_SMIN_N;
+               unsigned_code = BUILT_IN_ATOMIC_FETCH_UMIN_N;
+               break;
+             case BUILT_IN_ATOMIC_FETCH_MAX_N:
+               signed_code = BUILT_IN_ATOMIC_FETCH_SMAX_N;
+               unsigned_code = BUILT_IN_ATOMIC_FETCH_UMAX_N;
+               break;
+             case BUILT_IN_ATOMIC_MIN_FETCH_N:
+               signed_code = BUILT_IN_ATOMIC_SMIN_FETCH_N;
+               unsigned_code = BUILT_IN_ATOMIC_UMIN_FETCH_N;
+               break;
+             case BUILT_IN_ATOMIC_MAX_FETCH_N:
+               signed_code = BUILT_IN_ATOMIC_SMAX_FETCH_N;
+               unsigned_code = BUILT_IN_ATOMIC_UMAX_FETCH_N;
+               break;
+             default:
+               gcc_unreachable ();
+             }
+           orig_code = is_unsigned ? unsigned_code : signed_code;
+         }
 
        fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1);
        new_function = builtin_decl_explicit (fncode);
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 3d2b2535158..e1fd4f3f0cb 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -9733,7 +9733,6 @@ expand_expr_real_2 (const_sepops ops, rtx target, 
machine_mode tmode,
                    enum expand_modifier modifier)
 {
   rtx op0, op1, op2, temp;
-  rtx_code_label *lab;
   tree type;
   int unsignedp;
   machine_mode mode;
@@ -10536,92 +10535,7 @@ expand_expr_real_2 (const_sepops ops, rtx target, 
machine_mode tmode,
       if (temp != 0)
        return temp;
 
-      if (VECTOR_TYPE_P (type))
-       gcc_unreachable ();
-
-      /* At this point, a MEM target is no longer useful; we will get better
-        code without it.  */
-
-      if (! REG_P (target))
-       target = gen_reg_rtx (mode);
-
-      /* If op1 was placed in target, swap op0 and op1.  */
-      if (target != op0 && target == op1)
-       std::swap (op0, op1);
-
-      /* We generate better code and avoid problems with op1 mentioning
-        target by forcing op1 into a pseudo if it isn't a constant.  */
-      if (! CONSTANT_P (op1))
-       op1 = force_reg (mode, op1);
-
-      {
-       enum rtx_code comparison_code;
-       rtx cmpop1 = op1;
-
-       if (code == MAX_EXPR)
-         comparison_code = unsignedp ? GEU : GE;
-       else
-         comparison_code = unsignedp ? LEU : LE;
-
-       /* Canonicalize to comparisons against 0.  */
-       if (op1 == const1_rtx)
-         {
-           /* Converting (a >= 1 ? a : 1) into (a > 0 ? a : 1)
-              or (a != 0 ? a : 1) for unsigned.
-              For MIN we are safe converting (a <= 1 ? a : 1)
-              into (a <= 0 ? a : 1)  */
-           cmpop1 = const0_rtx;
-           if (code == MAX_EXPR)
-             comparison_code = unsignedp ? NE : GT;
-         }
-       if (op1 == constm1_rtx && !unsignedp)
-         {
-           /* Converting (a >= -1 ? a : -1) into (a >= 0 ? a : -1)
-              and (a <= -1 ? a : -1) into (a < 0 ? a : -1) */
-           cmpop1 = const0_rtx;
-           if (code == MIN_EXPR)
-             comparison_code = LT;
-         }
-
-       /* Use a conditional move if possible.  */
-       if (can_conditionally_move_p (mode))
-         {
-           rtx insn;
-
-           start_sequence ();
-
-           /* Try to emit the conditional move.  */
-           insn = emit_conditional_move (target,
-                                         { comparison_code,
-                                           op0, cmpop1, mode },
-                                         op0, op1, mode,
-                                         unsignedp);
-
-           /* If we could do the conditional move, emit the sequence,
-              and return.  */
-           if (insn)
-             {
-               rtx_insn *seq = end_sequence ();
-               emit_insn (seq);
-               return target;
-             }
-
-           /* Otherwise discard the sequence and fall back to code with
-              branches.  */
-           end_sequence ();
-         }
-
-       if (target != op0)
-         emit_move_insn (target, op0);
-
-       lab = gen_label_rtx ();
-       do_compare_rtx_and_jump (target, cmpop1, comparison_code,
-                                unsignedp, mode, NULL_RTX, NULL, lab,
-                                profile_probability::uninitialized ());
-      }
-      emit_move_insn (target, op1);
-      emit_label (lab);
-      return target;
+      gcc_unreachable ();
 
     case BIT_NOT_EXPR:
       op0 = expand_expr (treeop0, subtarget,
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index cacb15bb946..c19f8c0ebed 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -1643,6 +1643,93 @@ expand_binop (machine_mode mode, optab binoptab, rtx 
op0, rtx op1,
            }
        }
     }
+  /* Implement MIN/MAX expansion as a compare and branch/conditional move.  */
+  if ((binoptab == smin_optab || binoptab == smax_optab
+       || binoptab == umin_optab || binoptab == umax_optab)
+      && !VECTOR_MODE_P (mode) && methods != OPTAB_DIRECT)
+    {
+      bool is_max = (binoptab == smax_optab || binoptab == umax_optab);
+      rtx_code_label *lab;
+
+      if (target == NULL_RTX || !REG_P (target))
+       target = gen_reg_rtx (mode);
+
+      /* If op1 was placed in target, swap op0 and op1.  */
+      if (target != op0 && target == op1)
+       std::swap (op0, op1);
+
+      /* We generate better code and avoid problems with op1 mentioning
+        target by forcing op1 into a pseudo if it isn't a constant.  */
+      if (!CONSTANT_P (op1))
+       op1 = force_reg (mode, op1);
+
+      {
+       enum rtx_code comparison_code;
+       rtx cmpop1 = op1;
+
+       if (is_max)
+         comparison_code = unsignedp ? GEU : GE;
+       else
+         comparison_code = unsignedp ? LEU : LE;
+
+       /* Canonicalize to comparisons against 0.  */
+       if (op1 == const1_rtx)
+         {
+           /* Converting (a >= 1 ? a : 1) into (a > 0 ? a : 1)
+              or (a != 0 ? a : 1) for unsigned.
+              For MIN we are safe converting (a <= 1 ? a : 1)
+              into (a <= 0 ? a : 1)  */
+           cmpop1 = const0_rtx;
+           if (is_max)
+             comparison_code = unsignedp ? NE : GT;
+         }
+       if (op1 == constm1_rtx && !unsignedp)
+         {
+           /* Converting (a >= -1 ? a : -1) into (a >= 0 ? a : -1)
+              and (a <= -1 ? a : -1) into (a < 0 ? a : -1) */
+           cmpop1 = const0_rtx;
+           if (!is_max)
+             comparison_code = LT;
+         }
+
+       /* Use a conditional move if possible.  */
+       if (can_conditionally_move_p (mode))
+         {
+           rtx insn;
+
+           start_sequence ();
+
+           /* Try to emit the conditional move.  */
+           insn = emit_conditional_move (target,
+                                         {comparison_code, op0, cmpop1, mode},
+                                         op0, op1, mode, unsignedp);
+
+           /* If we could do the conditional move, emit the sequence,
+              and return.  */
+           if (insn)
+             {
+               rtx_insn *seq = end_sequence ();
+               emit_insn (seq);
+               return target;
+             }
+
+           /* Otherwise discard the sequence and fall back to code with
+              branches.  */
+           end_sequence ();
+         }
+
+       if (target != op0)
+         emit_move_insn (target, op0);
+
+       lab = gen_label_rtx ();
+       do_compare_rtx_and_jump (target, cmpop1, comparison_code, unsignedp,
+                                mode, NULL_RTX, NULL, lab,
+                                profile_probability::uninitialized ());
+      }
+      emit_move_insn (target, op1);
+      emit_label (lab);
+      return target;
+    }
 
   /* Look for a wider mode of the same class for which we think we
      can open-code the operation.  Check for a widening multiply at the
@@ -7807,6 +7894,11 @@ expand_atomic_fetch_op_no_fallback (rtx target, rtx mem, 
rtx val,
                                            true, OPTAB_LIB_WIDEN);
              result = expand_simple_unop (mode, NOT, result, target, true);
            }
+         else if (code == SMIN || code == SMAX)
+           {
+             result = expand_simple_binop (mode, code, result, val, target,
+                                           false, OPTAB_LIB_WIDEN);
+           }
          else
            result = expand_simple_binop (mode, code, result, val, target,
                                          true, OPTAB_LIB_WIDEN);
@@ -7933,6 +8025,11 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, 
enum rtx_code code,
                                    true, OPTAB_LIB_WIDEN);
          t1 = expand_simple_unop (mode, code, t1, NULL_RTX, true);
        }
+      else if (code == SMIN || code == SMAX)
+       {
+         t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX,
+                                   false, OPTAB_LIB_WIDEN);
+       }
       else
        t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX, true,
                                  OPTAB_LIB_WIDEN);
-- 
2.44.0

Reply via email to