From: Soumya AR <[email protected]>
This patch adds support for expanding the following atomic min/max builtins:
__atomic_fetch_min
__atomic_fetch_max
__atomic_min_fetch
__atomic_max_fetch
into compare-and-swap loops.
---
expand_binop() is extended to handle min/max operations, either via a
conditional move or a compare and branch sequence.
To do this, we migrate the code from expand_expr_real_2() to expand_binop()
for min/max operations.
Earlier, the MIN/MAX case in expand_expr_real_2() was handled by calling
expand_binop() with OPTAB_WIDEN. Since expand_binop() did not support min/max,
it would return NULL if no direct optab was available. expand_expr_real_2()
would then proceed to a compare and branch / conditional move sequence.
Since we extend expand_binop() to do this now, we can remove the fallback from
expand_expr_real_2, as we would always return a non-NULL expansion from
expand_binop() for min/max operations.
Would appreciate confirmation if this approach is OK.
---
When extending resolve_overloaded_builtin to handle generic atomic min/max, we
first resolve it to the generic sign-specific variant. Then, we fallthrough to
the existing logic for fetching the sized variant. This is ensured because of
the order of the atomic min/max builtins defined in sync-builtins.def:
<generic_variant> eg: BUILT_IN_ATOMIC_FETCH_MIN_N
<generic_signed_variant> eg: BUILT_IN_ATOMIC_FETCH_SMIN_N
<all_sized_signed_variants> eg: BUILT_IN_ATOMIC_FETCH_SMIN_1,
BUILT_IN_ATOMIC_FETCH_SMIN_2, etc.
<generic_unsigned_variant> eg: BUILT_IN_ATOMIC_FETCH_UMIN_N
<all_sized_unsigned_variants> eg: BUILT_IN_ATOMIC_FETCH_UMIN_1,
BUILT_IN_ATOMIC_FETCH_UMIN_2, etc.
---
Bootstrapped and regression tested on aarch64-linux-gnu and x86_64-linux-gnu.
Cross-compiled and regression tested for arm-linux-gnueabihf-armv7-a and
aarch64-linux-gnu without LSE.
Signed-off-by: Soumya AR <[email protected]>
gcc/ChangeLog:
* builtins.cc (expand_builtin_atomic_fetch_op): Special handling for
SMIN/SMAX.
(expand_builtin): Handle BUILT_IN_ATOMIC_FETCH_{MIN,MAX}_*
and BUILT_IN_ATOMIC_{MIN,MAX}_FETCH_*.
* expr.cc (expand_expr_real_2): Remove fallback to compare and branch /
conditional move sequence.
* optabs.cc (expand_binop): Add support for expanding min/max operations
using compare and branch / conditional move sequence.
(expand_atomic_fetch_op_no_fallback): Special handling for SMIN/SMAX.
(expand_atomic_fetch_op): Special handling for SMIN/SMAX.
gcc/c-family/ChangeLog:
* c-common.cc (resolve_overloaded_builtin): Resolve generic atomic
min/max builtins into size and sign specific builtins.
---
gcc/builtins.cc | 116 +++++++++++++++++++++++++++++++++++++++
gcc/c-family/c-common.cc | 50 +++++++++++++++++
gcc/expr.cc | 88 +----------------------------
gcc/optabs.cc | 97 ++++++++++++++++++++++++++++++++
4 files changed, 264 insertions(+), 87 deletions(-)
diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 7f580a3145f..c75bffd0136 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -6981,6 +6981,9 @@ expand_builtin_atomic_fetch_op (machine_mode mode, tree
exp, rtx target,
OPTAB_LIB_WIDEN);
ret = expand_simple_unop (mode, NOT, ret, target, true);
}
+ else if (code == SMIN || code == SMAX)
+ ret = expand_simple_binop (mode, code, ret, val, target, false,
+ OPTAB_LIB_WIDEN);
else
ret = expand_simple_binop (mode, code, ret, val, target, true,
OPTAB_LIB_WIDEN);
@@ -8918,6 +8921,119 @@ expand_builtin (tree exp, rtx target, rtx subtarget,
machine_mode mode,
return target;
break;
+ case BUILT_IN_ATOMIC_FETCH_SMAX_1:
+ case BUILT_IN_ATOMIC_FETCH_SMAX_2:
+ case BUILT_IN_ATOMIC_FETCH_SMAX_4:
+ case BUILT_IN_ATOMIC_FETCH_SMAX_8:
+ case BUILT_IN_ATOMIC_FETCH_SMAX_16:
+ mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_SMAX_1);
+ target = expand_builtin_atomic_fetch_op (mode, exp, target, SMAX, false,
+ ignore, BUILT_IN_NONE);
+ if (target)
+ return target;
+ break;
+
+ case BUILT_IN_ATOMIC_FETCH_SMIN_1:
+ case BUILT_IN_ATOMIC_FETCH_SMIN_2:
+ case BUILT_IN_ATOMIC_FETCH_SMIN_4:
+ case BUILT_IN_ATOMIC_FETCH_SMIN_8:
+ case BUILT_IN_ATOMIC_FETCH_SMIN_16:
+ mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_SMIN_1);
+ target = expand_builtin_atomic_fetch_op (mode, exp, target, SMIN, false,
+ ignore, BUILT_IN_NONE);
+ if (target)
+ return target;
+ break;
+
+ case BUILT_IN_ATOMIC_FETCH_UMAX_1:
+ case BUILT_IN_ATOMIC_FETCH_UMAX_2:
+ case BUILT_IN_ATOMIC_FETCH_UMAX_4:
+ case BUILT_IN_ATOMIC_FETCH_UMAX_8:
+ case BUILT_IN_ATOMIC_FETCH_UMAX_16:
+ mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_UMAX_1);
+ target = expand_builtin_atomic_fetch_op (mode, exp, target, UMAX, false,
+ ignore, BUILT_IN_NONE);
+ if (target)
+ return target;
+ break;
+
+ case BUILT_IN_ATOMIC_FETCH_UMIN_1:
+ case BUILT_IN_ATOMIC_FETCH_UMIN_2:
+ case BUILT_IN_ATOMIC_FETCH_UMIN_4:
+ case BUILT_IN_ATOMIC_FETCH_UMIN_8:
+ case BUILT_IN_ATOMIC_FETCH_UMIN_16:
+ mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_UMIN_1);
+ target = expand_builtin_atomic_fetch_op (mode, exp, target, UMIN, false,
+ ignore, BUILT_IN_NONE);
+ if (target)
+ return target;
+ break;
+
+ case BUILT_IN_ATOMIC_SMAX_FETCH_1:
+ case BUILT_IN_ATOMIC_SMAX_FETCH_2:
+ case BUILT_IN_ATOMIC_SMAX_FETCH_4:
+ case BUILT_IN_ATOMIC_SMAX_FETCH_8:
+ case BUILT_IN_ATOMIC_SMAX_FETCH_16:
+ {
+ enum built_in_function lib;
+ mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_SMAX_FETCH_1);
+ lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_SMAX_1 +
+ (fcode - BUILT_IN_ATOMIC_SMAX_FETCH_1));
+ target = expand_builtin_atomic_fetch_op (mode, exp, target, SMAX, true,
+ ignore, lib);
+ if (target)
+ return target;
+ break;
+ }
+ case BUILT_IN_ATOMIC_SMIN_FETCH_1:
+ case BUILT_IN_ATOMIC_SMIN_FETCH_2:
+ case BUILT_IN_ATOMIC_SMIN_FETCH_4:
+ case BUILT_IN_ATOMIC_SMIN_FETCH_8:
+ case BUILT_IN_ATOMIC_SMIN_FETCH_16:
+ {
+ enum built_in_function lib;
+ mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_SMIN_FETCH_1);
+ lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_SMIN_1 +
+ (fcode - BUILT_IN_ATOMIC_SMIN_FETCH_1));
+ target = expand_builtin_atomic_fetch_op (mode, exp, target, SMIN, true,
+ ignore, lib);
+ if (target)
+ return target;
+ break;
+ }
+ case BUILT_IN_ATOMIC_UMAX_FETCH_1:
+ case BUILT_IN_ATOMIC_UMAX_FETCH_2:
+ case BUILT_IN_ATOMIC_UMAX_FETCH_4:
+ case BUILT_IN_ATOMIC_UMAX_FETCH_8:
+ case BUILT_IN_ATOMIC_UMAX_FETCH_16:
+ {
+ enum built_in_function lib;
+ mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_UMAX_FETCH_1);
+ lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_UMAX_1 +
+ (fcode - BUILT_IN_ATOMIC_UMAX_FETCH_1));
+ target = expand_builtin_atomic_fetch_op (mode, exp, target, UMAX, true,
+ ignore, lib);
+ if (target)
+ return target;
+ break;
+ }
+ case BUILT_IN_ATOMIC_UMIN_FETCH_1:
+ case BUILT_IN_ATOMIC_UMIN_FETCH_2:
+ case BUILT_IN_ATOMIC_UMIN_FETCH_4:
+ case BUILT_IN_ATOMIC_UMIN_FETCH_8:
+ case BUILT_IN_ATOMIC_UMIN_FETCH_16:
+ {
+ enum built_in_function lib;
+ mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_UMIN_FETCH_1);
+ lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_UMIN_1 +
+ (fcode - BUILT_IN_ATOMIC_UMIN_FETCH_1));
+ target = expand_builtin_atomic_fetch_op (mode, exp, target, UMIN, true,
+ ignore, lib);
+ if (target)
+ return target;
+ break;
+ }
+
case BUILT_IN_ATOMIC_TEST_AND_SET:
target = expand_builtin_atomic_test_and_set (exp, target);
if (target)
diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index e7dd4602ac1..cdf9e6c938d 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -8701,12 +8701,24 @@ resolve_overloaded_builtin (location_t loc, tree
function,
case BUILT_IN_ATOMIC_NAND_FETCH_N:
case BUILT_IN_ATOMIC_XOR_FETCH_N:
case BUILT_IN_ATOMIC_OR_FETCH_N:
+ case BUILT_IN_ATOMIC_MAX_FETCH_N:
+ case BUILT_IN_ATOMIC_SMAX_FETCH_N:
+ case BUILT_IN_ATOMIC_UMAX_FETCH_N:
+ case BUILT_IN_ATOMIC_MIN_FETCH_N:
+ case BUILT_IN_ATOMIC_SMIN_FETCH_N:
+ case BUILT_IN_ATOMIC_UMIN_FETCH_N:
case BUILT_IN_ATOMIC_FETCH_ADD_N:
case BUILT_IN_ATOMIC_FETCH_SUB_N:
case BUILT_IN_ATOMIC_FETCH_AND_N:
case BUILT_IN_ATOMIC_FETCH_NAND_N:
case BUILT_IN_ATOMIC_FETCH_XOR_N:
case BUILT_IN_ATOMIC_FETCH_OR_N:
+ case BUILT_IN_ATOMIC_FETCH_MAX_N:
+ case BUILT_IN_ATOMIC_FETCH_SMAX_N:
+ case BUILT_IN_ATOMIC_FETCH_UMAX_N:
+ case BUILT_IN_ATOMIC_FETCH_MIN_N:
+ case BUILT_IN_ATOMIC_FETCH_SMIN_N:
+ case BUILT_IN_ATOMIC_FETCH_UMIN_N:
orig_format = false;
/* FALLTHRU */
case BUILT_IN_SYNC_FETCH_AND_ADD_N:
@@ -8756,6 +8768,44 @@ resolve_overloaded_builtin (location_t loc, tree
function,
return atomic_bitint_fetch_using_cas_loop (loc, orig_code, function,
params);
}
+ /* Transform generic MIN/MAX to signed/unsigned variants based on type
+ */
+ if (orig_code == BUILT_IN_ATOMIC_FETCH_MIN_N
+ || orig_code == BUILT_IN_ATOMIC_FETCH_MAX_N
+ || orig_code == BUILT_IN_ATOMIC_MIN_FETCH_N
+ || orig_code == BUILT_IN_ATOMIC_MAX_FETCH_N)
+ {
+ /* Get the type of the value being operated on. */
+ tree arg_type = TREE_TYPE ((*params)[0]);
+ if (TREE_CODE (arg_type) == POINTER_TYPE)
+ arg_type = TREE_TYPE (arg_type);
+
+ bool is_unsigned = TYPE_UNSIGNED (arg_type);
+ enum built_in_function signed_code, unsigned_code;
+
+ switch (orig_code)
+ {
+ case BUILT_IN_ATOMIC_FETCH_MIN_N:
+ signed_code = BUILT_IN_ATOMIC_FETCH_SMIN_N;
+ unsigned_code = BUILT_IN_ATOMIC_FETCH_UMIN_N;
+ break;
+ case BUILT_IN_ATOMIC_FETCH_MAX_N:
+ signed_code = BUILT_IN_ATOMIC_FETCH_SMAX_N;
+ unsigned_code = BUILT_IN_ATOMIC_FETCH_UMAX_N;
+ break;
+ case BUILT_IN_ATOMIC_MIN_FETCH_N:
+ signed_code = BUILT_IN_ATOMIC_SMIN_FETCH_N;
+ unsigned_code = BUILT_IN_ATOMIC_UMIN_FETCH_N;
+ break;
+ case BUILT_IN_ATOMIC_MAX_FETCH_N:
+ signed_code = BUILT_IN_ATOMIC_SMAX_FETCH_N;
+ unsigned_code = BUILT_IN_ATOMIC_UMAX_FETCH_N;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ orig_code = is_unsigned ? unsigned_code : signed_code;
+ }
fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1);
new_function = builtin_decl_explicit (fncode);
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 3d2b2535158..e1fd4f3f0cb 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -9733,7 +9733,6 @@ expand_expr_real_2 (const_sepops ops, rtx target,
machine_mode tmode,
enum expand_modifier modifier)
{
rtx op0, op1, op2, temp;
- rtx_code_label *lab;
tree type;
int unsignedp;
machine_mode mode;
@@ -10536,92 +10535,7 @@ expand_expr_real_2 (const_sepops ops, rtx target,
machine_mode tmode,
if (temp != 0)
return temp;
- if (VECTOR_TYPE_P (type))
- gcc_unreachable ();
-
- /* At this point, a MEM target is no longer useful; we will get better
- code without it. */
-
- if (! REG_P (target))
- target = gen_reg_rtx (mode);
-
- /* If op1 was placed in target, swap op0 and op1. */
- if (target != op0 && target == op1)
- std::swap (op0, op1);
-
- /* We generate better code and avoid problems with op1 mentioning
- target by forcing op1 into a pseudo if it isn't a constant. */
- if (! CONSTANT_P (op1))
- op1 = force_reg (mode, op1);
-
- {
- enum rtx_code comparison_code;
- rtx cmpop1 = op1;
-
- if (code == MAX_EXPR)
- comparison_code = unsignedp ? GEU : GE;
- else
- comparison_code = unsignedp ? LEU : LE;
-
- /* Canonicalize to comparisons against 0. */
- if (op1 == const1_rtx)
- {
- /* Converting (a >= 1 ? a : 1) into (a > 0 ? a : 1)
- or (a != 0 ? a : 1) for unsigned.
- For MIN we are safe converting (a <= 1 ? a : 1)
- into (a <= 0 ? a : 1) */
- cmpop1 = const0_rtx;
- if (code == MAX_EXPR)
- comparison_code = unsignedp ? NE : GT;
- }
- if (op1 == constm1_rtx && !unsignedp)
- {
- /* Converting (a >= -1 ? a : -1) into (a >= 0 ? a : -1)
- and (a <= -1 ? a : -1) into (a < 0 ? a : -1) */
- cmpop1 = const0_rtx;
- if (code == MIN_EXPR)
- comparison_code = LT;
- }
-
- /* Use a conditional move if possible. */
- if (can_conditionally_move_p (mode))
- {
- rtx insn;
-
- start_sequence ();
-
- /* Try to emit the conditional move. */
- insn = emit_conditional_move (target,
- { comparison_code,
- op0, cmpop1, mode },
- op0, op1, mode,
- unsignedp);
-
- /* If we could do the conditional move, emit the sequence,
- and return. */
- if (insn)
- {
- rtx_insn *seq = end_sequence ();
- emit_insn (seq);
- return target;
- }
-
- /* Otherwise discard the sequence and fall back to code with
- branches. */
- end_sequence ();
- }
-
- if (target != op0)
- emit_move_insn (target, op0);
-
- lab = gen_label_rtx ();
- do_compare_rtx_and_jump (target, cmpop1, comparison_code,
- unsignedp, mode, NULL_RTX, NULL, lab,
- profile_probability::uninitialized ());
- }
- emit_move_insn (target, op1);
- emit_label (lab);
- return target;
+ gcc_unreachable ();
case BIT_NOT_EXPR:
op0 = expand_expr (treeop0, subtarget,
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index cacb15bb946..c19f8c0ebed 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -1643,6 +1643,93 @@ expand_binop (machine_mode mode, optab binoptab, rtx
op0, rtx op1,
}
}
}
+ /* Implement MIN/MAX expansion as a compare and branch/conditional move. */
+ if ((binoptab == smin_optab || binoptab == smax_optab
+ || binoptab == umin_optab || binoptab == umax_optab)
+ && !VECTOR_MODE_P (mode) && methods != OPTAB_DIRECT)
+ {
+ bool is_max = (binoptab == smax_optab || binoptab == umax_optab);
+ rtx_code_label *lab;
+
+ if (target == NULL_RTX || !REG_P (target))
+ target = gen_reg_rtx (mode);
+
+ /* If op1 was placed in target, swap op0 and op1. */
+ if (target != op0 && target == op1)
+ std::swap (op0, op1);
+
+ /* We generate better code and avoid problems with op1 mentioning
+ target by forcing op1 into a pseudo if it isn't a constant. */
+ if (!CONSTANT_P (op1))
+ op1 = force_reg (mode, op1);
+
+ {
+ enum rtx_code comparison_code;
+ rtx cmpop1 = op1;
+
+ if (is_max)
+ comparison_code = unsignedp ? GEU : GE;
+ else
+ comparison_code = unsignedp ? LEU : LE;
+
+ /* Canonicalize to comparisons against 0. */
+ if (op1 == const1_rtx)
+ {
+ /* Converting (a >= 1 ? a : 1) into (a > 0 ? a : 1)
+ or (a != 0 ? a : 1) for unsigned.
+ For MIN we are safe converting (a <= 1 ? a : 1)
+ into (a <= 0 ? a : 1) */
+ cmpop1 = const0_rtx;
+ if (is_max)
+ comparison_code = unsignedp ? NE : GT;
+ }
+ if (op1 == constm1_rtx && !unsignedp)
+ {
+ /* Converting (a >= -1 ? a : -1) into (a >= 0 ? a : -1)
+ and (a <= -1 ? a : -1) into (a < 0 ? a : -1) */
+ cmpop1 = const0_rtx;
+ if (!is_max)
+ comparison_code = LT;
+ }
+
+ /* Use a conditional move if possible. */
+ if (can_conditionally_move_p (mode))
+ {
+ rtx insn;
+
+ start_sequence ();
+
+ /* Try to emit the conditional move. */
+ insn = emit_conditional_move (target,
+ {comparison_code, op0, cmpop1, mode},
+ op0, op1, mode, unsignedp);
+
+ /* If we could do the conditional move, emit the sequence,
+ and return. */
+ if (insn)
+ {
+ rtx_insn *seq = end_sequence ();
+ emit_insn (seq);
+ return target;
+ }
+
+ /* Otherwise discard the sequence and fall back to code with
+ branches. */
+ end_sequence ();
+ }
+
+ if (target != op0)
+ emit_move_insn (target, op0);
+
+ lab = gen_label_rtx ();
+ do_compare_rtx_and_jump (target, cmpop1, comparison_code, unsignedp,
+ mode, NULL_RTX, NULL, lab,
+ profile_probability::uninitialized ());
+ }
+ emit_move_insn (target, op1);
+ emit_label (lab);
+ return target;
+ }
/* Look for a wider mode of the same class for which we think we
can open-code the operation. Check for a widening multiply at the
@@ -7807,6 +7894,11 @@ expand_atomic_fetch_op_no_fallback (rtx target, rtx mem,
rtx val,
true, OPTAB_LIB_WIDEN);
result = expand_simple_unop (mode, NOT, result, target, true);
}
+ else if (code == SMIN || code == SMAX)
+ {
+ result = expand_simple_binop (mode, code, result, val, target,
+ false, OPTAB_LIB_WIDEN);
+ }
else
result = expand_simple_binop (mode, code, result, val, target,
true, OPTAB_LIB_WIDEN);
@@ -7933,6 +8025,11 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val,
enum rtx_code code,
true, OPTAB_LIB_WIDEN);
t1 = expand_simple_unop (mode, code, t1, NULL_RTX, true);
}
+ else if (code == SMIN || code == SMAX)
+ {
+ t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX,
+ false, OPTAB_LIB_WIDEN);
+ }
else
t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX, true,
OPTAB_LIB_WIDEN);
--
2.44.0