On Mon, Dec 29, 2025 at 4:27 PM Tamar Christina <[email protected]> wrote:
>
> The following testcase
>
> void f (float *__restrict c, int *__restrict d, int n)
> {
> for (int i = 0; i < n; i++)
> {
> if (d[i] > 1000)
> c[i] = __builtin_sqrtf (c[i]);
> }
> }
>
> compiled with -O3 -march=armv9-a -fno-math-errno -ftrapping-math needs to be
> predicated on the conditional. It's invalid to execute the branch and use a
> select to extract it later unless using -fno-trapping-math.
>
> This change in if-conversion changes what we used to generate:
>
> _26 = _4 > 1000;
> _34 = _33 + _2;
> _5 = (float *) _34;
> _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
> _7 = __builtin_sqrtf (_6);
> .MASK_STORE (_5, 32B, _26, _7);
>
> into
>
> _26 = _4 > 1000;
> _34 = _33 + _2;
> _5 = (float *) _34;
> _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
> _7 = .COND_SQRT (_26, _6, _6);
> .MASK_STORE (_5, 32B, _26, _7);
>
> which correctly results in
>
> .L3:
> ld1w z0.s, p7/z, [x1, x3, lsl 2]
> cmpgt p7.s, p7/z, z0.s, z31.s
> ld1w z30.s, p7/z, [x0, x3, lsl 2]
> fsqrt z30.s, p7/m, z30.s
> st1w z30.s, p7, [x0, x3, lsl 2]
> incw x3
> whilelo p7.s, w3, w2
> b.any .L3
>
> instead of
>
> .L3:
> ld1w z0.s, p7/z, [x1, x3, lsl 2]
> cmpgt p7.s, p7/z, z0.s, z31.s
> ld1w z30.s, p7/z, [x0, x3, lsl 2]
> fsqrt z30.s, p6/m, z30.s
> st1w z30.s, p7, [x0, x3, lsl 2]
> incw x3
> whilelo p7.s, w3, w2
> b.any .L3
>
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
>
> Any comments?
OK. But I'd like to see a testcase?
Thanks,
Richard.
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/122103
> * tree-if-conv.cc (ifcvt_can_predicate): Support
> gimple_call_builtin_p.
> (if_convertible_stmt_p, predicate_rhs_code,
> predicate_statements): Likewise.
>
> ---
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index
> bb30c4fb35facf3289a7239af0d39d2d1b8e47c6..4666a06b98425191cc99343c0b837f88a5b8fa1a
> 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -1006,6 +1006,19 @@ ifcvt_can_predicate (gimple *stmt)
> if (gimple_assign_single_p (stmt))
> return ifcvt_can_use_mask_load_store (stmt);
>
> + if (gimple_call_builtin_p (stmt))
> + if (tree callee = gimple_call_fndecl (stmt))
> + {
> + auto ifn = associated_internal_fn (callee);
> + auto cond_ifn = get_conditional_internal_fn (ifn);
> + tree type = TREE_TYPE (gimple_call_fntype (stmt));
> + return (cond_ifn != IFN_LAST
> + && vectorized_internal_fn_supported_p (cond_ifn, type));
> + }
> +
> + if (!is_gimple_assign (stmt))
> + return false;
> +
> tree_code code = gimple_assign_rhs_code (stmt);
> tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
> tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
> @@ -1150,6 +1163,23 @@ if_convertible_stmt_p (gimple *stmt,
> vec<data_reference_p> refs)
> }
> }
>
> + /* Check if the call can trap and if so require predication. */
> + if (gimple_could_trap_p (stmt))
> + {
> + if (ifcvt_can_predicate (stmt))
> + {
> + gimple_set_plf (stmt, GF_PLF_2, true);
> + need_to_predicate = true;
> + return true;
> + }
> + else
> + {
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + fprintf (dump_file, "stmt could trap...\n");
> + return false;
> + }
> + }
> +
> /* There are some IFN_s that are used to replace builtins but have the
> same semantics. Even if MASK_CALL cannot handle them
> vectorable_call
> will insert the proper selection, so do not block conversion. */
> @@ -2840,20 +2870,38 @@ value_available_p (gimple *stmt,
> hash_set<tree_ssa_name_hash> *ssa_names,
> SSA names defined earlier in STMT's block. */
>
> static gimple *
> -predicate_rhs_code (gassign *stmt, tree mask, tree cond,
> +predicate_rhs_code (gimple *stmt, tree mask, tree cond,
> hash_set<tree_ssa_name_hash> *ssa_names)
> {
> - tree lhs = gimple_assign_lhs (stmt);
> - tree_code code = gimple_assign_rhs_code (stmt);
> - unsigned int nops = gimple_num_ops (stmt);
> - internal_fn cond_fn = get_conditional_internal_fn (code);
> + internal_fn cond_fn;
> + if (is_gimple_assign (stmt))
> + {
> + tree_code code = gimple_assign_rhs_code (stmt);
> + cond_fn = get_conditional_internal_fn (code);
> + }
> + else if (tree callee = gimple_call_fndecl (stmt))
> + {
> + auto ifn = associated_internal_fn (callee);
> + cond_fn = get_conditional_internal_fn (ifn);
> + }
> + else
> + return NULL;
> +
> + if (cond_fn == IFN_LAST)
> + {
> + gcc_assert (!gimple_could_trap_p (stmt));
> + return NULL;
> + }
> +
> + tree lhs = gimple_get_lhs (stmt);
> + unsigned int nops = gimple_num_args (stmt) + 1;
>
> /* Construct the arguments to the conditional internal function. */
> auto_vec<tree, 8> args;
> args.safe_grow (nops + 1, true);
> args[0] = mask;
> - for (unsigned int i = 1; i < nops; ++i)
> - args[i] = gimple_op (stmt, i);
> + for (unsigned int i = 0; i < nops - 1; ++i)
> + args[i+1] = gimple_arg (stmt, i);
> args[nops] = NULL_TREE;
>
> /* Look for uses of the result to see whether they are COND_EXPRs that can
> @@ -3030,8 +3078,9 @@ predicate_statements (loop_p loop)
>
> for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
> {
> - gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
> - if (!stmt)
> + gimple *stmt = gsi_stmt (gsi);
> + if (!is_gimple_assign (stmt)
> + && !gimple_call_builtin_p (stmt))
> ;
> else if (is_false_predicate (cond)
> && gimple_vdef (stmt))
> @@ -3042,9 +3091,14 @@ predicate_statements (loop_p loop)
> continue;
> }
> else if (gimple_plf (stmt, GF_PLF_2)
> - && is_gimple_assign (stmt))
> + && (is_gimple_assign (stmt)
> + || gimple_call_builtin_p (stmt)))
> {
> - tree lhs = gimple_assign_lhs (stmt);
> + tree lhs = gimple_get_lhs (stmt);
> + /* ?? Assume that calls without an LHS are not data processing
> + and so no issues with traps. */
> + if (!lhs)
> + continue;
> tree mask;
> gimple *new_stmt;
> gimple_seq stmts = NULL;
> @@ -3080,11 +3134,14 @@ predicate_statements (loop_p loop)
> vect_masks.safe_push (mask);
> }
> if (gimple_assign_single_p (stmt))
> - new_stmt = predicate_load_or_store (&gsi, stmt, mask);
> + new_stmt = predicate_load_or_store (&gsi,
> + as_a <gassign *> (stmt),
> + mask);
> else
> new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
>
> - gsi_replace (&gsi, new_stmt, true);
> + if (new_stmt)
> + gsi_replace (&gsi, new_stmt, true);
> }
> else if (gimple_needing_rewrite_undefined (stmt))
> rewrite_to_defined_unconditional (&gsi);
>
>
> --