> -----Original Message-----
> From: Richard Biener <[email protected]>
> Sent: 05 January 2026 09:51
> To: Tamar Christina <[email protected]>
> Cc: [email protected]; nd <[email protected]>; [email protected];
> [email protected]
> Subject: Re: [PATCH 5/6][vect]: teach if-convert to predicate __builtin calls
> [PR122103]
>
> On Mon, Dec 29, 2025 at 4:27 PM Tamar Christina
> <[email protected]> wrote:
> >
> > The following testcase
> >
> > void f (float *__restrict c, int *__restrict d, int n)
> > {
> > for (int i = 0; i < n; i++)
> > {
> > if (d[i] > 1000)
> > c[i] = __builtin_sqrtf (c[i]);
> > }
> > }
> >
> > compiled with -O3 -march=armv9-a -fno-math-errno -ftrapping-math needs
> to be
> > predicated on the conditional. It's invalid to execute the branch and use a
> > select to extract it later unless using -fno-trapping-math.
> >
> > This change in if-conversion changes what we used to generate:
> >
> > _26 = _4 > 1000;
> > _34 = _33 + _2;
> > _5 = (float *) _34;
> > _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
> > _7 = __builtin_sqrtf (_6);
> > .MASK_STORE (_5, 32B, _26, _7);
> >
> > into
> >
> > _26 = _4 > 1000;
> > _34 = _33 + _2;
> > _5 = (float *) _34;
> > _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
> > _7 = .COND_SQRT (_26, _6, _6);
> > .MASK_STORE (_5, 32B, _26, _7);
> >
> > which correctly results in
> >
> > .L3:
> > ld1w z0.s, p7/z, [x1, x3, lsl 2]
> > cmpgt p7.s, p7/z, z0.s, z31.s
> > ld1w z30.s, p7/z, [x0, x3, lsl 2]
> > fsqrt z30.s, p7/m, z30.s
> > st1w z30.s, p7, [x0, x3, lsl 2]
> > incw x3
> > whilelo p7.s, w3, w2
> > b.any .L3
> >
> > instead of
> >
> > .L3:
> > ld1w z0.s, p7/z, [x1, x3, lsl 2]
> > cmpgt p7.s, p7/z, z0.s, z31.s
> > ld1w z30.s, p7/z, [x0, x3, lsl 2]
> > fsqrt z30.s, p6/m, z30.s
> > st1w z30.s, p7, [x0, x3, lsl 2]
> > incw x3
> > whilelo p7.s, w3, w2
> > b.any .L3
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu,
> > arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> > -m32, -m64 and no issues.
> >
> > Any comments?
>
> OK. But I'd like to see a testcase?
I should have mentioned, I included compile and run tests in patch 4.
They fail until the end of the series.
Thanks,
Tamar
>
> Thanks,
> Richard.
>
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/122103
> > * tree-if-conv.cc (ifcvt_can_predicate): Support
> > gimple_call_builtin_p.
> > (if_convertible_stmt_p, predicate_rhs_code,
> > predicate_statements): Likewise.
> >
> > ---
> > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> > index
> bb30c4fb35facf3289a7239af0d39d2d1b8e47c6..4666a06b98425191cc993
> 43c0b837f88a5b8fa1a 100644
> > --- a/gcc/tree-if-conv.cc
> > +++ b/gcc/tree-if-conv.cc
> > @@ -1006,6 +1006,19 @@ ifcvt_can_predicate (gimple *stmt)
> > if (gimple_assign_single_p (stmt))
> > return ifcvt_can_use_mask_load_store (stmt);
> >
> > + if (gimple_call_builtin_p (stmt))
> > + if (tree callee = gimple_call_fndecl (stmt))
> > + {
> > + auto ifn = associated_internal_fn (callee);
> > + auto cond_ifn = get_conditional_internal_fn (ifn);
> > + tree type = TREE_TYPE (gimple_call_fntype (stmt));
> > + return (cond_ifn != IFN_LAST
> > + && vectorized_internal_fn_supported_p (cond_ifn, type));
> > + }
> > +
> > + if (!is_gimple_assign (stmt))
> > + return false;
> > +
> > tree_code code = gimple_assign_rhs_code (stmt);
> > tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
> > tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
> > @@ -1150,6 +1163,23 @@ if_convertible_stmt_p (gimple *stmt,
> vec<data_reference_p> refs)
> > }
> > }
> >
> > + /* Check if the call can trap and if so require predication. */
> > + if (gimple_could_trap_p (stmt))
> > + {
> > + if (ifcvt_can_predicate (stmt))
> > + {
> > + gimple_set_plf (stmt, GF_PLF_2, true);
> > + need_to_predicate = true;
> > + return true;
> > + }
> > + else
> > + {
> > + if (dump_file && (dump_flags & TDF_DETAILS))
> > + fprintf (dump_file, "stmt could trap...\n");
> > + return false;
> > + }
> > + }
> > +
> > /* There are some IFN_s that are used to replace builtins but have
> > the
> > same semantics. Even if MASK_CALL cannot handle them
> vectorable_call
> > will insert the proper selection, so do not block conversion. */
> > @@ -2840,20 +2870,38 @@ value_available_p (gimple *stmt,
> hash_set<tree_ssa_name_hash> *ssa_names,
> > SSA names defined earlier in STMT's block. */
> >
> > static gimple *
> > -predicate_rhs_code (gassign *stmt, tree mask, tree cond,
> > +predicate_rhs_code (gimple *stmt, tree mask, tree cond,
> > hash_set<tree_ssa_name_hash> *ssa_names)
> > {
> > - tree lhs = gimple_assign_lhs (stmt);
> > - tree_code code = gimple_assign_rhs_code (stmt);
> > - unsigned int nops = gimple_num_ops (stmt);
> > - internal_fn cond_fn = get_conditional_internal_fn (code);
> > + internal_fn cond_fn;
> > + if (is_gimple_assign (stmt))
> > + {
> > + tree_code code = gimple_assign_rhs_code (stmt);
> > + cond_fn = get_conditional_internal_fn (code);
> > + }
> > + else if (tree callee = gimple_call_fndecl (stmt))
> > + {
> > + auto ifn = associated_internal_fn (callee);
> > + cond_fn = get_conditional_internal_fn (ifn);
> > + }
> > + else
> > + return NULL;
> > +
> > + if (cond_fn == IFN_LAST)
> > + {
> > + gcc_assert (!gimple_could_trap_p (stmt));
> > + return NULL;
> > + }
> > +
> > + tree lhs = gimple_get_lhs (stmt);
> > + unsigned int nops = gimple_num_args (stmt) + 1;
> >
> > /* Construct the arguments to the conditional internal function. */
> > auto_vec<tree, 8> args;
> > args.safe_grow (nops + 1, true);
> > args[0] = mask;
> > - for (unsigned int i = 1; i < nops; ++i)
> > - args[i] = gimple_op (stmt, i);
> > + for (unsigned int i = 0; i < nops - 1; ++i)
> > + args[i+1] = gimple_arg (stmt, i);
> > args[nops] = NULL_TREE;
> >
> > /* Look for uses of the result to see whether they are COND_EXPRs that
> can
> > @@ -3030,8 +3078,9 @@ predicate_statements (loop_p loop)
> >
> > for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
> > {
> > - gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
> > - if (!stmt)
> > + gimple *stmt = gsi_stmt (gsi);
> > + if (!is_gimple_assign (stmt)
> > + && !gimple_call_builtin_p (stmt))
> > ;
> > else if (is_false_predicate (cond)
> > && gimple_vdef (stmt))
> > @@ -3042,9 +3091,14 @@ predicate_statements (loop_p loop)
> > continue;
> > }
> > else if (gimple_plf (stmt, GF_PLF_2)
> > - && is_gimple_assign (stmt))
> > + && (is_gimple_assign (stmt)
> > + || gimple_call_builtin_p (stmt)))
> > {
> > - tree lhs = gimple_assign_lhs (stmt);
> > + tree lhs = gimple_get_lhs (stmt);
> > + /* ?? Assume that calls without an LHS are not data processing
> > + and so no issues with traps. */
> > + if (!lhs)
> > + continue;
> > tree mask;
> > gimple *new_stmt;
> > gimple_seq stmts = NULL;
> > @@ -3080,11 +3134,14 @@ predicate_statements (loop_p loop)
> > vect_masks.safe_push (mask);
> > }
> > if (gimple_assign_single_p (stmt))
> > - new_stmt = predicate_load_or_store (&gsi, stmt, mask);
> > + new_stmt = predicate_load_or_store (&gsi,
> > + as_a <gassign *> (stmt),
> > + mask);
> > else
> > new_stmt = predicate_rhs_code (stmt, mask, cond,
> > &ssa_names);
> >
> > - gsi_replace (&gsi, new_stmt, true);
> > + if (new_stmt)
> > + gsi_replace (&gsi, new_stmt, true);
> > }
> > else if (gimple_needing_rewrite_undefined (stmt))
> > rewrite_to_defined_unconditional (&gsi);
> >
> >
> > --