> -----Original Message-----
> From: Richard Biener <[email protected]>
> Sent: 05 January 2026 09:51
> To: Tamar Christina <[email protected]>
> Cc: [email protected]; nd <[email protected]>; [email protected];
> [email protected]
> Subject: Re: [PATCH 5/6][vect]: teach if-convert to predicate __builtin calls
> [PR122103]
> 
> On Mon, Dec 29, 2025 at 4:27 PM Tamar Christina
> <[email protected]> wrote:
> >
> > The following testcase
> >
> > void f (float *__restrict c, int *__restrict d, int n)
> > {
> >     for (int i = 0; i < n; i++)
> >     {
> >       if (d[i] > 1000)
> >         c[i] = __builtin_sqrtf (c[i]);
> >     }
> > }
> >
> > compiled with -O3 -march=armv9-a -fno-math-errno -ftrapping-math needs
> to be
> > predicated on the conditional.  It's invalid to execute the branch and use a
> > select to extract it later unless using -fno-trapping-math.
> >
> > This change in if-conversion changes what we used to generate:
> >
> >   _26 = _4 > 1000;
> >   _34 = _33 + _2;
> >   _5 = (float *) _34;
> >   _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
> >   _7 = __builtin_sqrtf (_6);
> >   .MASK_STORE (_5, 32B, _26, _7);
> >
> > into
> >
> >   _26 = _4 > 1000;
> >   _34 = _33 + _2;
> >   _5 = (float *) _34;
> >   _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
> >   _7 = .COND_SQRT (_26, _6, _6);
> >   .MASK_STORE (_5, 32B, _26, _7);
> >
> > which correctly results in
> >
> > .L3:
> >         ld1w    z0.s, p7/z, [x1, x3, lsl 2]
> >         cmpgt   p7.s, p7/z, z0.s, z31.s
> >         ld1w    z30.s, p7/z, [x0, x3, lsl 2]
> >         fsqrt   z30.s, p7/m, z30.s
> >         st1w    z30.s, p7, [x0, x3, lsl 2]
> >         incw    x3
> >         whilelo p7.s, w3, w2
> >         b.any   .L3
> >
> > instead of
> >
> > .L3:
> >         ld1w    z0.s, p7/z, [x1, x3, lsl 2]
> >         cmpgt   p7.s, p7/z, z0.s, z31.s
> >         ld1w    z30.s, p7/z, [x0, x3, lsl 2]
> >         fsqrt   z30.s, p6/m, z30.s
> >         st1w    z30.s, p7, [x0, x3, lsl 2]
> >         incw    x3
> >         whilelo p7.s, w3, w2
> >         b.any   .L3
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu,
> > arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> > -m32, -m64 and no issues.
> >
> > Any comments?
> 
> OK.  But I'd like to see a testcase?

I should have mentioned, I included compile and run tests in patch 4.
They fail until the end of the series.

Thanks,
Tamar

> 
> Thanks,
> Richard.
> 
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> >         PR tree-optimization/122103
> >         * tree-if-conv.cc (ifcvt_can_predicate): Support 
> > gimple_call_builtin_p.
> >         (if_convertible_stmt_p, predicate_rhs_code,
> >         predicate_statements): Likewise.
> >
> > ---
> > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> > index
> bb30c4fb35facf3289a7239af0d39d2d1b8e47c6..4666a06b98425191cc993
> 43c0b837f88a5b8fa1a 100644
> > --- a/gcc/tree-if-conv.cc
> > +++ b/gcc/tree-if-conv.cc
> > @@ -1006,6 +1006,19 @@ ifcvt_can_predicate (gimple *stmt)
> >    if (gimple_assign_single_p (stmt))
> >      return ifcvt_can_use_mask_load_store (stmt);
> >
> > +  if (gimple_call_builtin_p (stmt))
> > +    if (tree callee = gimple_call_fndecl (stmt))
> > +      {
> > +       auto ifn = associated_internal_fn (callee);
> > +       auto cond_ifn = get_conditional_internal_fn (ifn);
> > +       tree type = TREE_TYPE (gimple_call_fntype (stmt));
> > +       return (cond_ifn != IFN_LAST
> > +               && vectorized_internal_fn_supported_p (cond_ifn, type));
> > +      }
> > +
> > +  if (!is_gimple_assign (stmt))
> > +    return false;
> > +
> >    tree_code code = gimple_assign_rhs_code (stmt);
> >    tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
> >    tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
> > @@ -1150,6 +1163,23 @@ if_convertible_stmt_p (gimple *stmt,
> vec<data_reference_p> refs)
> >                   }
> >           }
> >
> > +       /* Check if the call can trap and if so require predication.  */
> > +       if (gimple_could_trap_p (stmt))
> > +         {
> > +           if (ifcvt_can_predicate (stmt))
> > +             {
> > +               gimple_set_plf (stmt, GF_PLF_2, true);
> > +               need_to_predicate = true;
> > +               return true;
> > +             }
> > +           else
> > +             {
> > +               if (dump_file && (dump_flags & TDF_DETAILS))
> > +                 fprintf (dump_file, "stmt could trap...\n");
> > +               return false;
> > +             }
> > +         }
> > +
> >         /* There are some IFN_s that are used to replace builtins but have 
> > the
> >            same semantics.  Even if MASK_CALL cannot handle them
> vectorable_call
> >            will insert the proper selection, so do not block conversion.  */
> > @@ -2840,20 +2870,38 @@ value_available_p (gimple *stmt,
> hash_set<tree_ssa_name_hash> *ssa_names,
> >     SSA names defined earlier in STMT's block.  */
> >
> >  static gimple *
> > -predicate_rhs_code (gassign *stmt, tree mask, tree cond,
> > +predicate_rhs_code (gimple *stmt, tree mask, tree cond,
> >                     hash_set<tree_ssa_name_hash> *ssa_names)
> >  {
> > -  tree lhs = gimple_assign_lhs (stmt);
> > -  tree_code code = gimple_assign_rhs_code (stmt);
> > -  unsigned int nops = gimple_num_ops (stmt);
> > -  internal_fn cond_fn = get_conditional_internal_fn (code);
> > +  internal_fn cond_fn;
> > +  if (is_gimple_assign (stmt))
> > +    {
> > +      tree_code code = gimple_assign_rhs_code (stmt);
> > +      cond_fn = get_conditional_internal_fn (code);
> > +    }
> > +  else if (tree callee = gimple_call_fndecl (stmt))
> > +    {
> > +      auto ifn = associated_internal_fn (callee);
> > +      cond_fn = get_conditional_internal_fn (ifn);
> > +    }
> > +  else
> > +    return NULL;
> > +
> > +  if (cond_fn == IFN_LAST)
> > +    {
> > +      gcc_assert (!gimple_could_trap_p (stmt));
> > +      return NULL;
> > +    }
> > +
> > +  tree lhs = gimple_get_lhs (stmt);
> > +  unsigned int nops = gimple_num_args (stmt) + 1;
> >
> >    /* Construct the arguments to the conditional internal function.   */
> >    auto_vec<tree, 8> args;
> >    args.safe_grow (nops + 1, true);
> >    args[0] = mask;
> > -  for (unsigned int i = 1; i < nops; ++i)
> > -    args[i] = gimple_op (stmt, i);
> > +  for (unsigned int i = 0; i < nops - 1; ++i)
> > +    args[i+1] = gimple_arg (stmt, i);
> >    args[nops] = NULL_TREE;
> >
> >    /* Look for uses of the result to see whether they are COND_EXPRs that
> can
> > @@ -3030,8 +3078,9 @@ predicate_statements (loop_p loop)
> >
> >        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
> >         {
> > -         gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
> > -         if (!stmt)
> > +         gimple *stmt = gsi_stmt (gsi);
> > +         if (!is_gimple_assign (stmt)
> > +             && !gimple_call_builtin_p (stmt))
> >             ;
> >           else if (is_false_predicate (cond)
> >                    && gimple_vdef (stmt))
> > @@ -3042,9 +3091,14 @@ predicate_statements (loop_p loop)
> >               continue;
> >             }
> >           else if (gimple_plf (stmt, GF_PLF_2)
> > -                  && is_gimple_assign (stmt))
> > +                  && (is_gimple_assign (stmt)
> > +                      || gimple_call_builtin_p (stmt)))
> >             {
> > -             tree lhs = gimple_assign_lhs (stmt);
> > +             tree lhs = gimple_get_lhs (stmt);
> > +             /* ?? Assume that calls without an LHS are not data processing
> > +                and so no issues with traps.  */
> > +             if (!lhs)
> > +               continue;
> >               tree mask;
> >               gimple *new_stmt;
> >               gimple_seq stmts = NULL;
> > @@ -3080,11 +3134,14 @@ predicate_statements (loop_p loop)
> >                   vect_masks.safe_push (mask);
> >                 }
> >               if (gimple_assign_single_p (stmt))
> > -               new_stmt = predicate_load_or_store (&gsi, stmt, mask);
> > +               new_stmt = predicate_load_or_store (&gsi,
> > +                                                   as_a <gassign *> (stmt),
> > +                                                   mask);
> >               else
> >                 new_stmt = predicate_rhs_code (stmt, mask, cond, 
> > &ssa_names);
> >
> > -             gsi_replace (&gsi, new_stmt, true);
> > +             if (new_stmt)
> > +               gsi_replace (&gsi, new_stmt, true);
> >             }
> >           else if (gimple_needing_rewrite_undefined (stmt))
> >             rewrite_to_defined_unconditional (&gsi);
> >
> >
> > --

Reply via email to