On Mon, Dec 29, 2025 at 4:27 PM Tamar Christina <[email protected]> wrote:
>
> The following testcase
>
> void f (float *__restrict c, int *__restrict d, int n)
> {
>     for (int i = 0; i < n; i++)
>     {
>       if (d[i] > 1000)
>         c[i] = __builtin_sqrtf (c[i]);
>     }
> }
>
> compiled with -O3 -march=armv9-a -fno-math-errno -ftrapping-math needs to be
> predicated on the conditional.  It's invalid to execute the branch and use a
> select to extract it later unless using -fno-trapping-math.
>
> This change in if-conversion changes what we used to generate:
>
>   _26 = _4 > 1000;
>   _34 = _33 + _2;
>   _5 = (float *) _34;
>   _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
>   _7 = __builtin_sqrtf (_6);
>   .MASK_STORE (_5, 32B, _26, _7);
>
> into
>
>   _26 = _4 > 1000;
>   _34 = _33 + _2;
>   _5 = (float *) _34;
>   _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
>   _7 = .COND_SQRT (_26, _6, _6);
>   .MASK_STORE (_5, 32B, _26, _7);
>
> which correctly results in
>
> .L3:
>         ld1w    z0.s, p7/z, [x1, x3, lsl 2]
>         cmpgt   p7.s, p7/z, z0.s, z31.s
>         ld1w    z30.s, p7/z, [x0, x3, lsl 2]
>         fsqrt   z30.s, p7/m, z30.s
>         st1w    z30.s, p7, [x0, x3, lsl 2]
>         incw    x3
>         whilelo p7.s, w3, w2
>         b.any   .L3
>
> instead of
>
> .L3:
>         ld1w    z0.s, p7/z, [x1, x3, lsl 2]
>         cmpgt   p7.s, p7/z, z0.s, z31.s
>         ld1w    z30.s, p7/z, [x0, x3, lsl 2]
>         fsqrt   z30.s, p6/m, z30.s
>         st1w    z30.s, p7, [x0, x3, lsl 2]
>         incw    x3
>         whilelo p7.s, w3, w2
>         b.any   .L3
>
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
>
> Any comments?

OK.  But I'd like to see a testcase?

Thanks,
Richard.

> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>         PR tree-optimization/122103
>         * tree-if-conv.cc (ifcvt_can_predicate): Support 
> gimple_call_builtin_p.
>         (if_convertible_stmt_p, predicate_rhs_code,
>         predicate_statements): Likewise.
>
> ---
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index 
> bb30c4fb35facf3289a7239af0d39d2d1b8e47c6..4666a06b98425191cc99343c0b837f88a5b8fa1a
>  100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -1006,6 +1006,19 @@ ifcvt_can_predicate (gimple *stmt)
>    if (gimple_assign_single_p (stmt))
>      return ifcvt_can_use_mask_load_store (stmt);
>
> +  if (gimple_call_builtin_p (stmt))
> +    if (tree callee = gimple_call_fndecl (stmt))
> +      {
> +       auto ifn = associated_internal_fn (callee);
> +       auto cond_ifn = get_conditional_internal_fn (ifn);
> +       tree type = TREE_TYPE (gimple_call_fntype (stmt));
> +       return (cond_ifn != IFN_LAST
> +               && vectorized_internal_fn_supported_p (cond_ifn, type));
> +      }
> +
> +  if (!is_gimple_assign (stmt))
> +    return false;
> +
>    tree_code code = gimple_assign_rhs_code (stmt);
>    tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
>    tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
> @@ -1150,6 +1163,23 @@ if_convertible_stmt_p (gimple *stmt, 
> vec<data_reference_p> refs)
>                   }
>           }
>
> +       /* Check if the call can trap and if so require predication.  */
> +       if (gimple_could_trap_p (stmt))
> +         {
> +           if (ifcvt_can_predicate (stmt))
> +             {
> +               gimple_set_plf (stmt, GF_PLF_2, true);
> +               need_to_predicate = true;
> +               return true;
> +             }
> +           else
> +             {
> +               if (dump_file && (dump_flags & TDF_DETAILS))
> +                 fprintf (dump_file, "stmt could trap...\n");
> +               return false;
> +             }
> +         }
> +
>         /* There are some IFN_s that are used to replace builtins but have the
>            same semantics.  Even if MASK_CALL cannot handle them 
> vectorable_call
>            will insert the proper selection, so do not block conversion.  */
> @@ -2840,20 +2870,38 @@ value_available_p (gimple *stmt, 
> hash_set<tree_ssa_name_hash> *ssa_names,
>     SSA names defined earlier in STMT's block.  */
>
>  static gimple *
> -predicate_rhs_code (gassign *stmt, tree mask, tree cond,
> +predicate_rhs_code (gimple *stmt, tree mask, tree cond,
>                     hash_set<tree_ssa_name_hash> *ssa_names)
>  {
> -  tree lhs = gimple_assign_lhs (stmt);
> -  tree_code code = gimple_assign_rhs_code (stmt);
> -  unsigned int nops = gimple_num_ops (stmt);
> -  internal_fn cond_fn = get_conditional_internal_fn (code);
> +  internal_fn cond_fn;
> +  if (is_gimple_assign (stmt))
> +    {
> +      tree_code code = gimple_assign_rhs_code (stmt);
> +      cond_fn = get_conditional_internal_fn (code);
> +    }
> +  else if (tree callee = gimple_call_fndecl (stmt))
> +    {
> +      auto ifn = associated_internal_fn (callee);
> +      cond_fn = get_conditional_internal_fn (ifn);
> +    }
> +  else
> +    return NULL;
> +
> +  if (cond_fn == IFN_LAST)
> +    {
> +      gcc_assert (!gimple_could_trap_p (stmt));
> +      return NULL;
> +    }
> +
> +  tree lhs = gimple_get_lhs (stmt);
> +  unsigned int nops = gimple_num_args (stmt) + 1;
>
>    /* Construct the arguments to the conditional internal function.   */
>    auto_vec<tree, 8> args;
>    args.safe_grow (nops + 1, true);
>    args[0] = mask;
> -  for (unsigned int i = 1; i < nops; ++i)
> -    args[i] = gimple_op (stmt, i);
> +  for (unsigned int i = 0; i < nops - 1; ++i)
> +    args[i+1] = gimple_arg (stmt, i);
>    args[nops] = NULL_TREE;
>
>    /* Look for uses of the result to see whether they are COND_EXPRs that can
> @@ -3030,8 +3078,9 @@ predicate_statements (loop_p loop)
>
>        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
>         {
> -         gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
> -         if (!stmt)
> +         gimple *stmt = gsi_stmt (gsi);
> +         if (!is_gimple_assign (stmt)
> +             && !gimple_call_builtin_p (stmt))
>             ;
>           else if (is_false_predicate (cond)
>                    && gimple_vdef (stmt))
> @@ -3042,9 +3091,14 @@ predicate_statements (loop_p loop)
>               continue;
>             }
>           else if (gimple_plf (stmt, GF_PLF_2)
> -                  && is_gimple_assign (stmt))
> +                  && (is_gimple_assign (stmt)
> +                      || gimple_call_builtin_p (stmt)))
>             {
> -             tree lhs = gimple_assign_lhs (stmt);
> +             tree lhs = gimple_get_lhs (stmt);
> +             /* ?? Assume that calls without an LHS are not data processing
> +                and so no issues with traps.  */
> +             if (!lhs)
> +               continue;
>               tree mask;
>               gimple *new_stmt;
>               gimple_seq stmts = NULL;
> @@ -3080,11 +3134,14 @@ predicate_statements (loop_p loop)
>                   vect_masks.safe_push (mask);
>                 }
>               if (gimple_assign_single_p (stmt))
> -               new_stmt = predicate_load_or_store (&gsi, stmt, mask);
> +               new_stmt = predicate_load_or_store (&gsi,
> +                                                   as_a <gassign *> (stmt),
> +                                                   mask);
>               else
>                 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
>
> -             gsi_replace (&gsi, new_stmt, true);
> +             if (new_stmt)
> +               gsi_replace (&gsi, new_stmt, true);
>             }
>           else if (gimple_needing_rewrite_undefined (stmt))
>             rewrite_to_defined_unconditional (&gsi);
>
>
> --

Reply via email to