On Mon, Jul 14, 2025 at 7:04 PM Andrew Pinski <quic_apin...@quicinc.com> wrote:
>
> For possible reductions, ifconv currently handles if the addition
> is on one side of the if. But in the case of PR 119920, the reduction
> addition is on both sides of the if.
> E.g.
> ```
>   if (_27 == 0)
>     goto <bb 14>; [50.00%]
>   else
>     goto <bb 13>; [50.00%]
>
>   <bb 14>
>   a_29 = b_14(D) + a_17;
>   goto <bb 15>; [100.00%]
>
>   <bb 13>
>   a_28 = c_12(D) + a_17;
>
>   <bb 15>
>   # a_30 = PHI <a_28(13), a_29(14)>
> ```
>
> Which ifcvt converts into:
> ```
>   _34 = _32 + _33;
>   a_15 = (int) _34;
>   _23 = _4 == 0;
>   _37 = _33 + _35;
>   a_13 = (int) _37;
>   a_5 = _23 ? a_15 : a_13;
> ```
>
> But the vectorizer does not recognize this as a reduction.
> To fix this, we should factor out the addition from the `if`.
> This allows us to get:
> ```
>   iftmp.0_7 = _22 ? b_13(D) : c_12(D);
>   a_14 = iftmp.0_7 + a_18;
> ```
>
> Which then the vectorizer recognizes as a reduction.
>
> In the case of PR 112324 and PR 110015, it is similar but with MAX_EXPR 
> reduction
> instead of an addition.
>
> Note while this should be done in phiopt, there are regressions
> due to other passes not able to handle the factored out cases
> (see linked bug to PR 64700). I have not had time to fix all of the passes
> that could handle the addition being in the if/then/else rather than being 
> outside yet.
> So this is I thought it would be useful just to have a localized version in 
> ifconv which
> is then only used for the vectorizer.

OK.

Thanks,
Richard.

> Bootstrapped and tested on x86_64-linux-gnu.
>
>         PR tree-optimization/119920
>         PR tree-optimization/112324
>         PR tree-optimization/110015
>
> gcc/ChangeLog:
>
>         * tree-if-conv.cc (find_different_opnum): New function.
>         (factor_out_operators): New function.
>         (predicate_scalar_phi): Call factor_out_operators when
>         there is only 2 elements of a phi.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/vect/vect-reduc-cond-1.c: New test.
>         * gcc.dg/vect/vect-reduc-cond-2.c: New test.
>         * gcc.dg/vect/vect-reduc-cond-3.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> ---
>  gcc/testsuite/gcc.dg/vect/vect-reduc-cond-1.c |  59 ++++++
>  gcc/testsuite/gcc.dg/vect/vect-reduc-cond-2.c |  61 ++++++
>  gcc/testsuite/gcc.dg/vect/vect-reduc-cond-3.c |  56 ++++++
>  gcc/tree-if-conv.cc                           | 186 ++++++++++++++++++
>  4 files changed, 362 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-cond-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-cond-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-cond-3.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-1.c 
> b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-1.c
> new file mode 100644
> index 00000000000..d8356b4685c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-1.c
> @@ -0,0 +1,59 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +/* PR tree-optimization/119920 */
> +
> +#define N 32
> +
> +unsigned int ub[N];
> +
> +/* Test vectorization of reduction of unsigned-int.  */
> +
> +__attribute__ ((noinline, noipa))
> +void init(void)
> +{
> +  #pragma GCC novector
> +  for(int i = 0;i < N; i++)
> +    ub[i] = i;
> +}
> +
> +
> +__attribute__ ((noinline, noipa))
> +void main1 (unsigned int b, unsigned int c)
> +{
> +  int i;
> +  unsigned int usum = 0;
> +
> +  init();
> +
> +  /* Summation.  */
> +  for (i = 0; i < N; i++) {
> +    if ( ub[i] < N/2 )
> +    {
> +      usum += b;
> +    }
> +    else
> +    {
> +      usum += c;
> +    }
> +  }
> +
> +  /* check results:  */
> + /* __builtin_printf("%d : %d\n", usum, (N/2*b + N/2*c)); */
> +  if (usum != N/2*b + N/2*c)
> +    abort ();
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  main1 (0, 0);
> +  main1 (1, 1);
> +  main1 (10, 1);
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { 
> vect_no_int_add } } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-2.c 
> b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-2.c
> new file mode 100644
> index 00000000000..80c1dba9fc1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-2.c
> @@ -0,0 +1,61 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-fdump-tree-ifcvt-details" } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +/* PR tree-optimization/119920 */
> +
> +#define N 32
> +
> +unsigned int ub[N];
> +unsigned int ua[N];
> +
> +/* Test vectorization of reduction of unsigned-int.  */
> +
> +__attribute__ ((noinline, noipa))
> +void init(void)
> +{
> +  #pragma GCC novector
> +  for(int i = 0;i < N; i++) {
> +    ub[i] = i;
> +    ua[i] = 1;
> +  }
> +}
> +
> +
> +__attribute__ ((noinline, noipa))
> +void main1 (unsigned int b, unsigned int c)
> +{
> +  int i;
> +  unsigned int usum = 0;
> +
> +  init();
> +
> +  /* Summation.  */
> +  for (i = 0; i < N; i++) {
> +    unsigned t = ua[i];
> +    if ( ub[i] < N/2 )
> +      usum += b * t;
> +    else
> +      usum += c * t;
> +  }
> +
> +  /* check results:  */
> +  /* __builtin_printf("%d : %d\n", usum, (N/2*b*1 + N/2*c*1)); */
> +  if (usum != N/2*b + N/2*c)
> +    abort ();
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  main1 (0, 0);
> +  main1 (1, 1);
> +  main1 (10, 1);
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { 
> vect_no_int_add } } } } */
> +/* { dg-final { scan-tree-dump-times "changed to factor operation out from 
> COND_EXPR" 2 "ifcvt" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-3.c 
> b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-3.c
> new file mode 100644
> index 00000000000..e425869ccf9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-3.c
> @@ -0,0 +1,56 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +/* PR tree-optimization/112324 */
> +/* PR tree-optimization/110015 */
> +
> +#define N 32
> +
> +int ub[N];
> +
> +/* Test vectorization of reduction of int max with some extra code involed.  
> */
> +
> +__attribute__ ((noinline, noipa))
> +void init(void)
> +{
> +  #pragma GCC novector
> +  for(int i = 0;i < N; i++)
> +    ub[i] = (i&4) && (i&1) ? -i : i;
> +}
> +
> +#define MAX(a, b) ((a) > (b) ? (a) : (b))
> +
> +__attribute__ ((noinline, noipa))
> +void main1 (void)
> +{
> +  int i;
> +  int max = 0;
> +
> +  init();
> +
> +  /* Summation.  */
> +  for (i = 0; i < N; i++) {
> +    int tmp = ub[i];
> +    if (tmp < 0)
> +      max = MAX (-tmp, max);
> +    else
> +      max = MAX (tmp, max);
> +  }
> +
> +  /* check results:  */
> +  /* __builtin_printf("%d : %d\n", max, N); */
> +  if (max != N - 1)
> +    abort ();
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  main1 ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { 
> vect_no_int_min_max } } } } */
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index 366e959fd77..ba25c19c50c 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -2114,6 +2114,187 @@ gen_phi_arg_condition (gphi *phi, ifcvt_arg_entry_t 
> &arg,
>    return cond;
>  }
>
> +/* Find the operand which is different between ARG0_OP and ARG1_OP.
> +   Returns the operand num where the difference is.
> +   Set NEWARG0 and NEWARG1 from the different argument.
> +   Returns -1 if none is found.
> +   If ARG0_OP/ARG1_OP is commutative also try swapping the
> +   two commutative operands and return the operand number where
> +   the difference happens in ARG0_OP. */
> +
> +static int
> +find_different_opnum (const gimple_match_op &arg0_op,
> +                     const gimple_match_op &arg1_op,
> +                     tree *new_arg0, tree *new_arg1)
> +{
> +  unsigned opnum = -1;
> +  unsigned first;
> +  first = first_commutative_argument (arg1_op.code, arg1_op.type);
> +  for (unsigned i = 0; i < arg0_op.num_ops; i++)
> +    {
> +      if (!operand_equal_for_phi_arg_p (arg0_op.ops[i],
> +                                       arg1_op.ops[i]))
> +       {
> +         /* Can handle only one non equal operand. */
> +         if (opnum != -1u)
> +           {
> +             /* Though if opnum is right before i and opnum is equal
> +                to the first communtative argument, handle communtative
> +                specially. */
> +             if (i == opnum + 1 && opnum == first)
> +               goto commutative;
> +             return -1;
> +           }
> +         opnum = i;
> +       }
> +  }
> +  /* If all operands are equal only do this is there was single
> +     operand.  */
> +  if (opnum == -1u)
> +    {
> +      if (arg0_op.num_ops != 1)
> +       return -1;
> +      opnum = 0;
> +    }
> +  *new_arg0 = arg0_op.ops[opnum];
> +  *new_arg1 = arg1_op.ops[opnum];
> +  return opnum;
> +
> +/* Handle commutative operations. */
> +commutative:
> +  gcc_assert (first != -1u);
> +
> +  /* Check the rest of the arguments to make sure they are the same. */
> +  for (unsigned i = first + 2; i < arg0_op.num_ops; i++)
> +    if (!operand_equal_for_phi_arg_p (arg0_op.ops[i],
> +                                     arg1_op.ops[i]))
> +      return -1;
> +
> +  /* If the arg0[first+1] and arg1[first] are the same
> +     then the one which is different is arg0[first] and arg1[first+1]
> +     return first since this is based on arg0.  */
> +  if (operand_equal_for_phi_arg_p (arg0_op.ops[first + 1],
> +                                  arg1_op.ops[first]))
> +    {
> +       *new_arg0 = arg0_op.ops[first];
> +       *new_arg1 = arg1_op.ops[first + 1];
> +       return first;
> +    }
> +  /* If the arg0[first] and arg1[first+1] are the same
> +     then the one which is different is arg0[first+1] and arg1[first]
> +     return first+1 since this is based on arg0.  */
> +  if (operand_equal_for_phi_arg_p (arg0_op.ops[first],
> +                                  arg1_op.ops[first + 1]))
> +    {
> +       *new_arg0 = arg0_op.ops[first + 1];
> +       *new_arg1 = arg1_op.ops[first];
> +       return first + 1;
> +    }
> +  return -1;
> +}
> +
> +/* Factors out an operation from *ARG0 and *ARG1 and
> +   create the new statement at GSI. *RES is the
> +   result of that new statement. Update *ARG0 and *ARG1
> +   and *RES to the new values if the factoring happened.
> +   Loops until all of the factoring is completed.  */
> +
> +static void
> +factor_out_operators (tree *res, gimple_stmt_iterator *gsi,
> +                     tree *arg0, tree *arg1, gphi *phi)
> +{
> +  gimple_match_op arg0_op, arg1_op;
> +  bool repeated = false;
> +
> +again:
> +  if (TREE_CODE (*arg0) != SSA_NAME || TREE_CODE (*arg1) != SSA_NAME)
> +    return;
> +
> +  if (operand_equal_p (*arg0, *arg1))
> +    return;
> +
> +  /* If either args have > 1 use, then this transformation actually
> +     increases the number of expressions evaluated at runtime.  */
> +  if (repeated
> +      ? (!has_zero_uses (*arg0) || !has_zero_uses (*arg1))
> +      : (!has_single_use (*arg0) || !has_single_use (*arg1)))
> +    return;
> +
> +  gimple *arg0_def_stmt = SSA_NAME_DEF_STMT (*arg0);
> +  if (!gimple_extract_op (arg0_def_stmt, &arg0_op))
> +    return;
> +
> +  /* At this point there should be no ssa names occuring in abnormals.  */
> +  gcc_assert (!arg0_op.operands_occurs_in_abnormal_phi ());
> +
> +  gimple *arg1_def_stmt = SSA_NAME_DEF_STMT (*arg1);
> +  if (!gimple_extract_op (arg1_def_stmt, &arg1_op))
> +    return;
> +
> +  /* At this point there should be no ssa names occuring in abnormals.  */
> +  gcc_assert (!arg1_op.operands_occurs_in_abnormal_phi ());
> +
> +  /* No factoring can happen if the codes are different
> +     or the number operands.  */
> +  if (arg1_op.code != arg0_op.code
> +      || arg1_op.num_ops != arg0_op.num_ops)
> +    return;
> +
> +  tree new_arg0, new_arg1;
> +  int opnum = find_different_opnum (arg0_op, arg1_op, &new_arg0, &new_arg1);
> +  if (opnum == -1)
> +    return;
> +
> +  if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1)))
> +    return;
> +  tree new_res = make_ssa_name (TREE_TYPE (new_arg0), NULL);
> +
> +  /* Create the operation stmt if possible and insert it.  */
> +
> +  gimple_match_op new_op = arg0_op;
> +  new_op.ops[opnum] = new_res;
> +  gimple_seq seq = NULL;
> +  tree result = *res;
> +  result = maybe_push_res_to_seq (&new_op, &seq, result);
> +
> +  /* If we can't create the new statement, release the temp name
> +     and return back.  */
> +  if (!result)
> +    {
> +      release_ssa_name (new_res);
> +      return;
> +    }
> +  gsi_insert_seq_before (gsi, seq, GSI_CONTINUE_LINKING);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "PHI ");
> +      print_generic_expr (dump_file, gimple_phi_result (phi));
> +      fprintf (dump_file,
> +              " changed to factor operation out from COND_EXPR.\n");
> +      fprintf (dump_file, "New stmt with OPERATION that defines ");
> +      print_generic_expr (dump_file, result);
> +      fprintf (dump_file, ".\n");
> +    }
> +
> +  /* Remove the old operation(s) that has single use.  */
> +  gimple_stmt_iterator gsi_for_def;
> +
> +  gsi_for_def = gsi_for_stmt (arg0_def_stmt);
> +  gsi_remove (&gsi_for_def, true);
> +  release_defs (arg0_def_stmt);
> +  gsi_for_def = gsi_for_stmt (arg1_def_stmt);
> +  gsi_remove (&gsi_for_def, true);
> +  release_defs (arg1_def_stmt);
> +
> +  /* Update the arguments and try again.  */
> +  *arg0 = new_arg0;
> +  *arg1 = new_arg1;
> +  *res = new_res;
> +  repeated = true;
> +  goto again;
> +}
> +
>  /* Create the smallest nested conditional possible.  On pre-order we record
>     which conditionals are live, and on post-order rewrite the chain by 
> removing
>     already active conditions.
> @@ -2293,6 +2474,11 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator 
> *gsi, bool loop_versioned)
>           arg0 = gimple_phi_arg_def (phi, 0);
>           arg1 = gimple_phi_arg_def (phi, 1);
>         }
> +
> +      /* Factor out operand if possible. This can only be done easily
> +        for PHI with 2 elements.  */
> +      factor_out_operators (&res, gsi, &arg0, &arg1, phi);
> +
>        if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
>                                     &op0, &op1, false, &has_nop,
>                                     &nop_reduc))
> --
> 2.43.0
>

Reply via email to