On Mon, Jul 14, 2025 at 7:04 PM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > For possible reductions, ifconv currently handles if the addition > is on one side of the if. But in the case of PR 119920, the reduction > addition is on both sides of the if. > E.g. > ``` > if (_27 == 0) > goto <bb 14>; [50.00%] > else > goto <bb 13>; [50.00%] > > <bb 14> > a_29 = b_14(D) + a_17; > goto <bb 15>; [100.00%] > > <bb 13> > a_28 = c_12(D) + a_17; > > <bb 15> > # a_30 = PHI <a_28(13), a_29(14)> > ``` > > Which ifcvt converts into: > ``` > _34 = _32 + _33; > a_15 = (int) _34; > _23 = _4 == 0; > _37 = _33 + _35; > a_13 = (int) _37; > a_5 = _23 ? a_15 : a_13; > ``` > > But the vectorizer does not recognize this as a reduction. > To fix this, we should factor out the addition from the `if`. > This allows us to get: > ``` > iftmp.0_7 = _22 ? b_13(D) : c_12(D); > a_14 = iftmp.0_7 + a_18; > ``` > > Which then the vectorizer recognizes as a reduction. > > In the case of PR 112324 and PR 110015, it is similar but with MAX_EXPR > reduction > instead of an addition. > > Note while this should be done in phiopt, there are regressions > due to other passes not able to handle the factored out cases > (see linked bug to PR 64700). I have not had time to fix all of the passes > that could handle the addition being in the if/then/else rather than being > outside yet. > So this is I thought it would be useful just to have a localized version in > ifconv which > is then only used for the vectorizer.
OK. Thanks, Richard. > Bootstrapped and tested on x86_64-linux-gnu. > > PR tree-optimization/119920 > PR tree-optimization/112324 > PR tree-optimization/110015 > > gcc/ChangeLog: > > * tree-if-conv.cc (find_different_opnum): New function. > (factor_out_operators): New function. > (predicate_scalar_phi): Call factor_out_operators when > there is only 2 elements of a phi. > > gcc/testsuite/ChangeLog: > > * gcc.dg/vect/vect-reduc-cond-1.c: New test. > * gcc.dg/vect/vect-reduc-cond-2.c: New test. > * gcc.dg/vect/vect-reduc-cond-3.c: New test. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > gcc/testsuite/gcc.dg/vect/vect-reduc-cond-1.c | 59 ++++++ > gcc/testsuite/gcc.dg/vect/vect-reduc-cond-2.c | 61 ++++++ > gcc/testsuite/gcc.dg/vect/vect-reduc-cond-3.c | 56 ++++++ > gcc/tree-if-conv.cc | 186 ++++++++++++++++++ > 4 files changed, 362 insertions(+) > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-cond-1.c > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-cond-2.c > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-cond-3.c > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-1.c > b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-1.c > new file mode 100644 > index 00000000000..d8356b4685c > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-1.c > @@ -0,0 +1,59 @@ > +/* { dg-require-effective-target vect_int } */ > + > +#include <stdarg.h> > +#include "tree-vect.h" > + > +/* PR tree-optimization/119920 */ > + > +#define N 32 > + > +unsigned int ub[N]; > + > +/* Test vectorization of reduction of unsigned-int. */ > + > +__attribute__ ((noinline, noipa)) > +void init(void) > +{ > + #pragma GCC novector > + for(int i = 0;i < N; i++) > + ub[i] = i; > +} > + > + > +__attribute__ ((noinline, noipa)) > +void main1 (unsigned int b, unsigned int c) > +{ > + int i; > + unsigned int usum = 0; > + > + init(); > + > + /* Summation. */ > + for (i = 0; i < N; i++) { > + if ( ub[i] < N/2 ) > + { > + usum += b; > + } > + else > + { > + usum += c; > + } > + } > + > + /* check results: */ > + /* __builtin_printf("%d : %d\n", usum, (N/2*b + N/2*c)); */ > + if (usum != N/2*b + N/2*c) > + abort (); > +} > + > +int main (void) > +{ > + check_vect (); > + > + main1 (0, 0); > + main1 (1, 1); > + main1 (10, 1); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { > vect_no_int_add } } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-2.c > b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-2.c > new file mode 100644 > index 00000000000..80c1dba9fc1 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-2.c > @@ -0,0 +1,61 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-additional-options "-fdump-tree-ifcvt-details" } */ > + > +#include <stdarg.h> > +#include "tree-vect.h" > + > +/* PR tree-optimization/119920 */ > + > +#define N 32 > + > +unsigned int ub[N]; > +unsigned int ua[N]; > + > +/* Test vectorization of reduction of unsigned-int. */ > + > +__attribute__ ((noinline, noipa)) > +void init(void) > +{ > + #pragma GCC novector > + for(int i = 0;i < N; i++) { > + ub[i] = i; > + ua[i] = 1; > + } > +} > + > + > +__attribute__ ((noinline, noipa)) > +void main1 (unsigned int b, unsigned int c) > +{ > + int i; > + unsigned int usum = 0; > + > + init(); > + > + /* Summation. */ > + for (i = 0; i < N; i++) { > + unsigned t = ua[i]; > + if ( ub[i] < N/2 ) > + usum += b * t; > + else > + usum += c * t; > + } > + > + /* check results: */ > + /* __builtin_printf("%d : %d\n", usum, (N/2*b*1 + N/2*c*1)); */ > + if (usum != N/2*b + N/2*c) > + abort (); > +} > + > +int main (void) > +{ > + check_vect (); > + > + main1 (0, 0); > + main1 (1, 1); > + main1 (10, 1); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { > vect_no_int_add } } } } */ > +/* { dg-final { scan-tree-dump-times "changed to factor operation out from > COND_EXPR" 2 "ifcvt" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-3.c > b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-3.c > new file mode 100644 > index 00000000000..e425869ccf9 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-cond-3.c > @@ -0,0 +1,56 @@ > +/* { dg-require-effective-target vect_int } */ > + > +#include <stdarg.h> > +#include "tree-vect.h" > + > +/* PR tree-optimization/112324 */ > +/* PR tree-optimization/110015 */ > + > +#define N 32 > + > +int ub[N]; > + > +/* Test vectorization of reduction of int max with some extra code involed. > */ > + > +__attribute__ ((noinline, noipa)) > +void init(void) > +{ > + #pragma GCC novector > + for(int i = 0;i < N; i++) > + ub[i] = (i&4) && (i&1) ? -i : i; > +} > + > +#define MAX(a, b) ((a) > (b) ? (a) : (b)) > + > +__attribute__ ((noinline, noipa)) > +void main1 (void) > +{ > + int i; > + int max = 0; > + > + init(); > + > + /* Summation. */ > + for (i = 0; i < N; i++) { > + int tmp = ub[i]; > + if (tmp < 0) > + max = MAX (-tmp, max); > + else > + max = MAX (tmp, max); > + } > + > + /* check results: */ > + /* __builtin_printf("%d : %d\n", max, N); */ > + if (max != N - 1) > + abort (); > +} > + > +int main (void) > +{ > + check_vect (); > + > + main1 (); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { > vect_no_int_min_max } } } } */ > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc > index 366e959fd77..ba25c19c50c 100644 > --- a/gcc/tree-if-conv.cc > +++ b/gcc/tree-if-conv.cc > @@ -2114,6 +2114,187 @@ gen_phi_arg_condition (gphi *phi, ifcvt_arg_entry_t > &arg, > return cond; > } > > +/* Find the operand which is different between ARG0_OP and ARG1_OP. > + Returns the operand num where the difference is. > + Set NEWARG0 and NEWARG1 from the different argument. > + Returns -1 if none is found. > + If ARG0_OP/ARG1_OP is commutative also try swapping the > + two commutative operands and return the operand number where > + the difference happens in ARG0_OP. */ > + > +static int > +find_different_opnum (const gimple_match_op &arg0_op, > + const gimple_match_op &arg1_op, > + tree *new_arg0, tree *new_arg1) > +{ > + unsigned opnum = -1; > + unsigned first; > + first = first_commutative_argument (arg1_op.code, arg1_op.type); > + for (unsigned i = 0; i < arg0_op.num_ops; i++) > + { > + if (!operand_equal_for_phi_arg_p (arg0_op.ops[i], > + arg1_op.ops[i])) > + { > + /* Can handle only one non equal operand. */ > + if (opnum != -1u) > + { > + /* Though if opnum is right before i and opnum is equal > + to the first communtative argument, handle communtative > + specially. */ > + if (i == opnum + 1 && opnum == first) > + goto commutative; > + return -1; > + } > + opnum = i; > + } > + } > + /* If all operands are equal only do this is there was single > + operand. */ > + if (opnum == -1u) > + { > + if (arg0_op.num_ops != 1) > + return -1; > + opnum = 0; > + } > + *new_arg0 = arg0_op.ops[opnum]; > + *new_arg1 = arg1_op.ops[opnum]; > + return opnum; > + > +/* Handle commutative operations. */ > +commutative: > + gcc_assert (first != -1u); > + > + /* Check the rest of the arguments to make sure they are the same. */ > + for (unsigned i = first + 2; i < arg0_op.num_ops; i++) > + if (!operand_equal_for_phi_arg_p (arg0_op.ops[i], > + arg1_op.ops[i])) > + return -1; > + > + /* If the arg0[first+1] and arg1[first] are the same > + then the one which is different is arg0[first] and arg1[first+1] > + return first since this is based on arg0. */ > + if (operand_equal_for_phi_arg_p (arg0_op.ops[first + 1], > + arg1_op.ops[first])) > + { > + *new_arg0 = arg0_op.ops[first]; > + *new_arg1 = arg1_op.ops[first + 1]; > + return first; > + } > + /* If the arg0[first] and arg1[first+1] are the same > + then the one which is different is arg0[first+1] and arg1[first] > + return first+1 since this is based on arg0. */ > + if (operand_equal_for_phi_arg_p (arg0_op.ops[first], > + arg1_op.ops[first + 1])) > + { > + *new_arg0 = arg0_op.ops[first + 1]; > + *new_arg1 = arg1_op.ops[first]; > + return first + 1; > + } > + return -1; > +} > + > +/* Factors out an operation from *ARG0 and *ARG1 and > + create the new statement at GSI. *RES is the > + result of that new statement. Update *ARG0 and *ARG1 > + and *RES to the new values if the factoring happened. > + Loops until all of the factoring is completed. */ > + > +static void > +factor_out_operators (tree *res, gimple_stmt_iterator *gsi, > + tree *arg0, tree *arg1, gphi *phi) > +{ > + gimple_match_op arg0_op, arg1_op; > + bool repeated = false; > + > +again: > + if (TREE_CODE (*arg0) != SSA_NAME || TREE_CODE (*arg1) != SSA_NAME) > + return; > + > + if (operand_equal_p (*arg0, *arg1)) > + return; > + > + /* If either args have > 1 use, then this transformation actually > + increases the number of expressions evaluated at runtime. */ > + if (repeated > + ? (!has_zero_uses (*arg0) || !has_zero_uses (*arg1)) > + : (!has_single_use (*arg0) || !has_single_use (*arg1))) > + return; > + > + gimple *arg0_def_stmt = SSA_NAME_DEF_STMT (*arg0); > + if (!gimple_extract_op (arg0_def_stmt, &arg0_op)) > + return; > + > + /* At this point there should be no ssa names occuring in abnormals. */ > + gcc_assert (!arg0_op.operands_occurs_in_abnormal_phi ()); > + > + gimple *arg1_def_stmt = SSA_NAME_DEF_STMT (*arg1); > + if (!gimple_extract_op (arg1_def_stmt, &arg1_op)) > + return; > + > + /* At this point there should be no ssa names occuring in abnormals. */ > + gcc_assert (!arg1_op.operands_occurs_in_abnormal_phi ()); > + > + /* No factoring can happen if the codes are different > + or the number operands. */ > + if (arg1_op.code != arg0_op.code > + || arg1_op.num_ops != arg0_op.num_ops) > + return; > + > + tree new_arg0, new_arg1; > + int opnum = find_different_opnum (arg0_op, arg1_op, &new_arg0, &new_arg1); > + if (opnum == -1) > + return; > + > + if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1))) > + return; > + tree new_res = make_ssa_name (TREE_TYPE (new_arg0), NULL); > + > + /* Create the operation stmt if possible and insert it. */ > + > + gimple_match_op new_op = arg0_op; > + new_op.ops[opnum] = new_res; > + gimple_seq seq = NULL; > + tree result = *res; > + result = maybe_push_res_to_seq (&new_op, &seq, result); > + > + /* If we can't create the new statement, release the temp name > + and return back. */ > + if (!result) > + { > + release_ssa_name (new_res); > + return; > + } > + gsi_insert_seq_before (gsi, seq, GSI_CONTINUE_LINKING); > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "PHI "); > + print_generic_expr (dump_file, gimple_phi_result (phi)); > + fprintf (dump_file, > + " changed to factor operation out from COND_EXPR.\n"); > + fprintf (dump_file, "New stmt with OPERATION that defines "); > + print_generic_expr (dump_file, result); > + fprintf (dump_file, ".\n"); > + } > + > + /* Remove the old operation(s) that has single use. */ > + gimple_stmt_iterator gsi_for_def; > + > + gsi_for_def = gsi_for_stmt (arg0_def_stmt); > + gsi_remove (&gsi_for_def, true); > + release_defs (arg0_def_stmt); > + gsi_for_def = gsi_for_stmt (arg1_def_stmt); > + gsi_remove (&gsi_for_def, true); > + release_defs (arg1_def_stmt); > + > + /* Update the arguments and try again. */ > + *arg0 = new_arg0; > + *arg1 = new_arg1; > + *res = new_res; > + repeated = true; > + goto again; > +} > + > /* Create the smallest nested conditional possible. On pre-order we record > which conditionals are live, and on post-order rewrite the chain by > removing > already active conditions. > @@ -2293,6 +2474,11 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator > *gsi, bool loop_versioned) > arg0 = gimple_phi_arg_def (phi, 0); > arg1 = gimple_phi_arg_def (phi, 1); > } > + > + /* Factor out operand if possible. This can only be done easily > + for PHI with 2 elements. */ > + factor_out_operators (&res, gsi, &arg0, &arg1, phi); > + > if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1, > &op0, &op1, false, &has_nop, > &nop_reduc)) > -- > 2.43.0 >