On Fri, 18 Oct 2019, Christophe Lyon wrote:

> On Wed, 16 Oct 2019 at 15:09, Richard Biener <rguent...@suse.de> wrote:
> >
> >
> > It happens we cannot have different typed data and index for
> > integer condition reductions right now, for whatever reason.
> > The following makes that work, even for double data and integer index.
> > There's hope this enables some relevant amount of extra vectorization.
> >
> > Actually this is fallout from simplifying vect_is_simple_reduction
> > down to SSA cycle detection and moving reduction validity / handling
> > checks to vectorizable_reduction (thus a single place).
> >
> > I've decided to take an intermediate step here as I enable more
> > vectorization.  Which also needed the vect_transform_stmt change.
> >
> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> >
> > Richard.
> >
> >         * tree-vect-loop.c (vect_valid_reduction_input_p): Remove.
> >         (vect_is_simple_reduction): Delay checking to
> >         vectorizable_reduction and relax the checking.
> >         (vectorizable_reduction): Check we have a simple use.  Check
> >         for bogus condition reductions.
> >         * tree-vect-stmts.c (vect_transform_stmt): Make sure we
> >         are looking at the last stmt in a pattern sequence when
> >         filling in backedge PHI values.
> >
> >         * gcc.dg/vect/vect-cond-reduc-3.c: New testcase.
> >         * gcc.dg/vect/vect-cond-reduc-4.c: Likewise.
> >
> 
> Hi Richard,
> 
> The new test vect-cond-reduc-3.c fails on arm*linux-gnueabihf when
> configured --with-fpu neon-*:
> FAIL: gcc.dg/vect/vect-cond-reduc-3.c -flto -ffat-lto-objects
> scan-tree-dump-times vect "LOOP VECTORIZED" 2
> FAIL: gcc.dg/vect/vect-cond-reduc-3.c -flto -ffat-lto-objects
> scan-tree-dump-times vect "condition expression based on integer
> induction." 2
> FAIL: gcc.dg/vect/vect-cond-reduc-3.c scan-tree-dump-times vect "LOOP
> VECTORIZED" 2
> FAIL: gcc.dg/vect/vect-cond-reduc-3.c scan-tree-dump-times vect
> "condition expression based on integer induction." 2
> 
> vect_float is true in such cases, so is that a vectorization failure
> or is that expected on this target and the test should be disabled?

Hmm, maybe it needs vect_cond_mixed, arm doesnt have it but aarch64 does.

Richard.

> Thanks,
> 
> Christophe
> 
> 
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c 
> > b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
> > new file mode 100644
> > index 00000000000..a5b3849a8c3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
> > @@ -0,0 +1,45 @@
> > +/* { dg-require-effective-target vect_condition } */
> > +/* { dg-require-effective-target vect_float } */
> > +
> > +#include "tree-vect.h"
> > +
> > +extern void abort (void) __attribute__ ((noreturn));
> > +
> > +#define N 27
> > +
> > +/* Condition reduction with different types.  */
> > +
> > +int
> > +condition_reduction (float *a, float min_v)
> > +{
> > +  int last = 0;
> > +
> > +  for (int i = 0; i < N; i++)
> > +    if (a[i] < min_v)
> > +      last = i;
> > +
> > +  return last;
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > +  float a[N] = {
> > +  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
> > +  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
> > +  21, 22, 23, 24, 25, 26, 27
> > +  };
> > +
> > +  check_vect ();
> > +
> > +  int ret = condition_reduction (a, 10);
> > +  if (ret != 18)
> > +    abort ();
> > +
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
> > +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with 
> > FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
> > +/* { dg-final { scan-tree-dump-times "condition expression based on 
> > integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
> > +
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c 
> > b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
> > new file mode 100644
> > index 00000000000..6b6d17fb93c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
> > @@ -0,0 +1,45 @@
> > +/* { dg-require-effective-target vect_condition } */
> > +/* { dg-require-effective-target vect_double } */
> > +
> > +#include "tree-vect.h"
> > +
> > +extern void abort (void) __attribute__ ((noreturn));
> > +
> > +#define N 27
> > +
> > +/* Condition reduction with different types.  */
> > +
> > +int
> > +condition_reduction (double *a, double min_v)
> > +{
> > +  int last = 0;
> > +
> > +  for (int i = 0; i < N; i++)
> > +    if (a[i] < min_v)
> > +      last = i;
> > +
> > +  return last;
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > +  double a[N] = {
> > +  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
> > +  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
> > +  21, 22, 23, 24, 25, 26, 27
> > +  };
> > +
> > +  check_vect ();
> > +
> > +  int ret = condition_reduction (a, 10);
> > +  if (ret != 18)
> > +    abort ();
> > +
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
> > +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with 
> > FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
> > +/* { dg-final { scan-tree-dump-times "condition expression based on 
> > integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
> > +
> > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> > index 455251070d0..0530d6643b4 100644
> > --- a/gcc/tree-vect-loop.c
> > +++ b/gcc/tree-vect-loop.c
> > @@ -2532,21 +2532,6 @@ report_vect_op (dump_flags_t msg_type, gimple *stmt, 
> > const char *msg)
> >    dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
> >  }
> >
> > -/* DEF_STMT_INFO occurs in a loop that contains a potential reduction
> > -   operation.  Return true if the results of DEF_STMT_INFO are something
> > -   that can be accumulated by such a reduction.  */
> > -
> > -static bool
> > -vect_valid_reduction_input_p (stmt_vec_info def_stmt_info)
> > -{
> > -  return (is_gimple_assign (def_stmt_info->stmt)
> > -         || is_gimple_call (def_stmt_info->stmt)
> > -         || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
> > -         || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
> > -             && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
> > -             && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
> > -}
> > -
> >  /* Return true if we need an in-order reduction for operation CODE
> >     on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
> >     overflow must wrap.  */
> > @@ -2748,13 +2733,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> > stmt_vec_info phi_info,
> >                           bool *double_reduc)
> >  {
> >    gphi *phi = as_a <gphi *> (phi_info->stmt);
> > -  class loop *loop = (gimple_bb (phi))->loop_father;
> > -  class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
> > -  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
> >    gimple *phi_use_stmt = NULL;
> > -  enum tree_code orig_code, code;
> > -  tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
> > -  tree type;
> >    imm_use_iterator imm_iter;
> >    use_operand_p use_p;
> >
> > @@ -2768,6 +2747,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> > stmt_vec_info phi_info,
> >       can be constant.  See PR60382.  */
> >    if (has_zero_uses (phi_name))
> >      return NULL;
> > +  class loop *loop = (gimple_bb (phi))->loop_father;
> >    unsigned nphi_def_loop_uses = 0;
> >    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
> >      {
> > @@ -2802,6 +2782,8 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> > stmt_vec_info phi_info,
> >        || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
> >      return NULL;
> >
> > +  bool nested_in_vect_loop
> > +    = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info), loop);
> >    unsigned nlatch_def_loop_uses = 0;
> >    auto_vec<gphi *, 3> lcphis;
> >    bool inner_loop_of_double_reduc = false;
> > @@ -2849,8 +2831,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> > stmt_vec_info phi_info,
> >       defined in the inner loop.  */
> >    if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
> >      {
> > -      op1 = PHI_ARG_DEF (def_stmt, 0);
> > -
> > +      tree op1 = PHI_ARG_DEF (def_stmt, 0);
> >        if (gimple_phi_num_args (def_stmt) != 1
> >            || TREE_CODE (op1) != SSA_NAME)
> >          {
> > @@ -2890,7 +2871,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> > stmt_vec_info phi_info,
> >                          def_stmt_info->stmt);
> >        return NULL;
> >      }
> > -  code = orig_code = gimple_assign_rhs_code (def_stmt);
> > +  enum tree_code code = gimple_assign_rhs_code (def_stmt);
> >
> >    /* We can handle "res -= x[i]", which is non-associative by
> >       simply rewriting this into "res += -x[i]".  Avoid changing
> > @@ -2899,26 +2880,11 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> > stmt_vec_info phi_info,
> >    if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
> >      code = PLUS_EXPR;
> >
> > +  tree op1, op2;
> >    if (code == COND_EXPR)
> >      {
> >        if (! nested_in_vect_loop)
> >         STMT_VINFO_REDUC_TYPE (phi_info) = COND_REDUCTION;
> > -
> > -      op3 = gimple_assign_rhs1 (def_stmt);
> > -      if (COMPARISON_CLASS_P (op3))
> > -        {
> > -          op4 = TREE_OPERAND (op3, 1);
> > -          op3 = TREE_OPERAND (op3, 0);
> > -        }
> > -      if (op3 == phi_name || op4 == phi_name)
> > -       {
> > -         if (dump_enabled_p ())
> > -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> > -                           "reduction: condition depends on previous"
> > -                           " iteration: ");
> > -         return NULL;
> > -       }
> > -
> >        op1 = gimple_assign_rhs2 (def_stmt);
> >        op2 = gimple_assign_rhs3 (def_stmt);
> >      }
> > @@ -2951,33 +2917,6 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> > stmt_vec_info phi_info,
> >        return NULL;
> >      }
> >
> > -  type = TREE_TYPE (gimple_assign_lhs (def_stmt));
> > -  if ((TREE_CODE (op1) == SSA_NAME
> > -       && !types_compatible_p (type,TREE_TYPE (op1)))
> > -      || (TREE_CODE (op2) == SSA_NAME
> > -          && !types_compatible_p (type, TREE_TYPE (op2)))
> > -      || (op3 && TREE_CODE (op3) == SSA_NAME
> > -          && !types_compatible_p (type, TREE_TYPE (op3)))
> > -      || (op4 && TREE_CODE (op4) == SSA_NAME
> > -          && !types_compatible_p (type, TREE_TYPE (op4))))
> > -    {
> > -      if (dump_enabled_p ())
> > -        {
> > -          dump_printf_loc (MSG_NOTE, vect_location,
> > -                          "reduction: multiple types: operation type: "
> > -                          "%T, operands types: %T,%T",
> > -                          type,  TREE_TYPE (op1), TREE_TYPE (op2));
> > -          if (op3)
> > -           dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
> > -
> > -          if (op4)
> > -           dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
> > -          dump_printf (MSG_NOTE, "\n");
> > -        }
> > -
> > -      return NULL;
> > -    }
> > -
> >    /* Check whether it's ok to change the order of the computation.
> >       Generally, when vectorizing a reduction we change the order of the
> >       computation.  This may change the behavior of the program in some
> > @@ -2985,6 +2924,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> > stmt_vec_info phi_info,
> >       vectorizing an outer-loop: the inner-loop is executed sequentially,
> >       and therefore vectorizing reductions in the inner-loop during
> >       outer-loop vectorization is safe.  */
> > +  tree type = TREE_TYPE (gimple_assign_lhs (def_stmt));
> >    if (STMT_VINFO_REDUC_TYPE (phi_info) == TREE_CODE_REDUCTION
> >        && needs_fold_left_reduction_p (type, code))
> >      STMT_VINFO_REDUC_TYPE (phi_info) = FOLD_LEFT_REDUCTION;
> > @@ -2993,38 +2933,19 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> > stmt_vec_info phi_info,
> >       1) integer arithmetic and no trapv
> >       2) floating point arithmetic, and special flags permit this 
> > optimization
> >       3) nested cycle (i.e., outer loop vectorization).  */
> > +
> > +  /* Check for the simple case that one def is the reduction def,
> > +     defined by the PHI node.  */
> >    stmt_vec_info def1_info = loop_info->lookup_def (op1);
> >    stmt_vec_info def2_info = loop_info->lookup_def (op2);
> > -  if (code != COND_EXPR && !def1_info && !def2_info)
> > -    {
> > -      if (dump_enabled_p ())
> > -       report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for 
> > operands: ");
> > -      return NULL;
> > -    }
> > -
> > -  /* Check that one def is the reduction def, defined by PHI,
> > -     the other def is either defined in the loop ("vect_internal_def"),
> > -     or it's an induction (defined by a loop-header phi-node).  */
> > -
> > -  if (def2_info
> > -      && def2_info->stmt == phi
> > -      && (code == COND_EXPR
> > -         || !def1_info
> > -         || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
> > -         || vect_valid_reduction_input_p (def1_info)))
> > +  if (def2_info && def2_info->stmt == phi)
> >      {
> >        STMT_VINFO_REDUC_IDX (def_stmt_info) = 1 + (code == COND_EXPR ? 1 : 
> > 0);
> >        if (dump_enabled_p ())
> >         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
> >        return def_stmt_info;
> >      }
> > -
> > -  if (def1_info
> > -      && def1_info->stmt == phi
> > -      && (code == COND_EXPR
> > -         || !def2_info
> > -         || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
> > -         || vect_valid_reduction_input_p (def2_info)))
> > +  else if (def1_info && def1_info->stmt == phi)
> >      {
> >        STMT_VINFO_REDUC_IDX (def_stmt_info) = 0 + (code == COND_EXPR ? 1 : 
> > 0);
> >        if (dump_enabled_p ())
> > @@ -3032,7 +2953,8 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> > stmt_vec_info phi_info,
> >        return def_stmt_info;
> >      }
> >
> > -  /* Look for the expression computing latch_def from loop PHI result.  */
> > +  /* Look for the expression computing latch_def from then loop PHI result
> > +     in a way involving more than one stmt.  */
> >    auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
> >    if (check_reduction_path (vect_location, loop, phi, latch_def, code,
> >                             path))
> > @@ -5716,7 +5638,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, 
> > slp_tree slp_node,
> >    enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
> >    stmt_vec_info cond_stmt_vinfo = NULL;
> >    tree scalar_type;
> > -  bool is_simple_use;
> >    int i;
> >    int ncopies;
> >    bool single_defuse_cycle = false;
> > @@ -5887,10 +5808,15 @@ vectorizable_reduction (stmt_vec_info stmt_info, 
> > slp_tree slp_node,
> >          continue;
> >
> >        stmt_vec_info def_stmt_info;
> > -      is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &dts[i], 
> > &tem,
> > -                                         &def_stmt_info);
> > +      if (!vect_is_simple_use (ops[i], loop_vinfo, &dts[i], &tem,
> > +                              &def_stmt_info))
> > +       {
> > +         if (dump_enabled_p ())
> > +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                            "use not simple.\n");
> > +         return false;
> > +       }
> >        dt = dts[i];
> > -      gcc_assert (is_simple_use);
> >        if (dt == vect_reduction_def
> >           && ops[i] == reduc_def)
> >         {
> > @@ -6000,6 +5926,15 @@ vectorizable_reduction (stmt_vec_info stmt_info, 
> > slp_tree slp_node,
> >           return false;
> >         }
> >
> > +      /* When the condition uses the reduction value in the condition, 
> > fail.  */
> > +      if (reduc_index == 0)
> > +       {
> > +         if (dump_enabled_p ())
> > +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                            "condition depends on previous iteration\n");
> > +         return false;
> > +       }
> > +
> >        if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
> >                                           vectype_in, OPTIMIZE_FOR_SPEED))
> >         {
> > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> > index ff9863f290f..e7255fb76bc 100644
> > --- a/gcc/tree-vect-stmts.c
> > +++ b/gcc/tree-vect-stmts.c
> > @@ -10901,6 +10901,7 @@ vect_transform_stmt (stmt_vec_info stmt_info, 
> > gimple_stmt_iterator *gsi,
> >    stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
> >    stmt_vec_info reduc_info;
> >    if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
> > +      && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
> >        && (reduc_info = info_for_reduction (orig_stmt_info))
> >        && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
> >        && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to