The following disables loop masking when we are using an even/odd widening operation in a reduction because the loop mask then aligns to the wrong elements.
Bootstrap and regtest running on x86_64-unknown-linux-gnu. If a target implements both even/odd and hi/lo widening we might want to change supportable_widening_operation to prefer the hi/lo variant (when masking is supported?). PR tree-optimization/121049 * internal-fn.h (widening_evenodd_fn_p): Declare. * internal-fn.cc (widening_evenodd_fn_p): New function. * tree-vect-stmts.cc (vectorizable_conversion): When using an even/odd widening function disable loop masking. * gcc.dg/vect/pr121049.c: New testcase. --- gcc/internal-fn.cc | 27 +++++++++++++++++++++++++++ gcc/internal-fn.h | 1 + gcc/testsuite/gcc.dg/vect/pr121049.c | 25 +++++++++++++++++++++++++ gcc/tree-vect-stmts.cc | 15 +++++++++++++++ 4 files changed, 68 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/pr121049.c diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 044bdc22481..114f5a9da18 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4548,6 +4548,33 @@ widening_fn_p (code_helper code) } } +/* Return true if this CODE describes an internal_fn that returns a vector with + elements twice as wide as the element size of the input vectors and operates + on even/odd parts of the input. */ + +bool +widening_evenodd_fn_p (code_helper code) +{ + if (!code.is_fn_code ()) + return false; + + if (!internal_fn_p ((combined_fn) code)) + return false; + + internal_fn fn = as_internal_fn ((combined_fn) code); + switch (fn) + { + #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T) \ + case IFN_##NAME##_EVEN: \ + case IFN_##NAME##_ODD: \ + return true; + #include "internal-fn.def" + + default: + return false; + } +} + /* Return true if IFN_SET_EDOM is supported. */ bool diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index afd4f8e64c7..02731ea03ae 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -219,6 +219,7 @@ extern bool commutative_ternary_fn_p (internal_fn); extern int first_commutative_argument (internal_fn); extern bool associative_binary_fn_p (internal_fn); extern bool widening_fn_p (code_helper); +extern bool widening_evenodd_fn_p (code_helper); extern bool set_edom_supported_p (void); diff --git a/gcc/testsuite/gcc.dg/vect/pr121049.c b/gcc/testsuite/gcc.dg/vect/pr121049.c new file mode 100644 index 00000000000..558c92ab884 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr121049.c @@ -0,0 +1,25 @@ +/* { dg-additional-options "--param vect-partial-vector-usage=1" } */ +/* { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } } */ + +#include "tree-vect.h" + +int mon_lengths[12] = { 1, 10, 100 }; + +__attribute__ ((noipa)) long +transtime (int mon) +{ + long value = 0; + for (int i = 0; i < mon; ++i) + value += mon_lengths[i] * 2l; + return value; +} + +int +main () +{ + check_vect (); + if (transtime (3) != 222) + __builtin_abort (); + return 0; +} + diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 1be29c3fd22..7a24384e899 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -5462,6 +5462,7 @@ vectorizable_conversion (vec_info *vinfo, vec<tree> vec_oprnds1 = vNULL; tree vop0; bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); + loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); int multi_step_cvt = 0; vec<tree> interm_types = vNULL; tree intermediate_type, cvt_type = NULL_TREE; @@ -5804,6 +5805,20 @@ vectorizable_conversion (vec_info *vinfo, gcc_unreachable (); } + if (modifier == WIDEN + && loop_vinfo + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) + && (code1 == VEC_WIDEN_MULT_EVEN_EXPR + || widening_evenodd_fn_p (code1))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because" + " widening operation on even/odd elements" + " mixes up lanes.\n"); + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; + } + if (!vec_stmt) /* transformation not required. */ { if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in) -- 2.43.0