The following disables loop masking when we are using an even/odd
widening operation in a reduction because the loop mask then aligns
to the wrong elements.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

If a target implements both even/odd and hi/lo widening we might
want to change supportable_widening_operation to prefer the hi/lo
variant (when masking is supported?).

        PR tree-optimization/121049
        * internal-fn.h (widening_evenodd_fn_p): Declare.
        * internal-fn.cc (widening_evenodd_fn_p): New function.
        * tree-vect-stmts.cc (vectorizable_conversion): When using
        an even/odd widening function disable loop masking.

        * gcc.dg/vect/pr121049.c: New testcase.
---
 gcc/internal-fn.cc                   | 27 +++++++++++++++++++++++++++
 gcc/internal-fn.h                    |  1 +
 gcc/testsuite/gcc.dg/vect/pr121049.c | 25 +++++++++++++++++++++++++
 gcc/tree-vect-stmts.cc               | 15 +++++++++++++++
 4 files changed, 68 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr121049.c

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 044bdc22481..114f5a9da18 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4548,6 +4548,33 @@ widening_fn_p (code_helper code)
     }
 }
 
+/* Return true if this CODE describes an internal_fn that returns a vector with
+   elements twice as wide as the element size of the input vectors and operates
+   on even/odd parts of the input.  */
+
+bool
+widening_evenodd_fn_p (code_helper code)
+{
+  if (!code.is_fn_code ())
+    return false;
+
+  if (!internal_fn_p ((combined_fn) code))
+    return false;
+
+  internal_fn fn = as_internal_fn ((combined_fn) code);
+  switch (fn)
+    {
+    #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T) \
+    case IFN_##NAME##_EVEN:                                      \
+    case IFN_##NAME##_ODD:                                       \
+      return true;
+    #include "internal-fn.def"
+
+    default:
+      return false;
+    }
+}
+
 /* Return true if IFN_SET_EDOM is supported.  */
 
 bool
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index afd4f8e64c7..02731ea03ae 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -219,6 +219,7 @@ extern bool commutative_ternary_fn_p (internal_fn);
 extern int first_commutative_argument (internal_fn);
 extern bool associative_binary_fn_p (internal_fn);
 extern bool widening_fn_p (code_helper);
+extern bool widening_evenodd_fn_p (code_helper);
 
 extern bool set_edom_supported_p (void);
 
diff --git a/gcc/testsuite/gcc.dg/vect/pr121049.c 
b/gcc/testsuite/gcc.dg/vect/pr121049.c
new file mode 100644
index 00000000000..558c92ab884
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr121049.c
@@ -0,0 +1,25 @@
+/* { dg-additional-options "--param vect-partial-vector-usage=1" } */
+/* { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } } */
+
+#include "tree-vect.h"
+
+int mon_lengths[12] = { 1, 10, 100 };
+
+__attribute__ ((noipa)) long
+transtime (int mon)
+{
+  long value = 0;
+  for (int i = 0; i < mon; ++i)
+    value += mon_lengths[i] * 2l;
+  return value;
+}
+
+int
+main ()
+{
+  check_vect ();
+  if (transtime (3) != 222)
+    __builtin_abort ();
+  return 0;
+}
+
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 1be29c3fd22..7a24384e899 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5462,6 +5462,7 @@ vectorizable_conversion (vec_info *vinfo,
   vec<tree> vec_oprnds1 = vNULL;
   tree vop0;
   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   int multi_step_cvt = 0;
   vec<tree> interm_types = vNULL;
   tree intermediate_type, cvt_type = NULL_TREE;
@@ -5804,6 +5805,20 @@ vectorizable_conversion (vec_info *vinfo,
       gcc_unreachable ();
     }
 
+  if (modifier == WIDEN
+      && loop_vinfo
+      && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+      && (code1 == VEC_WIDEN_MULT_EVEN_EXPR
+         || widening_evenodd_fn_p (code1)))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "can't use a fully-masked loop because"
+                        " widening operation on even/odd elements"
+                        " mixes up lanes.\n");
+      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+    }
+
   if (!vec_stmt)               /* transformation not required.  */
     {
       if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
-- 
2.43.0

Reply via email to