We failed to build the correct initialization vector. For VLA vectors and a non-uniform initialization vector this rejects vectorization for now.
v2 adds VLA support for uniform initializers. I've added a testcase for the case I thought we might get wrong, we get it correct. Re-bootstrap and regtest running on x86_64-unknown-linux-gnu. PR tree-optimization/121256 * tree-vect-loop.cc (vectorizable_recurr): Build a correct initialization vector for SLP_TREE_LANES > 1. * gcc.dg/vect/vect-recurr-pr121256.c: New testcase. * gcc.dg/vect/vect-recurr-pr121256-2.c: Likewise. --- .../gcc.dg/vect/vect-recurr-pr121256-2.c | 49 +++++++++++++++ .../gcc.dg/vect/vect-recurr-pr121256.c | 54 +++++++++++++++++ gcc/tree-vect-loop.cc | 60 ++++++++++++++++--- 3 files changed, 155 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256-2.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256-2.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256-2.c new file mode 100644 index 00000000000..7350fd9feba --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256-2.c @@ -0,0 +1,49 @@ +/* { dg-additional-options "-mavx2" { target avx2_runtime } } */ + +#include "tree-vect.h" + +#define B 0 +#define G 1 +#define R 2 + +int red = 153; +int green = 66; +int blue = 187; + +static void __attribute__((noipa)) +sub_left_prediction_bgr32(int *restrict dst, int *restrict src) +{ + for (int i = 0; i < 8; i++) { + int rt = src[i * 3 + R]; + int gt = src[i * 3 + G]; + int bt = src[i * 3 + B]; + + dst[i * 3 + R] = rt - red; + dst[i * 3 + G] = gt - green; + dst[i * 3 + B] = bt - blue; + + red = rt; + green = gt; + blue = bt; + } +} + +int main() +{ + int dst[8*3]; + int src[8*3] = { 160, 73, 194, 17, 33, 99, 0, 12, 283, 87, 73, 11, + 9, 7, 1, 23, 19, 13, 77, 233, 97, 78, 2, 5 }; + int dst2[8*3] = {-27, 7, 41, -143, -40, -95, -17, -21, 184, 87, 61, + -272, -78, -66, -10, 14, 12, 12, 54, 214, 84, 1, -231, -92}; + + check_vect (); + + sub_left_prediction_bgr32(dst, src); + +#pragma GCC novector + for (int i = 0; i < 8*3; ++i) + if (dst[i] != dst2[i]) + __builtin_abort(); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c new file mode 100644 index 00000000000..c895e94021d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c @@ -0,0 +1,54 @@ +/* { dg-additional-options "-mavx2" { target avx2_runtime } } */ + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include "tree-vect.h" + +#define B 0 +#define G 1 +#define R 2 +#define A 3 + +int red = 153; +int green = 66; +int blue = 187; +int alpha = 255; + +static void __attribute__((noipa)) +sub_left_prediction_bgr32(uint8_t *restrict dst, uint8_t *restrict src, int w) +{ + for (int i = 0; i < 8; i++) { + int rt = src[i * 4 + R]; + int gt = src[i * 4 + G]; + int bt = src[i * 4 + B]; + int at = src[i * 4 + A]; + + dst[i * 4 + R] = rt - red; + dst[i * 4 + G] = gt - green; + dst[i * 4 + B] = bt - blue; + dst[i * 4 + A] = at - alpha; + + red = rt; + green = gt; + blue = bt; + alpha = at; + } +} + +int main() +{ + check_vect (); + + uint8_t *dst = calloc(36, sizeof(uint8_t)); + uint8_t *src = calloc(36, sizeof(uint8_t)); + + src[R] = 160; + src[G] = 73; + src[B] = 194; + src[A] = 255; + + sub_left_prediction_bgr32(dst, src, 33); + if (dst[R] != 7 || dst[B] != 7 || dst[A] != 0) + __builtin_abort(); +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 15f23640292..9be1c79cd47 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -8971,6 +8971,33 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, return false; } + /* We need to be able to build a { ..., a, b } init vector with + dist number of distinct trailing values. Always possible + when dist == 1 or when nunits is constant or when the initializations + are uniform. */ + tree uniform_initval = NULL_TREE; + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); + for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node)) + { + gphi *phi = as_a <gphi *> (s->stmt); + if (! uniform_initval) + uniform_initval = PHI_ARG_DEF_FROM_EDGE (phi, pe); + else if (! operand_equal_p (uniform_initval, + PHI_ARG_DEF_FROM_EDGE (phi, pe))) + { + uniform_initval = NULL_TREE; + break; + } + } + if (!uniform_initval && !nunits.is_constant ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "cannot build initialization vector for " + "first order recurrence\n"); + return false; + } + /* First-order recurrence autovectorization needs to handle permutation with indices = [nunits-1, nunits, nunits+1, ...]. */ vec_perm_builder sel (nunits, 1, 3); @@ -9021,21 +9048,38 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, return true; } - edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); - basic_block bb = gimple_bb (phi); - tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe); - if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (preheader))) + tree vec_init; + if (! uniform_initval) { - gimple_seq stmts = NULL; - preheader = gimple_convert (&stmts, TREE_TYPE (vectype), preheader); - gsi_insert_seq_on_edge_immediate (pe, stmts); + vec<constructor_elt, va_gc> *v = NULL; + vec_alloc (v, nunits.to_constant ()); + for (unsigned i = 0; i < nunits.to_constant () - dist; ++i) + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + build_zero_cst (TREE_TYPE (vectype))); + for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node)) + { + gphi *phi = as_a <gphi *> (s->stmt); + tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe); + if (!useless_type_conversion_p (TREE_TYPE (vectype), + TREE_TYPE (preheader))) + { + gimple_seq stmts = NULL; + preheader = gimple_convert (&stmts, + TREE_TYPE (vectype), preheader); + gsi_insert_seq_on_edge_immediate (pe, stmts); + } + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, preheader); + } + vec_init = build_constructor (vectype, v); } - tree vec_init = build_vector_from_val (vectype, preheader); + else + vec_init = uniform_initval; vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL); /* Create the vectorized first-order PHI node. */ tree vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_recur_"); + basic_block bb = gimple_bb (phi); gphi *new_phi = create_phi_node (vec_dest, bb); add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION); -- 2.43.0