We failed to build the correct initialization vector. Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Note this restricts n-lanes to 1 for VLA vectors since I don't know how to build a proper initializer there. I think that RVV at least could do this by shifting in the individual lane PHI latch defs into a zero vector from the right (or from the left and then possibly reverse the vector). Not sure how that goes for SVE or how to express this as CTOR. I could possibly build a fixed size vector CTOR and repeat that - but I guess that would only work for a power-of-two number of lanes (the permute scheme likely doesn't work for 3 lanes and I think we miss some additional checks here). PR tree-optimization/121256 * tree-vect-loop.cc (vectorizable_recurr): Build a correct initialization vector for SLP_TREE_LANES > 1. * gcc.dg/vect/vect-recurr-pr121256.c: New testcase. --- .../gcc.dg/vect/vect-recurr-pr121256.c | 54 +++++++++++++++++++ gcc/tree-vect-loop.cc | 46 +++++++++++++--- 2 files changed, 92 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c new file mode 100644 index 00000000000..c895e94021d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c @@ -0,0 +1,54 @@ +/* { dg-additional-options "-mavx2" { target avx2_runtime } } */ + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include "tree-vect.h" + +#define B 0 +#define G 1 +#define R 2 +#define A 3 + +int red = 153; +int green = 66; +int blue = 187; +int alpha = 255; + +static void __attribute__((noipa)) +sub_left_prediction_bgr32(uint8_t *restrict dst, uint8_t *restrict src, int w) +{ + for (int i = 0; i < 8; i++) { + int rt = src[i * 4 + R]; + int gt = src[i * 4 + G]; + int bt = src[i * 4 + B]; + int at = src[i * 4 + A]; + + dst[i * 4 + R] = rt - red; + dst[i * 4 + G] = gt - green; + dst[i * 4 + B] = bt - blue; + dst[i * 4 + A] = at - alpha; + + red = rt; + green = gt; + blue = bt; + alpha = at; + } +} + +int main() +{ + check_vect (); + + uint8_t *dst = calloc(36, sizeof(uint8_t)); + uint8_t *src = calloc(36, sizeof(uint8_t)); + + src[R] = 160; + src[G] = 73; + src[B] = 194; + src[A] = 255; + + sub_left_prediction_bgr32(dst, src, 33); + if (dst[R] != 7 || dst[B] != 7 || dst[A] != 0) + __builtin_abort(); +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 93607cbf247..56d6228572b 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -8967,6 +8967,18 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, return false; } + /* We need to be able to build a { ..., a, b } init vector with + dist number of distinct trailing values. Always possible + when dist == 1 or when nunits is constant. */ + if (dist != 1 && !nunits.is_constant ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "cannot build initialization vector for " + "first order recurrence\n"); + return false; + } + /* First-order recurrence autovectorization needs to handle permutation with indices = [nunits-1, nunits, nunits+1, ...]. */ vec_perm_builder sel (nunits, 1, 3); @@ -9018,20 +9030,38 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, } edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); - basic_block bb = gimple_bb (phi); - tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe); - if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (preheader))) - { - gimple_seq stmts = NULL; - preheader = gimple_convert (&stmts, TREE_TYPE (vectype), preheader); - gsi_insert_seq_on_edge_immediate (pe, stmts); + tree vec_init; + if (dist > 1) + { + vec<constructor_elt, va_gc> *v = NULL; + vec_alloc (v, nunits.to_constant ()); + for (unsigned i = 0; i < nunits.to_constant () - dist; ++i) + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + build_zero_cst (TREE_TYPE (vectype))); + for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node)) + { + gphi *phi = as_a <gphi *> (s->stmt); + tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe); + if (!useless_type_conversion_p (TREE_TYPE (vectype), + TREE_TYPE (preheader))) + { + gimple_seq stmts = NULL; + preheader = gimple_convert (&stmts, + TREE_TYPE (vectype), preheader); + gsi_insert_seq_on_edge_immediate (pe, stmts); + } + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, preheader); + } + vec_init = build_constructor (vectype, v); } - tree vec_init = build_vector_from_val (vectype, preheader); + else + vec_init = PHI_ARG_DEF_FROM_EDGE (phi, pe); vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL); /* Create the vectorized first-order PHI node. */ tree vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_recur_"); + basic_block bb = gimple_bb (phi); gphi *new_phi = create_phi_node (vec_dest, bb); add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION); -- 2.43.0