On Fri, 26 Jun 2020, Richard Biener wrote: > (sorry for the duplicate, forgot to copy the list) > > This teaches SLP analysis about vector typed externals that are > fed into the SLP operations via lane extracting BIT_FIELD_REFs. > It shows that there's currently no good representation for > vector code on the SLP side so I went a half way and represent > such vector externals uses always using a SLP permutation node > with a single external SLP child which has a non-standard > representation of no scalar defs but only a vector def. That > works best for shielding the rest of the vectorizer from it. > > I'm not sure it's actually worth the trouble and what real-world > cases benefit from this. In theory vectorized unrolled code > > interfacing with scalar code might be one case but there > we necessarily go through memory and there's no intermediate > > pass transforming that to registers [to make BB vectorization > cheaper]. > > > It's also not even close to ready for re-vectorizing vectorized > > code with a larger VF. > > > Any opinions?
I have now installed this. Richard. > Bootstrapped / tested on x86_64-unknown-linux-gnu. > > > Thanks, > Richard. > > 2020-06-26 Richard Biener <rguent...@suse.de> > > PR tree-optimization/95839 > * tree-vect-slp.c (vect_slp_tree_uniform_p): Pre-existing > vectors are not uniform. > (vect_build_slp_tree_1): Handle BIT_FIELD_REFs of > vector registers. > (vect_build_slp_tree_2): For groups of lane extracts > from a vector register generate a permute node > with a special child representing the pre-existing vector. > (vect_prologue_cost_for_slp): Pre-existing vectors cost nothing. > (vect_slp_analyze_node_operations): Use SLP_TREE_LANES. > (vectorizable_slp_permutation): Do not generate or cost identity > permutes. > (vect_schedule_slp_instance): Handle pre-existing vector > that are function arguments. > > * gcc.dg/vect/bb-slp-pr95839-2.c: New testcase. > --- > gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-2.c | 20 ++++ > gcc/tree-vect-slp.c | 119 ++++++++++++++++--- > 2 files changed, 124 insertions(+), 15 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-2.c > > diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-2.c > b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-2.c > new file mode 100644 > index 00000000000..49e75d8c95c > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-2.c > @@ -0,0 +1,20 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_double } */ > +/* { dg-additional-options "-w -Wno-psabi" } */ > + > +typedef double __attribute__((vector_size(16))) v2df; > + > +v2df f(v2df a, v2df b) > +{ > + return (v2df){a[0] + b[0], a[1] + b[1]}; > +} > + > +v2df g(v2df a, v2df b) > +{ > + return (v2df){a[0] + b[1], a[1] + b[0]}; > +} > + > +/* Verify we manage to vectorize this with using the original vectors > + and do not end up with any vector CTORs. */ > +/* { dg-final { scan-tree-dump-times "basic block vectorized" 2 "slp2" } } */ > +/* { dg-final { scan-tree-dump-not "vect_cst" "slp2" } } */ > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c > index b223956e3af..83ec382ee0d 100644 > --- a/gcc/tree-vect-slp.c > +++ b/gcc/tree-vect-slp.c > @@ -247,6 +247,10 @@ vect_slp_tree_uniform_p (slp_tree node) > gcc_assert (SLP_TREE_DEF_TYPE (node) == vect_constant_def > || SLP_TREE_DEF_TYPE (node) == vect_external_def); > > + /* Pre-exsting vectors. */ > + if (SLP_TREE_SCALAR_OPS (node).is_empty ()) > + return false; > + > unsigned i; > tree op, first = NULL_TREE; > FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) > @@ -838,7 +842,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char > *swap, > else > { > rhs_code = gimple_assign_rhs_code (stmt); > - load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference; > + load_p = gimple_vuse (stmt); > } > > /* Check the operation. */ > @@ -899,6 +903,22 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char > *swap, > need_same_oprnds = true; > first_op1 = gimple_assign_rhs2 (stmt); > } > + else if (!load_p > + && rhs_code == BIT_FIELD_REF) > + { > + tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); > + if (TREE_CODE (vec) != SSA_NAME > + || !types_compatible_p (vectype, TREE_TYPE (vec))) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "Build SLP failed: " > + "BIT_FIELD_REF not supported\n"); > + /* Fatal mismatch. */ > + matches[0] = false; > + return false; > + } > + } > else if (call_stmt > && gimple_call_internal_p (call_stmt, IFN_DIV_POW2)) > { > @@ -957,6 +977,18 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char > *swap, > continue; > } > } > + if (!load_p > + && first_stmt_code == BIT_FIELD_REF > + && (TREE_OPERAND (gimple_assign_rhs1 (first_stmt_info->stmt), 0) > + != TREE_OPERAND (gimple_assign_rhs1 (stmt_info->stmt), 0))) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "Build SLP failed: different BIT_FIELD_REF " > + "arguments in %G", stmt); > + /* Mismatch. */ > + continue; > + } > > if (!load_p && rhs_code == CALL_EXPR) > { > @@ -1026,7 +1058,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char > *swap, > && TREE_CODE_CLASS (rhs_code) != tcc_expression > && TREE_CODE_CLASS (rhs_code) != tcc_comparison > && rhs_code != VIEW_CONVERT_EXPR > - && rhs_code != CALL_EXPR) > + && rhs_code != CALL_EXPR > + && rhs_code != BIT_FIELD_REF) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -1287,6 +1320,45 @@ vect_build_slp_tree_2 (vec_info *vinfo, > return node; > } > } > + else if (gimple_assign_single_p (stmt_info->stmt) > + && !gimple_vuse (stmt_info->stmt) > + && gimple_assign_rhs_code (stmt_info->stmt) == BIT_FIELD_REF) > + { > + /* vect_build_slp_tree_2 determined all BIT_FIELD_REFs reference > + the same SSA name vector of a compatible type to vectype. */ > + vec<std::pair<unsigned, unsigned> > lperm = vNULL; > + tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt_info->stmt), 0); > + stmt_vec_info estmt_info; > + FOR_EACH_VEC_ELT (stmts, i, estmt_info) > + { > + gassign *estmt = as_a <gassign *> (estmt_info->stmt); > + tree bfref = gimple_assign_rhs1 (estmt); > + HOST_WIDE_INT lane; > + if (!known_eq (bit_field_size (bfref), > + tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (vectype)))) > + || !constant_multiple_p (bit_field_offset (bfref), > + bit_field_size (bfref), &lane)) > + { > + lperm.release (); > + return NULL; > + } > + lperm.safe_push (std::make_pair (0, (unsigned)lane)); > + } > + slp_tree vnode = vect_create_new_slp_node (vNULL); > + SLP_TREE_VECTYPE (vnode) = TREE_TYPE (vec); > + SLP_TREE_VEC_DEFS (vnode).safe_push (vec); > + /* We are always building a permutation node even if it is an identity > + permute to shield the rest of the vectorizer from the odd node > + representing an actual vector without any scalar ops. > + ??? We could hide it completely with making the permute node > + external? */ > + node = vect_create_new_slp_node (stmts, 1); > + SLP_TREE_CODE (node) = VEC_PERM_EXPR; > + SLP_TREE_LANE_PERMUTATION (node) = lperm; > + SLP_TREE_VECTYPE (node) = vectype; > + SLP_TREE_CHILDREN (node).quick_push (vnode); > + return node; > + } > > /* Get at the operands, verifying they are compatible. */ > vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, > group_size); > @@ -2744,6 +2816,10 @@ static void > vect_prologue_cost_for_slp (slp_tree node, > stmt_vector_for_cost *cost_vec) > { > + /* There's a special case of an existing vector, that costs nothing. */ > + if (SLP_TREE_SCALAR_OPS (node).length () == 0 > + && !SLP_TREE_VEC_DEFS (node).is_empty ()) > + return; > /* Without looking at the actual initializer a vector of > constants can be implemented as load from the constant pool. > When all elements are the same we can use a splat. */ > @@ -2857,7 +2933,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, > slp_tree node, > && j == 1); > continue; > } > - unsigned group_size = SLP_TREE_SCALAR_OPS (child).length (); > + unsigned group_size = SLP_TREE_LANES (child); > poly_uint64 vf = 1; > if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) > vf = loop_vinfo->vectorization_factor; > @@ -4139,7 +4215,9 @@ vectorizable_slp_permutation (vec_info *vinfo, > gimple_stmt_iterator *gsi, > { > indices.new_vector (mask, second_vec.first == -1U ? 1 : 2, > const_nunits); > - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) > + bool identity_p = indices.series_p (0, 1, 0, 1); > + if (!identity_p > + && !can_vec_perm_const_p (TYPE_MODE (vectype), indices)) > { > if (dump_enabled_p ()) > { > @@ -4157,11 +4235,10 @@ vectorizable_slp_permutation (vec_info *vinfo, > gimple_stmt_iterator *gsi, > return false; > } > > - nperms++; > + if (!identity_p) > + nperms++; > if (gsi) > { > - tree mask_vec = vect_gen_perm_mask_checked (vectype, indices); > - > if (second_vec.first == -1U) > second_vec = first_vec; > > @@ -4169,14 +4246,22 @@ vectorizable_slp_permutation (vec_info *vinfo, > gimple_stmt_iterator *gsi, > slp_tree first_node = SLP_TREE_CHILDREN (node)[first_vec.first]; > tree first_def > = vect_get_slp_vect_def (first_node, first_vec.second); > - slp_tree second_node = SLP_TREE_CHILDREN (node)[second_vec.first]; > - tree second_def > - = vect_get_slp_vect_def (second_node, second_vec.second); > + gassign *perm_stmt; > tree perm_dest = make_ssa_name (vectype); > - gassign *perm_stmt > - = gimple_build_assign (perm_dest, VEC_PERM_EXPR, > - first_def, second_def, > - mask_vec); > + if (!identity_p) > + { > + slp_tree second_node > + = SLP_TREE_CHILDREN (node)[second_vec.first]; > + tree second_def > + = vect_get_slp_vect_def (second_node, second_vec.second); > + tree mask_vec = vect_gen_perm_mask_checked (vectype, indices); > + perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR, > + first_def, second_def, > + mask_vec); > + } > + else > + /* We need a copy here in case the def was external. */ > + perm_stmt = gimple_build_assign (perm_dest, first_def); > vect_finish_stmt_generation (vinfo, NULL, perm_stmt, gsi); > /* Store the vector statement in NODE. */ > SLP_TREE_VEC_STMTS (node).quick_push (perm_stmt); > @@ -4300,13 +4385,17 @@ vect_schedule_slp_instance (vec_info *vinfo, > unsigned j; > tree vdef; > FOR_EACH_VEC_ELT (SLP_TREE_VEC_DEFS (child), j, vdef) > - if (TREE_CODE (vdef) == SSA_NAME) > + if (TREE_CODE (vdef) == SSA_NAME > + && !SSA_NAME_IS_DEFAULT_DEF (vdef)) > { > gimple *vstmt = SSA_NAME_DEF_STMT (vdef); > if (!last_stmt > || vect_stmt_dominates_stmt_p (last_stmt, vstmt)) > last_stmt = vstmt; > } > + /* This can happen when all children are pre-existing vectors. */ > + if (!last_stmt) > + last_stmt = vect_find_first_scalar_stmt_in_slp (node)->stmt; > } > if (is_a <gphi *> (last_stmt)) > si = gsi_after_labels (gimple_bb (last_stmt)); > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)