On Wed, Jun 15, 2016 at 10:52 AM, Richard Sandiford
wrote:
> This patch moves the fix for PR65518 to the code that checks whether
> load-and-permute operations are supported. If the group size is
> greater than the vectorisation factor, it would still be possible
> to fall back to elementwise loads (as for strided groups) rather
> than fail vectorisation entirely.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
Ok.
Thanks,
Richard.
> Thanks,
> Richard
>
>
> gcc/
> * tree-vectorizer.h (vect_grouped_load_supported): Add a
> single_element_p parameter.
> * tree-vect-data-refs.c (vect_grouped_load_supported): Likewise.
> Check the PR65518 case here rather than in vectorizable_load.
> * tree-vect-loop.c (vect_analyze_loop_2): Update call accordignly.
> * tree-vect-stmts.c (vectorizable_load): Likewise.
>
> Index: gcc/tree-vectorizer.h
> ===
> --- gcc/tree-vectorizer.h
> +++ gcc/tree-vectorizer.h
> @@ -1069,7 +1069,7 @@ extern tree bump_vector_ptr (tree, gimple *,
> gimple_stmt_iterator *, gimple *,
> extern tree vect_create_destination_var (tree, tree);
> extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
> extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT);
> -extern bool vect_grouped_load_supported (tree, unsigned HOST_WIDE_INT);
> +extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
> extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT);
> extern void vect_permute_store_chain (vec ,unsigned int, gimple *,
> gimple_stmt_iterator *, vec *);
> Index: gcc/tree-vect-data-refs.c
> ===
> --- gcc/tree-vect-data-refs.c
> +++ gcc/tree-vect-data-refs.c
> @@ -5131,14 +5131,31 @@ vect_setup_realignment (gimple *stmt,
> gimple_stmt_iterator *gsi,
>
> /* Function vect_grouped_load_supported.
>
> - Returns TRUE if even and odd permutations are supported,
> - and FALSE otherwise. */
> + COUNT is the size of the load group (the number of statements plus the
> + number of gaps). SINGLE_ELEMENT_P is true if there is actually
> + only one statement, with a gap of COUNT - 1.
> +
> + Returns true if a suitable permute exists. */
>
> bool
> -vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
> +vect_grouped_load_supported (tree vectype, bool single_element_p,
> +unsigned HOST_WIDE_INT count)
> {
>machine_mode mode = TYPE_MODE (vectype);
>
> + /* If this is single-element interleaving with an element distance
> + that leaves unused vector loads around punt - we at least create
> + very sub-optimal code in that case (and blow up memory,
> + see PR65518). */
> + if (single_element_p && count > TYPE_VECTOR_SUBPARTS (vectype))
> +{
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +"single-element interleaving not supported "
> +"for not adjacent vector loads\n");
> + return false;
> +}
> +
>/* vect_permute_load_chain requires the group size to be equal to 3 or
> be a power of two. */
>if (count != 3 && exact_log2 (count) == -1)
> Index: gcc/tree-vect-loop.c
> ===
> --- gcc/tree-vect-loop.c
> +++ gcc/tree-vect-loop.c
> @@ -2148,10 +2148,12 @@ again:
> {
> vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
> vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
> + bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo);
> size = STMT_VINFO_GROUP_SIZE (vinfo);
> vectype = STMT_VINFO_VECTYPE (vinfo);
> if (! vect_load_lanes_supported (vectype, size)
> - && ! vect_grouped_load_supported (vectype, size))
> + && ! vect_grouped_load_supported (vectype, single_element_p,
> + size))
> return false;
> }
> }
> Index: gcc/tree-vect-stmts.c
> ===
> --- gcc/tree-vect-stmts.c
> +++ gcc/tree-vect-stmts.c
> @@ -6298,31 +6298,20 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator
> *gsi, gimple **vec_stmt,
>
>first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
> + bool single_element_p = (first_stmt == stmt
> + && !GROUP_NEXT_ELEMENT (stmt_info));
>
>if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
> {
> if (vect_load_lanes_supported (vectype, group_size))
> load_lanes_p = true;
> - else if (!vect_grouped_load_supported (vectype, group_size))
> + else if (