This patch makes can_vec_perm_p & co. take a vec<>, wrapped in new typedefs vec_perm_indices and auto_vec_perm_indices. There are two reasons for doing this for SVE:
(1) it means that the number of elements is bundled with the elements themselves, and is obviously constant. (2) it makes it easier to change the "unsigned char" element type to something wider. I'm happy to change the target hooks as a follow-on patch, if this is OK. Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. OK to install? Richard 2017-09-14 Richard Sandiford <richard.sandif...@linaro.org> Alan Hayward <alan.hayw...@arm.com> David Sherwood <david.sherw...@arm.com> gcc/ * target.h (vec_perm_indices): New typedef. (auto_vec_perm_indices): Likewise. * optabs-query.h: Include target.h (can_vec_perm_p): Take a vec_perm_indices *. * optabs-query.c (can_vec_perm_p): Likewise. (can_mult_highpart_p): Update accordingly. Use auto_vec_perm_indices. * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise. * tree-vect-generic.c (lower_vec_perm): Likewise. * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise. (vect_grouped_load_supported): Likewise. (vect_shift_permute_load_chain): Likewise. (vect_permute_store_chain): Use auto_vec_perm_indices. (vect_permute_load_chain): Likewise. * fold-const.c (fold_vec_perm): Take vec_perm_indices. (fold_ternary_loc): Update accordingly. Use auto_vec_perm_indices. Update uses of can_vec_perm_p. * tree-vect-loop.c (calc_vec_perm_mask_for_shift): Replace the mode with a number of elements. Take a vec_perm_indices *. (vect_create_epilog_for_reduction): Update accordingly. Use auto_vec_perm_indices. (have_whole_vector_shift): Likewise. Update call to can_vec_perm_p. * tree-vect-slp.c (vect_build_slp_tree_1): Likewise. (vect_transform_slp_perm_load): Likewise. (vect_schedule_slp_instance): Use auto_vec_perm_indices. * tree-vectorizer.h (vect_gen_perm_mask_any): Take a vec_perm_indices. (vect_gen_perm_mask_checked): Likewise. * tree-vect-stmts.c (vect_gen_perm_mask_any): Take a vec_perm_indices. (vect_gen_perm_mask_checked): Likewise. (vectorizable_mask_load_store): Use auto_vec_perm_indices. (vectorizable_store): Likewise. (vectorizable_load): Likewise. (perm_mask_for_reverse): Likewise. Update call to can_vec_perm_p. (vectorizable_bswap): Likewise. Index: gcc/target.h =================================================================== --- gcc/target.h 2017-09-11 17:10:58.656085547 +0100 +++ gcc/target.h 2017-09-14 11:25:32.162167193 +0100 @@ -191,6 +191,14 @@ enum vect_cost_model_location { vect_epilogue = 2 }; +/* The type to use for vector permutes with a constant permute vector. + Each entry is an index into the concatenated input vectors. */ +typedef vec<unsigned char> vec_perm_indices; + +/* Same, but can be used to construct local permute vectors that are + automatically freed. */ +typedef auto_vec<unsigned char, 32> auto_vec_perm_indices; + /* The target structure. This holds all the backend hooks. */ #define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME; #define DEFHOOK(NAME, DOC, TYPE, PARAMS, INIT) TYPE (* NAME) PARAMS; Index: gcc/optabs-query.h =================================================================== --- gcc/optabs-query.h 2017-08-30 12:14:51.272396735 +0100 +++ gcc/optabs-query.h 2017-09-14 11:25:32.162167193 +0100 @@ -21,6 +21,7 @@ the Free Software Foundation; either ver #define GCC_OPTABS_QUERY_H #include "insn-opinit.h" +#include "target.h" /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing if the target does not have such an insn. */ @@ -165,7 +166,7 @@ enum insn_code can_extend_p (machine_mod enum insn_code can_float_p (machine_mode, machine_mode, int); enum insn_code can_fix_p (machine_mode, machine_mode, int, bool *); bool can_conditionally_move_p (machine_mode mode); -bool can_vec_perm_p (machine_mode, bool, const unsigned char *); +bool can_vec_perm_p (machine_mode, bool, vec_perm_indices *); enum insn_code widening_optab_handler (optab, machine_mode, machine_mode); /* Find a widening optab even if it doesn't widen as much as we want. */ #define find_widening_optab_handler(A,B,C,D) \ Index: gcc/optabs-query.c =================================================================== --- gcc/optabs-query.c 2017-09-05 20:57:40.745898121 +0100 +++ gcc/optabs-query.c 2017-09-14 11:25:32.162167193 +0100 @@ -353,8 +353,7 @@ can_conditionally_move_p (machine_mode m zeroes; this case is not dealt with here. */ bool -can_vec_perm_p (machine_mode mode, bool variable, - const unsigned char *sel) +can_vec_perm_p (machine_mode mode, bool variable, vec_perm_indices *sel) { machine_mode qimode; @@ -368,7 +367,7 @@ can_vec_perm_p (machine_mode mode, bool if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing && (sel == NULL || targetm.vectorize.vec_perm_const_ok == NULL - || targetm.vectorize.vec_perm_const_ok (mode, sel))) + || targetm.vectorize.vec_perm_const_ok (mode, &(*sel)[0]))) return true; } @@ -460,7 +459,6 @@ find_widening_optab_handler_and_mode (op can_mult_highpart_p (machine_mode mode, bool uns_p) { optab op; - unsigned char *sel; unsigned i, nunits; op = uns_p ? umul_highpart_optab : smul_highpart_optab; @@ -472,7 +470,6 @@ can_mult_highpart_p (machine_mode mode, return 0; nunits = GET_MODE_NUNITS (mode); - sel = XALLOCAVEC (unsigned char, nunits); op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab; if (optab_handler (op, mode) != CODE_FOR_nothing) @@ -480,9 +477,12 @@ can_mult_highpart_p (machine_mode mode, op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab; if (optab_handler (op, mode) != CODE_FOR_nothing) { + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0); - if (can_vec_perm_p (mode, false, sel)) + sel.quick_push (!BYTES_BIG_ENDIAN + + (i & ~1) + + ((i & 1) ? nunits : 0)); + if (can_vec_perm_p (mode, false, &sel)) return 2; } } @@ -493,9 +493,10 @@ can_mult_highpart_p (machine_mode mode, op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; if (optab_handler (op, mode) != CODE_FOR_nothing) { + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1); - if (can_vec_perm_p (mode, false, sel)) + sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); + if (can_vec_perm_p (mode, false, &sel)) return 3; } } Index: gcc/tree-ssa-forwprop.c =================================================================== --- gcc/tree-ssa-forwprop.c 2017-09-14 11:24:42.667010577 +0100 +++ gcc/tree-ssa-forwprop.c 2017-09-14 11:25:32.163167193 +0100 @@ -1952,7 +1952,6 @@ simplify_vector_constructor (gimple_stmt unsigned elem_size, nelts, i; enum tree_code code, conv_code; constructor_elt *elt; - unsigned char *sel; bool maybe_ident; gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); @@ -1965,7 +1964,7 @@ simplify_vector_constructor (gimple_stmt elem_type = TREE_TYPE (type); elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); - sel = XALLOCAVEC (unsigned char, nelts); + auto_vec_perm_indices sel (nelts); orig = NULL; conv_code = ERROR_MARK; maybe_ident = true; @@ -2023,8 +2022,10 @@ simplify_vector_constructor (gimple_stmt } if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size) return false; - sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; - if (sel[i] != i) maybe_ident = false; + unsigned int elt = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; + if (elt != i) + maybe_ident = false; + sel.quick_push (elt); } if (i < nelts) return false; @@ -2053,7 +2054,7 @@ simplify_vector_constructor (gimple_stmt { tree mask_type; - if (!can_vec_perm_p (TYPE_MODE (type), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (type), false, &sel)) return false; mask_type = build_vector_type (build_nonstandard_integer_type (elem_size, 1), Index: gcc/tree-vect-generic.c =================================================================== --- gcc/tree-vect-generic.c 2017-09-14 11:24:42.667010577 +0100 +++ gcc/tree-vect-generic.c 2017-09-14 11:25:32.164167193 +0100 @@ -1300,13 +1300,13 @@ lower_vec_perm (gimple_stmt_iterator *gs if (TREE_CODE (mask) == VECTOR_CST) { - unsigned char *sel_int = XALLOCAVEC (unsigned char, elements); + auto_vec_perm_indices sel_int (elements); for (i = 0; i < elements; ++i) - sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) - & (2 * elements - 1)); + sel_int.quick_push (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) + & (2 * elements - 1)); - if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int)) + if (can_vec_perm_p (TYPE_MODE (vect_type), false, &sel_int)) { gimple_assign_set_rhs3 (stmt, mask); update_stmt (stmt); Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c 2017-08-30 12:10:14.677681466 +0100 +++ gcc/tree-vect-data-refs.c 2017-09-14 11:25:32.163167193 +0100 @@ -4547,7 +4547,8 @@ vect_grouped_store_supported (tree vecty if (VECTOR_MODE_P (mode)) { unsigned int i, nelt = GET_MODE_NUNITS (mode); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); if (count == 3) { @@ -4568,7 +4569,7 @@ vect_grouped_store_supported (tree vecty if (3 * i + nelt2 < nelt) sel[3 * i + nelt2] = 0; } - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, @@ -4585,7 +4586,7 @@ vect_grouped_store_supported (tree vecty if (3 * i + nelt2 < nelt) sel[3 * i + nelt2] = nelt + j2++; } - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, @@ -4605,13 +4606,13 @@ vect_grouped_store_supported (tree vecty sel[i * 2] = i; sel[i * 2 + 1] = i + nelt; } - if (can_vec_perm_p (mode, false, sel)) - { - for (i = 0; i < nelt; i++) - sel[i] += nelt / 2; - if (can_vec_perm_p (mode, false, sel)) - return true; - } + if (can_vec_perm_p (mode, false, &sel)) + { + for (i = 0; i < nelt; i++) + sel[i] += nelt / 2; + if (can_vec_perm_p (mode, false, &sel)) + return true; + } } } @@ -4710,7 +4711,9 @@ vect_permute_store_chain (vec<tree> dr_c tree perm3_mask_low, perm3_mask_high; unsigned int i, n, log_length = exact_log2 (length); unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); result_chain->quick_grow (length); memcpy (result_chain->address (), dr_chain.address (), @@ -5132,7 +5135,8 @@ vect_grouped_load_supported (tree vectyp if (VECTOR_MODE_P (mode)) { unsigned int i, j, nelt = GET_MODE_NUNITS (mode); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); if (count == 3) { @@ -5144,7 +5148,7 @@ vect_grouped_load_supported (tree vectyp sel[i] = 3 * i + k; else sel[i] = 0; - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5157,7 +5161,7 @@ vect_grouped_load_supported (tree vectyp sel[i] = i; else sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++); - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5174,11 +5178,11 @@ vect_grouped_load_supported (tree vectyp gcc_assert (pow2p_hwi (count)); for (i = 0; i < nelt; i++) sel[i] = i * 2; - if (can_vec_perm_p (mode, false, sel)) + if (can_vec_perm_p (mode, false, &sel)) { for (i = 0; i < nelt; i++) sel[i] = i * 2 + 1; - if (can_vec_perm_p (mode, false, sel)) + if (can_vec_perm_p (mode, false, &sel)) return true; } } @@ -5292,7 +5296,9 @@ vect_permute_load_chain (vec<tree> dr_ch tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); unsigned int i, j, log_length = exact_log2 (length); unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); result_chain->quick_grow (length); memcpy (result_chain->address (), dr_chain.address (), @@ -5486,10 +5492,12 @@ vect_shift_permute_load_chain (vec<tree> tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); unsigned int i; unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); + result_chain->quick_grow (length); memcpy (result_chain->address (), dr_chain.address (), length * sizeof (tree)); @@ -5501,7 +5509,7 @@ vect_shift_permute_load_chain (vec<tree> sel[i] = i * 2; for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2 + 1; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5515,7 +5523,7 @@ vect_shift_permute_load_chain (vec<tree> sel[i] = i * 2 + 1; for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5529,7 +5537,7 @@ vect_shift_permute_load_chain (vec<tree> For vector length 8 it is {4 5 6 7 8 9 10 11}. */ for (i = 0; i < nelt; i++) sel[i] = nelt / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5544,7 +5552,7 @@ vect_shift_permute_load_chain (vec<tree> sel[i] = i; for (i = nelt / 2; i < nelt; i++) sel[i] = nelt + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5607,7 +5615,7 @@ vect_shift_permute_load_chain (vec<tree> sel[i] = 3 * k + (l % 3); k++; } - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5621,7 +5629,7 @@ vect_shift_permute_load_chain (vec<tree> For vector length 8 it is {6 7 8 9 10 11 12 13}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5634,7 +5642,7 @@ vect_shift_permute_load_chain (vec<tree> For vector length 8 it is {5 6 7 8 9 10 11 12}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + 1 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5647,7 +5655,7 @@ vect_shift_permute_load_chain (vec<tree> For vector length 8 it is {3 4 5 6 7 8 9 10}. */ for (i = 0; i < nelt; i++) sel[i] = (nelt / 3) + (nelt % 3) / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5660,7 +5668,7 @@ vect_shift_permute_load_chain (vec<tree> For vector length 8 it is {5 6 7 8 9 10 11 12}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, Index: gcc/fold-const.c =================================================================== --- gcc/fold-const.c 2017-09-14 11:24:42.666088258 +0100 +++ gcc/fold-const.c 2017-09-14 11:25:32.162167193 +0100 @@ -8786,12 +8786,14 @@ vec_cst_ctor_to_array (tree arg, unsigne NULL_TREE otherwise. */ static tree -fold_vec_perm (tree type, tree arg0, tree arg1, const unsigned char *sel) +fold_vec_perm (tree type, tree arg0, tree arg1, vec_perm_indices sel) { - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i; + unsigned int i; bool need_ctor = false; - gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts + unsigned int nelts = sel.length (); + gcc_assert (TYPE_VECTOR_SUBPARTS (type) == nelts + && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts); if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type) || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type)) @@ -11312,15 +11314,15 @@ fold_ternary_loc (location_t loc, enum t || TREE_CODE (arg2) == CONSTRUCTOR)) { unsigned int nelts = VECTOR_CST_NELTS (arg0), i; - unsigned char *sel = XALLOCAVEC (unsigned char, nelts); gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); + auto_vec_perm_indices sel (nelts); for (i = 0; i < nelts; i++) { tree val = VECTOR_CST_ELT (arg0, i); if (integer_all_onesp (val)) - sel[i] = i; + sel.quick_push (i); else if (integer_zerop (val)) - sel[i] = nelts + i; + sel.quick_push (nelts + i); else /* Currently unreachable. */ return NULL_TREE; } @@ -11643,8 +11645,6 @@ fold_ternary_loc (location_t loc, enum t if (TREE_CODE (arg2) == VECTOR_CST) { unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2; - unsigned char *sel = XALLOCAVEC (unsigned char, 2 * nelts); - unsigned char *sel2 = sel + nelts; bool need_mask_canon = false; bool need_mask_canon2 = false; bool all_in_vec0 = true; @@ -11656,6 +11656,8 @@ fold_ternary_loc (location_t loc, enum t mask2 = 2 * nelts - 1; mask = single_arg ? (nelts - 1) : mask2; gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); + auto_vec_perm_indices sel (nelts); + auto_vec_perm_indices sel2 (nelts); for (i = 0; i < nelts; i++) { tree val = VECTOR_CST_ELT (arg2, i); @@ -11667,16 +11669,19 @@ fold_ternary_loc (location_t loc, enum t wide_int t = val; need_mask_canon |= wi::gtu_p (t, mask); need_mask_canon2 |= wi::gtu_p (t, mask2); - sel[i] = t.to_uhwi () & mask; - sel2[i] = t.to_uhwi () & mask2; + unsigned int elt = t.to_uhwi () & mask; + unsigned int elt2 = t.to_uhwi () & mask2; - if (sel[i] < nelts) + if (elt < nelts) all_in_vec1 = false; else all_in_vec0 = false; - if ((sel[i] & (nelts-1)) != i) + if ((elt & (nelts - 1)) != i) maybe_identity = false; + + sel.quick_push (elt); + sel2.quick_push (elt2); } if (maybe_identity) @@ -11714,8 +11719,8 @@ fold_ternary_loc (location_t loc, enum t argument permutation while still allowing an equivalent 2-argument version. */ if (need_mask_canon && arg2 == op2 - && !can_vec_perm_p (TYPE_MODE (type), false, sel) - && can_vec_perm_p (TYPE_MODE (type), false, sel2)) + && !can_vec_perm_p (TYPE_MODE (type), false, &sel) + && can_vec_perm_p (TYPE_MODE (type), false, &sel2)) { need_mask_canon = need_mask_canon2; sel = sel2; Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c 2017-09-14 11:24:42.667932896 +0100 +++ gcc/tree-vect-loop.c 2017-09-14 11:25:32.164167193 +0100 @@ -3698,15 +3698,15 @@ vect_estimate_min_profitable_iters (loop } /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET - vector elements (not bits) for a vector of mode MODE. */ + vector elements (not bits) for a vector with NELT elements. */ static void -calc_vec_perm_mask_for_shift (machine_mode mode, unsigned int offset, - unsigned char *sel) +calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt, + vec_perm_indices *sel) { - unsigned int i, nelt = GET_MODE_NUNITS (mode); + unsigned int i; for (i = 0; i < nelt; i++) - sel[i] = (i + offset) & (2*nelt - 1); + sel->quick_push ((i + offset) & (2 * nelt - 1)); } /* Checks whether the target supports whole-vector shifts for vectors of mode @@ -3722,12 +3722,13 @@ have_whole_vector_shift (machine_mode mo return false; unsigned int i, nelt = GET_MODE_NUNITS (mode); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + auto_vec_perm_indices sel (nelt); for (i = nelt/2; i >= 1; i/=2) { - calc_vec_perm_mask_for_shift (mode, i, sel); - if (!can_vec_perm_p (mode, false, sel)) + sel.truncate (0); + calc_vec_perm_mask_for_shift (i, nelt, &sel); + if (!can_vec_perm_p (mode, false, &sel)) return false; } return true; @@ -5059,7 +5060,7 @@ vect_create_epilog_for_reduction (vec<tr if (reduce_with_shift && !slp_reduc) { int nelements = vec_size_in_bits / element_bitsize; - unsigned char *sel = XALLOCAVEC (unsigned char, nelements); + auto_vec_perm_indices sel (nelements); int elt_offset; @@ -5083,8 +5084,9 @@ vect_create_epilog_for_reduction (vec<tr elt_offset >= 1; elt_offset /= 2) { - calc_vec_perm_mask_for_shift (mode, elt_offset, sel); - tree mask = vect_gen_perm_mask_any (vectype, sel); + sel.truncate (0); + calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel); + tree mask = vect_gen_perm_mask_any (vectype, sel); epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR, new_temp, zero_vec, mask); new_name = make_ssa_name (vec_dest, epilog_stmt); Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c 2017-09-14 11:24:42.667932896 +0100 +++ gcc/tree-vect-slp.c 2017-09-14 11:25:32.165167193 +0100 @@ -873,15 +873,16 @@ vect_build_slp_tree_1 (vec_info *vinfo, if (alt_stmt_code != ERROR_MARK && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) { - unsigned char *sel - = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype)); - for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i) + unsigned int count = TYPE_VECTOR_SUBPARTS (vectype); + auto_vec_perm_indices sel (count); + for (i = 0; i < count; ++i) { - sel[i] = i; + unsigned int elt = i; if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code) - sel[i] += TYPE_VECTOR_SUBPARTS (vectype); + elt += count; + sel.quick_push (elt); } - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { for (i = 0; i < group_size; ++i) if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code) @@ -3486,7 +3487,6 @@ vect_transform_slp_perm_load (slp_tree n tree vectype = STMT_VINFO_VECTYPE (stmt_info); int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); int mask_element; - unsigned char *mask; machine_mode mode; if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) @@ -3502,7 +3502,8 @@ vect_transform_slp_perm_load (slp_tree n (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1); mask_type = get_vectype_for_scalar_type (mask_element_type); nunits = TYPE_VECTOR_SUBPARTS (vectype); - mask = XALLOCAVEC (unsigned char, nunits); + auto_vec_perm_indices mask (nunits); + mask.quick_grow (nunits); /* Initialize the vect stmts of NODE to properly insert the generated stmts later. */ @@ -3577,7 +3578,7 @@ vect_transform_slp_perm_load (slp_tree n if (index == nunits) { if (! noop_p - && ! can_vec_perm_p (mode, false, mask)) + && ! can_vec_perm_p (mode, false, &mask)) { if (dump_enabled_p ()) { @@ -3730,15 +3731,15 @@ vect_schedule_slp_instance (slp_tree nod enum tree_code code0 = gimple_assign_rhs_code (stmt); enum tree_code ocode = ERROR_MARK; gimple *ostmt; - unsigned char *mask = XALLOCAVEC (unsigned char, group_size); + auto_vec_perm_indices mask (group_size); FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt) if (gimple_assign_rhs_code (ostmt) != code0) { - mask[i] = 1; + mask.quick_push (1); ocode = gimple_assign_rhs_code (ostmt); } else - mask[i] = 0; + mask.quick_push (0); if (ocode != ERROR_MARK) { vec<gimple *> v0; Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h 2017-08-29 20:01:07.143372092 +0100 +++ gcc/tree-vectorizer.h 2017-09-14 11:25:32.166167193 +0100 @@ -1151,8 +1151,8 @@ extern void vect_get_load_cost (struct d extern void vect_get_store_cost (struct data_reference *, int, unsigned int *, stmt_vector_for_cost *); extern bool vect_supportable_shift (enum tree_code, tree); -extern tree vect_gen_perm_mask_any (tree, const unsigned char *); -extern tree vect_gen_perm_mask_checked (tree, const unsigned char *); +extern tree vect_gen_perm_mask_any (tree, vec_perm_indices); +extern tree vect_gen_perm_mask_checked (tree, vec_perm_indices); extern void optimize_mask_stores (struct loop*); /* In tree-vect-data-refs.c. */ Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c 2017-09-14 11:24:42.668855214 +0100 +++ gcc/tree-vect-stmts.c 2017-09-14 11:25:32.166167193 +0100 @@ -1706,15 +1706,14 @@ compare_step_with_zero (gimple *stmt) perm_mask_for_reverse (tree vectype) { int i, nunits; - unsigned char *sel; nunits = TYPE_VECTOR_SUBPARTS (vectype); - sel = XALLOCAVEC (unsigned char, nunits); + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = nunits - 1 - i; + sel.quick_push (nunits - 1 - i); - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) return NULL_TREE; return vect_gen_perm_mask_checked (vectype, sel); } @@ -2171,19 +2170,20 @@ vectorizable_mask_load_store (gimple *st modifier = NONE; else if (nunits == gather_off_nunits / 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); modifier = WIDEN; + auto_vec_perm_indices sel (gather_off_nunits); for (i = 0; i < gather_off_nunits; ++i) - sel[i] = i | nunits; + sel.quick_push (i | nunits); perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel); } else if (nunits == gather_off_nunits * 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, nunits); modifier = NARROW; + auto_vec_perm_indices sel (nunits); + sel.quick_grow (nunits); for (i = 0; i < nunits; ++i) sel[i] = i < gather_off_nunits ? i : i + nunits - gather_off_nunits; @@ -2481,14 +2481,14 @@ vectorizable_bswap (gimple *stmt, gimple return false; unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype); - unsigned char *elts = XALLOCAVEC (unsigned char, num_bytes); - unsigned char *elt = elts; unsigned word_bytes = num_bytes / nunits; + + auto_vec_perm_indices elts (num_bytes); for (unsigned i = 0; i < nunits; ++i) for (unsigned j = 0; j < word_bytes; ++j) - *elt++ = (i + 1) * word_bytes - j - 1; + elts.quick_push ((i + 1) * word_bytes - j - 1); - if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts)) + if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts)) return false; if (! vec_stmt) @@ -5803,22 +5803,22 @@ vectorizable_store (gimple *stmt, gimple modifier = NONE; else if (nunits == (unsigned int) scatter_off_nunits / 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits); modifier = WIDEN; + auto_vec_perm_indices sel (scatter_off_nunits); for (i = 0; i < (unsigned int) scatter_off_nunits; ++i) - sel[i] = i | nunits; + sel.quick_push (i | nunits); perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel); gcc_assert (perm_mask != NULL_TREE); } else if (nunits == (unsigned int) scatter_off_nunits * 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, nunits); modifier = NARROW; + auto_vec_perm_indices sel (nunits); for (i = 0; i < (unsigned int) nunits; ++i) - sel[i] = i | scatter_off_nunits; + sel.quick_push (i | scatter_off_nunits); perm_mask = vect_gen_perm_mask_checked (vectype, sel); gcc_assert (perm_mask != NULL_TREE); @@ -6503,19 +6503,19 @@ vectorizable_store (gimple *stmt, gimple vect_gen_perm_mask_checked. */ tree -vect_gen_perm_mask_any (tree vectype, const unsigned char *sel) +vect_gen_perm_mask_any (tree vectype, vec_perm_indices sel) { tree mask_elt_type, mask_type, mask_vec; - int i, nunits; - nunits = TYPE_VECTOR_SUBPARTS (vectype); + unsigned int nunits = sel.length (); + gcc_checking_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype)); mask_elt_type = lang_hooks.types.type_for_mode (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1); mask_type = get_vectype_for_scalar_type (mask_elt_type); auto_vec<tree, 32> mask_elts (nunits); - for (i = 0; i < nunits; ++i) + for (unsigned int i = 0; i < nunits; ++i) mask_elts.quick_push (build_int_cst (mask_elt_type, sel[i])); mask_vec = build_vector (mask_type, mask_elts); @@ -6526,9 +6526,9 @@ vect_gen_perm_mask_any (tree vectype, co i.e. that the target supports the pattern _for arbitrary input vectors_. */ tree -vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel) +vect_gen_perm_mask_checked (tree vectype, vec_perm_indices sel) { - gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel)); + gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel)); return vect_gen_perm_mask_any (vectype, sel); } @@ -6841,22 +6841,22 @@ vectorizable_load (gimple *stmt, gimple_ modifier = NONE; else if (nunits == gather_off_nunits / 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); modifier = WIDEN; + auto_vec_perm_indices sel (gather_off_nunits); for (i = 0; i < gather_off_nunits; ++i) - sel[i] = i | nunits; + sel.quick_push (i | nunits); perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel); } else if (nunits == gather_off_nunits * 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, nunits); modifier = NARROW; + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = i < gather_off_nunits - ? i : i + nunits - gather_off_nunits; + sel.quick_push (i < gather_off_nunits + ? i : i + nunits - gather_off_nunits); perm_mask = vect_gen_perm_mask_checked (vectype, sel); ncopies *= 2;