https://gcc.gnu.org/g:e51fffdcbb53f10acffd197379585a586720a9b3
commit r16-5085-ge51fffdcbb53f10acffd197379585a586720a9b3 Author: Robin Dapp <[email protected]> Date: Tue Sep 9 11:41:51 2025 +0200 vect: Relax gather/scatter detection by swapping offset sign. This patch adjusts vect_gather_scatter_fn_p to always check an offset type with swapped signedness (vs. the original offset argument). If the target supports the gather/scatter with the new offset type as well as the conversion of the offset we now emit an explicit offset conversion before the actual gather/scatter. The relaxation is only done for the IFN path of gather/scatter and the general idea roughly looks like: - vect_gather_scatter_fn_p builds a list of all offset vector types that the target supports for the current vectype. Then it goes through that list, trying direct support first and sign-swapped offset types next, taking precision requirements into account. If successful it sets supported_offset_vectype to the type that actually worked while offset_vectype_out is the type that was requested. - vect_check_gather_scatter works as before but uses the relaxed vect_gather_scatter_fn_p. - get_load_store_type sets ls_data->supported_offset_vectype if the requested type wasn't supported but another one was. - check_load_store_for_partial_vectors uses the supported_offset_vectype in order to validate what get_load_store_type determined. - vectorizable_load/store emit a conversion if ls_data->supported_offset_vectype is nonzero and cost it. The offset type is either of pointer size (if we started with a signed offset) or twice the size of the original offset (when that one was unsigned). gcc/ChangeLog: * tree-vect-data-refs.cc (struct gather_scatter_config): New struct to hold gather/scatter configurations. (vect_gather_scatter_which_ifn): New function to determine which IFN to use. (vect_gather_scatter_get_configs): New function to enumerate all target-supported configs. (vect_gather_scatter_fn_p): Rework to use vect_gather_scatter_get_configs and try sign-swapped offset. (vect_check_gather_scatter): Use new supported offset vectype argument. * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto. (vect_truncate_gather_scatter_offset): Ditto. (vect_use_grouped_gather): Ditto. (get_load_store_type): Ditto. (vectorizable_store): Convert to sign-swapped offset type if needed. (vectorizable_load): Ditto. * tree-vectorizer.h (struct vect_load_store_data): Add supported_offset_vectype. (vect_gather_scatter_fn_p): Add argument. Diff: --- gcc/tree-vect-data-refs.cc | 270 +++++++++++++++++++++++++++++++++------------ gcc/tree-vect-stmts.cc | 79 ++++++++++++- gcc/tree-vectorizer.h | 6 +- 3 files changed, 280 insertions(+), 75 deletions(-) diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index c7941108887e..fb2450a30c45 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -4425,6 +4425,143 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) return opt_result::success (); } +/* Structure to hold information about a supported gather/scatter + configuration. */ +struct gather_scatter_config +{ + internal_fn ifn; + tree offset_vectype; + vec<int> elsvals; +}; + +/* Determine which gather/scatter IFN is supported for the given parameters. + IFN_MASK_GATHER_LOAD, IFN_GATHER_LOAD, and IFN_MASK_LEN_GATHER_LOAD + are mutually exclusive, so we only need to find one. Return the + supported IFN or IFN_LAST if none are supported. */ + +static internal_fn +vect_gather_scatter_which_ifn (bool read_p, bool masked_p, + tree vectype, tree memory_type, + tree offset_vectype, int scale, + vec<int> *elsvals) +{ + /* Work out which functions to try. */ + internal_fn ifn, alt_ifn, alt_ifn2; + if (read_p) + { + ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD; + alt_ifn = IFN_MASK_GATHER_LOAD; + alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD; + } + else + { + ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; + alt_ifn = IFN_MASK_SCATTER_STORE; + alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE; + } + + if (!offset_vectype) + return IFN_LAST; + + if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, + offset_vectype, scale, elsvals)) + return ifn; + if (internal_gather_scatter_fn_supported_p (alt_ifn, vectype, memory_type, + offset_vectype, scale, elsvals)) + return alt_ifn; + if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, memory_type, + offset_vectype, scale, elsvals)) + return alt_ifn2; + + return IFN_LAST; +} + +/* Collect all supported offset vector types for a gather load or scatter + store. READ_P is true for loads and false for stores. MASKED_P is true + if the load or store is conditional. VECTYPE is the data vector type. + MEMORY_TYPE is the type of the memory elements being loaded or stored, + and OFFSET_TYPE is the type of the offset. + SCALE is the amount by which the offset should be multiplied. + + Return a vector of all configurations the target supports (which can + be none). */ + +static auto_vec<gather_scatter_config> +vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p, + tree vectype, tree memory_type, + tree offset_type, int scale) +{ + auto_vec<gather_scatter_config> configs; + + auto_vec<tree, 8> offset_types_to_try; + + /* Try all sizes from the offset type's precision up to POINTER_SIZE. */ + for (unsigned int bits = TYPE_PRECISION (offset_type); + bits <= POINTER_SIZE; + bits *= 2) + { + /* Signed variant. */ + offset_types_to_try.safe_push + (build_nonstandard_integer_type (bits, 0)); + /* Unsigned variant. */ + offset_types_to_try.safe_push + (build_nonstandard_integer_type (bits, 1)); + } + + /* Once we find which IFN works for one offset type, we know that it + will work for other offset types as well. Then we can perform + the checks for the remaining offset types with only that IFN. + However, we might need to try different offset types to find which + IFN is supported, since the check is offset-type-specific. */ + internal_fn ifn = IFN_LAST; + + /* Try each offset type. */ + for (unsigned int i = 0; i < offset_types_to_try.length (); i++) + { + tree offset_type = offset_types_to_try[i]; + tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); + if (!offset_vectype) + continue; + + vec<int> elsvals = vNULL; + + /* If we haven't determined which IFN is supported yet, try all three + to find which one the target supports. */ + if (ifn == IFN_LAST) + { + ifn = vect_gather_scatter_which_ifn (read_p, masked_p, + vectype, memory_type, + offset_vectype, scale, &elsvals); + if (ifn != IFN_LAST) + { + /* Found which IFN is supported. Save this configuration. */ + gather_scatter_config config; + config.ifn = ifn; + config.offset_vectype = offset_vectype; + config.elsvals = elsvals; + configs.safe_push (config); + } + } + else + { + /* We already know which IFN is supported, just check if this + offset type works with it. */ + if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, + offset_vectype, scale, + &elsvals)) + { + gather_scatter_config config; + config.ifn = ifn; + config.offset_vectype = offset_vectype; + config.elsvals = elsvals; + configs.safe_push (config); + } + } + } + + return configs; +} + /* Check whether we can use an internal function for a gather load or scatter store. READ_P is true for loads and false for stores. MASKED_P is true if the load or store is conditional. MEMORY_TYPE is @@ -4436,15 +4573,21 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) Return true if the function is supported, storing the function id in *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. + If we support an offset vector type with different signedness than + OFFSET_TYPE store it in SUPPORTED_OFFSET_VECTYPE. - If we can use gather and store the possible else values in ELSVALS. */ + If we can use gather/scatter and ELSVALS is nonzero, store the possible + else values in ELSVALS. */ bool vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, tree vectype, tree memory_type, tree offset_type, int scale, internal_fn *ifn_out, - tree *offset_vectype_out, vec<int> *elsvals) + tree *offset_vectype_out, + tree *supported_offset_vectype, + vec<int> *elsvals) { + *supported_offset_vectype = NULL_TREE; unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); unsigned int element_bits = vector_element_bits (vectype); if (element_bits != memory_bits) @@ -4452,80 +4595,64 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, memory elements. */ return false; - /* Work out which function we need. */ - internal_fn ifn, alt_ifn, alt_ifn2; - if (read_p) - { - ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD; - alt_ifn = IFN_MASK_GATHER_LOAD; - /* When target supports MASK_LEN_GATHER_LOAD, we always - use MASK_LEN_GATHER_LOAD regardless whether len and - mask are valid or not. */ - alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD; - } - else - { - ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; - alt_ifn = IFN_MASK_SCATTER_STORE; - /* When target supports MASK_LEN_SCATTER_STORE, we always - use MASK_LEN_SCATTER_STORE regardless whether len and - mask are valid or not. */ - alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE; - } + /* Get the original offset vector type for comparison. */ + tree offset_vectype = VECTOR_TYPE_P (offset_type) + ? offset_type : get_vectype_for_scalar_type (vinfo, offset_type); - for (;;) - { - tree offset_vectype; - if (VECTOR_TYPE_P (offset_type)) - offset_vectype = offset_type; - else - { - offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); - if (!offset_vectype) - return false; - } + offset_type = TREE_TYPE (offset_vectype); - /* Test whether the target supports this combination. */ - if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, - offset_vectype, scale, - elsvals)) - { - *ifn_out = ifn; - *offset_vectype_out = offset_vectype; - return true; - } - else if (!masked_p - && internal_gather_scatter_fn_supported_p (alt_ifn, vectype, - memory_type, - offset_vectype, - scale, elsvals)) + /* Get all supported configurations for this data vector type. */ + auto_vec<gather_scatter_config> configs + = vect_gather_scatter_get_configs (vinfo, read_p, masked_p, vectype, + memory_type, offset_type, scale); + + if (configs.is_empty ()) + return false; + + /* First, try to find a configuration that matches our offset type + (no conversion needed). */ + for (unsigned int i = 0; i < configs.length (); i++) + { + if (TYPE_SIGN (configs[i].offset_vectype) == TYPE_SIGN (offset_vectype)) { - *ifn_out = alt_ifn; - *offset_vectype_out = offset_vectype; + *ifn_out = configs[i].ifn; + *offset_vectype_out = configs[i].offset_vectype; + if (elsvals) + *elsvals = configs[i].elsvals; return true; } - else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, - memory_type, - offset_vectype, scale, - elsvals)) + } + + /* No direct match. This means we try to find a sign-swapped offset + vectype. */ + unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE (offset_vectype)); + unsigned int needed_precision + = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE; + needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE); + + enum tree_code tmp; + for (unsigned int i = 0; i < configs.length (); i++) + { + unsigned int precision + = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype)); + if (precision >= needed_precision + && (supportable_convert_operation (CONVERT_EXPR, + configs[i].offset_vectype, + offset_vectype, &tmp) + || (needed_precision == offset_precision + && tree_nop_conversion_p (configs[i].offset_vectype, + offset_vectype)))) { - *ifn_out = alt_ifn2; + *ifn_out = configs[i].ifn; *offset_vectype_out = offset_vectype; + *supported_offset_vectype = configs[i].offset_vectype; + if (elsvals) + *elsvals = configs[i].elsvals; return true; } - - /* For fixed offset vector type we're done. */ - if (VECTOR_TYPE_P (offset_type)) - return false; - - if (TYPE_PRECISION (offset_type) >= POINTER_SIZE - && TYPE_PRECISION (offset_type) >= element_bits) - return false; - - /* Try a larger offset vector type. */ - offset_type = build_nonstandard_integer_type - (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type)); } + + return false; } /* STMT_INFO is a call to an internal gather load or scatter store function. @@ -4678,6 +4805,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype, base = fold_convert (sizetype, base); base = size_binop (PLUS_EXPR, base, size_int (pbytepos)); + tree tmp_offset_vectype; /* OFF at this point may be either a SSA_NAME or some tree expression from get_inner_reference. Try to peel off loop invariants from it @@ -4752,12 +4880,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype, signed_char_type_node, new_scale, &ifn, &offset_vectype, + &tmp_offset_vectype, elsvals) && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, vectype, memory_type, unsigned_char_type_node, new_scale, &ifn, &offset_vectype, + &tmp_offset_vectype, elsvals)) break; scale = new_scale; @@ -4781,7 +4911,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype, && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, vectype, memory_type, TREE_TYPE (off), scale, &ifn, - &offset_vectype, elsvals)) + &offset_vectype, + &tmp_offset_vectype, + elsvals)) break; if (TYPE_PRECISION (TREE_TYPE (op0)) @@ -4835,7 +4967,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype, { if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, vectype, memory_type, offtype, scale, - &ifn, &offset_vectype, elsvals)) + &ifn, &offset_vectype, + &tmp_offset_vectype, + elsvals)) ifn = IFN_LAST; decl = NULL_TREE; } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 8692d440a7cb..da093d5021bc 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1505,6 +1505,14 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, : ls->strided_offset_vectype); tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr)); int scale = SLP_TREE_GS_SCALE (slp_node); + + /* The following "supported" checks just verify what we established in + get_load_store_type and don't try different offset types. + Therefore, off_vectype must be a supported offset type. In case + we chose a different one use this instead. */ + if (ls->supported_offset_vectype) + off_vectype = ls->supported_offset_vectype; + if (internal_gather_scatter_fn_supported_p (len_ifn, vectype, memory_type, off_vectype, scale, @@ -1697,10 +1705,11 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype, /* See whether the target supports the operation with an offset no narrower than OFFSET_TYPE. */ tree memory_type = TREE_TYPE (DR_REF (dr)); + tree tmp_offset_vectype; if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, vectype, memory_type, offset_type, scale, &gs_info->ifn, &gs_info->offset_vectype, - elsvals) + &tmp_offset_vectype, elsvals) || gs_info->ifn == IFN_LAST) continue; @@ -1779,10 +1788,11 @@ vect_use_grouped_gather (dr_vec_info *dr_info, tree vectype, type must exist) so it is possible that even though a gather/scatter is not available we still have a strided load/store. */ bool ok = false; + tree tmp_vectype; if (vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype, TREE_TYPE (*pun_vectype), *pun_vectype, 1, &ifn, - &offset_vectype, elsvals)) + &offset_vectype, &tmp_vectype, elsvals)) ok = true; else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype, elsvals)) @@ -2080,6 +2090,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, tree *ls_type = &ls->ls_type; bool *slp_perm = &ls->slp_perm; unsigned *n_perms = &ls->n_perms; + tree *supported_offset_vectype = &ls->supported_offset_vectype; loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; @@ -2152,12 +2163,25 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr)); tree tem; if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD, - masked_p, vectype, - memory_type, + masked_p, vectype, memory_type, offset_vectype, scale, &ls->gs.ifn, &tem, - elsvals)) - *memory_access_type = VMAT_GATHER_SCATTER_IFN; + supported_offset_vectype, elsvals)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "gather/scatter with required " + "offset type " + "%T and offset scale %d.\n", + offset_vectype, scale); + if (*supported_offset_vectype) + dump_printf_loc (MSG_NOTE, vect_location, + " target supports offset type %T.\n", + *supported_offset_vectype); + } + *memory_access_type = VMAT_GATHER_SCATTER_IFN; + } else if (vls_type == VLS_LOAD ? (targetm.vectorize.builtin_gather && (ls->gs.decl @@ -2421,6 +2445,19 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, masked_p, &gs_info, elsvals, group_size, single_element_p)) { + /* vect_use_strided_gather_scatters_p does not save the actually + supported scale and offset type so do that here. + We need it later in check_load_store_for_partial_vectors + where we only check if the given internal function is supported + (to choose whether to use the IFN, LEGACY, or EMULATED flavor + of gather/scatter) and don't re-do the full analysis. */ + tree tmp; + gcc_assert (vect_gather_scatter_fn_p + (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype, + gs_info.memory_type, TREE_TYPE (gs_info.offset), + gs_info.scale, &gs_info.ifn, + &tmp, supported_offset_vectype, elsvals)); + SLP_TREE_GS_SCALE (slp_node) = gs_info.scale; SLP_TREE_GS_BASE (slp_node) = error_mark_node; ls->gs.ifn = gs_info.ifn; @@ -8809,6 +8846,11 @@ vectorizable_store (vec_info *vinfo, { if (costing_p) { + if (ls.supported_offset_vectype) + inside_cost + += record_stmt_cost (cost_vec, 1, vector_stmt, + slp_node, 0, vect_body); + unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost += record_stmt_cost (cost_vec, cnunits, scalar_store, @@ -8820,6 +8862,16 @@ vectorizable_store (vec_info *vinfo, vec_offset = vec_offsets[j]; tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); + bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset)); + + /* Perform the offset conversion if necessary. */ + if (!strided && ls.supported_offset_vectype) + { + gimple_seq stmts = NULL; + vec_offset = gimple_convert + (&stmts, ls.supported_offset_vectype, vec_offset); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + } if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE) { @@ -10635,6 +10687,11 @@ vectorizable_load (vec_info *vinfo, { if (costing_p) { + if (ls.supported_offset_vectype) + inside_cost + += record_stmt_cost (cost_vec, 1, vector_stmt, + slp_node, 0, vect_body); + unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost = record_stmt_cost (cost_vec, cnunits, scalar_load, @@ -10645,6 +10702,16 @@ vectorizable_load (vec_info *vinfo, vec_offset = vec_offsets[i]; tree zero = build_zero_cst (vectype); tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); + bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset)); + + /* Perform the offset conversion if necessary. */ + if (!strided && ls.supported_offset_vectype) + { + gimple_seq stmts = NULL; + vec_offset = gimple_convert + (&stmts, ls.supported_offset_vectype, vec_offset); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + } if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD) { diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 359c994139b2..b940a763a3c7 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -289,6 +289,10 @@ struct vect_load_store_data : vect_data { } gs; tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided tree ls_type; // VMAT_GATHER_SCATTER_IFN + /* This is set to a supported offset vector type if we don't support the + originally requested offset type. In that case there will be an + additional offset conversion before the gather/scatter. */ + tree supported_offset_vectype; // VMAT_GATHER_SCATTER_IFN auto_vec<int> elsvals; /* True if the load requires a load permutation. */ bool slp_perm; // SLP_TREE_LOAD_PERMUTATION @@ -2589,7 +2593,7 @@ extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *); extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, tree, int, internal_fn *, tree *, - vec<int> * = nullptr); + tree *, vec<int> * = nullptr); extern bool vect_check_gather_scatter (stmt_vec_info, tree, loop_vec_info, gather_scatter_info *, vec<int> * = nullptr);
