On Fri, Jul 11, 2025 at 11:03 AM Robin Dapp <rdapp....@gmail.com> wrote: > > This patch adds simple misalignment checks for gather/scatter > operations. Previously, we assumed that those perform element accesses > internally so alignment does not matter. The riscv vector spec however > explicitly states that vector operations are allowed to fault on > element-misaligned accesses. Reasonable uarchs won't, but... > > For gather/scatter we have two paths in the vectorizer: > > (1) Regular analysis based on datarefs. Here we can also create > strided loads. > (2) Non-affine access where each gather index is relative to the > initial address. > > The assumption this patch works off is that once the alignment for the > first scalar is correct, all others will fall in line, as the index is > always a multiple of the first element's size. > > For (1) we have a dataref and can check it for alignment as in other > cases. For (2) this patch checks the object alignment of BASE and > compares it against the natural alignment of the current vectype's unit. > > The patch also adds a pointer argument to the gather/scatter IFNs that > contains the necessary alignment. Most of the patch is thus mechanical > in that it merely adjusts indices. > > I tested the riscv version with a custom qemu version that faults on > element-misaligned vector accesses. With this patch applied, there is > just a single fault left, which is due to PR120782 and which will be > addressed separately. > > Bootstrapped and regtested on x86 and aarch64. Regtested on > rv64gcv_zvl512b with and without unaligned vector support. > > gcc/ChangeLog: > > * internal-fn.cc (internal_fn_len_index): Adjust indices for new > alias_ptr param. > (internal_fn_else_index): Ditto. > (internal_fn_mask_index): Ditto. > (internal_fn_stored_value_index): Ditto. > (internal_fn_alias_ptr_index): Ditto. > (internal_fn_offset_index): Ditto. > (internal_fn_scale_index): Ditto. > (internal_gather_scatter_fn_supported_p): Ditto. > * optabs-query.cc (supports_vec_gather_load_p): Ditto. > * tree-vect-data-refs.cc (vect_check_gather_scatter): Add alias > pointer. > * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Add > alias pointer. > * tree-vect-slp.cc (vect_get_operand_map): Adjust for alias > pointer. > * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): Add > alias pointer and misalignment handling. > (get_load_store_type): Move from here... > (get_group_load_store_type): ...To here. > (vectorizable_store): Add alias pointer. > (vectorizable_load): Ditto. > * tree-vectorizer.h (struct gather_scatter_info): Ditto. > --- > gcc/internal-fn.cc | 43 ++++++-- > gcc/internal-fn.h | 1 + > gcc/optabs-query.cc | 6 +- > gcc/tree-vect-data-refs.cc | 7 ++ > gcc/tree-vect-patterns.cc | 17 +-- > gcc/tree-vect-slp.cc | 16 +-- > gcc/tree-vect-stmts.cc | 214 +++++++++++++++++++++++-------------- > gcc/tree-vectorizer.h | 4 + > 8 files changed, 198 insertions(+), 110 deletions(-) > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index 4a9dc26e836..6c0155e4c63 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -4940,11 +4940,13 @@ internal_fn_len_index (internal_fn fn) > return 2; > > case IFN_MASK_LEN_SCATTER_STORE: > + return 6; > + > case IFN_MASK_LEN_STRIDED_LOAD: > return 5; > > case IFN_MASK_LEN_GATHER_LOAD: > - return 6; > + return 7; > > case IFN_COND_LEN_FMA: > case IFN_COND_LEN_FMS: > @@ -5048,7 +5050,7 @@ internal_fn_else_index (internal_fn fn) > > case IFN_MASK_GATHER_LOAD: > case IFN_MASK_LEN_GATHER_LOAD: > - return 5; > + return 6; > > default: > return -1; > @@ -5083,7 +5085,7 @@ internal_fn_mask_index (internal_fn fn) > case IFN_MASK_SCATTER_STORE: > case IFN_MASK_LEN_GATHER_LOAD: > case IFN_MASK_LEN_SCATTER_STORE: > - return 4; > + return 5; > > case IFN_VCOND_MASK: > case IFN_VCOND_MASK_LEN: > @@ -5108,10 +5110,11 @@ internal_fn_stored_value_index (internal_fn fn) > > case IFN_MASK_STORE: > case IFN_MASK_STORE_LANES: > + return 3; > case IFN_SCATTER_STORE: > case IFN_MASK_SCATTER_STORE: > case IFN_MASK_LEN_SCATTER_STORE: > - return 3; > + return 4; > > case IFN_LEN_STORE: > return 4; > @@ -5125,6 +5128,28 @@ internal_fn_stored_value_index (internal_fn fn) > } > } > > +/* If FN has an alias pointer return its index, otherwise return -1. */ > + > +int > +internal_fn_alias_ptr_index (internal_fn fn) > +{ > + switch (fn) > + { > + case IFN_MASK_LOAD: > + case IFN_MASK_LEN_LOAD: > + case IFN_GATHER_LOAD: > + case IFN_MASK_GATHER_LOAD: > + case IFN_MASK_LEN_GATHER_LOAD: > + case IFN_SCATTER_STORE: > + case IFN_MASK_SCATTER_STORE: > + case IFN_MASK_LEN_SCATTER_STORE: > + return 1; > + > + default: > + return -1; > + } > +} > + > /* If FN is a gather/scatter return the index of its offset argument, > otherwise return -1. */ > > @@ -5142,7 +5167,7 @@ internal_fn_offset_index (internal_fn fn) > case IFN_SCATTER_STORE: > case IFN_MASK_SCATTER_STORE: > case IFN_MASK_LEN_SCATTER_STORE: > - return 1; > + return 2; > > default: > return -1; > @@ -5166,7 +5191,7 @@ internal_fn_scale_index (internal_fn fn) > case IFN_SCATTER_STORE: > case IFN_MASK_SCATTER_STORE: > case IFN_MASK_LEN_SCATTER_STORE: > - return 2; > + return 3; > > default: > return -1; > @@ -5250,13 +5275,9 @@ internal_gather_scatter_fn_supported_p (internal_fn > ifn, tree vector_type, > && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p)) > && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)); > > - /* For gather the optab's operand indices do not match the IFN's because > - the latter does not have the extension operand (operand 3). It is > - implicitly added during expansion so we use the IFN's else index + 1. > - */ > if (ok && elsvals) > get_supported_else_vals > - (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals); > + (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals); > > return ok; > } > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h > index c5b533c0abd..d190d718240 100644 > --- a/gcc/internal-fn.h > +++ b/gcc/internal-fn.h > @@ -241,6 +241,7 @@ extern int internal_fn_else_index (internal_fn); > extern int internal_fn_stored_value_index (internal_fn); > extern int internal_fn_offset_index (internal_fn fn); > extern int internal_fn_scale_index (internal_fn fn); > +extern int internal_fn_alias_ptr_index (internal_fn fn); > extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, > tree, tree, int, > vec<int> * = nullptr); > diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc > index f5ca98da818..5335d0d8401 100644 > --- a/gcc/optabs-query.cc > +++ b/gcc/optabs-query.cc > @@ -719,13 +719,9 @@ supports_vec_gather_load_p (machine_mode mode, vec<int> > *elsvals) > = (icode != CODE_FOR_nothing) ? 1 : -1; > } > > - /* For gather the optab's operand indices do not match the IFN's because > - the latter does not have the extension operand (operand 3). It is > - implicitly added during expansion so we use the IFN's else index + 1. > - */ > if (elsvals && icode != CODE_FOR_nothing) > get_supported_else_vals > - (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals); > + (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals); > > return this_fn_optabs->supports_vec_gather_load[mode] > 0; > } > diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc > index 019f0b6ca36..3414d518e17 100644 > --- a/gcc/tree-vect-data-refs.cc > +++ b/gcc/tree-vect-data-refs.cc > @@ -4539,6 +4539,8 @@ vect_describe_gather_scatter_call (stmt_vec_info > stmt_info, > info->ifn = gimple_call_internal_fn (call); > info->decl = NULL_TREE; > info->base = gimple_call_arg (call, 0); > + info->alias_ptr = gimple_call_arg > + (call, internal_fn_alias_ptr_index (info->ifn)); > info->offset = gimple_call_arg > (call, internal_fn_offset_index (info->ifn)); > info->offset_dt = vect_unknown_def_type; > @@ -4869,6 +4871,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > info->ifn = ifn; > info->decl = decl; > info->base = base; > + > + info->alias_ptr = build_int_cst > + (reference_alias_ptr_type (DR_REF (dr)), > + get_object_alignment (DR_REF (dr))); > + > info->offset = off; > info->offset_dt = vect_unknown_def_type; > info->offset_vectype = offset_vectype; > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index 0f6d6b77ea1..f0ddbf9660c 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -6042,12 +6042,14 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, > > tree vec_els > = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype)); > - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base, > + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base, > + gs_info.alias_ptr, > offset, scale, zero, > mask, > vec_els); > } > else > - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, > + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base, > + gs_info.alias_ptr, > offset, scale, zero); > tree lhs = gimple_get_lhs (stmt_info->stmt); > tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); > @@ -6057,12 +6059,13 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, > { > tree rhs = vect_get_store_rhs (stmt_info); > if (mask != NULL) > - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, > - base, offset, scale, rhs, > - mask); > + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, > + base, gs_info.alias_ptr, > + offset, scale, rhs, mask); > else > - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, > - base, offset, scale, rhs); > + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, > + base, gs_info.alias_ptr, > + offset, scale, rhs); > } > gimple_call_set_nothrow (pattern_stmt, true); > > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index 0c95ed946bb..30dd886d305 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -511,11 +511,11 @@ vect_def_types_match (enum vect_def_type dta, enum > vect_def_type dtb) > > static const int no_arg_map[] = { 0 }; > static const int arg0_map[] = { 1, 0 }; > -static const int arg1_map[] = { 1, 1 }; > +static const int arg2_map[] = { 1, 2 }; > static const int arg2_arg3_map[] = { 2, 2, 3 }; > -static const int arg1_arg3_map[] = { 2, 1, 3 }; > -static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 }; > -static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 }; > +static const int arg2_arg4_map[] = { 2, 2, 4 }; > +static const int arg2_arg5_arg6_map[] = { 3, 2, 5, 6 }; > +static const int arg2_arg4_arg5_map[] = { 3, 2, 4, 5 }; > static const int arg3_arg2_map[] = { 2, 3, 2 }; > static const int op1_op0_map[] = { 2, 1, 0 }; > static const int off_map[] = { 1, GATHER_SCATTER_OFFSET }; > @@ -570,18 +570,18 @@ vect_get_operand_map (const gimple *stmt, bool > gather_scatter_p = false, > return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map; > > case IFN_GATHER_LOAD: > - return arg1_map; > + return arg2_map; > > case IFN_MASK_GATHER_LOAD: > case IFN_MASK_LEN_GATHER_LOAD: > - return arg1_arg4_arg5_map; > + return arg2_arg5_arg6_map; > > case IFN_SCATTER_STORE: > - return arg1_arg3_map; > + return arg2_arg4_map; > > case IFN_MASK_SCATTER_STORE: > case IFN_MASK_LEN_SCATTER_STORE: > - return arg1_arg3_arg4_map; > + return arg2_arg4_arg5_map; > > case IFN_MASK_STORE: > return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map; > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index 57942f43c3b..9b524becb88 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -1803,6 +1803,9 @@ vect_truncate_gather_scatter_offset (stmt_vec_info > stmt_info, > /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET, > but we don't need to store that here. */ > gs_info->base = NULL_TREE; > + gs_info->alias_ptr = build_int_cst > + (reference_alias_ptr_type (DR_REF (dr)), > + get_object_alignment (DR_REF (dr))); > gs_info->element_type = TREE_TYPE (vectype); > gs_info->offset = fold_convert (offset_type, step); > gs_info->offset_dt = vect_constant_def; > @@ -2106,7 +2109,7 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > separated by the stride, until we have a complete vector. > Fall back to scalar accesses if that isn't possible. */ > *memory_access_type = VMAT_STRIDED_SLP; > - else > + else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > { > int cmp = compare_step_with_zero (vinfo, stmt_info); > if (cmp < 0) > @@ -2349,19 +2352,71 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > allows us to use contiguous accesses. */ > if ((*memory_access_type == VMAT_ELEMENTWISE > || *memory_access_type == VMAT_STRIDED_SLP) > + && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) > && single_element_p > && SLP_TREE_LANES (slp_node) == 1 > && loop_vinfo > && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, > masked_p, gs_info, elsvals)) > *memory_access_type = VMAT_GATHER_SCATTER; > + else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > + { > + *memory_access_type = VMAT_GATHER_SCATTER; > + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, > + elsvals)) > + gcc_unreachable (); > + /* When using internal functions, we rely on pattern recognition > + to convert the type of the offset to the type that the target > + requires, with the result being a call to an internal function. > + If that failed for some reason (e.g. because another pattern > + took priority), just handle cases in which the offset already > + has the right type. */ > + else if (GATHER_SCATTER_IFN_P (*gs_info) > + && !is_gimple_call (stmt_info->stmt) > + && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), > + TREE_TYPE > (gs_info->offset_vectype))) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "%s offset requires a conversion\n", > + vls_type == VLS_LOAD ? "gather" : "scatter"); > + return false; > + } > + else if (!vect_is_simple_use (gs_info->offset, vinfo, > + &gs_info->offset_dt, > + &gs_info->offset_vectype)) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "%s index use not simple.\n", > + vls_type == VLS_LOAD ? "gather" : "scatter"); > + return false; > + } > + else if (GATHER_SCATTER_EMULATED_P (*gs_info)) > + { > + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () > + || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant > () > + || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype) > + || !constant_multiple_p (TYPE_VECTOR_SUBPARTS > + (gs_info->offset_vectype), > + TYPE_VECTOR_SUBPARTS (vectype))) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "unsupported vector types for emulated " > + "gather.\n"); > + return false; > + } > + } > + } > > if (*memory_access_type == VMAT_CONTIGUOUS_DOWN > || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) > *poffset = neg_ldst_offset; > > - if (*memory_access_type == VMAT_GATHER_SCATTER > - || *memory_access_type == VMAT_ELEMENTWISE > + if (*memory_access_type == VMAT_ELEMENTWISE > + || (*memory_access_type == VMAT_GATHER_SCATTER > + && GATHER_SCATTER_LEGACY_P (*gs_info)) > || *memory_access_type == VMAT_STRIDED_SLP > || *memory_access_type == VMAT_INVARIANT) > { > @@ -2370,10 +2425,48 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > } > else > { > - *misalignment = dr_misalignment (first_dr_info, vectype, *poffset); > - *alignment_support_scheme > - = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, > - *misalignment); > + /* Non dataref-based gather/scatter. */ > + if (*memory_access_type == VMAT_GATHER_SCATTER > + && !first_dr_info)
So this is the only part I think is odd - there is a dataref, it just has only DR_REF as relevant data. I would have expected we can adjust vect_supportable_dr_alignment to deal with the scatter/gather case. I'm OK with doing it how you did it here, but seeing the /* For now assume all conditional loads/stores support unaligned access without any special code. */ if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) if (gimple_call_internal_p (stmt) && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) return dr_unaligned_supported; comment in that function I fear there are more cases that might break on targets where at least element alignment is required? > + { > + /* Gather-scatter accesses normally perform only component accesses > + so alignment is irrelevant for them. Targets like riscv do care > + about scalar alignment in vector accesses, though, so check > scalar > + alignment here. We determined the alias pointer as well as the > + base alignment during pattern recognition and can re-use it here. > + > + As we do not have a dataref we only know the alignment of the > + base. For now don't try harder to determine misalignment and > + just assume it is unknown. We consider the type packed if its > + scalar alignment is lower than the natural alignment of a vector > + element's type. */ > + > + tree inner_vectype = TREE_TYPE (vectype); > + > + unsigned HOST_WIDE_INT scalar_align > + = tree_to_uhwi (gs_info->alias_ptr); > + unsigned HOST_WIDE_INT inner_vectype_sz > + = tree_to_uhwi (TYPE_SIZE (inner_vectype)); > + > + bool is_misaligned = scalar_align < inner_vectype_sz; > + bool is_packed = scalar_align > 1 && is_misaligned; > + > + *misalignment = DR_MISALIGNMENT_UNKNOWN; > + > + if (targetm.vectorize.support_vector_misalignment > + (TYPE_MODE (vectype), inner_vectype, *misalignment, is_packed, > + /*is_gather_scatter=*/ true)) > + *alignment_support_scheme = dr_unaligned_supported; > + else > + *alignment_support_scheme = dr_unaligned_unsupported; > + } > + else > + { > + *misalignment = dr_misalignment (first_dr_info, vectype, *poffset); > + *alignment_support_scheme > + = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, > + *misalignment); > + } > } > > if (vls_type != VLS_LOAD && first_stmt_info == stmt_info) > @@ -2443,58 +2536,12 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); > *misalignment = DR_MISALIGNMENT_UNKNOWN; > *poffset = 0; > - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > - { > - *memory_access_type = VMAT_GATHER_SCATTER; > - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, > - elsvals)) > - gcc_unreachable (); > - /* When using internal functions, we rely on pattern recognition > - to convert the type of the offset to the type that the target > - requires, with the result being a call to an internal function. > - If that failed for some reason (e.g. because another pattern > - took priority), just handle cases in which the offset already > - has the right type. */ > - else if (GATHER_SCATTER_IFN_P (*gs_info) > - && !is_gimple_call (stmt_info->stmt) > - && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), > - TREE_TYPE > (gs_info->offset_vectype))) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "%s offset requires a conversion\n", > - vls_type == VLS_LOAD ? "gather" : "scatter"); > - return false; > - } > - slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; > - gs_info->offset_dt = SLP_TREE_DEF_TYPE (offset_node); > - gs_info->offset_vectype = SLP_TREE_VECTYPE (offset_node); > - if (gs_info->ifn == IFN_LAST && !gs_info->decl) > - { > - if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () > - || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant > () > - || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype) > - || !constant_multiple_p (TYPE_VECTOR_SUBPARTS > - (gs_info->offset_vectype), > - TYPE_VECTOR_SUBPARTS (vectype))) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "unsupported vector types for emulated " > - "gather.\n"); > - return false; > - } > - } > - /* Gather-scatter accesses perform only component accesses, alignment > - is irrelevant for them. */ > - *alignment_support_scheme = dr_unaligned_supported; > - } > - else if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node, > - masked_p, > - vls_type, memory_access_type, poffset, > - alignment_support_scheme, > - misalignment, gs_info, lanes_ifn, > - elsvals)) > + if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node, > + masked_p, > + vls_type, memory_access_type, poffset, > + alignment_support_scheme, > + misalignment, gs_info, lanes_ifn, > + elsvals)) > return false; > > if ((*memory_access_type == VMAT_ELEMENTWISE > @@ -2528,17 +2575,18 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > "alignment. With non-contiguous memory > vectorization" > " could read out of bounds at %G ", > STMT_VINFO_STMT (stmt_info)); > - if (inbounds) > - LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; > - else > - return false; > + if (inbounds) > + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; > + else > + return false; > } > > /* If this DR needs alignment for correctness, we must ensure the target > alignment is a constant power-of-two multiple of the amount read per > vector iteration or force masking. */ > if (dr_safe_speculative_read_required (stmt_info) > - && *alignment_support_scheme == dr_aligned) > + && (*alignment_support_scheme == dr_aligned > + && *memory_access_type != VMAT_GATHER_SCATTER)) > { > /* We can only peel for loops, of course. */ > gcc_checking_assert (loop_vinfo); > @@ -8442,7 +8490,6 @@ vectorizable_store (vec_info *vinfo, > > if (dump_enabled_p () > && memory_access_type != VMAT_ELEMENTWISE > - && memory_access_type != VMAT_GATHER_SCATTER > && memory_access_type != VMAT_STRIDED_SLP > && memory_access_type != VMAT_INVARIANT > && alignment_support_scheme != dr_aligned) > @@ -9143,24 +9190,31 @@ vectorizable_store (vec_info *vinfo, > { > if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) > call = gimple_build_call_internal ( > - IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr, > + IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr, > + gs_info.alias_ptr, > vec_offset, scale, vec_oprnd, final_mask, > final_len, > bias); > else > /* Non-vector offset indicates that prefer to take > MASK_LEN_STRIDED_STORE instead of the > - IFN_MASK_SCATTER_STORE with direct stride arg. */ > + IFN_MASK_SCATTER_STORE with direct stride arg. > + Similar to the gather case we have checked the > + alignment for a scatter already and assume > + that the strided store has the same requirements. */ > call = gimple_build_call_internal ( > IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr, > vec_offset, vec_oprnd, final_mask, final_len, > bias); > } > else if (final_mask) > call = gimple_build_call_internal > - (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, > + (IFN_MASK_SCATTER_STORE, 6, dataref_ptr, > + gs_info.alias_ptr, > vec_offset, scale, vec_oprnd, final_mask); > else > - call = gimple_build_call_internal (IFN_SCATTER_STORE, 4, > - dataref_ptr, vec_offset, > + call = gimple_build_call_internal (IFN_SCATTER_STORE, 5, > + dataref_ptr, > + gs_info.alias_ptr, > + vec_offset, > scale, vec_oprnd); > gimple_call_set_nothrow (call, true); > vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); > @@ -10627,7 +10681,6 @@ vectorizable_load (vec_info *vinfo, > vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); > } > > - gcc_assert (alignment_support_scheme); > vec_loop_masks *loop_masks > = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) > ? &LOOP_VINFO_MASKS (loop_vinfo) > @@ -10647,10 +10700,12 @@ vectorizable_load (vec_info *vinfo, > > /* Targets with store-lane instructions must not require explicit > realignment. vect_supportable_dr_alignment always returns either > - dr_aligned or dr_unaligned_supported for masked operations. */ > + dr_aligned or dr_unaligned_supported for (non-length) masked > + operations. */ > gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES > && !mask > && !loop_masks) > + || memory_access_type == VMAT_GATHER_SCATTER > || alignment_support_scheme == dr_aligned > || alignment_support_scheme == dr_unaligned_supported); > > @@ -10995,8 +11050,6 @@ vectorizable_load (vec_info *vinfo, > > if (memory_access_type == VMAT_GATHER_SCATTER) > { > - gcc_assert (alignment_support_scheme == dr_aligned > - || alignment_support_scheme == dr_unaligned_supported); > gcc_assert (!grouped_load && !slp_perm); > > unsigned int inside_cost = 0, prologue_cost = 0; > @@ -11085,7 +11138,8 @@ vectorizable_load (vec_info *vinfo, > { > if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) > call = gimple_build_call_internal > (IFN_MASK_LEN_GATHER_LOAD, > - 8, dataref_ptr, > + 9, dataref_ptr, > + gs_info.alias_ptr, > vec_offset, scale, > zero, > final_mask, vec_els, > final_len, bias); > @@ -11100,13 +11154,15 @@ vectorizable_load (vec_info *vinfo, > } > else if (final_mask) > call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, > - 6, dataref_ptr, > + 7, dataref_ptr, > + gs_info.alias_ptr, > vec_offset, scale, > zero, final_mask, vec_els); > else > - call = gimple_build_call_internal (IFN_GATHER_LOAD, 4, > - dataref_ptr, vec_offset, > - scale, zero); > + call = gimple_build_call_internal (IFN_GATHER_LOAD, 5, > + dataref_ptr, > + gs_info.alias_ptr, > + vec_offset, scale, zero); > gimple_call_set_nothrow (call, true); > new_stmt = call; > data_ref = NULL_TREE; > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 7b927491b1c..4511527647a 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -1557,6 +1557,10 @@ struct gather_scatter_info { > /* The loop-invariant base value. */ > tree base; > > + /* The TBBA alias pointer the value of which determines the alignment > + of the scalar accesses. */ > + tree alias_ptr; > + > /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ > tree offset; > > -- > 2.50.0 >