On Fri, Jul 11, 2025 at 11:03 AM Robin Dapp <rdapp....@gmail.com> wrote:
>
> This patch adds simple misalignment checks for gather/scatter
> operations.  Previously, we assumed that those perform element accesses
> internally so alignment does not matter.  The riscv vector spec however
> explicitly states that vector operations are allowed to fault on
> element-misaligned accesses.  Reasonable uarchs won't, but...
>
> For gather/scatter we have two paths in the vectorizer:
>
>  (1) Regular analysis based on datarefs.  Here we can also create
>      strided loads.
>  (2) Non-affine access where each gather index is relative to the
>      initial address.
>
> The assumption this patch works off is that once the alignment for the
> first scalar is correct, all others will fall in line, as the index is
> always a multiple of the first element's size.
>
> For (1) we have a dataref and can check it for alignment as in other
> cases.  For (2) this patch checks the object alignment of BASE and
> compares it against the natural alignment of the current vectype's unit.
>
> The patch also adds a pointer argument to the gather/scatter IFNs that
> contains the necessary alignment.  Most of the patch is thus mechanical
> in that it merely adjusts indices.
>
> I tested the riscv version with a custom qemu version that faults on
> element-misaligned vector accesses.  With this patch applied, there is
> just a single fault left, which is due to PR120782 and which will be
> addressed separately.
>
> Bootstrapped and regtested on x86 and aarch64.  Regtested on
> rv64gcv_zvl512b with and without unaligned vector support.
>
> gcc/ChangeLog:
>
>         * internal-fn.cc (internal_fn_len_index): Adjust indices for new
>         alias_ptr param.
>         (internal_fn_else_index): Ditto.
>         (internal_fn_mask_index): Ditto.
>         (internal_fn_stored_value_index): Ditto.
>         (internal_fn_alias_ptr_index): Ditto.
>         (internal_fn_offset_index): Ditto.
>         (internal_fn_scale_index): Ditto.
>         (internal_gather_scatter_fn_supported_p): Ditto.
>         * optabs-query.cc (supports_vec_gather_load_p): Ditto.
>         * tree-vect-data-refs.cc (vect_check_gather_scatter): Add alias
>         pointer.
>         * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Add
>         alias pointer.
>         * tree-vect-slp.cc (vect_get_operand_map): Adjust for alias
>         pointer.
>         * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): Add
>         alias pointer and misalignment handling.
>         (get_load_store_type): Move from here...
>         (get_group_load_store_type): ...To here.
>         (vectorizable_store): Add alias pointer.
>         (vectorizable_load): Ditto.
>         * tree-vectorizer.h (struct gather_scatter_info): Ditto.
> ---
>  gcc/internal-fn.cc         |  43 ++++++--
>  gcc/internal-fn.h          |   1 +
>  gcc/optabs-query.cc        |   6 +-
>  gcc/tree-vect-data-refs.cc |   7 ++
>  gcc/tree-vect-patterns.cc  |  17 +--
>  gcc/tree-vect-slp.cc       |  16 +--
>  gcc/tree-vect-stmts.cc     | 214 +++++++++++++++++++++++--------------
>  gcc/tree-vectorizer.h      |   4 +
>  8 files changed, 198 insertions(+), 110 deletions(-)
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 4a9dc26e836..6c0155e4c63 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4940,11 +4940,13 @@ internal_fn_len_index (internal_fn fn)
>        return 2;
>
>      case IFN_MASK_LEN_SCATTER_STORE:
> +      return 6;
> +
>      case IFN_MASK_LEN_STRIDED_LOAD:
>        return 5;
>
>      case IFN_MASK_LEN_GATHER_LOAD:
> -      return 6;
> +      return 7;
>
>      case IFN_COND_LEN_FMA:
>      case IFN_COND_LEN_FMS:
> @@ -5048,7 +5050,7 @@ internal_fn_else_index (internal_fn fn)
>
>      case IFN_MASK_GATHER_LOAD:
>      case IFN_MASK_LEN_GATHER_LOAD:
> -      return 5;
> +      return 6;
>
>      default:
>        return -1;
> @@ -5083,7 +5085,7 @@ internal_fn_mask_index (internal_fn fn)
>      case IFN_MASK_SCATTER_STORE:
>      case IFN_MASK_LEN_GATHER_LOAD:
>      case IFN_MASK_LEN_SCATTER_STORE:
> -      return 4;
> +      return 5;
>
>      case IFN_VCOND_MASK:
>      case IFN_VCOND_MASK_LEN:
> @@ -5108,10 +5110,11 @@ internal_fn_stored_value_index (internal_fn fn)
>
>      case IFN_MASK_STORE:
>      case IFN_MASK_STORE_LANES:
> +      return 3;
>      case IFN_SCATTER_STORE:
>      case IFN_MASK_SCATTER_STORE:
>      case IFN_MASK_LEN_SCATTER_STORE:
> -      return 3;
> +      return 4;
>
>      case IFN_LEN_STORE:
>        return 4;
> @@ -5125,6 +5128,28 @@ internal_fn_stored_value_index (internal_fn fn)
>      }
>  }
>
> +/* If FN has an alias pointer return its index, otherwise return -1.  */
> +
> +int
> +internal_fn_alias_ptr_index (internal_fn fn)
> +{
> +  switch (fn)
> +    {
> +    case IFN_MASK_LOAD:
> +    case IFN_MASK_LEN_LOAD:
> +    case IFN_GATHER_LOAD:
> +    case IFN_MASK_GATHER_LOAD:
> +    case IFN_MASK_LEN_GATHER_LOAD:
> +    case IFN_SCATTER_STORE:
> +    case IFN_MASK_SCATTER_STORE:
> +    case IFN_MASK_LEN_SCATTER_STORE:
> +      return 1;
> +
> +    default:
> +      return -1;
> +    }
> +}
> +
>  /* If FN is a gather/scatter return the index of its offset argument,
>     otherwise return -1.  */
>
> @@ -5142,7 +5167,7 @@ internal_fn_offset_index (internal_fn fn)
>      case IFN_SCATTER_STORE:
>      case IFN_MASK_SCATTER_STORE:
>      case IFN_MASK_LEN_SCATTER_STORE:
> -      return 1;
> +      return 2;
>
>      default:
>        return -1;
> @@ -5166,7 +5191,7 @@ internal_fn_scale_index (internal_fn fn)
>      case IFN_SCATTER_STORE:
>      case IFN_MASK_SCATTER_STORE:
>      case IFN_MASK_LEN_SCATTER_STORE:
> -      return 2;
> +      return 3;
>
>      default:
>        return -1;
> @@ -5250,13 +5275,9 @@ internal_gather_scatter_fn_supported_p (internal_fn 
> ifn, tree vector_type,
>      && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
>      && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale));
>
> -  /* For gather the optab's operand indices do not match the IFN's because
> -     the latter does not have the extension operand (operand 3).  It is
> -     implicitly added during expansion so we use the IFN's else index + 1.
> -     */
>    if (ok && elsvals)
>      get_supported_else_vals
> -      (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
> +      (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals);
>
>    return ok;
>  }
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index c5b533c0abd..d190d718240 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -241,6 +241,7 @@ extern int internal_fn_else_index (internal_fn);
>  extern int internal_fn_stored_value_index (internal_fn);
>  extern int internal_fn_offset_index (internal_fn fn);
>  extern int internal_fn_scale_index (internal_fn fn);
> +extern int internal_fn_alias_ptr_index (internal_fn fn);
>  extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
>                                                     tree, tree, int,
>                                                     vec<int> * = nullptr);
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index f5ca98da818..5335d0d8401 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -719,13 +719,9 @@ supports_vec_gather_load_p (machine_mode mode, vec<int> 
> *elsvals)
>         = (icode != CODE_FOR_nothing) ? 1 : -1;
>      }
>
> -  /* For gather the optab's operand indices do not match the IFN's because
> -     the latter does not have the extension operand (operand 3).  It is
> -     implicitly added during expansion so we use the IFN's else index + 1.
> -     */
>    if (elsvals && icode != CODE_FOR_nothing)
>      get_supported_else_vals
> -      (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
> +      (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals);
>
>    return this_fn_optabs->supports_vec_gather_load[mode] > 0;
>  }
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 019f0b6ca36..3414d518e17 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -4539,6 +4539,8 @@ vect_describe_gather_scatter_call (stmt_vec_info 
> stmt_info,
>    info->ifn = gimple_call_internal_fn (call);
>    info->decl = NULL_TREE;
>    info->base = gimple_call_arg (call, 0);
> +  info->alias_ptr = gimple_call_arg
> +                    (call, internal_fn_alias_ptr_index (info->ifn));
>    info->offset = gimple_call_arg
>                   (call, internal_fn_offset_index (info->ifn));
>    info->offset_dt = vect_unknown_def_type;
> @@ -4869,6 +4871,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>    info->ifn = ifn;
>    info->decl = decl;
>    info->base = base;
> +
> +  info->alias_ptr = build_int_cst
> +    (reference_alias_ptr_type (DR_REF (dr)),
> +     get_object_alignment (DR_REF (dr)));
> +
>    info->offset = off;
>    info->offset_dt = vect_unknown_def_type;
>    info->offset_vectype = offset_vectype;
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 0f6d6b77ea1..f0ddbf9660c 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -6042,12 +6042,14 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>
>           tree vec_els
>             = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
> -         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
> +         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base,
> +                                                    gs_info.alias_ptr,
>                                                      offset, scale, zero, 
> mask,
>                                                      vec_els);
>         }
>        else
> -       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
> +       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
> +                                                  gs_info.alias_ptr,
>                                                    offset, scale, zero);
>        tree lhs = gimple_get_lhs (stmt_info->stmt);
>        tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
> @@ -6057,12 +6059,13 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>      {
>        tree rhs = vect_get_store_rhs (stmt_info);
>        if (mask != NULL)
> -       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
> -                                                  base, offset, scale, rhs,
> -                                                  mask);
> +       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6,
> +                                                  base, gs_info.alias_ptr,
> +                                                  offset, scale, rhs, mask);
>        else
> -       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
> -                                                  base, offset, scale, rhs);
> +       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
> +                                                  base, gs_info.alias_ptr,
> +                                                  offset, scale, rhs);
>      }
>    gimple_call_set_nothrow (pattern_stmt, true);
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 0c95ed946bb..30dd886d305 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -511,11 +511,11 @@ vect_def_types_match (enum vect_def_type dta, enum 
> vect_def_type dtb)
>
>  static const int no_arg_map[] = { 0 };
>  static const int arg0_map[] = { 1, 0 };
> -static const int arg1_map[] = { 1, 1 };
> +static const int arg2_map[] = { 1, 2 };
>  static const int arg2_arg3_map[] = { 2, 2, 3 };
> -static const int arg1_arg3_map[] = { 2, 1, 3 };
> -static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 };
> -static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 };
> +static const int arg2_arg4_map[] = { 2, 2, 4 };
> +static const int arg2_arg5_arg6_map[] = { 3, 2, 5, 6 };
> +static const int arg2_arg4_arg5_map[] = { 3, 2, 4, 5 };
>  static const int arg3_arg2_map[] = { 2, 3, 2 };
>  static const int op1_op0_map[] = { 2, 1, 0 };
>  static const int off_map[] = { 1, GATHER_SCATTER_OFFSET };
> @@ -570,18 +570,18 @@ vect_get_operand_map (const gimple *stmt, bool 
> gather_scatter_p = false,
>             return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map;
>
>           case IFN_GATHER_LOAD:
> -           return arg1_map;
> +           return arg2_map;
>
>           case IFN_MASK_GATHER_LOAD:
>           case IFN_MASK_LEN_GATHER_LOAD:
> -           return arg1_arg4_arg5_map;
> +           return arg2_arg5_arg6_map;
>
>           case IFN_SCATTER_STORE:
> -           return arg1_arg3_map;
> +           return arg2_arg4_map;
>
>           case IFN_MASK_SCATTER_STORE:
>           case IFN_MASK_LEN_SCATTER_STORE:
> -           return arg1_arg3_arg4_map;
> +           return arg2_arg4_arg5_map;
>
>           case IFN_MASK_STORE:
>             return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 57942f43c3b..9b524becb88 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -1803,6 +1803,9 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
> stmt_info,
>        /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
>          but we don't need to store that here.  */
>        gs_info->base = NULL_TREE;
> +      gs_info->alias_ptr = build_int_cst
> +       (reference_alias_ptr_type (DR_REF (dr)),
> +        get_object_alignment (DR_REF (dr)));
>        gs_info->element_type = TREE_TYPE (vectype);
>        gs_info->offset = fold_convert (offset_type, step);
>        gs_info->offset_dt = vect_constant_def;
> @@ -2106,7 +2109,7 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>         separated by the stride, until we have a complete vector.
>         Fall back to scalar accesses if that isn't possible.  */
>      *memory_access_type = VMAT_STRIDED_SLP;
> -  else
> +  else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>      {
>        int cmp = compare_step_with_zero (vinfo, stmt_info);
>        if (cmp < 0)
> @@ -2349,19 +2352,71 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>       allows us to use contiguous accesses.  */
>    if ((*memory_access_type == VMAT_ELEMENTWISE
>         || *memory_access_type == VMAT_STRIDED_SLP)
> +      && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
>        && single_element_p
>        && SLP_TREE_LANES (slp_node) == 1
>        && loop_vinfo
>        && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
>                                              masked_p, gs_info, elsvals))
>      *memory_access_type = VMAT_GATHER_SCATTER;
> +  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> +    {
> +      *memory_access_type = VMAT_GATHER_SCATTER;
> +      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
> +                                     elsvals))
> +       gcc_unreachable ();
> +      /* When using internal functions, we rely on pattern recognition
> +        to convert the type of the offset to the type that the target
> +        requires, with the result being a call to an internal function.
> +        If that failed for some reason (e.g. because another pattern
> +        took priority), just handle cases in which the offset already
> +        has the right type.  */
> +      else if (GATHER_SCATTER_IFN_P (*gs_info)
> +              && !is_gimple_call (stmt_info->stmt)
> +              && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
> +                                         TREE_TYPE 
> (gs_info->offset_vectype)))
> +       {
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "%s offset requires a conversion\n",
> +                            vls_type == VLS_LOAD ? "gather" : "scatter");
> +         return false;
> +       }
> +      else if (!vect_is_simple_use (gs_info->offset, vinfo,
> +                                   &gs_info->offset_dt,
> +                                   &gs_info->offset_vectype))
> +       {
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "%s index use not simple.\n",
> +                            vls_type == VLS_LOAD ? "gather" : "scatter");
> +         return false;
> +       }
> +      else if (GATHER_SCATTER_EMULATED_P (*gs_info))
> +       {
> +         if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
> +             || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant 
> ()
> +             || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
> +             || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
> +                                        (gs_info->offset_vectype),
> +                                      TYPE_VECTOR_SUBPARTS (vectype)))
> +           {
> +             if (dump_enabled_p ())
> +               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                                "unsupported vector types for emulated "
> +                                "gather.\n");
> +             return false;
> +           }
> +       }
> +    }
>
>    if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
>        || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
>      *poffset = neg_ldst_offset;
>
> -  if (*memory_access_type == VMAT_GATHER_SCATTER
> -      || *memory_access_type == VMAT_ELEMENTWISE
> +  if (*memory_access_type == VMAT_ELEMENTWISE
> +      || (*memory_access_type == VMAT_GATHER_SCATTER
> +         && GATHER_SCATTER_LEGACY_P (*gs_info))
>        || *memory_access_type == VMAT_STRIDED_SLP
>        || *memory_access_type == VMAT_INVARIANT)
>      {
> @@ -2370,10 +2425,48 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>      }
>    else
>      {
> -      *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
> -      *alignment_support_scheme
> -       = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
> -                                        *misalignment);
> +      /* Non dataref-based gather/scatter.  */
> +      if (*memory_access_type == VMAT_GATHER_SCATTER
> +         && !first_dr_info)

So this is the only part I think is odd - there is a dataref, it just
has only DR_REF as relevant data.  I would have expected we can
adjust vect_supportable_dr_alignment to deal with the scatter/gather
case.  I'm OK with doing it how you did it here, but seeing
the

  /* For now assume all conditional loads/stores support unaligned
     access without any special code.  */
  if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
    if (gimple_call_internal_p (stmt)
        && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
            || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
      return dr_unaligned_supported;

comment in that function I fear there are more cases that might break
on targets where at least element alignment is required?

> +       {
> +         /* Gather-scatter accesses normally perform only component accesses
> +            so alignment is irrelevant for them.  Targets like riscv do care
> +            about scalar alignment in vector accesses, though, so check 
> scalar
> +            alignment here.  We determined the alias pointer as well as the
> +            base alignment during pattern recognition and can re-use it here.
> +
> +            As we do not have a dataref we only know the alignment of the
> +            base.  For now don't try harder to determine misalignment and
> +            just assume it is unknown.  We consider the type packed if its
> +            scalar alignment is lower than the natural alignment of a vector
> +            element's type.  */
> +
> +         tree inner_vectype = TREE_TYPE (vectype);
> +
> +         unsigned HOST_WIDE_INT scalar_align
> +           = tree_to_uhwi (gs_info->alias_ptr);
> +         unsigned HOST_WIDE_INT inner_vectype_sz
> +           = tree_to_uhwi (TYPE_SIZE (inner_vectype));
> +
> +         bool is_misaligned = scalar_align < inner_vectype_sz;
> +         bool is_packed = scalar_align > 1 && is_misaligned;
> +
> +         *misalignment = DR_MISALIGNMENT_UNKNOWN;
> +
> +         if (targetm.vectorize.support_vector_misalignment
> +             (TYPE_MODE (vectype), inner_vectype, *misalignment, is_packed,
> +              /*is_gather_scatter=*/ true))
> +           *alignment_support_scheme = dr_unaligned_supported;
> +         else
> +           *alignment_support_scheme = dr_unaligned_unsupported;
> +       }
> +      else
> +       {
> +         *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
> +         *alignment_support_scheme
> +           = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
> +                                            *misalignment);
> +       }
>      }
>
>    if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
> @@ -2443,58 +2536,12 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>    poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
>    *misalignment = DR_MISALIGNMENT_UNKNOWN;
>    *poffset = 0;
> -  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> -    {
> -      *memory_access_type = VMAT_GATHER_SCATTER;
> -      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
> -                                     elsvals))
> -       gcc_unreachable ();
> -      /* When using internal functions, we rely on pattern recognition
> -        to convert the type of the offset to the type that the target
> -        requires, with the result being a call to an internal function.
> -        If that failed for some reason (e.g. because another pattern
> -        took priority), just handle cases in which the offset already
> -        has the right type.  */
> -      else if (GATHER_SCATTER_IFN_P (*gs_info)
> -              && !is_gimple_call (stmt_info->stmt)
> -              && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
> -                                         TREE_TYPE 
> (gs_info->offset_vectype)))
> -       {
> -         if (dump_enabled_p ())
> -           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                            "%s offset requires a conversion\n",
> -                            vls_type == VLS_LOAD ? "gather" : "scatter");
> -         return false;
> -       }
> -      slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
> -      gs_info->offset_dt = SLP_TREE_DEF_TYPE (offset_node);
> -      gs_info->offset_vectype = SLP_TREE_VECTYPE (offset_node);
> -      if (gs_info->ifn == IFN_LAST && !gs_info->decl)
> -       {
> -         if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
> -             || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant 
> ()
> -             || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
> -             || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
> -                                        (gs_info->offset_vectype),
> -                                      TYPE_VECTOR_SUBPARTS (vectype)))
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                "unsupported vector types for emulated "
> -                                "gather.\n");
> -             return false;
> -           }
> -       }
> -      /* Gather-scatter accesses perform only component accesses, alignment
> -        is irrelevant for them.  */
> -      *alignment_support_scheme = dr_unaligned_supported;
> -    }
> -  else if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
> -                                      masked_p,
> -                                      vls_type, memory_access_type, poffset,
> -                                      alignment_support_scheme,
> -                                      misalignment, gs_info, lanes_ifn,
> -                                      elsvals))
> +  if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
> +                                 masked_p,
> +                                 vls_type, memory_access_type, poffset,
> +                                 alignment_support_scheme,
> +                                 misalignment, gs_info, lanes_ifn,
> +                                 elsvals))
>      return false;
>
>    if ((*memory_access_type == VMAT_ELEMENTWISE
> @@ -2528,17 +2575,18 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>                            "alignment. With non-contiguous memory 
> vectorization"
>                            " could read out of bounds at %G ",
>                            STMT_VINFO_STMT (stmt_info));
> -       if (inbounds)
> -         LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
> -       else
> -         return false;
> +      if (inbounds)
> +       LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
> +      else
> +       return false;
>      }
>
>    /* If this DR needs alignment for correctness, we must ensure the target
>       alignment is a constant power-of-two multiple of the amount read per
>       vector iteration or force masking.  */
>    if (dr_safe_speculative_read_required (stmt_info)
> -      && *alignment_support_scheme == dr_aligned)
> +      && (*alignment_support_scheme == dr_aligned
> +         && *memory_access_type != VMAT_GATHER_SCATTER))
>      {
>        /* We can only peel for loops, of course.  */
>        gcc_checking_assert (loop_vinfo);
> @@ -8442,7 +8490,6 @@ vectorizable_store (vec_info *vinfo,
>
>        if (dump_enabled_p ()
>           && memory_access_type != VMAT_ELEMENTWISE
> -         && memory_access_type != VMAT_GATHER_SCATTER
>           && memory_access_type != VMAT_STRIDED_SLP
>           && memory_access_type != VMAT_INVARIANT
>           && alignment_support_scheme != dr_aligned)
> @@ -9143,24 +9190,31 @@ vectorizable_store (vec_info *vinfo,
>                 {
>                   if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
>                     call = gimple_build_call_internal (
> -                           IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
> +                           IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr,
> +                           gs_info.alias_ptr,
>                             vec_offset, scale, vec_oprnd, final_mask, 
> final_len,
>                             bias);
>                   else
>                     /* Non-vector offset indicates that prefer to take
>                        MASK_LEN_STRIDED_STORE instead of the
> -                      IFN_MASK_SCATTER_STORE with direct stride arg.  */
> +                      IFN_MASK_SCATTER_STORE with direct stride arg.
> +                      Similar to the gather case we have checked the
> +                      alignment for a scatter already and assume
> +                      that the strided store has the same requirements.  */
>                     call = gimple_build_call_internal (
>                             IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr,
>                             vec_offset, vec_oprnd, final_mask, final_len, 
> bias);
>                 }
>               else if (final_mask)
>                 call = gimple_build_call_internal
> -                            (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
> +                            (IFN_MASK_SCATTER_STORE, 6, dataref_ptr,
> +                             gs_info.alias_ptr,
>                               vec_offset, scale, vec_oprnd, final_mask);
>               else
> -               call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
> -                                                  dataref_ptr, vec_offset,
> +               call = gimple_build_call_internal (IFN_SCATTER_STORE, 5,
> +                                                  dataref_ptr,
> +                                                  gs_info.alias_ptr,
> +                                                  vec_offset,
>                                                    scale, vec_oprnd);
>               gimple_call_set_nothrow (call, true);
>               vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
> @@ -10627,7 +10681,6 @@ vectorizable_load (vec_info *vinfo,
>        vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
>      }
>
> -  gcc_assert (alignment_support_scheme);
>    vec_loop_masks *loop_masks
>      = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
>         ? &LOOP_VINFO_MASKS (loop_vinfo)
> @@ -10647,10 +10700,12 @@ vectorizable_load (vec_info *vinfo,
>
>    /* Targets with store-lane instructions must not require explicit
>       realignment.  vect_supportable_dr_alignment always returns either
> -     dr_aligned or dr_unaligned_supported for masked operations.  */
> +     dr_aligned or dr_unaligned_supported for (non-length) masked
> +     operations.  */
>    gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
>                && !mask
>                && !loop_masks)
> +             || memory_access_type == VMAT_GATHER_SCATTER
>               || alignment_support_scheme == dr_aligned
>               || alignment_support_scheme == dr_unaligned_supported);
>
> @@ -10995,8 +11050,6 @@ vectorizable_load (vec_info *vinfo,
>
>    if (memory_access_type == VMAT_GATHER_SCATTER)
>      {
> -      gcc_assert (alignment_support_scheme == dr_aligned
> -                 || alignment_support_scheme == dr_unaligned_supported);
>        gcc_assert (!grouped_load && !slp_perm);
>
>        unsigned int inside_cost = 0, prologue_cost = 0;
> @@ -11085,7 +11138,8 @@ vectorizable_load (vec_info *vinfo,
>                 {
>                   if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
>                     call = gimple_build_call_internal 
> (IFN_MASK_LEN_GATHER_LOAD,
> -                                                      8, dataref_ptr,
> +                                                      9, dataref_ptr,
> +                                                      gs_info.alias_ptr,
>                                                        vec_offset, scale, 
> zero,
>                                                        final_mask, vec_els,
>                                                        final_len, bias);
> @@ -11100,13 +11154,15 @@ vectorizable_load (vec_info *vinfo,
>                 }
>               else if (final_mask)
>                 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
> -                                                  6, dataref_ptr,
> +                                                  7, dataref_ptr,
> +                                                  gs_info.alias_ptr,
>                                                    vec_offset, scale,
>                                                    zero, final_mask, vec_els);
>               else
> -               call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
> -                                                  dataref_ptr, vec_offset,
> -                                                  scale, zero);
> +               call = gimple_build_call_internal (IFN_GATHER_LOAD, 5,
> +                                                  dataref_ptr,
> +                                                  gs_info.alias_ptr,
> +                                                  vec_offset, scale, zero);
>               gimple_call_set_nothrow (call, true);
>               new_stmt = call;
>               data_ref = NULL_TREE;
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 7b927491b1c..4511527647a 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1557,6 +1557,10 @@ struct gather_scatter_info {
>    /* The loop-invariant base value.  */
>    tree base;
>
> +  /* The TBBA alias pointer the value of which determines the alignment
> +     of the scalar accesses.  */
> +  tree alias_ptr;
> +
>    /* The original scalar offset, which is a non-loop-invariant SSA_NAME.  */
>    tree offset;
>
> --
> 2.50.0
>

Reply via email to