On Fri, 18 Oct 2024, Robin Dapp wrote:
> This patch adds an else operand to vectorized masked load calls.
> The current implementation adds else-value arguments to the respective
> target-querying functions that is used to supply the vectorizer with the
> proper else value.
>
> Right now, the only spot where a zero else value is actually enforced is
> tree-ifcvt. Loop masking and other instances of masked loads in the
> vectorizer itself do not use vec_cond_exprs.
>
> gcc/ChangeLog:
>
> * optabs-query.cc (supports_vec_convert_optab_p): Return icode.
> (get_supported_else_val): Return supported else value for
> optab's operand at index.
> (supports_vec_gather_load_p): Add else argument.
> (supports_vec_scatter_store_p): Ditto.
> * optabs-query.h (supports_vec_gather_load_p): Ditto.
> (get_supported_else_val): Ditto.
> * optabs-tree.cc (target_supports_mask_load_store_p): Ditto.
> (can_vec_mask_load_store_p): Ditto.
> (target_supports_len_load_store_p): Ditto.
> (get_len_load_store_mode): Ditto.
> * optabs-tree.h (target_supports_mask_load_store_p): Ditto.
> (can_vec_mask_load_store_p): Ditto.
> * tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto.
> (vect_gather_scatter_fn_p): Ditto.
> (vect_check_gather_scatter): Ditto.
> (vect_load_lanes_supported): Ditto.
> * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern):
> Ditto.
> * tree-vect-slp.cc (vect_get_operand_map): Adjust indices for
> else operand.
> (vect_slp_analyze_node_operations): Skip undefined else operand.
> * tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p):
> Add else operand handling.
> (vect_get_vec_defs_for_operand): Handle undefined else operand.
> (check_load_store_for_partial_vectors): Add else argument.
> (vect_truncate_gather_scatter_offset): Ditto.
> (vect_use_strided_gather_scatters_p): Ditto.
> (get_group_load_store_type): Ditto.
> (get_load_store_type): Ditto.
> (vect_get_mask_load_else): Ditto.
> (vect_get_else_val_from_tree): Ditto.
> (vect_build_one_gather_load_call): Add zero else operand.
> (vectorizable_load): Use else operand.
> * tree-vectorizer.h (vect_gather_scatter_fn_p): Add else
> argument.
> (vect_load_lanes_supported): Ditto.
> (vect_get_mask_load_else): Ditto.
> (vect_get_else_val_from_tree): Ditto.
> ---
> gcc/optabs-query.cc | 59 ++++++---
> gcc/optabs-query.h | 3 +-
> gcc/optabs-tree.cc | 62 ++++++---
> gcc/optabs-tree.h | 8 +-
> gcc/tree-vect-data-refs.cc | 77 +++++++----
> gcc/tree-vect-patterns.cc | 18 ++-
> gcc/tree-vect-slp.cc | 22 +++-
> gcc/tree-vect-stmts.cc | 257 +++++++++++++++++++++++++++++--------
> gcc/tree-vectorizer.h | 11 +-
> 9 files changed, 394 insertions(+), 123 deletions(-)
>
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index cc52bc0f5ea..347a1322479 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -29,6 +29,9 @@ along with GCC; see the file COPYING3. If not see
> #include "rtl.h"
> #include "recog.h"
> #include "vec-perm-indices.h"
> +#include "internal-fn.h"
> +#include "memmodel.h"
> +#include "optabs.h"
>
> struct target_optabs default_target_optabs;
> struct target_optabs *this_fn_optabs = &default_target_optabs;
> @@ -672,34 +675,48 @@ lshift_cheap_p (bool speed_p)
> that mode, given that the second mode is always an integer vector.
> If MODE is VOIDmode, return true if OP supports any vector mode. */
>
> -static bool
> +static enum insn_code
> supports_vec_convert_optab_p (optab op, machine_mode mode)
The name is bad now. supported_vec_convert_optab (...) maybe?
> {
> int start = mode == VOIDmode ? 0 : mode;
> int end = mode == VOIDmode ? MAX_MACHINE_MODE - 1 : mode;
> + enum insn_code icode = CODE_FOR_nothing;
> for (int i = start; i <= end; ++i)
> if (VECTOR_MODE_P ((machine_mode) i))
> for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j)
> - if (convert_optab_handler (op, (machine_mode) i,
> - (machine_mode) j) != CODE_FOR_nothing)
> - return true;
> + {
> + if ((icode
> + = convert_optab_handler (op, (machine_mode) i,
> + (machine_mode) j)) != CODE_FOR_nothing)
> + return icode;
> + }
>
> - return false;
> + return icode;
> }
>
> /* If MODE is not VOIDmode, return true if vec_gather_load is available for
> that mode. If MODE is VOIDmode, return true if gather_load is available
> - for at least one vector mode. */
> + for at least one vector mode.
> + In that case, and if ELSVALS is nonzero, store the supported else values
> + into the vector it points to. */
>
> bool
> -supports_vec_gather_load_p (machine_mode mode)
> +supports_vec_gather_load_p (machine_mode mode, auto_vec<int> *elsvals)
> {
> - if (!this_fn_optabs->supports_vec_gather_load[mode])
> - this_fn_optabs->supports_vec_gather_load[mode]
> - = (supports_vec_convert_optab_p (gather_load_optab, mode)
> - || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
> - || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
> - ? 1 : -1);
> + enum insn_code icode = CODE_FOR_nothing;
> + if (!this_fn_optabs->supports_vec_gather_load[mode] || elsvals)
> + {
> + icode = supports_vec_convert_optab_p (gather_load_optab, mode);
> + if (icode == CODE_FOR_nothing)
> + icode = supports_vec_convert_optab_p (mask_gather_load_optab, mode);
> + if (icode == CODE_FOR_nothing)
> + icode = supports_vec_convert_optab_p (mask_len_gather_load_optab, mode);
> + this_fn_optabs->supports_vec_gather_load[mode]
> + = (icode != CODE_FOR_nothing) ? 1 : -1;
> + }
> +
> + if (elsvals && icode != CODE_FOR_nothing)
> + get_supported_else_vals (icode, MASK_LOAD_GATHER_ELSE_IDX, *elsvals);
>
> return this_fn_optabs->supports_vec_gather_load[mode] > 0;
> }
> @@ -711,12 +728,18 @@ supports_vec_gather_load_p (machine_mode mode)
> bool
> supports_vec_scatter_store_p (machine_mode mode)
> {
> + enum insn_code icode;
> if (!this_fn_optabs->supports_vec_scatter_store[mode])
> - this_fn_optabs->supports_vec_scatter_store[mode]
> - = (supports_vec_convert_optab_p (scatter_store_optab, mode)
> - || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
> - || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode)
> - ? 1 : -1);
> + {
> + icode = supports_vec_convert_optab_p (scatter_store_optab, mode);
> + if (icode == CODE_FOR_nothing)
> + icode = supports_vec_convert_optab_p (mask_scatter_store_optab, mode);
> + if (icode == CODE_FOR_nothing)
> + icode = supports_vec_convert_optab_p (mask_len_scatter_store_optab,
> + mode);
> + this_fn_optabs->supports_vec_scatter_store[mode]
> + = (icode != CODE_FOR_nothing) ? 1 : -1;
> + }
>
> return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
> }
> diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
> index 0cb2c21ba85..5e0f59ee4b9 100644
> --- a/gcc/optabs-query.h
> +++ b/gcc/optabs-query.h
> @@ -191,7 +191,8 @@ bool can_compare_and_swap_p (machine_mode, bool);
> bool can_atomic_exchange_p (machine_mode, bool);
> bool can_atomic_load_p (machine_mode);
> bool lshift_cheap_p (bool);
> -bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
> +bool supports_vec_gather_load_p (machine_mode = E_VOIDmode,
> + auto_vec<int> * = nullptr);
> bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
> bool can_vec_extract (machine_mode, machine_mode);
>
> diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc
> index b69a5bc3676..ebdb6051c14 100644
> --- a/gcc/optabs-tree.cc
> +++ b/gcc/optabs-tree.cc
> @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see
> #include "optabs.h"
> #include "optabs-tree.h"
> #include "stor-layout.h"
> +#include "internal-fn.h"
>
> /* Return the optab used for computing the operation given by the tree code,
> CODE and the tree EXP. This function is not always usable (for example,
> it
> @@ -552,24 +553,38 @@ target_supports_op_p (tree type, enum tree_code code,
> or mask_len_{load,store}.
> This helper function checks whether target supports masked
> load/store and return corresponding IFN in the last argument
> - (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */
> + (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).
> + If there is support and ELSVALS is nonzero add the possible else values
> + to the vector it points to. */
>
> -static bool
> +bool
> target_supports_mask_load_store_p (machine_mode mode, machine_mode mask_mode,
> - bool is_load, internal_fn *ifn)
> + bool is_load, internal_fn *ifn,
> + auto_vec<int> *elsvals)
> {
> optab op = is_load ? maskload_optab : maskstore_optab;
> optab len_op = is_load ? mask_len_load_optab : mask_len_store_optab;
> - if (convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing)
> + enum insn_code icode;
> + if ((icode = convert_optab_handler (op, mode, mask_mode))
> + != CODE_FOR_nothing)
> {
> if (ifn)
> *ifn = is_load ? IFN_MASK_LOAD : IFN_MASK_STORE;
> + if (elsvals)
> + get_supported_else_vals (icode,
> + internal_fn_else_index (IFN_MASK_LOAD),
> + *elsvals);
> return true;
> }
> - else if (convert_optab_handler (len_op, mode, mask_mode) !=
> CODE_FOR_nothing)
> + else if ((icode = convert_optab_handler (len_op, mode, mask_mode))
> + != CODE_FOR_nothing)
> {
> if (ifn)
> *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
> + if (elsvals)
> + get_supported_else_vals (icode,
> + internal_fn_else_index (IFN_MASK_LEN_LOAD),
> + *elsvals);
> return true;
> }
> return false;
> @@ -584,13 +599,15 @@ bool
> can_vec_mask_load_store_p (machine_mode mode,
> machine_mode mask_mode,
> bool is_load,
> - internal_fn *ifn)
> + internal_fn *ifn,
> + auto_vec<int> *elsvals)
> {
> machine_mode vmode;
>
> /* If mode is vector mode, check it directly. */
> if (VECTOR_MODE_P (mode))
> - return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn);
> + return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn,
> + elsvals);
>
> /* Otherwise, return true if there is some vector mode with
> the mask load/store supported. */
> @@ -604,7 +621,8 @@ can_vec_mask_load_store_p (machine_mode mode,
> vmode = targetm.vectorize.preferred_simd_mode (smode);
> if (VECTOR_MODE_P (vmode)
> && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> - && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
> + && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
> + elsvals))
> return true;
>
> auto_vector_modes vector_modes;
> @@ -612,7 +630,8 @@ can_vec_mask_load_store_p (machine_mode mode,
> for (machine_mode base_mode : vector_modes)
> if (related_vector_mode (base_mode, smode).exists (&vmode)
> && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> - && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
> + && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
> + elsvals))
> return true;
> return false;
> }
> @@ -622,11 +641,13 @@ can_vec_mask_load_store_p (machine_mode mode,
> or mask_len_{load,store}.
> This helper function checks whether target supports len
> load/store and return corresponding IFN in the last argument
> - (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */
> + (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).
> + If there is support and ELSVALS is nonzero add the possible else values
> + to the vector it points to. */
>
> static bool
> target_supports_len_load_store_p (machine_mode mode, bool is_load,
> - internal_fn *ifn)
> + internal_fn *ifn, auto_vec<int> *elsvals)
> {
> optab op = is_load ? len_load_optab : len_store_optab;
> optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab;
> @@ -638,11 +659,17 @@ target_supports_len_load_store_p (machine_mode mode,
> bool is_load,
> return true;
> }
> machine_mode mask_mode;
> + enum insn_code icode;
> if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
> - && convert_optab_handler (masked_op, mode, mask_mode) !=
> CODE_FOR_nothing)
> + && ((icode = convert_optab_handler (masked_op, mode, mask_mode))
> + != CODE_FOR_nothing))
> {
> if (ifn)
> *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
> + if (elsvals)
> + get_supported_else_vals (icode,
> + internal_fn_else_index (IFN_MASK_LEN_LOAD),
> + *elsvals);
> return true;
> }
> return false;
> @@ -656,22 +683,25 @@ target_supports_len_load_store_p (machine_mode mode,
> bool is_load,
> VnQI to wrap the other supportable same size vector modes.
> An additional output in the last argument which is the IFN pointer.
> We set IFN as LEN_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according
> - which optab is supported in the target. */
> + which optab is supported in the target.
> + If there is support and ELSVALS is nonzero add the possible else values
> + to the vector it points to. */
>
> opt_machine_mode
> -get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn)
> +get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn,
> + auto_vec<int> *elsvals)
> {
> gcc_assert (VECTOR_MODE_P (mode));
>
> /* Check if length in lanes supported for this mode directly. */
> - if (target_supports_len_load_store_p (mode, is_load, ifn))
> + if (target_supports_len_load_store_p (mode, is_load, ifn, elsvals))
> return mode;
>
> /* Check if length in bytes supported for same vector size VnQI. */
> machine_mode vmode;
> poly_uint64 nunits = GET_MODE_SIZE (mode);
> if (related_vector_mode (mode, QImode, nunits).exists (&vmode)
> - && target_supports_len_load_store_p (vmode, is_load, ifn))
> + && target_supports_len_load_store_p (vmode, is_load, ifn, elsvals))
> return vmode;
>
> return opt_machine_mode ();
> diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h
> index f2b49991462..390954bf998 100644
> --- a/gcc/optabs-tree.h
> +++ b/gcc/optabs-tree.h
> @@ -47,9 +47,13 @@ bool expand_vec_cond_expr_p (tree, tree, enum tree_code);
> void init_tree_optimization_optabs (tree);
> bool target_supports_op_p (tree, enum tree_code,
> enum optab_subtype = optab_default);
> +bool target_supports_mask_load_store_p (machine_mode, machine_mode,
> + bool, internal_fn *, auto_vec<int> *);
> bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool,
> - internal_fn * = nullptr);
> + internal_fn * = nullptr,
> + auto_vec<int> * = nullptr);
> opt_machine_mode get_len_load_store_mode (machine_mode, bool,
> - internal_fn * = nullptr);
> + internal_fn * = nullptr,
> + auto_vec<int> * = nullptr);
>
> #endif
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 202af7a8952..d9f608dd2c0 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -55,13 +55,18 @@ along with GCC; see the file COPYING3. If not see
> #include "vec-perm-indices.h"
> #include "internal-fn.h"
> #include "gimple-fold.h"
> +#include "optabs-query.h"
>
> /* Return true if load- or store-lanes optab OPTAB is implemented for
> - COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
> + COUNT vectors of type VECTYPE. NAME is the name of OPTAB.
> +
> + If it is implemented and ELSVALS is nonzero add the possible else values
> + to the vector it points to. */
>
> static bool
> vect_lanes_optab_supported_p (const char *name, convert_optab optab,
> - tree vectype, unsigned HOST_WIDE_INT count)
> + tree vectype, unsigned HOST_WIDE_INT count,
> + auto_vec<int> *elsvals = nullptr)
> {
> machine_mode mode, array_mode;
> bool limit_p;
> @@ -81,7 +86,9 @@ vect_lanes_optab_supported_p (const char *name,
> convert_optab optab,
> }
> }
>
> - if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
> + enum insn_code icode;
> + if ((icode = convert_optab_handler (optab, array_mode, mode))
> + == CODE_FOR_nothing)
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -92,8 +99,13 @@ vect_lanes_optab_supported_p (const char *name,
> convert_optab optab,
>
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> - "can use %s<%s><%s>\n", name, GET_MODE_NAME
> (array_mode),
> - GET_MODE_NAME (mode));
> + "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
> + GET_MODE_NAME (mode));
> +
> + if (elsvals)
> + get_supported_else_vals (icode,
> + internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES),
> + *elsvals);
>
> return true;
> }
> @@ -4177,13 +4189,15 @@ vect_prune_runtime_alias_test_list (loop_vec_info
> loop_vinfo)
> be multiplied *after* it has been converted to address width.
>
> Return true if the function is supported, storing the function id in
> - *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */
> + *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
> +
> + If we can use gather and add the possible else values to ELSVALS. */
>
> bool
> vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
> tree vectype, tree memory_type, tree offset_type,
> int scale, internal_fn *ifn_out,
> - tree *offset_vectype_out)
> + tree *offset_vectype_out, auto_vec<int> *elsvals)
Do not use auto_vec<int> * either, vec<int> * in this case (and
elsewhere).
> {
> unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
> unsigned int element_bits = vector_element_bits (vectype);
> @@ -4221,7 +4235,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p,
> bool masked_p,
>
> /* Test whether the target supports this combination. */
> if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
> - offset_vectype, scale))
> + offset_vectype, scale,
> + elsvals))
> {
> *ifn_out = ifn;
> *offset_vectype_out = offset_vectype;
> @@ -4231,7 +4246,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p,
> bool masked_p,
> && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
> memory_type,
> offset_vectype,
> - scale))
> + scale, elsvals))
> {
> *ifn_out = alt_ifn;
> *offset_vectype_out = offset_vectype;
> @@ -4239,7 +4254,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p,
> bool masked_p,
> }
> else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
> memory_type,
> - offset_vectype, scale))
> + offset_vectype, scale,
> + elsvals))
> {
> *ifn_out = alt_ifn2;
> *offset_vectype_out = offset_vectype;
> @@ -4278,11 +4294,13 @@ vect_describe_gather_scatter_call (stmt_vec_info
> stmt_info,
> }
>
> /* Return true if a non-affine read or write in STMT_INFO is suitable for a
> - gather load or scatter store. Describe the operation in *INFO if so. */
> + gather load or scatter store. Describe the operation in *INFO if so.
> + If it is suitable and ELSVALS is nonzero add the supported else values
> + to the vector it points to. */
>
> bool
> vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
> - gather_scatter_info *info)
> + gather_scatter_info *info, auto_vec<int> *elsvals)
> {
> HOST_WIDE_INT scale = 1;
> poly_int64 pbitpos, pbitsize;
> @@ -4306,6 +4324,16 @@ vect_check_gather_scatter (stmt_vec_info stmt_info,
> loop_vec_info loop_vinfo,
> ifn = gimple_call_internal_fn (call);
> if (internal_gather_scatter_fn_p (ifn))
> {
> + /* Extract the else value from a masked-load call. This is
> + necessary when we created a gather_scatter pattern from a
> + maskload. It is a bit cumbersome to basically create the
> + same else value three times but it's probably acceptable until
> + tree-ifcvt goes away. */
> + if (internal_fn_mask_index (ifn) >= 0 && elsvals)
> + {
> + tree els = gimple_call_arg (call, internal_fn_else_index (ifn));
> + elsvals->safe_push (vect_get_else_val_from_tree (els));
> + }
> vect_describe_gather_scatter_call (stmt_info, info);
> return true;
> }
> @@ -4315,7 +4343,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info,
> loop_vec_info loop_vinfo,
> /* True if we should aim to use internal functions rather than
> built-in functions. */
> bool use_ifn_p = (DR_IS_READ (dr)
> - ? supports_vec_gather_load_p (TYPE_MODE (vectype))
> + ? supports_vec_gather_load_p (TYPE_MODE (vectype),
> + elsvals)
> : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
>
> base = DR_REF (dr);
> @@ -4472,12 +4501,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info,
> loop_vec_info loop_vinfo,
> masked_p, vectype, memory_type,
> signed_char_type_node,
> new_scale, &ifn,
> - &offset_vectype)
> + &offset_vectype,
> + elsvals)
> && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
> masked_p, vectype, memory_type,
> unsigned_char_type_node,
> new_scale, &ifn,
> - &offset_vectype))
> + &offset_vectype,
> + elsvals))
> break;
> scale = new_scale;
> off = op0;
> @@ -4500,7 +4531,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info,
> loop_vec_info loop_vinfo,
> && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
> masked_p, vectype, memory_type,
> TREE_TYPE (off), scale, &ifn,
> - &offset_vectype))
> + &offset_vectype, elsvals))
> break;
>
> if (TYPE_PRECISION (TREE_TYPE (op0))
> @@ -4554,7 +4585,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info,
> loop_vec_info loop_vinfo,
> {
> if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
> vectype, memory_type, offtype, scale,
> - &ifn, &offset_vectype))
> + &ifn, &offset_vectype, elsvals))
> ifn = IFN_LAST;
> decl = NULL_TREE;
> }
> @@ -6391,27 +6422,29 @@ vect_grouped_load_supported (tree vectype, bool
> single_element_p,
> }
>
> /* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT
> vectors
> - of type VECTYPE. MASKED_P says whether the masked form is needed. */
> + of type VECTYPE. MASKED_P says whether the masked form is needed.
> + If it is available and ELSVALS is nonzero add the possible else values
> + to the vector it points to. */
>
> internal_fn
> vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
> - bool masked_p)
> + bool masked_p, auto_vec<int> *elsvals)
> {
> if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
> vec_mask_len_load_lanes_optab, vectype,
> - count))
> + count, elsvals))
> return IFN_MASK_LEN_LOAD_LANES;
> else if (masked_p)
> {
> if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
> vec_mask_load_lanes_optab, vectype,
> - count))
> + count, elsvals))
> return IFN_MASK_LOAD_LANES;
> }
> else
> {
> if (vect_lanes_optab_supported_p ("vec_load_lanes",
> vec_load_lanes_optab,
> - vectype, count))
> + vectype, count, elsvals))
> return IFN_LOAD_LANES;
> }
> return IFN_LAST;
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 746f100a084..184d150f96d 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -6630,7 +6630,8 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
> /* Make sure that the target supports an appropriate internal
> function for the gather/scatter operation. */
> gather_scatter_info gs_info;
> - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
> + auto_vec<int> elsvals;
> + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info, &elsvals)
> || gs_info.ifn == IFN_LAST)
> return NULL;
>
> @@ -6653,20 +6654,27 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
> tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
> gs_info.offset, stmt_info);
>
> + tree vec_els = NULL_TREE;
> /* Build the new pattern statement. */
> tree scale = size_int (gs_info.scale);
> gcall *pattern_stmt;
> + tree load_lhs;
> if (DR_IS_READ (dr))
> {
> tree zero = build_zero_cst (gs_info.element_type);
> if (mask != NULL)
> - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
> - offset, scale, zero, mask);
> + {
> + int elsval = *elsvals.begin ();
> + vec_els = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
> + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
> + offset, scale, zero, mask,
> + vec_els);
> + }
> else
> pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
> offset, scale, zero);
> - tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
> - gimple_call_set_lhs (pattern_stmt, load_lhs);
> + load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
> + gimple_set_lhs (pattern_stmt, load_lhs);
> }
> else
> {
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 8727246c27a..d161f28d62c 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -511,13 +511,13 @@ static const int cond_expr_maps[3][5] = {
> static const int no_arg_map[] = { 0 };
> static const int arg0_map[] = { 1, 0 };
> static const int arg1_map[] = { 1, 1 };
> -static const int arg2_map[] = { 1, 2 };
> -static const int arg1_arg4_map[] = { 2, 1, 4 };
> +static const int arg2_arg3_map[] = { 2, 2, 3 };
> +static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 };
> static const int arg3_arg2_map[] = { 2, 3, 2 };
> static const int op1_op0_map[] = { 2, 1, 0 };
> static const int off_map[] = { 1, -3 };
> static const int off_op0_map[] = { 2, -3, 0 };
> -static const int off_arg2_map[] = { 2, -3, 2 };
> +static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 };
> static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 };
> static const int mask_call_maps[6][7] = {
> { 1, 1, },
> @@ -564,14 +564,14 @@ vect_get_operand_map (const gimple *stmt, bool
> gather_scatter_p = false,
> switch (gimple_call_internal_fn (call))
> {
> case IFN_MASK_LOAD:
> - return gather_scatter_p ? off_arg2_map : arg2_map;
> + return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map;
>
> case IFN_GATHER_LOAD:
> return arg1_map;
>
> case IFN_MASK_GATHER_LOAD:
> case IFN_MASK_LEN_GATHER_LOAD:
> - return arg1_arg4_map;
> + return arg1_arg4_arg5_map;
>
> case IFN_MASK_STORE:
> return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
> @@ -7775,6 +7775,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo,
> slp_tree node,
> tree vector_type = SLP_TREE_VECTYPE (child);
> if (!vector_type)
> {
> + /* Masked loads can have an undefined (default SSA definition)
> + else operand. We do not need to cost it. */
> + vec<tree> ops = SLP_TREE_SCALAR_OPS (child);
> + if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
> + == load_vec_info_type)
> + && ((ops.length () &&
> + TREE_CODE (ops[0]) == SSA_NAME
> + && SSA_NAME_IS_DEFAULT_DEF (ops[0])
> + && VAR_P (SSA_NAME_VAR (ops[0])))
> + || SLP_TREE_DEF_TYPE (child) == vect_constant_def))
> + continue;
> +
> /* For shifts with a scalar argument we don't need
> to cost or code-generate anything.
> ??? Represent this more explicitely. */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 9b14b96cb5a..74a437735a5 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see
> #include "regs.h"
> #include "attribs.h"
> #include "optabs-libfuncs.h"
> +#include "tree-dfa.h"
>
> /* For lang_hooks.types.type_for_mode. */
> #include "langhooks.h"
> @@ -469,6 +470,10 @@ exist_non_indexing_operands_for_use_p (tree use,
> stmt_vec_info stmt_info)
> if (mask_index >= 0
> && use == gimple_call_arg (call, mask_index))
> return true;
> + int els_index = internal_fn_else_index (ifn);
> + if (els_index >= 0
> + && use == gimple_call_arg (call, els_index))
> + return true;
> int stored_value_index = internal_fn_stored_value_index (ifn);
> if (stored_value_index >= 0
> && use == gimple_call_arg (call, stored_value_index))
> @@ -1280,7 +1285,17 @@ vect_get_vec_defs_for_operand (vec_info *vinfo,
> stmt_vec_info stmt_vinfo,
> vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
>
> gcc_assert (vector_type);
> - tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
> + /* A masked load can have a default SSA definition as else operand.
> + We should "vectorize" this instead of creating a duplicate from the
> + scalar default. */
> + tree vop;
> + if (TREE_CODE (op) == SSA_NAME
> + && SSA_NAME_IS_DEFAULT_DEF (op)
> + && VAR_P (SSA_NAME_VAR (op)))
> + vop = get_or_create_ssa_default_def (cfun,
> + create_tmp_var (vector_type));
> + else
> + vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
> while (ncopies--)
> vec_oprnds->quick_push (vop);
> }
> @@ -1492,7 +1507,10 @@ static tree permute_vec_elements (vec_info *, tree,
> tree, tree, stmt_vec_info,
>
> Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
> vectors is not supported, otherwise record the required rgroup control
> - types. */
> + types.
> +
> + If partial vectors can be used and ELSVALS is nonzero the supported
> + else values will be added to the vector ELSVALS points to. */
>
> static void
> check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
> @@ -1502,7 +1520,8 @@ check_load_store_for_partial_vectors (loop_vec_info
> loop_vinfo, tree vectype,
> vect_memory_access_type
> memory_access_type,
> gather_scatter_info *gs_info,
> - tree scalar_mask)
> + tree scalar_mask,
> + auto_vec<int> *elsvals = nullptr)
> {
> /* Invariant loads need no special support. */
> if (memory_access_type == VMAT_INVARIANT)
> @@ -1518,7 +1537,8 @@ check_load_store_for_partial_vectors (loop_vec_info
> loop_vinfo, tree vectype,
> if (slp_node)
> nvectors /= group_size;
> internal_fn ifn
> - = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
> + = (is_load ? vect_load_lanes_supported (vectype, group_size, true,
> + elsvals)
> : vect_store_lanes_supported (vectype, group_size, true));
> if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
> vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
> @@ -1548,12 +1568,14 @@ check_load_store_for_partial_vectors (loop_vec_info
> loop_vinfo, tree vectype,
> if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
> gs_info->memory_type,
> gs_info->offset_vectype,
> - gs_info->scale))
> + gs_info->scale,
> + elsvals))
> vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
> else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
> gs_info->memory_type,
> gs_info->offset_vectype,
> - gs_info->scale))
> + gs_info->scale,
> + elsvals))
> vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
> scalar_mask);
> else
> @@ -1607,7 +1629,8 @@ check_load_store_for_partial_vectors (loop_vec_info
> loop_vinfo, tree vectype,
> machine_mode mask_mode;
> machine_mode vmode;
> bool using_partial_vectors_p = false;
> - if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
> + if (get_len_load_store_mode
> + (vecmode, is_load, nullptr, elsvals).exists (&vmode))
> {
> nvectors = group_memory_nvectors (group_size * vf, nunits);
> unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE
> (vecmode);
> @@ -1615,7 +1638,8 @@ check_load_store_for_partial_vectors (loop_vec_info
> loop_vinfo, tree vectype,
> using_partial_vectors_p = true;
> }
> else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
> - && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
> + && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL,
> + elsvals))
> {
> nvectors = group_memory_nvectors (group_size * vf, nunits);
> vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
> scalar_mask);
> @@ -1672,12 +1696,16 @@ prepare_vec_mask (loop_vec_info loop_vinfo, tree
> mask_type, tree loop_mask,
> without loss of precision, where X is STMT_INFO's DR_STEP.
>
> Return true if this is possible, describing the gather load or scatter
> - store in GS_INFO. MASKED_P is true if the load or store is conditional.
> */
> + store in GS_INFO. MASKED_P is true if the load or store is conditional.
> +
> + If we can use gather/scatter and ELSVALS is nonzero the supported
> + else values will be added to the vector ELSVALS points to. */
>
> static bool
> vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
> loop_vec_info loop_vinfo, bool masked_p,
> - gather_scatter_info *gs_info)
> + gather_scatter_info *gs_info,
> + auto_vec<int> *elsvals)
> {
> dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
> data_reference *dr = dr_info->dr;
> @@ -1734,7 +1762,8 @@ vect_truncate_gather_scatter_offset (stmt_vec_info
> stmt_info,
> tree memory_type = TREE_TYPE (DR_REF (dr));
> if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
> vectype, memory_type, offset_type, scale,
> - &gs_info->ifn, &gs_info->offset_vectype)
> + &gs_info->ifn, &gs_info->offset_vectype,
> + elsvals)
> || gs_info->ifn == IFN_LAST)
> continue;
>
> @@ -1762,17 +1791,21 @@ vect_truncate_gather_scatter_offset (stmt_vec_info
> stmt_info,
> vectorize STMT_INFO, which is a grouped or strided load or store.
> MASKED_P is true if load or store is conditional. When returning
> true, fill in GS_INFO with the information required to perform the
> - operation. */
> + operation.
> +
> + If we can use gather/scatter and ELSVALS is nonzero the supported
> + else values will be added to the vector ELSVALS points to. */
>
> static bool
> vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
> loop_vec_info loop_vinfo, bool masked_p,
> - gather_scatter_info *gs_info)
> + gather_scatter_info *gs_info,
> + auto_vec<int> *elsvals)
> {
> - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
> + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
> || gs_info->ifn == IFN_LAST)
> return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
> - masked_p, gs_info);
> + masked_p, gs_info, elsvals);
>
> tree old_offset_type = TREE_TYPE (gs_info->offset);
> tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
> @@ -1985,7 +2018,8 @@ get_group_load_store_type (vec_info *vinfo,
> stmt_vec_info stmt_info,
> dr_alignment_support *alignment_support_scheme,
> int *misalignment,
> gather_scatter_info *gs_info,
> - internal_fn *lanes_ifn)
> + internal_fn *lanes_ifn,
> + auto_vec<int> *elsvals)
> {
> loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
> @@ -2074,7 +2108,8 @@ get_group_load_store_type (vec_info *vinfo,
> stmt_vec_info stmt_info,
> else if (slp_node->ldst_lanes
> && (*lanes_ifn
> = (vls_type == VLS_LOAD
> - ? vect_load_lanes_supported (vectype, group_size,
> masked_p)
> + ? vect_load_lanes_supported (vectype, group_size,
> + masked_p, elsvals)
> : vect_store_lanes_supported (vectype, group_size,
> masked_p))) !=
> IFN_LAST)
> *memory_access_type = VMAT_LOAD_STORE_LANES;
> @@ -2242,7 +2277,8 @@ get_group_load_store_type (vec_info *vinfo,
> stmt_vec_info stmt_info,
> /* Otherwise try using LOAD/STORE_LANES. */
> *lanes_ifn
> = vls_type == VLS_LOAD
> - ? vect_load_lanes_supported (vectype, group_size, masked_p)
> + ? vect_load_lanes_supported (vectype, group_size, masked_p,
> + elsvals)
> : vect_store_lanes_supported (vectype, group_size,
> masked_p);
> if (*lanes_ifn != IFN_LAST)
> @@ -2276,7 +2312,7 @@ get_group_load_store_type (vec_info *vinfo,
> stmt_vec_info stmt_info,
> && single_element_p
> && loop_vinfo
> && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
> - masked_p, gs_info))
> + masked_p, gs_info, elsvals))
> *memory_access_type = VMAT_GATHER_SCATTER;
>
> if (*memory_access_type == VMAT_GATHER_SCATTER
> @@ -2338,7 +2374,10 @@ get_group_load_store_type (vec_info *vinfo,
> stmt_vec_info stmt_info,
> SLP says whether we're performing SLP rather than loop vectorization.
> MASKED_P is true if the statement is conditional on a vectorized mask.
> VECTYPE is the vector type that the vectorized statements will use.
> - NCOPIES is the number of vector statements that will be needed. */
> + NCOPIES is the number of vector statements that will be needed.
> +
> + If ELSVALS is nonzero the supported else values will be added to the
> + vector ELSVALS points to. */
>
> static bool
> get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
> @@ -2350,7 +2389,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
> stmt_info,
> dr_alignment_support *alignment_support_scheme,
> int *misalignment,
> gather_scatter_info *gs_info,
> - internal_fn *lanes_ifn)
> + internal_fn *lanes_ifn,
> + auto_vec<int> *elsvals = nullptr)
> {
> loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
> @@ -2359,7 +2399,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
> stmt_info,
> if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> {
> *memory_access_type = VMAT_GATHER_SCATTER;
> - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
> + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
> + elsvals))
> gcc_unreachable ();
> /* When using internal functions, we rely on pattern recognition
> to convert the type of the offset to the type that the target
> @@ -2413,7 +2454,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
> stmt_info,
> masked_p,
> vls_type, memory_access_type, poffset,
> alignment_support_scheme,
> - misalignment, gs_info, lanes_ifn))
> + misalignment, gs_info, lanes_ifn,
> + elsvals))
> return false;
> }
> else if (STMT_VINFO_STRIDED_P (stmt_info))
> @@ -2421,7 +2463,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
> stmt_info,
> gcc_assert (!slp_node);
> if (loop_vinfo
> && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
> - masked_p, gs_info))
> + masked_p, gs_info, elsvals))
> *memory_access_type = VMAT_GATHER_SCATTER;
> else
> *memory_access_type = VMAT_ELEMENTWISE;
> @@ -2689,6 +2731,53 @@ vect_build_zero_merge_argument (vec_info *vinfo,
> return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
> }
>
> +/* Return the supported else value for a masked load internal function IFN.
> + The vector type is given in VECTYPE and the mask type in VECTYPE2.
> + TYPE specifies the type of the returned else value. */
> +
> +tree
> +vect_get_mask_load_else (int elsval, tree type)
> +{
> + tree els;
> + if (elsval == MASK_LOAD_ELSE_UNDEFINED)
> + {
> + tree tmp = create_tmp_var (type);
> + /* No need to warn about anything. */
> + TREE_NO_WARNING (tmp) = 1;
> + els = get_or_create_ssa_default_def (cfun, tmp);
> + }
> + else if (elsval == MASK_LOAD_ELSE_M1)
> + els = build_minus_one_cst (type);
> + else if (elsval == MASK_LOAD_ELSE_ZERO)
> + els = build_zero_cst (type);
> + else
> + __builtin_unreachable ();
> +
> + return els;
> +}
> +
> +/* Return the integer define a tree else operand ELS represents.
> + This performs the inverse of vect_get_mask_load_else. Refer to
> + vect_check_gather_scatter for its usage rationale. */
> +
> +int
> +vect_get_else_val_from_tree (tree els)
> +{
> + if (TREE_CODE (els) == SSA_NAME
> + && SSA_NAME_IS_DEFAULT_DEF (els)
> + && TREE_CODE (SSA_NAME_VAR (els)) == VAR_DECL)
> + return MASK_LOAD_ELSE_UNDEFINED;
> + else
> + {
> + if (zerop (els))
> + return MASK_LOAD_ELSE_ZERO;
> + else if (integer_minus_onep (els))
> + return MASK_LOAD_ELSE_M1;
> + else
> + __builtin_unreachable ();
> + }
> +}
> +
> /* Build a gather load call while vectorizing STMT_INFO. Insert new
> instructions before GSI and add them to VEC_STMT. GS_INFO describes
> the gather load operation. If the load is conditional, MASK is the
> @@ -2770,8 +2859,14 @@ vect_build_one_gather_load_call (vec_info *vinfo,
> stmt_vec_info stmt_info,
> }
>
> tree scale = build_int_cst (scaletype, gs_info->scale);
> - gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
> - mask_op, scale);
> + gimple *new_stmt;
> +
> + if (!mask)
> + new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
> + mask_op, scale);
> + else
> + new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
> + mask_op, scale);
>
> if (!useless_type_conversion_p (vectype, rettype))
> {
> @@ -9967,6 +10062,7 @@ vectorizable_load (vec_info *vinfo,
> gather_scatter_info gs_info;
> tree ref_type;
> enum vect_def_type mask_dt = vect_unknown_def_type;
> + enum vect_def_type els_dt = vect_unknown_def_type;
>
> if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
> return false;
> @@ -9979,8 +10075,12 @@ vectorizable_load (vec_info *vinfo,
> return false;
>
> tree mask = NULL_TREE, mask_vectype = NULL_TREE;
> + tree els = NULL_TREE; tree els_vectype = NULL_TREE;
> +
> int mask_index = -1;
> + int els_index = -1;
> slp_tree slp_op = NULL;
> + slp_tree els_op = NULL;
> if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
> {
> scalar_dest = gimple_assign_lhs (assign);
> @@ -10020,6 +10120,15 @@ vectorizable_load (vec_info *vinfo,
> && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
> &mask, &slp_op, &mask_dt, &mask_vectype))
> return false;
> +
> + els_index = internal_fn_else_index (ifn);
> + if (els_index >= 0 && slp_node)
> + els_index = vect_slp_child_index_for_operand
> + (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
> + if (els_index >= 0
> + && !vect_is_simple_use (vinfo, stmt_info, slp_node, els_index,
> + &els, &els_op, &els_dt, &els_vectype))
> + return false;
> }
>
> tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> @@ -10122,10 +10231,11 @@ vectorizable_load (vec_info *vinfo,
> int misalignment;
> poly_int64 poffset;
> internal_fn lanes_ifn;
> + auto_vec<int> elsvals;
> if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask,
> VLS_LOAD,
> ncopies, &memory_access_type, &poffset,
> &alignment_support_scheme, &misalignment, &gs_info,
> - &lanes_ifn))
> + &lanes_ifn, &elsvals))
> return false;
>
> /* ??? The following checks should really be part of
> @@ -10191,7 +10301,8 @@ vectorizable_load (vec_info *vinfo,
> machine_mode vec_mode = TYPE_MODE (vectype);
> if (!VECTOR_MODE_P (vec_mode)
> || !can_vec_mask_load_store_p (vec_mode,
> - TYPE_MODE (mask_vectype), true))
> + TYPE_MODE (mask_vectype),
> + true, NULL, &elsvals))
> return false;
> }
> else if (memory_access_type != VMAT_LOAD_STORE_LANES
> @@ -10260,6 +10371,16 @@ vectorizable_load (vec_info *vinfo,
>
> STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
> }
> + else
> + {
> + /* Here just get the else values. */
> + if (loop_vinfo
> + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> + check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
> + VLS_LOAD, group_size,
> + memory_access_type, &gs_info,
> + mask, &elsvals);
> + }
>
> if (!slp)
> gcc_assert (memory_access_type
> @@ -10930,6 +11051,7 @@ vectorizable_load (vec_info *vinfo,
> }
>
> tree vec_mask = NULL_TREE;
> + tree vec_els = NULL_TREE;
> if (memory_access_type == VMAT_LOAD_STORE_LANES)
> {
> gcc_assert (alignment_support_scheme == dr_aligned
> @@ -11020,6 +11142,11 @@ vectorizable_load (vec_info *vinfo,
> }
> }
>
> + if (final_mask)
> + vec_els = vect_get_mask_load_else
> + (elsvals.contains (MASK_LOAD_ELSE_ZERO)
> + ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype);
> +
> gcall *call;
> if (final_len && final_mask)
> {
> @@ -11028,9 +11155,10 @@ vectorizable_load (vec_info *vinfo,
> VEC_MASK, LEN, BIAS). */
> unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
> tree alias_ptr = build_int_cst (ref_type, align);
> - call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
> + call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6,
> dataref_ptr, alias_ptr,
> - final_mask, final_len, bias);
> + final_mask, vec_els,
> + final_len, bias);
> }
> else if (final_mask)
> {
> @@ -11039,9 +11167,9 @@ vectorizable_load (vec_info *vinfo,
> VEC_MASK). */
> unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
> tree alias_ptr = build_int_cst (ref_type, align);
> - call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
> + call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
> dataref_ptr, alias_ptr,
> - final_mask);
> + final_mask, vec_els);
> }
> else
> {
> @@ -11190,17 +11318,29 @@ vectorizable_load (vec_info *vinfo,
> }
> }
>
> + if (final_mask)
> + vec_els = vect_get_mask_load_else
> + (elsvals.contains (MASK_LOAD_ELSE_ZERO)
> + ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype);
> +
> gcall *call;
> if (final_len && final_mask)
> - call
> - = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
> - dataref_ptr, vec_offset,
> - scale, zero, final_mask,
> - final_len, bias);
> + {
> + call
> + = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
> + 8, dataref_ptr,
> + vec_offset, scale, zero,
> + final_mask, vec_els,
> + final_len, bias);
> + }
> else if (final_mask)
> - call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
> - dataref_ptr, vec_offset,
> - scale, zero, final_mask);
> + {
> + call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
> + 6, dataref_ptr,
> + vec_offset, scale,
> + zero, final_mask,
> + vec_els);
> + }
why add these unneeded braces?
> else
> call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
> dataref_ptr, vec_offset,
> @@ -11514,6 +11654,7 @@ vectorizable_load (vec_info *vinfo,
> tree final_mask = NULL_TREE;
> tree final_len = NULL_TREE;
> tree bias = NULL_TREE;
> +
> if (!costing_p)
> {
> if (mask)
> @@ -11566,7 +11707,8 @@ vectorizable_load (vec_info *vinfo,
> if (loop_lens)
> {
> opt_machine_mode new_ovmode
> - = get_len_load_store_mode (vmode, true, &partial_ifn);
> + = get_len_load_store_mode (vmode, true, &partial_ifn,
> + &elsvals);
> new_vmode = new_ovmode.require ();
> unsigned factor
> = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
> @@ -11578,7 +11720,7 @@ vectorizable_load (vec_info *vinfo,
> {
> if (!can_vec_mask_load_store_p (
> vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
> - &partial_ifn))
> + &partial_ifn, &elsvals))
> gcc_unreachable ();
> }
>
> @@ -11606,19 +11748,28 @@ vectorizable_load (vec_info *vinfo,
> bias = build_int_cst (intQI_type_node, biasval);
> }
>
> + tree vec_els;
> +
> if (final_len)
> {
> tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
> gcall *call;
> if (partial_ifn == IFN_MASK_LEN_LOAD)
> - call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
> - dataref_ptr, ptr,
> - final_mask, final_len,
> - bias);
> + {
> + vec_els = vect_get_mask_load_else
> + (elsvals.contains (MASK_LOAD_ELSE_ZERO)
> + ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype);
> + call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
> + 6, dataref_ptr, ptr,
> + final_mask, vec_els,
> + final_len, bias);
> + }
> else
> - call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
> - dataref_ptr, ptr,
> - final_len, bias);
> + {
> + call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
> + dataref_ptr, ptr,
> + final_len, bias);
> + }
Likewise.
Otherwise looks OK to me.
Richard.
> gimple_call_set_nothrow (call, true);
> new_stmt = call;
> data_ref = NULL_TREE;
> @@ -11641,9 +11792,13 @@ vectorizable_load (vec_info *vinfo,
> else if (final_mask)
> {
> tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
> - gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
> + vec_els = vect_get_mask_load_else
> + (elsvals.contains (MASK_LOAD_ELSE_ZERO)
> + ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype);
> + gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
> dataref_ptr, ptr,
> - final_mask);
> + final_mask,
> + vec_els);
> gimple_call_set_nothrow (call, true);
> new_stmt = call;
> data_ref = NULL_TREE;
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index b7f2708fec0..0b20c36a7fe 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2439,9 +2439,11 @@ extern bool vect_slp_analyze_instance_alignment
> (vec_info *, slp_instance);
> extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
> extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
> extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
> - tree, int, internal_fn *, tree *);
> + tree, int, internal_fn *, tree *,
> + auto_vec<int> * = nullptr);
> extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
> - gather_scatter_info *);
> + gather_scatter_info *,
> + auto_vec<int> * = nullptr);
> extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
> vec<data_reference_p> *,
> vec<int> *, int);
> @@ -2459,7 +2461,8 @@ extern tree vect_create_destination_var (tree, tree);
> extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
> extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT,
> bool);
> extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
> -extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
> bool);
> +extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
> + bool, auto_vec<int> * = nullptr);
> extern void vect_permute_store_chain (vec_info *, vec<tree> &,
> unsigned int, stmt_vec_info,
> gimple_stmt_iterator *, vec<tree> *);
> @@ -2605,6 +2608,8 @@ extern int vect_slp_child_index_for_operand (const
> gimple *, int op, bool);
>
> extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
> gimple_stmt_iterator *);
> +extern tree vect_get_mask_load_else (int, tree);
> +extern int vect_get_else_val_from_tree (tree els);
>
> /* In tree-vect-patterns.cc. */
> extern void
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)