On Fri, 18 Oct 2024, Robin Dapp wrote:
> When predicating a load we implicitly assume that the else value is
> zero. This matters in case the loaded value is padded (like e.g.
> a Bool) and we must ensure that the padding bytes are zero on targets
> that don't implicitly zero inactive elements.
>
> In order to formalize this this patch queries the target for
> its supported else operand and uses that for the maskload call.
> Subsequently, if the else operand is nonzero, a cond_expr enforcing
> a zero else value is emitted.
>
> gcc/ChangeLog:
>
> * tree-if-conv.cc (predicate_load_or_store): Enforce zero else
> value for padded types.
> (predicate_statements): Use sequence instead of statement.
> ---
> gcc/tree-if-conv.cc | 112 +++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 94 insertions(+), 18 deletions(-)
>
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index 90c754a4814..9623426e1e1 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -2531,12 +2531,15 @@ mask_exists (int size, const vec<int> &vec)
>
> /* Helper function for predicate_statements. STMT is a memory read or
> write and it needs to be predicated by MASK. Return a statement
> - that does so. */
> + that does so. SSA_NAMES is the set of SSA names defined earlier in
> + STMT's block. */
>
> -static gimple *
> -predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
> +static gimple_seq
> +predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask,
> + hash_set<tree_ssa_name_hash> *ssa_names)
> {
> - gcall *new_stmt;
> + gimple_seq stmts = NULL;
> + gcall *call_stmt;
>
> tree lhs = gimple_assign_lhs (stmt);
> tree rhs = gimple_assign_rhs1 (stmt);
> @@ -2552,21 +2555,88 @@ predicate_load_or_store (gimple_stmt_iterator *gsi,
> gassign *stmt, tree mask)
> ref);
> if (TREE_CODE (lhs) == SSA_NAME)
> {
> - new_stmt
> - = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
> - ptr, mask);
> - gimple_call_set_lhs (new_stmt, lhs);
> - gimple_set_vuse (new_stmt, gimple_vuse (stmt));
> + /* Get the preferred vector mode and its corresponding mask for the
> + masked load. We need this to query the target's supported else
> + operands. */
> + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> + scalar_mode smode = as_a <scalar_mode> (mode);
> +
> + machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
> + machine_mode mask_mode
> + = targetm.vectorize.get_mask_mode (vmode).require ();
> +
> + auto_vec<int> elsvals;
> + internal_fn ifn;
> + bool have_masked_load
> + = target_supports_mask_load_store_p (vmode, mask_mode, true, &ifn,
> + &elsvals);
> +
> + /* We might need to explicitly zero inactive elements if there are
> + padding bits in the type that might leak otherwise.
> + Refer to PR115336. */
> + bool need_zero
> + = TYPE_PRECISION (TREE_TYPE (lhs)) < GET_MODE_PRECISION (smode);
> +
> + int elsval;
> + bool implicit_zero = false;
> + if (have_masked_load)
> + {
> + gcc_assert (elsvals.length ());
> +
> + /* But not if the target already provide implicit zeroing of inactive
> + elements. */
> + implicit_zero = elsvals.contains (MASK_LOAD_ELSE_ZERO);
> +
> + /* For now, just use the first else value if zero is unsupported. */
> + elsval = implicit_zero ? MASK_LOAD_ELSE_ZERO : *elsvals.begin ();
> + }
> + else
> + {
> + /* We cannot vectorize this either way so just use a zero even
> + if it is unsupported. */
> + elsval = MASK_LOAD_ELSE_ZERO;
> + }
> +
> + tree els = vect_get_mask_load_else (elsval, TREE_TYPE (lhs));
> +
> + call_stmt
> + = gimple_build_call_internal (IFN_MASK_LOAD, 4, addr,
> + ptr, mask, els);
> +
So what happens if the vectorizer chooses a vector type that does
not support MASK_LOAD_ELSE_ZERO but the .MASK_LOAD we create here
uses it?
Since we need to cope with this situation I think the "fix" is to
make if-conversion simply always use MASK_LOAD_ELSE_ZERO and not
emit the COND_EXPR and for the vectorizer to properly handle
this by either not vectorizing (boo!) or by adding a VEC_COND_EXPR
(yay!). I think GCN is the only target that would need this
handling?
Richard.
> + /* Build the load call and, if the else value is nonzero,
> + a COND_EXPR that enforces it. */
> + tree loadlhs;
> + if (!need_zero || implicit_zero)
> + gimple_call_set_lhs (call_stmt, gimple_get_lhs (stmt));
> + else
> + {
> + loadlhs = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "_ifc_");
> + ssa_names->add (loadlhs);
> + gimple_call_set_lhs (call_stmt, loadlhs);
> + }
> + gimple_set_vuse (call_stmt, gimple_vuse (stmt));
> + gimple_seq_add_stmt (&stmts, call_stmt);
> +
> + if (need_zero && !implicit_zero)
> + {
> + tree cond_rhs
> + = fold_build_cond_expr (TREE_TYPE (loadlhs), mask, loadlhs,
> + build_zero_cst (TREE_TYPE (loadlhs)));
> + gassign *cond_stmt
> + = gimple_build_assign (gimple_get_lhs (stmt), cond_rhs);
> + gimple_seq_add_stmt (&stmts, cond_stmt);
> + }
> }
> else
> {
> - new_stmt
> + call_stmt
> = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
> mask, rhs);
> - gimple_move_vops (new_stmt, stmt);
> + gimple_move_vops (call_stmt, stmt);
> + gimple_seq_add_stmt (&stmts, call_stmt);
> }
> - gimple_call_set_nothrow (new_stmt, true);
> - return new_stmt;
> + gimple_call_set_nothrow (call_stmt, true);
> + return stmts;
> }
>
> /* STMT uses OP_LHS. Check whether it is equivalent to:
> @@ -2836,7 +2906,6 @@ predicate_statements (loop_p loop)
> {
> tree lhs = gimple_assign_lhs (stmt);
> tree mask;
> - gimple *new_stmt;
> gimple_seq stmts = NULL;
> machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> /* We checked before setting GF_PLF_2 that an equivalent
> @@ -2870,11 +2939,18 @@ predicate_statements (loop_p loop)
> vect_masks.safe_push (mask);
> }
> if (gimple_assign_single_p (stmt))
> - new_stmt = predicate_load_or_store (&gsi, stmt, mask);
> - else
> - new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
> + {
> + gimple_seq call_seq
> + = predicate_load_or_store (&gsi, stmt, mask, &ssa_names);
>
> - gsi_replace (&gsi, new_stmt, true);
> + gsi_replace_with_seq (&gsi, call_seq, true);
> + }
> + else
> + {
> + gimple *new_stmt;
> + new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
> + gsi_replace (&gsi, new_stmt, true);
> + }
> }
> else if (((lhs = gimple_assign_lhs (stmt)), true)
> && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)