On Sun, 5 Nov 2023, Richard Sandiford wrote:

> Robin Dapp <rdapp....@gmail.com> writes:
> >> Ah, OK.  IMO it's better to keep the optab operands the same as the IFN
> >> operands, even if that makes things inconsistent with vcond_mask.
> >> vcond_mask isn't really a good example to follow, since the operand
> >> order is not only inconsistent with the IFN, it's also inconsistent
> >> with the natural if_then_else order.
> >
> > v4 attached with that changed,  match.pd patterns interleaved as well
> > as scratch-handling added and VLS modes removed.  Lehua has since pushed
> > another patch that extends gimple_match_op to 6/7 operands already so
> > that could be removed as well making the patch even smaller now.
> >
> > Testsuite on riscv looks good (apart from the mentioned cond_widen...),
> > still running on aarch64 and x86.  OK if those pass?
> >
> > Regards
> >  Robin
> >
> > Subject: [PATCH v4] internal-fn: Add VCOND_MASK_LEN.
> >
> > In order to prevent simplification of a COND_OP with degenerate mask
> > (CONSTM1_RTX) into just an OP in the presence of length masking this
> > patch introduces a length-masked analog to VEC_COND_EXPR:
> > IFN_VCOND_MASK_LEN.
> >
> > It also adds new match patterns that allow the combination of
> > unconditional unary, binary and ternay operations with the
> > VCOND_MASK_LEN into a conditional operation if the target supports it.
> >
> > gcc/ChangeLog:
> >
> >     PR tree-optimization/111760
> >
> >     * config/riscv/autovec.md (vcond_mask_len_<mode><vm>): Add
> >     expander.
> >     * config/riscv/riscv-protos.h (enum insn_type): Add.
> >     * config/riscv/riscv-v.cc (needs_fp_rounding): Add !pred_mov.
> >     * doc/md.texi: Add vcond_mask_len.
> >     * gimple-match-exports.cc (maybe_resimplify_conditional_op):
> >     Create VCOND_MASK_LEN when length masking.
> >     * gimple-match.h (gimple_match_op::gimple_match_op): Always
> >     initialize len and bias.
> >     * internal-fn.cc (vec_cond_mask_len_direct): Add.
> >     (direct_vec_cond_mask_len_optab_supported_p): Add.
> >     (internal_fn_len_index): Add VCOND_MASK_LEN.
> >     (internal_fn_mask_index): Ditto.
> >     * internal-fn.def (VCOND_MASK_LEN): New internal function.
> >     * match.pd: Combine unconditional unary, binary and ternary
> >     operations into the respective COND_LEN operations.
> >     * optabs.def (OPTAB_D): Add vcond_mask_len optab.
> >
> > gcc/testsuite/ChangeLog:
> >
> >     * gcc.dg/vect/vect-cond-arith-2.c: No vect cost model for
> >     riscv_v.
> > ---
> >  gcc/config/riscv/autovec.md                   | 26 ++++++++++
> >  gcc/config/riscv/riscv-protos.h               |  3 ++
> >  gcc/config/riscv/riscv-v.cc                   |  3 +-
> >  gcc/doc/md.texi                               |  9 ++++
> >  gcc/gimple-match-exports.cc                   | 13 +++--
> >  gcc/gimple-match.h                            |  6 ++-
> >  gcc/internal-fn.cc                            |  5 ++
> >  gcc/internal-fn.def                           |  2 +
> >  gcc/match.pd                                  | 51 +++++++++++++++++++
> >  gcc/optabs.def                                |  1 +
> >  gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c |  1 +
> >  11 files changed, 114 insertions(+), 6 deletions(-)
> >
> > diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> > index cc4c9596bbf..0a5e4ccb54e 100644
> > --- a/gcc/config/riscv/autovec.md
> > +++ b/gcc/config/riscv/autovec.md
> > @@ -565,6 +565,32 @@ (define_insn_and_split "vcond_mask_<mode><vm>"
> >    [(set_attr "type" "vector")]
> >  )
> >  
> > +(define_expand "vcond_mask_len_<mode>"
> > +  [(match_operand:V 0 "register_operand")
> > +    (match_operand:<VM> 1 "nonmemory_operand")
> > +    (match_operand:V 2 "nonmemory_operand")
> > +    (match_operand:V 3 "autovec_else_operand")
> > +    (match_operand 4 "autovec_length_operand")
> > +    (match_operand 5 "const_0_operand")]
> > +  "TARGET_VECTOR"
> > +  {
> > +    if (satisfies_constraint_Wc1 (operands[1]))
> > +      riscv_vector::expand_cond_len_unop (code_for_pred_mov (<MODE>mode),
> > +                                     operands);
> > +    else
> > +      {
> > +   /* The order of then and else is opposite to pred_merge.  */
> > +   rtx ops[] = {operands[0], operands[3], operands[3], operands[2],
> > +                operands[1]};
> > +   riscv_vector::emit_nonvlmax_insn (code_for_pred_merge (<MODE>mode),
> > +                                     riscv_vector::MERGE_OP_TU,
> > +                                     ops, operands[4]);
> > +      }
> > +    DONE;
> > +  }
> > +  [(set_attr "type" "vector")]
> > +)
> > +
> >  ;; 
> > -------------------------------------------------------------------------
> >  ;; ---- [BOOL] Select based on masks
> >  ;; 
> > -------------------------------------------------------------------------
> > diff --git a/gcc/config/riscv/riscv-protos.h 
> > b/gcc/config/riscv/riscv-protos.h
> > index a1be731c28e..0d0ee5effea 100644
> > --- a/gcc/config/riscv/riscv-protos.h
> > +++ b/gcc/config/riscv/riscv-protos.h
> > @@ -359,6 +359,9 @@ enum insn_type : unsigned int
> >    /* For vmerge, no mask operand, no mask policy operand.  */
> >    MERGE_OP = __NORMAL_OP_TA2 | TERNARY_OP_P,
> >  
> > +  /* For vmerge with TU policy.  */
> > +  MERGE_OP_TU = HAS_DEST_P | HAS_MERGE_P | TERNARY_OP_P | TU_POLICY_P,
> > +
> >    /* For vm<compare>, no tail policy operand.  */
> >    COMPARE_OP = __NORMAL_OP_MA | TERNARY_OP_P,
> >    COMPARE_OP_MU = __MASK_OP_MU | TERNARY_OP_P,
> > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> > index b489ce08775..d2dde1897c4 100644
> > --- a/gcc/config/riscv/riscv-v.cc
> > +++ b/gcc/config/riscv/riscv-v.cc
> > @@ -3214,7 +3214,8 @@ needs_fp_rounding (unsigned icode, machine_mode mode)
> >      && icode != maybe_code_for_pred_widen (FLOAT, mode)
> >      && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode)
> >      /* vfsgnj */
> > -    && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode);
> > +    && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode)
> > +    && icode != maybe_code_for_pred_mov (mode);
> >  }
> >  
> >  /* Subroutine to expand COND_LEN_* patterns.  */
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index fab2513105a..10f971749bc 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -5306,6 +5306,15 @@ no need to define this instruction pattern if the 
> > others are supported.
> >  Similar to @code{vcond@var{m}@var{n}} but operand 3 holds a pre-computed
> >  result of vector comparison.
> >  
> > +@cindex @code{vcond_mask_len_@var{m}@var{n}} instruction pattern
> > +@item @samp{vcond_mask_@var{m}@var{n}}
> > +Similar to @code{vcond_mask@var{m}@var{n}} but operand 4 holds a variable
> > +or constant length and operand 5 holds a bias.  If the
> > +element index < operand 4 + operand 5 the respective element of the result 
> > is
> > +computed as in @code{vcond_mask_@var{m}@var{n}}.  For element indices >=
> > +operand 4 + operand 5 the computation is performed as if the respective 
> > mask
> > +element were zero.
> > +
> 
> There is no computation here, it's just a selection between two values.
> We should also mention the different operand order.  How about:
> 
> ----------------
> Set each element of operand 0 to the corresponding element of operand 2
> or operand 3.  Choose operand 2 if both the element index is less than
> operand 4 plus operand 5 and the corresponding element of operand 1
> is nonzero:
> 
> @smallexample
> for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
>   op0[i] = i < op4 + op5 && op1[i] ? op2[i] : op3[i];
> @end smallexample
> 
> Operands 0, 2 and 3 have mode @var{m}.  Operand 1 has mode @var{n}.
> Operands 4 and 5 have a target-dependent scalar integer mode.
> ----------------
> 
> OK for the non-match.pd target-independent parts with that change.

OK for the rest as well.

Richard.

> Thanks,
> Richard
> 
> >  @cindex @code{maskload@var{m}@var{n}} instruction pattern
> >  @item @samp{maskload@var{m}@var{n}}
> >  Perform a masked load of vector from memory operand 1 of mode @var{m}
> > diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc
> > index b36027b0bad..d6dac08cc2b 100644
> > --- a/gcc/gimple-match-exports.cc
> > +++ b/gcc/gimple-match-exports.cc
> > @@ -307,9 +307,16 @@ maybe_resimplify_conditional_op (gimple_seq *seq, 
> > gimple_match_op *res_op,
> >        && VECTOR_TYPE_P (res_op->type)
> >        && gimple_simplified_result_is_gimple_val (res_op))
> >      {
> > -      new_op.set_op (VEC_COND_EXPR, res_op->type,
> > -                res_op->cond.cond, res_op->ops[0],
> > -                res_op->cond.else_value);
> > +      tree len = res_op->cond.len;
> > +      if (!len)
> > +   new_op.set_op (VEC_COND_EXPR, res_op->type,
> > +                  res_op->cond.cond, res_op->ops[0],
> > +                  res_op->cond.else_value);
> > +      else
> > +   new_op.set_op (IFN_VCOND_MASK_LEN, res_op->type,
> > +                  res_op->cond.cond, res_op->ops[0],
> > +                  res_op->cond.else_value,
> > +                  res_op->cond.len, res_op->cond.bias);
> >        *res_op = new_op;
> >        return gimple_resimplify3 (seq, res_op, valueize);
> >      }
> > diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h
> > index 9892c142285..63a9f029589 100644
> > --- a/gcc/gimple-match.h
> > +++ b/gcc/gimple-match.h
> > @@ -32,7 +32,8 @@ public:
> >    enum uncond { UNCOND };
> >  
> >    /* Build an unconditional op.  */
> > -  gimple_match_cond (uncond) : cond (NULL_TREE), else_value (NULL_TREE) {}
> > +  gimple_match_cond (uncond) : cond (NULL_TREE), else_value (NULL_TREE), 
> > len
> > +                          (NULL_TREE), bias (NULL_TREE) {}
> >    gimple_match_cond (tree, tree);
> >    gimple_match_cond (tree, tree, tree, tree);
> >  
> > @@ -56,7 +57,8 @@ public:
> >  
> >  inline
> >  gimple_match_cond::gimple_match_cond (tree cond_in, tree else_value_in)
> > -  : cond (cond_in), else_value (else_value_in)
> > +  : cond (cond_in), else_value (else_value_in), len (NULL_TREE),
> > +    bias (NULL_TREE)
> >  {
> >  }
> >  
> > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> > index c7d3564faef..5a998e794ad 100644
> > --- a/gcc/internal-fn.cc
> > +++ b/gcc/internal-fn.cc
> > @@ -170,6 +170,7 @@ init_internal_fns ()
> >  #define store_lanes_direct { 0, 0, false }
> >  #define mask_store_lanes_direct { 0, 0, false }
> >  #define vec_cond_mask_direct { 1, 0, false }
> > +#define vec_cond_mask_len_direct { 1, 1, false }
> >  #define vec_cond_direct { 2, 0, false }
> >  #define scatter_store_direct { 3, 1, false }
> >  #define len_store_direct { 3, 3, false }
> > @@ -4690,6 +4691,7 @@ internal_fn_len_index (internal_fn fn)
> >      case IFN_MASK_LEN_STORE:
> >      case IFN_MASK_LEN_LOAD_LANES:
> >      case IFN_MASK_LEN_STORE_LANES:
> > +    case IFN_VCOND_MASK_LEN:
> >        return 3;
> >  
> >      default:
> > @@ -4782,6 +4784,9 @@ internal_fn_mask_index (internal_fn fn)
> >      case IFN_MASK_LEN_SCATTER_STORE:
> >        return 4;
> >  
> > +    case IFN_VCOND_MASK_LEN:
> > +      return 0;
> > +
> >      default:
> >        return (conditional_internal_fn_code (fn) != ERROR_MARK
> >           || get_unconditional_internal_fn (fn) != IFN_LAST ? 0 : -1);
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index a2023ab9c3d..7f0e3759615 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -221,6 +221,8 @@ DEF_INTERNAL_OPTAB_FN (VCONDU, ECF_CONST | ECF_NOTHROW, 
> > vcondu, vec_cond)
> >  DEF_INTERNAL_OPTAB_FN (VCONDEQ, ECF_CONST | ECF_NOTHROW, vcondeq, vec_cond)
> >  DEF_INTERNAL_OPTAB_FN (VCOND_MASK, ECF_CONST | ECF_NOTHROW,
> >                    vcond_mask, vec_cond_mask)
> > +DEF_INTERNAL_OPTAB_FN (VCOND_MASK_LEN, ECF_CONST | ECF_NOTHROW,
> > +                  vcond_mask_len, cond_len_unary)
> >  
> >  DEF_INTERNAL_OPTAB_FN (VEC_SET, ECF_CONST | ECF_NOTHROW, vec_set, vec_set)
> >  DEF_INTERNAL_OPTAB_FN (VEC_EXTRACT, ECF_CONST | ECF_NOTHROW,
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 424bbd02233..dbc811b2b38 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -87,6 +87,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >    negate bit_not)
> >  (define_operator_list COND_UNARY
> >    IFN_COND_NEG IFN_COND_NOT)
> > +(define_operator_list COND_LEN_UNARY
> > +  IFN_COND_LEN_NEG IFN_COND_LEN_NOT)
> >  
> >  /* Binary operations and their associated IFN_COND_* function.  */
> >  (define_operator_list UNCOND_BINARY
> > @@ -8961,6 +8963,21 @@ and,
> >          && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >       (cond_op (bit_not @0) @2 @1)))))
> >  
> > +(for uncond_op (UNCOND_UNARY)
> > +     cond_op (COND_LEN_UNARY)
> > + (simplify
> > +  (IFN_VCOND_MASK_LEN @0 (view_convert? (uncond_op@3 @1)) @2 @4 @5)
> > +   (with { tree op_type = TREE_TYPE (@3); }
> > +    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
> > op_type)
> > +        && is_truth_type_for (op_type, TREE_TYPE (@0)))
> > +     (cond_op @0 @1 @2 @4 @5))))
> > + (simplify
> > +  (IFN_VCOND_MASK_LEN @0 @1 (view_convert? (uncond_op@3 @2)) @4 @5)
> > +   (with { tree op_type = TREE_TYPE (@3); }
> > +    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
> > op_type)
> > +        && is_truth_type_for (op_type, TREE_TYPE (@0)))
> > +     (cond_op (bit_not @0) @2 @1 @4 @5)))))
> > +
> >  /* `(a ? -1 : 0) ^ b` can be converted into a conditional not.  */
> >  (simplify
> >   (bit_xor:c (vec_cond @0 uniform_integer_cst_p@1 uniform_integer_cst_p@2) 
> > @3)
> > @@ -9007,6 +9024,23 @@ and,
> >     && single_use (@4))
> >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type 
> > @1)))))))
> >  
> > +(for uncond_op (UNCOND_BINARY)
> > +     cond_op (COND_LEN_BINARY)
> > + (simplify
> > +  (IFN_VCOND_MASK_LEN @0 (view_convert? (uncond_op@4 @1 @2)) @3 @5 @6)
> > +  (with { tree op_type = TREE_TYPE (@4); }
> > +   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
> > op_type)
> > +   && is_truth_type_for (op_type, TREE_TYPE (@0))
> > +   && single_use (@4))
> > +    (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3) @5 @6)))))
> > + (simplify
> > +  (IFN_VCOND_MASK_LEN @0 @1 (view_convert? (uncond_op@4 @2 @3)) @5 @6)
> > +  (with { tree op_type = TREE_TYPE (@4); }
> > +   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
> > op_type)
> > +   && is_truth_type_for (op_type, TREE_TYPE (@0))
> > +   && single_use (@4))
> > +    (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1) @5 
> > @6))))))
> > +
> >  /* Same for ternary operations.  */
> >  (for uncond_op (UNCOND_TERNARY)
> >       cond_op (COND_TERNARY)
> > @@ -9025,6 +9059,23 @@ and,
> >     && single_use (@5))
> >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> >               (view_convert:op_type @1)))))))
> > +
> > +(for uncond_op (UNCOND_TERNARY)
> > +     cond_op (COND_LEN_TERNARY)
> > + (simplify
> > +  (IFN_VCOND_MASK_LEN @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4 @6 @7)
> > +  (with { tree op_type = TREE_TYPE (@5); }
> > +   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
> > op_type)
> > +   && is_truth_type_for (op_type, TREE_TYPE (@0))
> > +   && single_use (@5))
> > +    (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4) @6 @7)))))
> > + (simplify
> > +  (IFN_VCOND_MASK_LEN @0 @1 (view_convert? (uncond_op@5 @2 @3 @4 @6 @7)))
> > +  (with { tree op_type = TREE_TYPE (@5); }
> > +   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
> > op_type)
> > +   && is_truth_type_for (op_type, TREE_TYPE (@0))
> > +   && single_use (@5))
> > +    (view_convert (cond_op (bit_not @0) @2 @3 @4 (view_convert:op_type @1) 
> > @6 @7))))))
> >  #endif
> >  
> >  /* Detect cases in which a VEC_COND_EXPR effectively replaces the
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 2ccbe4197b7..8d5ceeb8710 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -282,6 +282,7 @@ OPTAB_D (cond_len_fnma_optab, "cond_len_fnma$a")
> >  OPTAB_D (cond_len_fnms_optab, "cond_len_fnms$a")
> >  OPTAB_D (cond_len_neg_optab, "cond_len_neg$a")
> >  OPTAB_D (cond_len_one_cmpl_optab, "cond_len_one_cmpl$a")
> > +OPTAB_D (vcond_mask_len_optab, "vcond_mask_len_$a")
> >  OPTAB_D (cmov_optab, "cmov$a6")
> >  OPTAB_D (cstore_optab, "cstore$a4")
> >  OPTAB_D (ctrap_optab, "ctrap$a4")
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c 
> > b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c
> > index 7e165977e2b..7b3d73acb88 100644
> > --- a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c
> > @@ -1,5 +1,6 @@
> >  /* { dg-do compile } */
> >  /* { dg-additional-options "-fgimple -fdump-tree-optimized -ffast-math" } 
> > */
> > +/* { dg-additional-options "-fno-vect-cost-model" { target { riscv_v } } } 
> > */
> >  
> >  double __GIMPLE (ssa, startwith("loop"))
> >  neg_xi (double *x)
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to