float.

Hu, Lin1 Mon, 24 Jun 2024 19:00:36 -0700

> -----Original Message-----
> From: Tamar Christina <tamar.christ...@arm.com>
> Sent: Monday, June 24, 2024 10:12 PM
> To: Richard Biener <rguent...@suse.de>; Hu, Lin1 <lin1...@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao....@intel.com>;
> ubiz...@gmail.com
> Subject: RE: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> 
> int,
> float -> float and int <-> float.
> 
> > -----Original Message-----
> > From: Richard Biener <rguent...@suse.de>
> > Sent: Monday, June 24, 2024 1:34 PM
> > To: Hu, Lin1 <lin1...@intel.com>
> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao....@intel.com>;
> > ubiz...@gmail.com
> > Subject: RE: [PATCH 1/3 v3] vect: generate suitable convert insn for
> > int -> int, float
> > -> float and int <-> float.
> >
> > On Thu, 20 Jun 2024, Hu, Lin1 wrote:
> >
> > > > >    else if (ret_elt_bits > arg_elt_bits)
> > > > >      modifier = WIDEN;
> > > > >
> > > > > +  if (supportable_convert_operation (code, ret_type, arg_type, 
> > > > > &code1))
> > > > > +    {
> > > > > +      g = gimple_build_assign (lhs, code1, arg);
> > > > > +      gsi_replace (gsi, g, false);
> > > > > +      return;
> > > > > +    }
> > > >
> > > > Given the API change I suggest below it might make sense to have
> > > > supportable_indirect_convert_operation do the above and represent
> > > > it as
> > single-
> > > > step conversion?
> > > >
> > >
> > > OK, if you want to supportable_indirect_convert_operation can do
> > > something like supportable_convert_operation, I'll give it a try.
> > > This functionality is really the part that this function can cover.
> > > But this would require some changes not only the API change, because
> > > supportable_indirect_convert_operation originally only supported
> > > Float
> > > -> Int or Int ->Float.
> >
> > I think I'd like to see a single API to handle direct and
> > (multi-)indirect-level converts that operate on vectors with all the
> > same number of lanes.
> >
> > > >
> > > > > +  code_helper code2 = ERROR_MARK, code3 = ERROR_MARK;
> > > > > +  int multi_step_cvt = 0;
> > > > > +  vec<tree> interm_types = vNULL;
> > > > > +  if (supportable_indirect_convert_operation (NULL,
> > > > > +                                           code,
> > > > > +                                           ret_type, arg_type,
> > > > > +                                           &code2, &code3,
> > > > > +                                           &multi_step_cvt,
> > > > > +                                           &interm_types, arg))
> > > > > +    {
> > > > > +      new_rhs = make_ssa_name (interm_types[0]);
> > > > > +      g = gimple_build_assign (new_rhs, (tree_code) code3, arg);
> > > > > +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> > > > > +      g = gimple_build_assign (lhs, (tree_code) code2, new_rhs);
> > > > > +      gsi_replace (gsi, g, false);
> > > > > +      return;
> > > > > +    }
> > > > > +
> > > > >    if (modifier == NONE && (code == FIX_TRUNC_EXPR || code ==
> > > > FLOAT_EXPR))
> > > > >      {
> > > > > -      if (supportable_convert_operation (code, ret_type, arg_type,
> &code1))
> > > > > -     {
> > > > > -       g = gimple_build_assign (lhs, code1, arg);
> > > > > -       gsi_replace (gsi, g, false);
> > > > > -       return;
> > > > > -     }
> > > > >        /* Can't use get_compute_type here, as
> supportable_convert_operation
> > > > >        doesn't necessarily use an optab and needs two arguments.  */
> > > > >        tree vec_compute_type
> > > > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > > > > index 05a169ecb2d..0aa608202ca 100644
> > > > > --- a/gcc/tree-vect-stmts.cc
> > > > > +++ b/gcc/tree-vect-stmts.cc
> > > > > @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
> > > > >    tree scalar_dest;
> > > > >    tree op0, op1 = NULL_TREE;
> > > > >    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> > > > > -  tree_code tc1, tc2;
> > > > > +  tree_code tc1;
> > > > >    code_helper code, code1, code2;
> > > > >    code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
> > > > >    tree new_temp;
> > > > > @@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo,
> > > > >       break;
> > > > >        }
> > > > >
> > > > > -      /* For conversions between float and integer types try whether
> > > > > -      we can use intermediate signed integer types to support the
> > > > > -      conversion.  */
> > > > > -      if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> > > > > -       && (code == FLOAT_EXPR ||
> > > > > -           (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> > > > > -     {
> > > > > -       bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE
> > > > (lhs_mode);
> > > > > -       bool float_expr_p = code == FLOAT_EXPR;
> > > > > -       unsigned short target_size;
> > > > > -       scalar_mode intermediate_mode;
> > > > > -       if (demotion)
> > > > > -         {
> > > > > -           intermediate_mode = lhs_mode;
> > > > > -           target_size = GET_MODE_SIZE (rhs_mode);
> > > > > -         }
> > > > > -       else
> > > > > -         {
> > > > > -           target_size = GET_MODE_SIZE (lhs_mode);
> > > > > -           if (!int_mode_for_size
> > > > > -               (GET_MODE_BITSIZE (rhs_mode), 0).exists
> > > > (&intermediate_mode))
> > > > > -             goto unsupported;
> > > > > -         }
> > > > > -       code1 = float_expr_p ? code : NOP_EXPR;
> > > > > -       codecvt1 = float_expr_p ? NOP_EXPR : code;
> > > > > -       opt_scalar_mode mode_iter;
> > > > > -       FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> > > > > -         {
> > > > > -           intermediate_mode = mode_iter.require ();
> > > > > -
> > > > > -           if (GET_MODE_SIZE (intermediate_mode) > target_size)
> > > > > -             break;
> > > > > -
> > > > > -           scalar_mode cvt_mode;
> > > > > -           if (!int_mode_for_size
> > > > > -               (GET_MODE_BITSIZE (intermediate_mode), 0).exists
> > > > (&cvt_mode))
> > > > > -             break;
> > > > > -
> > > > > -           cvt_type = build_nonstandard_integer_type
> > > > > -             (GET_MODE_BITSIZE (cvt_mode), 0);
> > > > > -
> > > > > -           /* Check if the intermediate type can hold OP0's range.
> > > > > -              When converting from float to integer this is not 
> > > > > necessary
> > > > > -              because values that do not fit the (smaller) target 
> > > > > type are
> > > > > -              unspecified anyway.  */
> > > > > -           if (demotion && float_expr_p)
> > > > > -             {
> > > > > -               wide_int op_min_value, op_max_value;
> > > > > -               if (!vect_get_range_info (op0, &op_min_value,
> > > > &op_max_value))
> > > > > -                 break;
> > > > > -
> > > > > -               if (cvt_type == NULL_TREE
> > > > > -                   || (wi::min_precision (op_max_value, SIGNED)
> > > > > -                       > TYPE_PRECISION (cvt_type))
> > > > > -                   || (wi::min_precision (op_min_value, SIGNED)
> > > > > -                       > TYPE_PRECISION (cvt_type)))
> > > > > -                 continue;
> > > > > -             }
> > > > > -
> > > > > -           cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, 
> > > > > slp_node);
> > > > > -           /* This should only happened for SLP as long as loop 
> > > > > vectorizer
> > > > > -              only supports same-sized vector.  */
> > > > > -           if (cvt_type == NULL_TREE
> > > > > -               || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), 
> > > > > nunits_in)
> > > > > -               || !supportable_convert_operation ((tree_code) code1,
> > > > > -                                                  vectype_out,
> > > > > -                                                  cvt_type, &tc1)
> > > > > -               || !supportable_convert_operation ((tree_code) 
> > > > > codecvt1,
> > > > > -                                                  cvt_type,
> > > > > -                                                  vectype_in, &tc2))
> > > > > -             continue;
> > > > > -
> > > > > -           found_mode = true;
> > > > > -           break;
> > > > > -         }
> > > > > +      if (supportable_indirect_convert_operation (vinfo,
> > > > > +                                               code,
> > > > > +                                               vectype_out,
> > > > > +                                               vectype_in,
> > > > > +                                               &code1,
> > > > > +                                               &codecvt1,
> > > > > +                                               &multi_step_cvt,
> > > > > +                                               &interm_types,
> > > > > +                                               op0,slp_node))
> > > > > +     break;
> > > > >
> > > > > -       if (found_mode)
> > > > > -         {
> > > > > -           multi_step_cvt++;
> > > > > -           interm_types.safe_push (cvt_type);
> > > > > -           cvt_type = NULL_TREE;
> > > > > -           code1 = tc1;
> > > > > -           codecvt1 = tc2;
> > > > > -           break;
> > > > > -         }
> > > > > -     }
> > > > >        /* FALLTHRU */
> > > > >      unsupported:
> > > > >        if (dump_enabled_p ())
> > > > > @@ -14626,6 +14551,153 @@ supportable_narrowing_operation
> > > > (code_helper code,
> > > > >    return false;
> > > > >  }
> > > > >
> > > > > +/* Function supportable_indirect_convert_operation
> > > > > +
> > > > > +   Check whether an operation represented by the code CODE is two
> > > > > +   convert operations that are supported by the target platform in
> > > > > +   vector form (i.e., when operating on arguments of type VECTYPE_IN
> > > > > +   producing a result of type VECTYPE_OUT).
> > > > > +
> > > > > +   Convert operations we currently support directly are
> > > > > + FIX_TRUNC and
> > FLOAT.
> > > > > +   This function checks if these operations are supported
> > > > > +   by the target platform directly (via vector tree-codes).
> > > > > +
> > > > > +   Output:
> > > > > +   - CODE1 is the code of a vector operation to be used when
> > > > > +   converting the operation in the first step, if available.
> > > > > +   - CODE2 is the code of a vector operation to be used when
> > > > > +   converting the operation in the second step, if available.
> > > > > +   - MULTI_STEP_CVT determines the number of required
> > > > > + intermediate steps
> > > > in
> > > > > +   case of multi-step conversion (like int->short->char - in that 
> > > > > case
> > > > > +   MULTI_STEP_CVT will be 1). In the function, it should be 1.
> > > > > +   - INTERM_TYPES contains the intermediate type required to perform
> the
> > > > > +   convert operation (short in the above example).   */
> > > > > +bool
> > > > > +supportable_indirect_convert_operation (vec_info *vinfo,
> > > > > +                                     code_helper code,
> > > > > +                                     tree vectype_out,
> > > > > +                                     tree vectype_in,
> > > > > +                                     code_helper *code1,
> > > > > +                                     code_helper *code2,
> > > > > +                                     int *multi_step_cvt,
> > > > > +                                     vec<tree> *interm_types,
> > > >
> > > > This API is somewhat awkward, as we're inventing a new one I guess
> > > > we can do better.  I think we want
> > > >
> > > >       vec<std::pair<tree, tree_code> > *converts,
> > > >
> > > > covering all code1, code2, multi_step_cvt and interm_types with
> > > > the
> > conversion
> > > > sequence being
> > > >
> > > >       converts[0].first tem0 = converts[0].second op0;
> > > >       converts[1].first tem1 = converts[1].second tem;
> > > >
> > >
> > > That's great, this really makes the function work better.
> > >
> > > >
> > > > ... while converts.length () determines the length of the chain,
> > > > one being a
> > direct
> > > > conversion where then converts[0].first is vectype_out.  That
> > > > would allow double -> char to go double -> float -> int -> short -> 
> > > > char for
> example.
> > > >
> > >
> > > I'm trying to determine the requirements, do you want this function
> > > to support multiple conversions (the current implementation just
> > > does a two-step conversion, like double -> char, which becomes
> > > double -> int -> char). Actually we should be able to do all
> > > conversions in two steps, if we have some suitable instructions. I
> > > can't think of a scenario where multiple conversions are needed yet.
> > > Could you give me some examples? Of course, I could tweak this
> > > feature in advance if it is for future consideration.
> >
> > I think the API should support multi-level, not only two levels.  The
> > implementation doesn't need to cover that case unless we run into such
> > a requirement.  Usually vector ISAs allow 2x integer
> > widening/shortening but not 4x, so a VnDImode -> VnQImode conversion
> > would need to go via VnSImode and VnHImode (of course some targets
> > might "help" the vectorizer by providing a VnDImode -> VnQImode
> > pattern that does the intermediate conversions behind the vectorizers
> > back).  But yes, the original motivation for the vectorizer code was
> > that float<->int conversions are limited.
> >
> 
> I have a similar patch in this area but instead looking at unsigned int <-> 
> double
> conversion.   I would want to avoid complicating this area too much so it 
> would
> be good if the API doesn't care about sign either and allows the target to 
> choose
> the operation mode?
> 
> My current patch has a backend target hook that asks if the current widening 
> is
> Preferred as multilevel or single level.  For single level I just generate
> VEC_PERM_EXPRs with a zero register.
> 
> Just wanted to bring it up in case we can have a coherent story around this
> conversions.
>


This is the current API.
14579 bool
14580 supportable_indirect_convert_operation (code_helper code,
14581                                         tree vectype_out,
14582                                         tree vectype_in,
14583                                         vec<std::pair<tree, tree_code> > 
*converts,
14584                                         tree op0)


This API doesn't care about sign, whether double to char or unsigned char will 
be converted to int first in my opinion.

About the backend target hook, I guess you use targetm.* to determine the 
target to choose the operation mode, if yes, I think the API is ok for now, you 
can use targetm to determine the operation mode and output through converts. 
What do you think?

BRs,
Lin 

>
> >
> > >
> > > >
> > > > > +                                     tree op0,
> > > > > +                                     slp_tree slp_node)
> > > >
> > > > I would like to avoid passing VINFO and SLP_NODE here, see below.
> > > > The same is true for OP0 where the existing use is wrong for SLP
> > > > already, but I guess that can stay for now (I opened PR115538 about the
> wrong-code issue).
> > > >
> > >
> > > Thanks, I have removed them.
> > >
> > > >
> > > > > +{
> > > > > +  bool found_mode = false;
> > > > > +  scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE
> > > > > +(vectype_out));
> > > > > +  scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE
> > > > > +(vectype_in));
> > > > > +  opt_scalar_mode mode_iter;
> > > > > +  tree_code tc1, tc2;
> > > > > +
> > > > > +  tree cvt_type = NULL_TREE;
> > > > > +  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
> > > > > +
> > > > > +  (*multi_step_cvt) = 0;
> > > > > +  /* For conversions between float and integer types try whether
> > > > > +     we can use intermediate signed integer types to support the
> > > > > +     conversion.  */
> > > > > +  if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> > > > > +      && (code == FLOAT_EXPR
> > > > > +       || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> > > > > +    {
> > > > > +      bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE
> > > > (lhs_mode);
> > > > > +      bool float_expr_p = code == FLOAT_EXPR;
> > > > > +      unsigned short target_size;
> > > > > +      scalar_mode intermediate_mode;
> > > > > +      if (demotion)
> > > > > +     {
> > > > > +       intermediate_mode = lhs_mode;
> > > > > +       target_size = GET_MODE_SIZE (rhs_mode);
> > > > > +     }
> > > > > +      else
> > > > > +     {
> > > > > +       target_size = GET_MODE_SIZE (lhs_mode);
> > > > > +       if (!int_mode_for_size
> > > > > +           (GET_MODE_BITSIZE (rhs_mode), 0).exists
> (&intermediate_mode))
> > > > > +         return false;
> > > > > +     }
> > > > > +      *code1 = float_expr_p ? code : NOP_EXPR;
> > > > > +      *code2 = float_expr_p ? NOP_EXPR : code;
> > > > > +      opt_scalar_mode mode_iter;
> > > > > +      FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> > > > > +     {
> > > > > +       intermediate_mode = mode_iter.require ();
> > > > > +
> > > > > +       if (GET_MODE_SIZE (intermediate_mode) > target_size)
> > > > > +         break;
> > > > > +
> > > > > +       scalar_mode cvt_mode;
> > > > > +       if (!int_mode_for_size
> > > > > +           (GET_MODE_BITSIZE (intermediate_mode), 0).exists
> (&cvt_mode))
> > > > > +         break;
> > > > > +
> > > > > +       cvt_type = build_nonstandard_integer_type
> > > > > +         (GET_MODE_BITSIZE (cvt_mode), 0);
> > > > > +
> > > > > +       /* Check if the intermediate type can hold OP0's range.
> > > > > +          When converting from float to integer this is not necessary
> > > > > +          because values that do not fit the (smaller) target type 
> > > > > are
> > > > > +          unspecified anyway.  */
> > > > > +       if (demotion && float_expr_p)
> > > > > +         {
> > > > > +           wide_int op_min_value, op_max_value;
> > > > > +           /* For vector form, it looks like op0 doesn't have
> RANGE_INFO.
> > > > > +              In the future, if it is supported, changes may need to 
> > > > > be
> made
> > > > > +              to this part, such as checking the RANGE of each
> element
> > > > > +              in the vector.  */
> > > > > +           if (!SSA_NAME_RANGE_INFO (op0)
> > > > > +               || !vect_get_range_info (op0, &op_min_value,
> > > > &op_max_value))
> > > > > +             break;
> > > > > +
> > > > > +           if (cvt_type == NULL_TREE
> > > > > +               || (wi::min_precision (op_max_value, SIGNED)
> > > > > +                   > TYPE_PRECISION (cvt_type))
> > > > > +               || (wi::min_precision (op_min_value, SIGNED)
> > > > > +                   > TYPE_PRECISION (cvt_type)))
> > > > > +             continue;
> > > > > +         }
> > > > > +
> > > > > +       if (vinfo != NULL && slp_node != NULL)
> > > > > +         cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type,
> slp_node);
> > > > > +       else
> > > > > +         {
> > > > > +           bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out))
> > > > > +                      || TYPE_UNSIGNED (TREE_TYPE (vectype_in));
> > > > > +           cvt_type = build_nonstandard_integer_type
> > > > > +             (GET_MODE_BITSIZE (cvt_mode), uns);
> > > > > +           cvt_type = build_vector_type (cvt_type, nelts);
> > > > > +         }
> > > >
> > > > So this would then become
> > > >
> > > >           cvt_type = get_related_vectype_for_scalar_type
> > > > (TYPE_MODE (vectype_in), cvt_type, TYPE_VECTOR_SUBPARTS
> > > > (vectype_in));
> > > >
> > > > > +       /* This should only happened for SLP as long as loop 
> > > > > vectorizer
> > > > > +          only supports same-sized vector.  */
> > > > > +       if (cvt_type == NULL_TREE
> > > > > +           || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
> > > > > +           || !supportable_convert_operation ((tree_code) *code1,
> > > > > +                                              vectype_out,
> > > > > +                                              cvt_type, &tc1)
> > > > > +           || !supportable_convert_operation ((tree_code) *code2,
> > > > > +                                              cvt_type,
> > > > > +                                              vectype_in, &tc2))
> > > > > +         continue;
> > > > > +
> > > > > +       found_mode = true;
> > > > > +       break;
> > > > > +     }
> > > > > +
> > > > > +      if (found_mode)
> > > > > +     {
> > > > > +       (*multi_step_cvt)++;
> > > > > +       interm_types->safe_push (cvt_type);
> > > > > +       cvt_type = NULL_TREE;
> > > > > +       *code1 = tc1;
> > > > > +       *code2 = tc2;
> > > > > +       return true;
> > > > > +     }
> > > > > +    }
> > > > > +  interm_types->release ();
> > > >
> > > > Hmm, ownership of interm_types is somewhat unclear here - the
> > > > caller should release it, or is the situation that the caller is 
> > > > confused by
> stray elements in it?
> > In
> > > > that case I'd suggest to instead do interm_types->truncate (0).
> > > >
> > >
> > > It's my fault, I just imitate
> > > supportable_narrowing/widening_operation,
> > > I think for this function, interm_types->release() is not needed.

RE: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

Reply via email to