Hi Mike,

Thanks for fixing this, some comments are inlined below.

on 2022/11/2 10:42, Michael Meissner wrote:
> This patch fixes the issue that GCC cannot build when the default long double
> is IEEE 128-bit.  It fails in building libgcc, specifically when it is trying
> to buld the __mulkc3 function in libgcc.  It is failing in 
> gimple-range-fold.cc
> during the evrp pass.  Ultimately it is failing because the code declared the
> type to use TFmode but it used F128 functions (i.e. KFmode).
> 
>       typedef float TFtype __attribute__((mode (TF)));
>       typedef __complex float TCtype __attribute__((mode (TC)));
> 
>       TCtype
>       __mulkc3_sw (TFtype a, TFtype b, TFtype c, TFtype d)
>       {
>         TFtype ac, bd, ad, bc, x, y;
>         TCtype res;
> 
>         ac = a * c;
>         bd = b * d;
>         ad = a * d;
>         bc = b * c;
> 
>         x = ac - bd;
>         y = ad + bc;
> 
>         if (__builtin_isnan (x) && __builtin_isnan (y))
>           {
>             _Bool recalc = 0;
>             if (__builtin_isinf (a) || __builtin_isinf (b))
>               {
> 
>                 a = __builtin_copysignf128 (__builtin_isinf (a) ? 1 : 0, a);
>                 b = __builtin_copysignf128 (__builtin_isinf (b) ? 1 : 0, b);
>                 if (__builtin_isnan (c))
>                   c = __builtin_copysignf128 (0, c);
>                 if (__builtin_isnan (d))
>                   d = __builtin_copysignf128 (0, d);
>                 recalc = 1;
>               }
>             if (__builtin_isinf (c) || __builtin_isinf (d))
>               {
> 
>                 c = __builtin_copysignf128 (__builtin_isinf (c) ? 1 : 0, c);
>                 d = __builtin_copysignf128 (__builtin_isinf (d) ? 1 : 0, d);
>                 if (__builtin_isnan (a))
>                   a = __builtin_copysignf128 (0, a);
>                 if (__builtin_isnan (b))
>                   b = __builtin_copysignf128 (0, b);
>                 recalc = 1;
>               }
>             if (!recalc
>                 && (__builtin_isinf (ac) || __builtin_isinf (bd)
>                     || __builtin_isinf (ad) || __builtin_isinf (bc)))
>               {
> 
>                 if (__builtin_isnan (a))
>                   a = __builtin_copysignf128 (0, a);
>                 if (__builtin_isnan (b))
>                   b = __builtin_copysignf128 (0, b);
>                 if (__builtin_isnan (c))
>                   c = __builtin_copysignf128 (0, c);
>                 if (__builtin_isnan (d))
>                   d = __builtin_copysignf128 (0, d);
>                 recalc = 1;
>               }
>             if (recalc)
>               {
>                 x = __builtin_inff128 () * (a * c - b * d);
>                 y = __builtin_inff128 () * (a * d + b * c);
>               }
>           }
> 
>         __real__ res = x;
>         __imag__ res = y;
>         return res;
>       }
> 

One further reduced test case can be:

typedef float TFtype __attribute__((mode (TF)));

TFtype test (TFtype t)
{
  return __builtin_copysignf128 (1.0q, t);
}

Since this reduced test case is quite small, maybe it's good to make it as one
test case associated with this patch.

> Currently GCC uses the long double type node for __float128 if long double is
> IEEE 128-bit.  It did not use the node for _Float128.
> 
> Originally this was noticed if you call the nansq function to make a signaling
> NaN (nansq is mapped to nansf128).  Because the type node for _Float128 is
> different from __float128, the machine independent code converts signaling 
> NaNs
> to quiet NaNs if the types are not compatible.  The following tests used to
> fail when run on a system where long double is IEEE 128-bit:
> 
>       gcc.dg/torture/float128-nan.c
>       gcc.target/powerpc/nan128-1.c
> 
> This patch makes both __float128 and _Float128 use the same type node.
> 
> One side effect of not using the long double type node for __float128 is that 
> we
> must only use KFmode for _Float128/__float128.  The libstdc++ library won't
> build if we use TFmode for _Float128 and __float128 when long double is IEEE
> 128-bit.
> 

Sorry that I didn't get the point of the latter sentence, this proposed patch
uses KFmode for _Float128 and __float128, do you mean that would be fine for
libstdc++ library building since we don't use TFmode for them?

> Another minor side effect is that the f128 round to odd fused multiply-add
> function will not merge negatition with the FMA operation when the type is 
> long
> double.  If the type is __float128 or _Float128, then it will continue to do 
> the
> optimization.  The round to odd functions are defined in terms of __float128
> arguments.  For example:
> 
>       long double
>       do_fms (long double a, long double b, long double c)
>       {
>           return __builtin_fmaf128_round_to_odd (a, b, -c);
>       }
> 
> will generate (assuming -mabi=ieeelongdouble):
> 
>       xsnegqp 4,4
>       xsmaddqpo 4,2,3
>       xxlor 34,36,36
> 
> while:
> 
>       __float128
>       do_fms (__float128 a, __float128 b, __float128 c)
>       {
>           return __builtin_fmaf128_round_to_odd (a, b, -c);
>       }
> 
> will generate:
> 
>       xsmsubqpo 4,2,3
>       xxlor 34,36,36
> 

It sounds like a tiny "regression", maybe we should open a PR to track it?

> I tested all 3 patchs for PR target/107299 on:
> 
>     1)        LE Power10 using --with-cpu=power10 
> --with-long-double-format=ieee
>     2)        LE Power10 using --with-cpu=power10 
> --with-long-double-format=ibm
>     3)        LE Power9  using --with-cpu=power9  
> --with-long-double-format=ibm
>     4)        BE Power8  using --with-cpu=power8  
> --with-long-double-format=ibm
> 
> Once all 3 patches have been applied, we can once again build GCC when long
> double is IEEE 128-bit.  There were no other regressions with these patches.
> Can I check these patches into the trunk?
> 
> 2022-11-01   Michael Meissner  <meiss...@linux.ibm.com>
> 
> gcc/
> 
>       PR target/107299
>       * config/rs6000/rs6000-builtin.cc (rs6000_init_builtins): Always use the
>       _Float128 type for __float128.
>       (rs6000_expand_builtin): Only change a KFmode built-in to TFmode, if the
>       built-in passes or returns TFmode.  If the predicate failed because the
>       modes were different, use convert_move to load up the value instead of
>       copy_to_mode_reg.
>       * config/rs6000/rs6000.cc (rs6000_translate_mode_attribute): Don't
>       translate IEEE 128-bit floating point modes to explicit IEEE 128-bit
>       modes (KFmode or KCmode), even if long double is IEEE 128-bit.

You meant to say "Translate .." not "Don't .."?

>       (rs6000_libgcc_floating_mode_supported_p): Support KFmode all of the
>       time if we support IEEE 128-bit floating point.
>       (rs6000_floatn_mode): _Float128 and _Float128x always uses KFmode.
> 
> gcc/testsuite/
> 
>       PR target/107299
>       * gcc.target/powerpc/float128-hw12.c: New test.
>       * gcc.target/powerpc/float128-hw13.c: Likewise.
>       * gcc.target/powerpc/float128-hw4.c: Update insns.
> ---
>  gcc/config/rs6000/rs6000-builtin.cc           | 237 ++++++++++--------
>  gcc/config/rs6000/rs6000.cc                   |  31 ++-
>  .../gcc.target/powerpc/float128-hw12.c        | 137 ++++++++++
>  .../gcc.target/powerpc/float128-hw13.c        | 137 ++++++++++
>  .../gcc.target/powerpc/float128-hw4.c         |  10 +-
>  5 files changed, 431 insertions(+), 121 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-hw12.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-hw13.c
> 
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
> b/gcc/config/rs6000/rs6000-builtin.cc
> index 90ab39dc258..e5298f45363 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -730,25 +730,28 @@ rs6000_init_builtins (void)
>  
>    if (TARGET_FLOAT128_TYPE)
>      {
> -      if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
> -     ieee128_float_type_node = long_double_type_node;
> -      else
> +      /* In the past we used long_double_type_node when long double was IEEE
> +      128-bit.  However, this means that the _Float128 type
> +      (i.e. float128_type_node) is a different type from __float128
> +      (i.e. ieee128_float_type_nonde).  This leads to some corner cases,
                                  ~~~~ node
> +      such as processing signaling NaNs with the nansf128 built-in function
> +      (which returns a _Float128 value) and assign it to a long double or
> +      __float128 value.  The two explicit IEEE 128-bit types should always
> +      use the same internal type.
> +
> +      For C we only need to register the __ieee128 name for it.  For C++, we
> +      create a distinct type which will mangle differently (u9__ieee128)
> +      vs. _Float128 (DF128_) and behave backwards compatibly.  */
> +      if (float128t_type_node == NULL_TREE)
>       {
> -       /* For C we only need to register the __ieee128 name for
> -          it.  For C++, we create a distinct type which will mangle
> -          differently (u9__ieee128) vs. _Float128 (DF128_) and behave
> -          backwards compatibly.  */
> -       if (float128t_type_node == NULL_TREE)
> -         {
> -           float128t_type_node = make_node (REAL_TYPE);
> -           TYPE_PRECISION (float128t_type_node)
> -             = TYPE_PRECISION (float128_type_node);
> -           layout_type (float128t_type_node);
> -           SET_TYPE_MODE (float128t_type_node,
> -                          TYPE_MODE (float128_type_node));
> -         }
> -       ieee128_float_type_node = float128t_type_node;
> +       float128t_type_node = make_node (REAL_TYPE);
> +       TYPE_PRECISION (float128t_type_node)
> +         = TYPE_PRECISION (float128_type_node);
> +       layout_type (float128t_type_node);
> +       SET_TYPE_MODE (float128t_type_node,
> +                      TYPE_MODE (float128_type_node));
>       }
> +      ieee128_float_type_node = float128t_type_node;
>        t = build_qualified_type (ieee128_float_type_node, TYPE_QUAL_CONST);
>        lang_hooks.types.register_builtin_type (ieee128_float_type_node,
>                                             "__ieee128");
> @@ -3265,13 +3268,13 @@ htm_expand_builtin (bifdata *bifaddr, 
> rs6000_gen_builtins fcode,
>  
>  /* Expand an expression EXP that calls a built-in function,
>     with result going to TARGET if that's convenient
> -   (and in mode MODE if that's convenient).
> +   (and in mode RETURN_MODE if that's convenient).
>     SUBTARGET may be used as the target for computing one of EXP's operands.
>     IGNORE is nonzero if the value is to be ignored.
>     Use the new builtin infrastructure.  */
>  rtx
>  rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
> -                    machine_mode /* mode */, int ignore)
> +                    machine_mode return_mode, int ignore)
>  {
>    tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
>    enum rs6000_gen_builtins fcode
> @@ -3287,78 +3290,99 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* 
> subtarget */,
>    size_t uns_fcode = (size_t)fcode;
>    enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
>  
> -  /* TODO: The following commentary and code is inherited from the original
> -     builtin processing code.  The commentary is a bit confusing, with the
> -     intent being that KFmode is always IEEE-128, IFmode is always IBM
> -     double-double, and TFmode is the current long double.  The code is
> -     confusing in that it converts from KFmode to TFmode pattern names,
> -     when the other direction is more intuitive.  Try to address this.  */
> -
> -  /* We have two different modes (KFmode, TFmode) that are the IEEE
> -     128-bit floating point type, depending on whether long double is the
> -     IBM extended double (KFmode) or long double is IEEE 128-bit (TFmode).
> -     It is simpler if we only define one variant of the built-in function,
> -     and switch the code when defining it, rather than defining two built-
> -     ins and using the overload table in rs6000-c.cc to switch between the
> -     two.  If we don't have the proper assembler, don't do this switch
> -     because CODE_FOR_*kf* and CODE_FOR_*tf* will be CODE_FOR_nothing.  */
> -  if (FLOAT128_IEEE_P (TFmode))
> -    switch (icode)
> -      {
> -      case CODE_FOR_sqrtkf2_odd:
> -     icode = CODE_FOR_sqrttf2_odd;
> -     break;
> -      case CODE_FOR_trunckfdf2_odd:
> -     icode = CODE_FOR_trunctfdf2_odd;
> -     break;
> -      case CODE_FOR_addkf3_odd:
> -     icode = CODE_FOR_addtf3_odd;
> -     break;
> -      case CODE_FOR_subkf3_odd:
> -     icode = CODE_FOR_subtf3_odd;
> -     break;
> -      case CODE_FOR_mulkf3_odd:
> -     icode = CODE_FOR_multf3_odd;
> -     break;
> -      case CODE_FOR_divkf3_odd:
> -     icode = CODE_FOR_divtf3_odd;
> -     break;
> -      case CODE_FOR_fmakf4_odd:
> -     icode = CODE_FOR_fmatf4_odd;
> -     break;
> -      case CODE_FOR_xsxexpqp_kf:
> -     icode = CODE_FOR_xsxexpqp_tf;
> -     break;
> -      case CODE_FOR_xsxsigqp_kf:
> -     icode = CODE_FOR_xsxsigqp_tf;
> -     break;
> -      case CODE_FOR_xststdcnegqp_kf:
> -     icode = CODE_FOR_xststdcnegqp_tf;
> -     break;
> -      case CODE_FOR_xsiexpqp_kf:
> -     icode = CODE_FOR_xsiexpqp_tf;
> -     break;
> -      case CODE_FOR_xsiexpqpf_kf:
> -     icode = CODE_FOR_xsiexpqpf_tf;
> -     break;
> -      case CODE_FOR_xststdcqp_kf:
> -     icode = CODE_FOR_xststdcqp_tf;
> -     break;
> -      case CODE_FOR_xscmpexpqp_eq_kf:
> -     icode = CODE_FOR_xscmpexpqp_eq_tf;
> -     break;
> -      case CODE_FOR_xscmpexpqp_lt_kf:
> -     icode = CODE_FOR_xscmpexpqp_lt_tf;
> -     break;
> -      case CODE_FOR_xscmpexpqp_gt_kf:
> -     icode = CODE_FOR_xscmpexpqp_gt_tf;
> -     break;
> -      case CODE_FOR_xscmpexpqp_unordered_kf:
> -     icode = CODE_FOR_xscmpexpqp_unordered_tf;
> -     break;
> -      default:
> -     break;
> -      }
> +  /* For 128-bit long double, we may need both the KFmode built-in functions
> +     and IFmode built-in functions to the equivalent TFmode built-in 
> function,
> +     if either a TFmode result is expected or any of the arguments use
> +     TFmode.  */
> +  if (TARGET_LONG_DOUBLE_128)
> +    {
> +      bool uses_tf_mode = return_mode == TFmode;
> +      if (!uses_tf_mode)
> +     {
> +       call_expr_arg_iterator iter;
> +       tree arg;
> +       FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
> +         {
> +           if (arg != error_mark_node
> +               && TYPE_MODE (TREE_TYPE (arg)) == TFmode)
> +             {
> +               uses_tf_mode = true;
> +               break;
> +             }
> +         }
> +     }
> +
> +      /* Convert KFmode built-in functions to TFmode when long double is IEEE
> +      128-bit.  */
> +      if (uses_tf_mode && FLOAT128_IEEE_P (TFmode))
> +     switch (icode)
> +       {
> +       case CODE_FOR_sqrtkf2_odd:
> +         icode = CODE_FOR_sqrttf2_odd;
> +         break;
> +       case CODE_FOR_trunckfdf2_odd:
> +         icode = CODE_FOR_trunctfdf2_odd;
> +         break;
> +       case CODE_FOR_addkf3_odd:
> +         icode = CODE_FOR_addtf3_odd;
> +         break;
> +       case CODE_FOR_subkf3_odd:
> +         icode = CODE_FOR_subtf3_odd;
> +         break;
> +       case CODE_FOR_mulkf3_odd:
> +         icode = CODE_FOR_multf3_odd;
> +         break;
> +       case CODE_FOR_divkf3_odd:
> +         icode = CODE_FOR_divtf3_odd;
> +         break;
> +       case CODE_FOR_fmakf4_odd:
> +         icode = CODE_FOR_fmatf4_odd;
> +         break;
> +       case CODE_FOR_xsxexpqp_kf:
> +         icode = CODE_FOR_xsxexpqp_tf;
> +         break;
> +       case CODE_FOR_xsxsigqp_kf:
> +         icode = CODE_FOR_xsxsigqp_tf;
> +         break;
> +       case CODE_FOR_xststdcnegqp_kf:
> +         icode = CODE_FOR_xststdcnegqp_tf;
> +         break;
> +       case CODE_FOR_xsiexpqp_kf:
> +         icode = CODE_FOR_xsiexpqp_tf;
> +         break;
> +       case CODE_FOR_xsiexpqpf_kf:
> +         icode = CODE_FOR_xsiexpqpf_tf;
> +         break;
> +       case CODE_FOR_xststdcqp_kf:
> +         icode = CODE_FOR_xststdcqp_tf;
> +         break;
> +       case CODE_FOR_xscmpexpqp_eq_kf:
> +         icode = CODE_FOR_xscmpexpqp_eq_tf;
> +         break;
> +       case CODE_FOR_xscmpexpqp_lt_kf:
> +         icode = CODE_FOR_xscmpexpqp_lt_tf;
> +         break;
> +       case CODE_FOR_xscmpexpqp_gt_kf:
> +         icode = CODE_FOR_xscmpexpqp_gt_tf;
> +         break;
> +       case CODE_FOR_xscmpexpqp_unordered_kf:
> +         icode = CODE_FOR_xscmpexpqp_unordered_tf;
> +         break;
> +       default:
> +         break;
> +       }
> +
> +      /* Convert IFmode built-in functions to TFmode when long double is IBM
> +      128-bit.  */
> +      else if (uses_tf_mode && FLOAT128_IBM_P (TFmode))
> +     {
> +       if (icode == CODE_FOR_packif)
> +         icode = CODE_FOR_packtf;
> +
> +       else if (icode == CODE_FOR_unpackif)
> +         icode = CODE_FOR_unpacktf;
> +     }
> +    }
>  
>    /* In case of "#pragma target" changes, we initialize all builtins
>       but check for actual availability now, during expand time.  For
> @@ -3481,18 +3505,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* 
> subtarget */,
>  
>    if (bif_is_ibm128 (*bifaddr) && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
>      {
> -      if (fcode == RS6000_BIF_PACK_IF)
> -     {
> -       icode = CODE_FOR_packtf;
> -       fcode = RS6000_BIF_PACK_TF;
> -       uns_fcode = (size_t) fcode;
> -     }
> -      else if (fcode == RS6000_BIF_UNPACK_IF)
> -     {
> -       icode = CODE_FOR_unpacktf;
> -       fcode = RS6000_BIF_UNPACK_TF;
> -       uns_fcode = (size_t) fcode;
> -     }
>      }

This remaining "if" hunk is useless.  The others looks good to me.

BR,
Kewen

Reply via email to