On Tue, 2021-05-18 at 16:28 -0400, Michael Meissner wrote:
> [PATCH 2/2] Add IEEE 128-bit fp conditional move on PowerPC.
> 

Hi,


> This patch adds the support for power10 IEEE 128-bit floating point 
> conditional
> move and for automatically generating min/max.
> 
> In this patch, I simplified things compared to previous patches.  Instead of
> allowing any four of the modes to be used for the conditional move comparison
> and the move itself could use different modes, I restricted the conditional
> move to just the same mode.  I.e. you can do:

ok.

> 
>         _Float128 a, b, c, d, e, r;
> 
>         r = (a == b) ? c : d;
> 
> But you can't do:
> 
>         _Float128 c, d, r;
>         double a, b;
> 
>         r = (a == b) ? c : d;
> 
> or:
> 
>         _Float128 a, b;
>         double c, d, r;
> 
>         r = (a == b) ? c : d;
> 
> This eliminates a lot of the complexity of the code, because you don't have to
> worry about the sizes being different, and the IEEE 128-bit types being
> restricted to Altivec registers, while the SF/DF modes can use any VSX
> register.
> 
> I did not modify the existing support that allowed conditional moves where
> SFmode operands are compared and DFmode operands are moved (and vice versa).
> 
> I modified the test cases that I added to reflect this change.  I have also
> fixed the test for not equal to use '!=' instead of '=='.
> 
> I have done bootstrap builds with this patch on the following 3 systems:
>     1)        power9 running LE Linux using --with-cpu=power9
>     2)        power8 running BE Linux using --with-cpu=power8, testing both
>       32/64-bit.
>     3)        power10 prototype running LE Linux using --with-cpu=power10.
> 
> There were no regressions to the tests, and the new test added passed.  Can I
> check these patches into trunk branch for GCC 12?
> 
> I would like to check these patches into GCC 11 after a cooling off period, 
> but
> I can also not do the backport if desired.
> 
> gcc/
> 2021-05-18 Michael Meissner  <meiss...@linux.ibm.com>
> 
>         * config/rs6000/rs6000.c (rs6000_maybe_emit_fp_cmove): Add IEEE
>       128-bit floating point conditional move support.
>       (have_compare_and_set_mask): Add IEEE 128-bit floating point
>       types.
>       * config/rs6000/rs6000.md (mov<mode>cc, IEEE128 iterator): New insn.
>       (mov<mode>cc_p10, IEEE128 iterator): New insn.
>       (mov<mode>cc_invert_p10, IEEE128 iterator): New insn.
>       (fpmask<mode>, IEEE128 iterator): New insn.
>       (xxsel<mode>, IEEE128 iterator): New insn.
> 
> gcc/testsuite/
> 2021-05-18  Michael Meissner  <meiss...@linux.ibm.com>
> 
>         * gcc.target/powerpc/float128-cmove.c: New test.
>         * gcc.target/powerpc/float128-minmax-3.c: New test.

ok


> ---
>  gcc/config/rs6000/rs6000.c                    |  38 ++++++-
>  gcc/config/rs6000/rs6000.md                   | 106 ++++++++++++++++++
>  .../gcc.target/powerpc/float128-cmove.c       |  58 ++++++++++
>  .../gcc.target/powerpc/float128-minmax-3.c    |  15 +++
>  4 files changed, 215 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-cmove.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index fdaf12aeda0..ef1ebaaee05 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -15706,8 +15706,8 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, 
> rtx op_false,
>    return 1;
>  }
> 
> -/* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or
> -   minimum with "C" semantics.
> +/* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
> +   maximum or minimum with "C" semantics.
> 
>     Unless you use -ffast-math, you can't use these instructions to replace
>     conditions that implicitly reverse the condition because the comparison
> @@ -15783,6 +15783,7 @@ rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx 
> true_cond, rtx false_cond)
>    enum rtx_code code = GET_CODE (op);
>    rtx op0 = XEXP (op, 0);
>    rtx op1 = XEXP (op, 1);
> +  machine_mode compare_mode = GET_MODE (op0);
>    machine_mode result_mode = GET_MODE (dest);
>    rtx compare_rtx;
>    rtx cmove_rtx;
> @@ -15791,6 +15792,35 @@ rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx 
> true_cond, rtx false_cond)
>    if (!can_create_pseudo_p ())
>      return 0;
> 
> +  /* We allow the comparison to be either SFmode/DFmode and the true/false
> +     condition to be either SFmode/DFmode.  I.e. we allow:
> +
> +     float a, b;
> +     double c, d, r;
> +
> +     r = (a == b) ? c : d;
> +
> +    and:
> +
> +     double a, b;
> +     float c, d, r;
> +
> +     r = (a == b) ? c : d;


This new comment does not seem to align with the comments in the
description, which statee "But you can't do ..." 


> +
> +    but we don't allow intermixing the IEEE 128-bit floating point types with
> +    the 32/64-bit scalar types.
> +
> +    It gets too messy where SFmode/DFmode can use any register and 
> TFmode/KFmode
> +    can only use Altivec registers.  In addtion, we would need to do a 
> XXPERMDI
> +    if we compare SFmode/DFmode and move TFmode/KFmode.  */
> +
> +  if (compare_mode == result_mode
> +      || (compare_mode == SFmode && result_mode == DFmode)
> +      || (compare_mode == DFmode && result_mode == SFmode))
> +    ;
> +  else
> +    return false;

Interesting if/else block.  May want to reverse the logic. I defer if
this way is notably simpler than inverting it.


> +
>    switch (code)
>      {
>      case EQ:
> @@ -15843,6 +15873,10 @@ have_compare_and_set_mask (machine_mode mode)
>      case E_DFmode:
>        return TARGET_P9_MINMAX;
> 
> +    case E_KFmode:
> +    case E_TFmode:
> +      return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
> +
>      default:
>        break;
>      }

ok

> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 3a1bc1f8547..0c76338c734 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -5431,6 +5431,112 @@ (define_insn "*xxsel<mode>"
>    "xxsel %x0,%x4,%x3,%x1"
>    [(set_attr "type" "vecmove")])
> 
> +;; Support for ISA 3.1 IEEE 128-bit conditional move.  The mode used in the
> +;; comparison must be the same as used in the conditional move.
> +(define_expand "mov<mode>cc"
> +   [(set (match_operand:IEEE128 0 "gpc_reg_operand")
> +      (if_then_else:IEEE128 (match_operand 1 "comparison_operator")
> +                            (match_operand:IEEE128 2 "gpc_reg_operand")
> +                            (match_operand:IEEE128 3 "gpc_reg_operand")))]
> +  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
> +{
> +  if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
> +    DONE;
> +  else
> +    FAIL;
> +})
> +
> +(define_insn_and_split "*mov<mode>cc_p10"
> +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v")
> +     (if_then_else:IEEE128
> +      (match_operator:CCFP 1 "fpmask_comparison_operator"
> +             [(match_operand:IEEE128 2 "altivec_register_operand" "v,v")
> +              (match_operand:IEEE128 3 "altivec_register_operand" "v,v")])
> +      (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
> +      (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
> +   (clobber (match_scratch:V2DI 6 "=0,&v"))]
> +  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 6)
> +     (if_then_else:V2DI (match_dup 1)
> +                        (match_dup 7)
> +                        (match_dup 8)))
> +   (set (match_dup 0)
> +     (if_then_else:IEEE128 (ne (match_dup 6)
> +                               (match_dup 8))
> +                           (match_dup 4)
> +                           (match_dup 5)))]
> +{
> +  if (GET_CODE (operands[6]) == SCRATCH)
> +    operands[6] = gen_reg_rtx (V2DImode);
> +
> +  operands[7] = CONSTM1_RTX (V2DImode);
> +  operands[8] = CONST0_RTX (V2DImode);
> +}
> + [(set_attr "length" "8")
> +  (set_attr "type" "vecperm")])
> +
> +;; Handle inverting the fpmask comparisons.
> +(define_insn_and_split "*mov<mode>cc_invert_p10"
> +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v")
> +     (if_then_else:IEEE128
> +      (match_operator:CCFP 1 "invert_fpmask_comparison_operator"
> +             [(match_operand:IEEE128 2 "altivec_register_operand" "v,v")
> +              (match_operand:IEEE128 3 "altivec_register_operand" "v,v")])
> +      (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
> +      (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
> +   (clobber (match_scratch:V2DI 6 "=0,&v"))]
> +  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 6)
> +     (if_then_else:V2DI (match_dup 9)
> +                        (match_dup 7)
> +                        (match_dup 8)))
> +   (set (match_dup 0)
> +     (if_then_else:IEEE128 (ne (match_dup 6)
> +                               (match_dup 8))
> +                           (match_dup 5)
> +                           (match_dup 4)))]
> +{
> +  rtx op1 = operands[1];
> +  enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1));
> +
> +  if (GET_CODE (operands[6]) == SCRATCH)
> +    operands[6] = gen_reg_rtx (V2DImode);
> +
> +  operands[7] = CONSTM1_RTX (V2DImode);
> +  operands[8] = CONST0_RTX (V2DImode);
> +
> +  operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]);
> +}
> + [(set_attr "length" "8")
> +  (set_attr "type" "vecperm")])
> +
> +(define_insn "*fpmask<mode>"
> +  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
> +     (if_then_else:V2DI
> +      (match_operator:CCFP 1 "fpmask_comparison_operator"
> +             [(match_operand:IEEE128 2 "altivec_register_operand" "v")
> +              (match_operand:IEEE128 3 "altivec_register_operand" "v")])
> +      (match_operand:V2DI 4 "all_ones_constant" "")
> +      (match_operand:V2DI 5 "zero_constant" "")))]
> +  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
> +  "xscmp%V1qp %0,%2,%3"
> +  [(set_attr "type" "fpcompare")])
> +
> +(define_insn "*xxsel<mode>"
> +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> +     (if_then_else:IEEE128
> +      (ne (match_operand:V2DI 1 "altivec_register_operand" "v")
> +          (match_operand:V2DI 2 "zero_constant" ""))
> +      (match_operand:IEEE128 3 "altivec_register_operand" "v")
> +      (match_operand:IEEE128 4 "altivec_register_operand" "v")))]
> +  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
> +  "xxsel %x0,%x4,%x3,%x1"
> +  [(set_attr "type" "vecmove")])
> +

skimmed this part, nothing jumped out at me.

>  
>  ;; Conversions to and from floating-point.
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c 
> b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c
> new file mode 100644
> index 00000000000..2fae8dc23bc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c
> @@ -0,0 +1,58 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target ppc_float128_hw } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#ifndef TYPE
> +#ifdef __LONG_DOUBLE_IEEE128__
> +#define TYPE long double
> +
> +#else
> +#define TYPE _Float128
> +#endif
> +#endif
> +
> +/* Verify that the ISA 3.1 (power10) IEEE 128-bit conditional move 
> instructions
> +   are generated.  */
> +
> +TYPE
> +eq (TYPE a, TYPE b, TYPE c, TYPE d)
> +{
> +  return (a == b) ? c : d;
> +}
> +
> +TYPE
> +ne (TYPE a, TYPE b, TYPE c, TYPE d)
> +{
> +  return (a != b) ? c : d;
> +}
> +
> +TYPE
> +lt (TYPE a, TYPE b, TYPE c, TYPE d)
> +{
> +  return (a < b) ? c : d;
> +}
> +
> +TYPE
> +le (TYPE a, TYPE b, TYPE c, TYPE d)
> +{
> +  return (a <= b) ? c : d;
> +}
> +
> +TYPE
> +gt (TYPE a, TYPE b, TYPE c, TYPE d)
> +{
> +  return (a > b) ? c : d;
> +}
> +
> +TYPE
> +ge (TYPE a, TYPE b, TYPE c, TYPE d)
> +{
> +  return (a >= b) ? c : d;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxscmpeqqp\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mxscmpgeqp\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mxscmpgtqp\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mxxsel\M}     6 } } */
> +/* { dg-final { scan-assembler-not   {\mxscmpuqp\M}    } } */

ok.

> diff --git a/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c 
> b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c
> new file mode 100644
> index 00000000000..6f7627c0f2a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c
> @@ -0,0 +1,15 @@
> +/* { dg-require-effective-target ppc_float128_hw } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#ifndef TYPE
> +#define TYPE _Float128
> +#endif
> +
> +/* Test that the fminf128/fmaxf128 functions generate if/then/else and not a
> +   call.  */

s/"if/then/else"/"minmax"/  ?

> +TYPE f128_min (TYPE a, TYPE b) { return (a < b) ? a : b; }
> +TYPE f128_max (TYPE a, TYPE b) { return (b > a) ? b : a; }
> +
> +/* { dg-final { scan-assembler {\mxsmaxcqp\M} } } */
> +/* { dg-final { scan-assembler {\mxsmincqp\M} } } */


ok
thanks
-Will

> -- 
> 2.31.1
> 

Reply via email to