On Tue, 2021-05-18 at 16:28 -0400, Michael Meissner wrote: > [PATCH 2/2] Add IEEE 128-bit fp conditional move on PowerPC. >
Hi, > This patch adds the support for power10 IEEE 128-bit floating point > conditional > move and for automatically generating min/max. > > In this patch, I simplified things compared to previous patches. Instead of > allowing any four of the modes to be used for the conditional move comparison > and the move itself could use different modes, I restricted the conditional > move to just the same mode. I.e. you can do: ok. > > _Float128 a, b, c, d, e, r; > > r = (a == b) ? c : d; > > But you can't do: > > _Float128 c, d, r; > double a, b; > > r = (a == b) ? c : d; > > or: > > _Float128 a, b; > double c, d, r; > > r = (a == b) ? c : d; > > This eliminates a lot of the complexity of the code, because you don't have to > worry about the sizes being different, and the IEEE 128-bit types being > restricted to Altivec registers, while the SF/DF modes can use any VSX > register. > > I did not modify the existing support that allowed conditional moves where > SFmode operands are compared and DFmode operands are moved (and vice versa). > > I modified the test cases that I added to reflect this change. I have also > fixed the test for not equal to use '!=' instead of '=='. > > I have done bootstrap builds with this patch on the following 3 systems: > 1) power9 running LE Linux using --with-cpu=power9 > 2) power8 running BE Linux using --with-cpu=power8, testing both > 32/64-bit. > 3) power10 prototype running LE Linux using --with-cpu=power10. > > There were no regressions to the tests, and the new test added passed. Can I > check these patches into trunk branch for GCC 12? > > I would like to check these patches into GCC 11 after a cooling off period, > but > I can also not do the backport if desired. > > gcc/ > 2021-05-18 Michael Meissner <meiss...@linux.ibm.com> > > * config/rs6000/rs6000.c (rs6000_maybe_emit_fp_cmove): Add IEEE > 128-bit floating point conditional move support. > (have_compare_and_set_mask): Add IEEE 128-bit floating point > types. > * config/rs6000/rs6000.md (mov<mode>cc, IEEE128 iterator): New insn. > (mov<mode>cc_p10, IEEE128 iterator): New insn. > (mov<mode>cc_invert_p10, IEEE128 iterator): New insn. > (fpmask<mode>, IEEE128 iterator): New insn. > (xxsel<mode>, IEEE128 iterator): New insn. > > gcc/testsuite/ > 2021-05-18 Michael Meissner <meiss...@linux.ibm.com> > > * gcc.target/powerpc/float128-cmove.c: New test. > * gcc.target/powerpc/float128-minmax-3.c: New test. ok > --- > gcc/config/rs6000/rs6000.c | 38 ++++++- > gcc/config/rs6000/rs6000.md | 106 ++++++++++++++++++ > .../gcc.target/powerpc/float128-cmove.c | 58 ++++++++++ > .../gcc.target/powerpc/float128-minmax-3.c | 15 +++ > 4 files changed, 215 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-cmove.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index fdaf12aeda0..ef1ebaaee05 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -15706,8 +15706,8 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, > rtx op_false, > return 1; > } > > -/* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or > - minimum with "C" semantics. > +/* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a > + maximum or minimum with "C" semantics. > > Unless you use -ffast-math, you can't use these instructions to replace > conditions that implicitly reverse the condition because the comparison > @@ -15783,6 +15783,7 @@ rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx > true_cond, rtx false_cond) > enum rtx_code code = GET_CODE (op); > rtx op0 = XEXP (op, 0); > rtx op1 = XEXP (op, 1); > + machine_mode compare_mode = GET_MODE (op0); > machine_mode result_mode = GET_MODE (dest); > rtx compare_rtx; > rtx cmove_rtx; > @@ -15791,6 +15792,35 @@ rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx > true_cond, rtx false_cond) > if (!can_create_pseudo_p ()) > return 0; > > + /* We allow the comparison to be either SFmode/DFmode and the true/false > + condition to be either SFmode/DFmode. I.e. we allow: > + > + float a, b; > + double c, d, r; > + > + r = (a == b) ? c : d; > + > + and: > + > + double a, b; > + float c, d, r; > + > + r = (a == b) ? c : d; This new comment does not seem to align with the comments in the description, which statee "But you can't do ..." > + > + but we don't allow intermixing the IEEE 128-bit floating point types with > + the 32/64-bit scalar types. > + > + It gets too messy where SFmode/DFmode can use any register and > TFmode/KFmode > + can only use Altivec registers. In addtion, we would need to do a > XXPERMDI > + if we compare SFmode/DFmode and move TFmode/KFmode. */ > + > + if (compare_mode == result_mode > + || (compare_mode == SFmode && result_mode == DFmode) > + || (compare_mode == DFmode && result_mode == SFmode)) > + ; > + else > + return false; Interesting if/else block. May want to reverse the logic. I defer if this way is notably simpler than inverting it. > + > switch (code) > { > case EQ: > @@ -15843,6 +15873,10 @@ have_compare_and_set_mask (machine_mode mode) > case E_DFmode: > return TARGET_P9_MINMAX; > > + case E_KFmode: > + case E_TFmode: > + return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode); > + > default: > break; > } ok > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 3a1bc1f8547..0c76338c734 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -5431,6 +5431,112 @@ (define_insn "*xxsel<mode>" > "xxsel %x0,%x4,%x3,%x1" > [(set_attr "type" "vecmove")]) > > +;; Support for ISA 3.1 IEEE 128-bit conditional move. The mode used in the > +;; comparison must be the same as used in the conditional move. > +(define_expand "mov<mode>cc" > + [(set (match_operand:IEEE128 0 "gpc_reg_operand") > + (if_then_else:IEEE128 (match_operand 1 "comparison_operator") > + (match_operand:IEEE128 2 "gpc_reg_operand") > + (match_operand:IEEE128 3 "gpc_reg_operand")))] > + "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" > +{ > + if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) > + DONE; > + else > + FAIL; > +}) > + > +(define_insn_and_split "*mov<mode>cc_p10" > + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v") > + (if_then_else:IEEE128 > + (match_operator:CCFP 1 "fpmask_comparison_operator" > + [(match_operand:IEEE128 2 "altivec_register_operand" "v,v") > + (match_operand:IEEE128 3 "altivec_register_operand" "v,v")]) > + (match_operand:IEEE128 4 "altivec_register_operand" "v,v") > + (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) > + (clobber (match_scratch:V2DI 6 "=0,&v"))] > + "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" > + "#" > + "&& 1" > + [(set (match_dup 6) > + (if_then_else:V2DI (match_dup 1) > + (match_dup 7) > + (match_dup 8))) > + (set (match_dup 0) > + (if_then_else:IEEE128 (ne (match_dup 6) > + (match_dup 8)) > + (match_dup 4) > + (match_dup 5)))] > +{ > + if (GET_CODE (operands[6]) == SCRATCH) > + operands[6] = gen_reg_rtx (V2DImode); > + > + operands[7] = CONSTM1_RTX (V2DImode); > + operands[8] = CONST0_RTX (V2DImode); > +} > + [(set_attr "length" "8") > + (set_attr "type" "vecperm")]) > + > +;; Handle inverting the fpmask comparisons. > +(define_insn_and_split "*mov<mode>cc_invert_p10" > + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v") > + (if_then_else:IEEE128 > + (match_operator:CCFP 1 "invert_fpmask_comparison_operator" > + [(match_operand:IEEE128 2 "altivec_register_operand" "v,v") > + (match_operand:IEEE128 3 "altivec_register_operand" "v,v")]) > + (match_operand:IEEE128 4 "altivec_register_operand" "v,v") > + (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) > + (clobber (match_scratch:V2DI 6 "=0,&v"))] > + "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" > + "#" > + "&& 1" > + [(set (match_dup 6) > + (if_then_else:V2DI (match_dup 9) > + (match_dup 7) > + (match_dup 8))) > + (set (match_dup 0) > + (if_then_else:IEEE128 (ne (match_dup 6) > + (match_dup 8)) > + (match_dup 5) > + (match_dup 4)))] > +{ > + rtx op1 = operands[1]; > + enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); > + > + if (GET_CODE (operands[6]) == SCRATCH) > + operands[6] = gen_reg_rtx (V2DImode); > + > + operands[7] = CONSTM1_RTX (V2DImode); > + operands[8] = CONST0_RTX (V2DImode); > + > + operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); > +} > + [(set_attr "length" "8") > + (set_attr "type" "vecperm")]) > + > +(define_insn "*fpmask<mode>" > + [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") > + (if_then_else:V2DI > + (match_operator:CCFP 1 "fpmask_comparison_operator" > + [(match_operand:IEEE128 2 "altivec_register_operand" "v") > + (match_operand:IEEE128 3 "altivec_register_operand" "v")]) > + (match_operand:V2DI 4 "all_ones_constant" "") > + (match_operand:V2DI 5 "zero_constant" "")))] > + "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" > + "xscmp%V1qp %0,%2,%3" > + [(set_attr "type" "fpcompare")]) > + > +(define_insn "*xxsel<mode>" > + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") > + (if_then_else:IEEE128 > + (ne (match_operand:V2DI 1 "altivec_register_operand" "v") > + (match_operand:V2DI 2 "zero_constant" "")) > + (match_operand:IEEE128 3 "altivec_register_operand" "v") > + (match_operand:IEEE128 4 "altivec_register_operand" "v")))] > + "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" > + "xxsel %x0,%x4,%x3,%x1" > + [(set_attr "type" "vecmove")]) > + skimmed this part, nothing jumped out at me. > > ;; Conversions to and from floating-point. > > diff --git a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c > b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c > new file mode 100644 > index 00000000000..2fae8dc23bc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c > @@ -0,0 +1,58 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target ppc_float128_hw } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +#ifndef TYPE > +#ifdef __LONG_DOUBLE_IEEE128__ > +#define TYPE long double > + > +#else > +#define TYPE _Float128 > +#endif > +#endif > + > +/* Verify that the ISA 3.1 (power10) IEEE 128-bit conditional move > instructions > + are generated. */ > + > +TYPE > +eq (TYPE a, TYPE b, TYPE c, TYPE d) > +{ > + return (a == b) ? c : d; > +} > + > +TYPE > +ne (TYPE a, TYPE b, TYPE c, TYPE d) > +{ > + return (a != b) ? c : d; > +} > + > +TYPE > +lt (TYPE a, TYPE b, TYPE c, TYPE d) > +{ > + return (a < b) ? c : d; > +} > + > +TYPE > +le (TYPE a, TYPE b, TYPE c, TYPE d) > +{ > + return (a <= b) ? c : d; > +} > + > +TYPE > +gt (TYPE a, TYPE b, TYPE c, TYPE d) > +{ > + return (a > b) ? c : d; > +} > + > +TYPE > +ge (TYPE a, TYPE b, TYPE c, TYPE d) > +{ > + return (a >= b) ? c : d; > +} > + > +/* { dg-final { scan-assembler-times {\mxscmpeqqp\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mxscmpgeqp\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mxscmpgtqp\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mxxsel\M} 6 } } */ > +/* { dg-final { scan-assembler-not {\mxscmpuqp\M} } } */ ok. > diff --git a/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c > b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c > new file mode 100644 > index 00000000000..6f7627c0f2a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c > @@ -0,0 +1,15 @@ > +/* { dg-require-effective-target ppc_float128_hw } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +#ifndef TYPE > +#define TYPE _Float128 > +#endif > + > +/* Test that the fminf128/fmaxf128 functions generate if/then/else and not a > + call. */ s/"if/then/else"/"minmax"/ ? > +TYPE f128_min (TYPE a, TYPE b) { return (a < b) ? a : b; } > +TYPE f128_max (TYPE a, TYPE b) { return (b > a) ? b : a; } > + > +/* { dg-final { scan-assembler {\mxsmaxcqp\M} } } */ > +/* { dg-final { scan-assembler {\mxsmincqp\M} } } */ ok thanks -Will > -- > 2.31.1 >