Re: [PATCH ver 4] rs6000, add overloaded DFP quantize support

Kewen.Lin via Gcc-patches Tue, 29 Aug 2023 01:54:53 -0700

Hi Carl,

on 2023/8/29 04:00, Carl Love wrote:
> 
> GCC maintainers:
> 
> Version 4, additional define_insn name fix.  Change Log fix for the
> UNSPEC_DQUAN.  Retested patch on Power 10 LE.
> 
> Version 3, fixed the built-in instance names.  Missed removing the "n"
> the name.  Added the tighter constraints on the predicates for the
> define_insn.  Updated the wording for the built-ins in the
> documentation file.  Changed the test file name again.  Updated the
> ChangeLog file, added the PR target line.  Retested the patch on Power
> 10LE and Power 8 and Power 9.
> 
> Version 2, renamed the built-in instances.  Changed the name of the
> overloaded built-in.  Added the missing documentation for the new
> built-ins.  Fixed typos.  Changed name of the test.  Updated the
> effective target for the test.  Retested the patch on Power 10LE and
> Power 8 and Power 9.
> 
> The following patch adds four built-ins for the decimal floating point
> (DFP) quantize instructions on rs6000.  The built-ins are for 64-bit
> and 128-bit DFP operands.
> 
> The patch also adds a test case for the new builtins.
> 
> The Patch has been tested on Power 10LE and Power 9 LE/BE.
> 
> Please let me know if the patch is acceptable for mainline.  Thanks.
> 
>                  Carl Love
> 
> 
> ----------------------------------------
> rs6000, add overloaded DFP quantize support
> 
> Add decimal floating point (DFP) quantize built-ins for both 64-bit DFP
> and 128-DFP operands.  In each case, there is an immediate version and a
> variable version of the built-in.  The RM value is a 2-bit constant int
> which specifies the rounding mode to use.  For the immediate versions of
> the built-in, the TE field is a 5-bit constant that specifies the value of
> the ideal exponent for the result.  The built-in specifications are:
> 
>   __Decimal64 builtin_dfp_quantize (_Decimal64, _Decimal64,
>                                   const int RM)
>   __Decimal64 builtin_dfp_quantize (const int TE, _Decimal64,
>                                   const int RM)
>   __Decimal128 builtin_dfp_quantize (_Decimal128, _Decimal128,
>                                    const int RM)
>   __Decimal128 builtin_dfp_quantize (const int TE, _Decimal128,
>                                    const int RM)
> 
> A testcase is added for the new built-in definitions.
> 
> gcc/ChangeLog:
>       * config/rs6000/dfp.md (UNSPEC_DQUAN): New unspec.
>       (dfp_dqua_<mode>, dfp_dquai_<mode>): New define_insn.
>       * config/rs6000/rs6000-builtins.def (__builtin_dfp_dqua,
>       __builtin_dfp_dquai, __builtin_dfp_dquaq, __builtin_dfp_dquaqi):
>       New buit-in definitions.
>       * config/rs6000/rs6000-overload.def (__builtin_dfp_quantize): New
>       overloaded definition.
>       * doc/extend.texi: Add documentation for __builtin_dfp_quantize.
> 
> gcc/testsuite/
>       * gcc.target/powerpc/pr93448.c: New test case.
> 
>       PR target/93448
> ---
>  gcc/config/rs6000/dfp.md                   |  25 ++-
>  gcc/config/rs6000/rs6000-builtins.def      |  15 ++
>  gcc/config/rs6000/rs6000-overload.def      |  10 ++
>  gcc/doc/extend.texi                        |  17 ++
>  gcc/testsuite/gcc.target/powerpc/pr93448.c | 200 +++++++++++++++++++++
>  5 files changed, 266 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr93448.c
> 
> diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
> index 5ed8a73ac51..bf4a227b0eb 100644
> --- a/gcc/config/rs6000/dfp.md
> +++ b/gcc/config/rs6000/dfp.md
> @@ -271,7 +271,8 @@ (define_c_enum "unspec"
>     UNSPEC_DIEX
>     UNSPEC_DSCLI
>     UNSPEC_DTSTSFI
> -   UNSPEC_DSCRI])
> +   UNSPEC_DSCRI
> +   UNSPEC_DQUAN])
>  
>  (define_code_iterator DFP_TEST [eq lt gt unordered])
>  
> @@ -395,3 +396,25 @@ (define_insn "dfp_dscri_<mode>"
>    "dscri<q> %0,%1,%2"
>    [(set_attr "type" "dfp")
>     (set_attr "size" "<bits>")])
> +
> +(define_insn "dfp_dqua_<mode>"
> +  [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
> +        (unspec:DDTD [(match_operand:DDTD 1 "gpc_reg_operand" "d")
> +                   (match_operand:DDTD 2 "gpc_reg_operand" "d")
> +                   (match_operand:SI 3 "const_0_to_3_operand" "n")]
> +                     UNSPEC_DQUAN))]
> +  "TARGET_DFP"
> +  "dqua<q> %0,%1,%2,%3"
> +  [(set_attr "type" "dfp")
> +   (set_attr "size" "<bits>")])
> +
> +(define_insn "dfp_dquai_<mode>"
> +  [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
> +        (unspec:DDTD [(match_operand:SI 1 "s5bit_cint_operand" "n")
> +                   (match_operand:DDTD 2 "gpc_reg_operand" "d")
> +                   (match_operand:SI 3 "const_0_to_3_operand" "n")]
> +                     UNSPEC_DQUAN))]
> +  "TARGET_DFP"
> +  "dquai<q> %1,%0,%2,%3"
> +  [(set_attr "type" "dfp")
> +   (set_attr "size" "<bits>")])
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index 8a294d6c934..ce40600e803 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2983,6 +2983,21 @@
>    const unsigned long long __builtin_unpack_dec128 (_Decimal128, const 
> int<1>);
>      UNPACK_TD unpacktd {}
>  
> +  const _Decimal64 __builtin_dfp_dqua (_Decimal64, _Decimal64, \
> +                                    const int<2>);
> +    DFPQUAN_64 dfp_dqua_dd {}
> +
> +  const _Decimal64 __builtin_dfp_dquai (const int<5>, _Decimal64, \
> +                                     const int<2>);
> +    DFPQUAN_64i dfp_dquai_dd {}
> +
> +  const _Decimal128 __builtin_dfp_dquaq (_Decimal128, _Decimal128, \
> +                                      const int<2>);
> +    DFPQUAN_128 dfp_dqua_td {}
> +
> +  const _Decimal128 __builtin_dfp_dquaqi (const int<5>, _Decimal128, \
> +                                       const int<2>);
> +    DFPQUAN_128i dfp_dquai_td {}
>  
>  [crypto]
>    const vull __builtin_crypto_vcipher (vull, vull);
> diff --git a/gcc/config/rs6000/rs6000-overload.def 
> b/gcc/config/rs6000/rs6000-overload.def
> index b83946f5ad8..38d92fcf1f0 100644
> --- a/gcc/config/rs6000/rs6000-overload.def
> +++ b/gcc/config/rs6000/rs6000-overload.def
> @@ -195,6 +195,16 @@
>    unsigned long long __builtin_cmpb (unsigned long long, unsigned long long);
>      CMPB
>  
> +[DFPQUAN, dfp_quantize, __builtin_dfp_quantize]
> +  _Decimal64 __builtin_dfp_quantize (_Decimal64, _Decimal64, const int);
> +    DFPQUAN_64
> +  _Decimal64 __builtin_dfp_quantize (const int, _Decimal64, const int);
> +    DFPQUAN_64i
> +  _Decimal128 __builtin_dfp_quantize (_Decimal128, _Decimal128, const int);
> +    DFPQUAN_128
> +  _Decimal128 __builtin_dfp_quantize (const int, _Decimal128, const int);
> +    DFPQUAN_128i
> +
>  [VEC_ABS, vec_abs, __builtin_vec_abs]
>    vsc __builtin_vec_abs (vsc);
>      ABS_V16QI
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 73a997276cb..8d7a1116cd9 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -18566,6 +18566,23 @@ The builtin uses the ISA 3.0 instruction 
> @code{mffscdrn} if available.
>  Otherwise the builtin reads the FPSCR, masks the current decimal rounding
>  mode bits out and OR's in the new value.
>  
> +_Decimal64 __builtin_dfp_quantize (_Decimal64, _Decimal64, const int);
> +_Decimal64 __builtin_dfp_quantize (const int, _Decimal64, const int);
> +_Decimal128 __builtin_dfp_quantize (_Decimal128, _Decimal128, const int);
> +_Decimal128 __builtin_dfp_quantize (const int, _Decimal128, const int);
> +
> +The @code{__builtin_dfp_quantize} built-in, converts and rounds the second
> +argument to the form with the exponent as specified by the first
> +argument based on the rounding mode specified by the third argument.
> +If the first argument is a decimal floating point value, its exponent is used
> +for converting and rounding of the second argument.  If the first argument 
> is a
> +5-bit constant integer value, then the value specifies the exponent to be 
> used
> +when rounding and converting the second argument.  The third argument is a
> +two bit constant integer that specifies the rounding mode.  The possible 
> modes
> +are: 00 Round to nearest, ties to even; 01 Round toward 0; 10 Round to 
> nearest,
> +ties away from 0; 11 Round according to DRN where DRN is the Decimal Floating
> +point field of the FPSCR.
> +
>  @end smallexample
>  
>  The following functions require @option{-mhard-float},
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93448.c 
> b/gcc/testsuite/gcc.target/powerpc/pr93448.c
> new file mode 100644
> index 00000000000..f9c388585d7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93448.c
> @@ -0,0 +1,200 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target  dfp_hw} */
> +/* { dg-require-effective-target  has_arch_pwr6} */


Sorry, I didn't catch this in the previous reviews.
"dfp_hw" and "has_arch_pwr6" don't have the expected
space after, without the space, the checkings would
be useless and this case can fail.  So they should be:

/* { dg-require-effective-target dfp_hw } */
/* { dg-require-effective-target has_arch_pwr6 } */

Okay for trunk with this fixed, thanks!

BR,
Kewen

> +/* { dg-options "-mhard-float -O2 -save-temps" } */
> +
> +/* Test the decimal floating point quantize built-ins.  */
> +
> +#define DEBUG 0
> +
> +#ifdef DEBUG
> +#include <stdio.h>
> +#endif
> +#include <float.h>
> +
> +void abort (void);
> +
> +int main()
> +{
> +#define IMM2  2
> +#define IMM3  3
> +#define IMM4  4
> +
> +  _Decimal64 srcA_dfp64, srcB_dfp64;
> +  _Decimal64 result_dfp64;
> +  _Decimal64 expected_result_dfp64;
> +  _Decimal128 srcA_dfp128, srcB_dfp128;
> +  _Decimal128 result_dfp128;
> +  _Decimal128 expected_result_dfp128;
> +
> +  /* Third argument of quantize built-ins is the rounding mode value (RMC).
> +     
> +     RMC    Rounding Mode
> +     00     Round to nearest, ties to even
> +     01     Round toward 0
> +     10     Round to nearest, ties toward 0
> +     11     Round according to DRN      */
> +
> +
> +  /* Tests for quantize with 64-bit DFP variable.  */
> +  srcA_dfp64 = 100.0df;
> +  srcB_dfp64 = 300.456789df;
> +  expected_result_dfp64 = 300.5df;
> +
> +  result_dfp64 = __builtin_dfp_quantize (srcA_dfp64, srcB_dfp64, 0x0);
> +
> +  if (result_dfp64 != expected_result_dfp64)
> +#if DEBUG
> +    printf("DFP 64-bit quantize of variable, RMC = 0 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  srcA_dfp64 = 100.00df;
> +  srcB_dfp64 = 300.456789df;
> +  expected_result_dfp64 = 300.45df;
> +
> +  result_dfp64 = __builtin_dfp_quantize (srcA_dfp64, srcB_dfp64, 0x1);
> +
> +  if (result_dfp64 != expected_result_dfp64)
> +#if DEBUG
> +    printf("DFP 64-bit quantize of variable, RMC = 1 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  srcA_dfp64 = 100.001df;
> +  srcB_dfp64 = 3001.456789df;
> +  expected_result_dfp64 = 3001.457df;
> +
> +  result_dfp64 = __builtin_dfp_quantize (srcA_dfp64, srcB_dfp64, 0x2);
> +
> +  if (result_dfp64 != expected_result_dfp64)
> +#if DEBUG
> +    printf("DFP 64-bit quantize of variable, RMC = 2 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  /* Tests for 64-bit quantize with immediate value.  */
> +
> +  srcB_dfp64 = 10.4567df;
> +  expected_result_dfp64 = 000.0df;
> +
> +  result_dfp64 = __builtin_dfp_quantize (IMM2, srcB_dfp64, 0x0);
> +
> +  if (result_dfp64 != expected_result_dfp64)
> +#if DEBUG
> +    printf("DFP 64-bit quantize immediate, RMC = 0 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  srcB_dfp64 = 104567.891df;
> +  expected_result_dfp64 = 100000.0df;
> +
> +  result_dfp64 = __builtin_dfp_quantize (IMM4, srcB_dfp64, 0x1);
> +
> +  if (result_dfp64 != expected_result_dfp64)
> +#if DEBUG
> +    printf("DFP 64-bit quantize immediate, RMC = 1 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  srcB_dfp64 = 109876.54321df;
> +  expected_result_dfp64 = 109900.0df;
> +
> +  result_dfp64 = __builtin_dfp_quantize (IMM2, srcB_dfp64, 0x2);
> +
> +  if (result_dfp64 != expected_result_dfp64)
> +#if DEBUG
> +    printf("DFP 64-bit quantize immediate, RMC = 2 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  /* Tests for quantize 128-bit DFP variable.  */
> +  srcA_dfp128 = 0.018df;
> +  srcB_dfp128 = 50000.18345df;
> +  expected_result_dfp128 = 50000.180df;
> +
> +  result_dfp128 = __builtin_dfp_quantize (srcA_dfp128, srcB_dfp128, 0x0);
> +  
> +  if (result_dfp128 != expected_result_dfp128)
> +#if DEBUG
> +    printf("DFP 128-bit quantize variable, RMC = 0 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  srcA_dfp128 = 8.01df;
> +  srcB_dfp128 = 50000.18345df;
> +  expected_result_dfp128 = 50000.18df;
> +
> +  result_dfp128 = __builtin_dfp_quantize (srcA_dfp128, srcB_dfp128, 0x1);
> +  
> +  if (result_dfp128 != expected_result_dfp128)
> +#if DEBUG
> +    printf("DFP 128-bit quantize variable, RMC = 1 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  srcA_dfp128 = 0.1234df;
> +  srcB_dfp128 = 50000.18346789df;
> +  expected_result_dfp128 = 50000.1800df;
> +
> +  result_dfp128 = __builtin_dfp_quantize (srcA_dfp128, srcB_dfp128, 0x2);
> +  
> +  if (result_dfp128 != expected_result_dfp128)
> +#if DEBUG
> +    printf("DFP 128-bit quantize variable, RMC = 2 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  /* Tests for 128-bit quantize with immediate value.  */
> +  srcB_dfp128 = 1234.18345df;
> +  expected_result_dfp128 = 1200.0df;
> +
> +  result_dfp128 = __builtin_dfp_quantize (IMM2, srcB_dfp128, 0x0);
> +
> +  if (result_dfp128 != expected_result_dfp128)
> +#if DEBUG
> +    printf("DFP 128-bit quantize immediate, RMC = 0 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  srcB_dfp128 = 123456.18345df;
> +  expected_result_dfp128 = 120000.0df;
> +
> +  result_dfp128 = __builtin_dfp_quantize (IMM4, srcB_dfp128, 0x1);
> +
> +  if (result_dfp128 != expected_result_dfp128)
> +#if DEBUG
> +    printf("DFP 128-bit quantize immediate, RMC = 1 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +  srcB_dfp128 = 12361834.5df;
> +  expected_result_dfp128 = 12362000.0df;
> +
> +  result_dfp128 = __builtin_dfp_quantize (IMM3, srcB_dfp128, 0x2);
> +
> +  if (result_dfp128 != expected_result_dfp128)
> +#if DEBUG
> +    printf("DFP 128-bit quantize immediate, RMC = 2 result does not match 
> expected result\n");
> +#else
> +    abort();
> +#endif
> +
> +    return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mdqua\M}   3 } } */
> +/* { dg-final { scan-assembler-times {\mdquai\M}  3 } } */
> +/* { dg-final { scan-assembler-times {\mdquaq\M}  3 } } */
> +/* { dg-final { scan-assembler-times {\mdquaiq\M} 3 } } */


BR,
Kewen

Re: [PATCH ver 4] rs6000, add overloaded DFP quantize support

Reply via email to