Kewen, GCC maintainers: Version 2, I have addressed the various comments from Kewen. I had issues with adding an additional overloaded version of scalar_insert_exp with vector arguments. The overload infrastructure didn't work with a mix of scalar and vector arguments. I did rename the __builtin_insertf128_exp to __builtin_vsx_scalar_insert_exp_qp make it similar to the existing builtin. I also wasn't able to get the suggested merge of xsxexpqp_f128_<mode> with xsxexpqp_<mode> to work so I left the two simpler definitiions.
The patch add three new builtins to extract the significand and exponent of an IEEE float 128-bit value where the builtin argument is a vector. Additionally, a builtin to insert the exponent into an IEEE float 128-bit vector argument is added. These builtins were requested since there is no clean and optimal way to transfer between a vector and a scalar IEEE 128 bit value. The patch has been tested on Power 10 with no regressions. Please let me know if the patch is acceptable or not. Thanks. Carl --------------------------------------- rs6000: Add builtins for IEEE 128-bit floating point values Add support for the following builtins: __vector unsigned long long int __builtin_scalar_extract_exp_to_vec (__ieee128); __vector unsigned __int128 __builtin_scalar_extract_sig_to_vec (__ieee128); __ieee128 __builtin_vsx_scalar_insert_exp_vqp (__vector unsigned __int128, __vector unsigned long long); These builtins were requesed since there is no clean and performant way to transfer between a vector type and the ieee128 scalar, despite the fact that both reside in vector registers. Also a union transfer does not work correctly on most GCC versions. gcc/ * config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp, __builtin_extractf128_sig, __builtin_insertf128_exp): Add new builtin definitions. * config/rs6000.md (extractf128_exp_<mode>, insertf128_exp_<mode>, extractf128_sig_<mode>): Add define_expand for new builtins. (xsxexpqp_f128_<mode>, xsxsigqp_f128_<mode>, siexpqpf_f128_<mode>): Add define_insn for new builtins. * doc/extend.texi (__builtin_extractf128_exp, __builtin_extractf128_sig, __builtin_insertf128_exp): Add documentation for new builtins. gcc/testsuite/ * gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case. * gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case. * gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case. --- gcc/config/rs6000/rs6000-builtins.def | 9 +++ gcc/config/rs6000/rs6000-overload.def | 2 + gcc/config/rs6000/vsx.md | 31 +++++++++- gcc/doc/extend.texi | 10 ++++ .../powerpc/bfp/extract-exp-ieee128.c | 50 ++++++++++++++++ .../powerpc/bfp/extract-sig-ieee128.c | 57 ++++++++++++++++++ .../powerpc/bfp/insert-exp-ieee128.c | 58 +++++++++++++++++++ 7 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 638d0bc72ca..92f22481687 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2901,6 +2901,12 @@ fpmath double __builtin_truncf128_round_to_odd (_Float128); TRUNCF128_ODD trunckfdf2_odd {} + vull __builtin_scalar_extract_exp_to_vec (_Float128); + EEXPKF xsxexpqp_f128_kf {} + + vuq __builtin_scalar_extract_sig_to_vec (_Float128); + ESIGKF xsxsigqp_f128_kf {} + const signed long long __builtin_vsx_scalar_extract_expq (_Float128); VSEEQP xsxexpqp_kf {} @@ -2915,6 +2921,9 @@ unsigned long long); VSIEQPF xsiexpqpf_kf {} + const _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull); + VSIEDP_VULL xsiexpqpf_f128_kf {} + const signed int __builtin_vsx_scalar_test_data_class_qp (_Float128, \ const int<7>); VSTDCQP xststdcqp_kf {} diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def index c582490c084..102ead9f80b 100644 --- a/gcc/config/rs6000/rs6000-overload.def +++ b/gcc/config/rs6000/rs6000-overload.def @@ -4515,6 +4515,8 @@ VSIEQP _Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned long long); VSIEQPF + _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull); + VSIEDP_VULL [VEC_VSTDC, scalar_test_data_class, __builtin_vec_scalar_test_data_class] unsigned int __builtin_vec_scalar_test_data_class (float, const int); diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 7d845df5c2d..0f6df4bbcf5 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -369,7 +369,7 @@ UNSPEC_XXSPLTI32DX UNSPEC_XXBLEND UNSPEC_XXPERMX - ]) +]) (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16 UNSPEC_VSX_XVCVBF16SPN]) @@ -5016,6 +5016,15 @@ "xsxexpqp %0,%1" [(set_attr "type" "vecmove")]) +;; VSX Scalar to Vector Extract Exponent IEEE 128-bit floating point format +(define_insn "xsxexpqp_f128_<mode>" + [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") + (unspec:V2DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] + UNSPEC_VSX_SXEXPDP))] + "TARGET_P9_VECTOR" + "xsxexpqp %0,%1" + [(set_attr "type" "vecmove")]) + ;; VSX Scalar Extract Exponent Double-Precision (define_insn "xsxexpdp" [(set (match_operand:DI 0 "register_operand" "=r") @@ -5034,6 +5043,15 @@ "xsxsigqp %0,%1" [(set_attr "type" "vecmove")]) +;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating point format +(define_insn "xsxsigqp_f128_<mode>" + [(set (match_operand:V1TI 0 "altivec_register_operand" "=v") + (unspec:V1TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] + UNSPEC_VSX_SXSIG))] + "TARGET_P9_VECTOR" + "xsxsigqp %0,%1" + [(set_attr "type" "vecmove")]) + ;; VSX Scalar Extract Significand Double-Precision (define_insn "xsxsigdp" [(set (match_operand:DI 0 "register_operand" "=r") @@ -5054,6 +5072,17 @@ "xsiexpqp %0,%1,%2" [(set_attr "type" "vecmove")]) +;; VSX Insert Exponent IEEE 128-bit Floating point format +(define_insn "xsiexpqpf_f128_<mode>" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(match_operand:V1TI 1 "altivec_register_operand" "v") + (match_operand:V2DI 2 "altivec_register_operand" "v")] + UNSPEC_VSX_SIEXPQP))] + "TARGET_P9_VECTOR" + "xsiexpqp %0,%1,%2" + [(set_attr "type" "vecmove")]) + ;; VSX Scalar Insert Exponent Quad-Precision (define_insn "xsiexpqp_<mode>" [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e426a2eb7d8..1d3372d322d 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -19724,6 +19724,10 @@ double scalar_insert_exp (double significand, unsigned long long int exponent); ieee_128 scalar_insert_exp (unsigned __int128 significand, unsigned long long int exponent); ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long long int exponent); +vector unsigned long long __builtin_scalar_extract_exp_to_vec (ieee_128); +vector unsigned __int128 __builtin_scalar_extract_sig_to_vec (ieee_128); +vector ieee_128 __builtin_scalar_insert_exp_vqp (vector unsigned __int128, + vector unsigned long long); int scalar_cmp_exp_gt (double arg1, double arg2); int scalar_cmp_exp_lt (double arg1, double arg2); @@ -19777,6 +19781,12 @@ The significand and exponent components of the result are composed of the least significant 15 bits of the @code{exponent} argument and the least significant 112 bits of the @code{significand} argument respectively. +The @code{__builtin_scalar_extract_exp_to_vec}, +@code{__builtin_scalar_extract_sig_to_vec} and +@code{__builtin_scalar_insert_exp_vqp} are similar to +@code{scalar_extract_exp}, @code{scalar_extract_sig} and +@code{scalar_insert_exp} except they operate on vector arguments. + The @code{scalar_cmp_exp_gt}, @code{scalar_cmp_exp_lt}, @code{scalar_cmp_exp_eq}, and @code{scalar_cmp_exp_unordered} built-in functions return a non-zero value if @code{arg1} is greater than, less diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c new file mode 100644 index 00000000000..39981f0a274 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c @@ -0,0 +1,50 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p9vector_hw } */ +/* { dg-options "-mdejagnu-cpu=power9" } */ + +#include <altivec.h> +#include <stdlib.h> + +#if DEBUG +#include <stdio.h> +#endif + +vector unsigned long long int +get_exponents (__ieee128 *p) +{ + __ieee128 source = *p; + + return __builtin_scalar_extract_exp_to_vec (source); +} + +int +main () +{ + vector unsigned long long int result, exp_result; + union conv128_t + { + __ieee128 val_ieee128; + __int128 val_int128; + } source; + + exp_result[0] = 0x0ULL; + exp_result[1] = 0x1234ULL; + source.val_int128 = 0x923456789ABCDEF0ULL; + source.val_int128 = (source.val_int128 << 64) | 0x123456789ABCDEFULL; + + result = get_exponents (&source.val_ieee128); + + if ((result[0] != exp_result[0]) || (result[1] != exp_result[1])) +#if DEBUG + { + printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n", + result[0], exp_result[0]); + printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n", + result[1], exp_result[1]); + } +#else + abort(); +#endif + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c new file mode 100644 index 00000000000..f7b3aedb832 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c @@ -0,0 +1,57 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p9vector_hw } */ +/* { dg-options "-mdejagnu-cpu=power9" } */ + +#include <altivec.h> +#include <stdlib.h> + +#if DEBUG +#include <stdio.h> +#endif + +vector unsigned __int128 +get_significand (__ieee128 *p) +{ + __ieee128 source = *p; + + return __builtin_scalar_extract_sig_to_vec(source); +} + +int +main () +{ + #define NOT_ZERO_OR_DENORMAL 0x1000000000000 + + union conv128_t + { + __ieee128 val_ieee128; + unsigned long long int val_ull[2]; + unsigned __int128 val_uint128; + __vector unsigned __int128 val_vuint128; + } source, result, exp_result; + + /* Result is not zero or denormal. */ + exp_result.val_ull[1] = 0x00056789ABCDEF0ULL | NOT_ZERO_OR_DENORMAL; + exp_result.val_ull[0] = 0x123456789ABCDEFULL; + source.val_uint128 = 0x923456789ABCDEF0ULL; + source.val_uint128 = (source.val_uint128 << 64) | 0x123456789ABCDEFULL; + + /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was zero or + Denormal, 1 otherwise. */ + result.val_vuint128 = get_significand (&source.val_ieee128); + + if ((result.val_ull[0] != exp_result.val_ull[0]) + || (result.val_ull[1] != exp_result.val_ull[1])) +#if DEBUG + { + printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n", + result.val_ull[0], exp_result.val_ull[0]); + printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n", + result.val_ull[1], exp_result.val_ull[1]); + } +#else + abort(); +#endif + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c new file mode 100644 index 00000000000..8ec1006f8b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c @@ -0,0 +1,58 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p9vector_hw } */ +/* { dg-options "-mdejagnu-cpu=power9" } */ + +#include <altivec.h> +#include <stdlib.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +__ieee128 +insert_exponent (__vector unsigned __int128 *significand_p, + __vector unsigned long long int *exponent_p) +{ + __vector unsigned __int128 significand = *significand_p; + __vector unsigned long long int exponent = *exponent_p; + + return __builtin_vsx_scalar_insert_exp_vqp (significand, exponent); +} + +int +main () +{ + union conv128_t + { + __ieee128 val_ieee128; + __vector unsigned __int128 val_vint128; + __vector unsigned long long int val_vull; + } result, exp_result, significand; + + __vector unsigned long long int exponent; + + significand.val_vull[0] = 0xFEDCBA9876543210ULL; + significand.val_vull[1] = 0x7FFF12345678ABCDULL; /* positive value */ + + exponent[0] = 0x5678; + exponent[1] = 0x1234; + + exp_result.val_vull[0] = 0xFEDCBA9876543210ULL; + exp_result.val_vull[1] = 0x123412345678ABCDULL; + + result.val_ieee128 = insert_exponent(&significand.val_vint128, &exponent); + + if (result.val_ieee128 != exp_result.val_ieee128) +#ifdef DEBUG + { + printf("result.val_vull[0] = 0x%llx, exp_result.val_vull[0] = 0x%llx\n", + result.val_vull[0], exp_result.val_vull[0]); + printf("result.val_vull[1] = 0x%llx, exp_result.val_vull[1] = 0x%llx\n", + result.val_vull[1], exp_result.val_vull[1]); + } +#else + abort (); +#endif + +} -- 2.37.2