Hello! Attached patch intriduces __builtin_signbitq built-in function, so the compiler will be able to use SSE4.1 PTEST instruction to determine sign bit of __float128 value.
The patch introduces complete infrastructure, including fallback to __signbittf2 libgcc function for non-SSE4.1 targets. I have changed libquadmath to use __builtin_signbitq, and there were numerous places, where the call to signbitq + test + conditional jump reduced to e.g.: e0d8: 66 0f 38 17 35 4f a6 ptest 0x1a64f(%rip),%xmm6 # 28730 <_fini+0x24> e0df: 01 00 e0e1: 74 19 je e0fc <__quadmath_kernel_sincosq+0x24c> 2016-06-13 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386-builtin-types.def (INT_FTYPE_FLOAT128): New function type. * config/i386/i386.c (enum ix86_builtins) [IX86_BUILTIN_SIGNBITQ]: New. (ix86_init_builtins): Add __builtin_signbitq function. (ix86_expand_args_builtin): Handle INT_FTYPE_FLOAT128. (ix86_expand_builtin): Handle IX86_BUILTIN_SIGNBITQ. * config/i386/i386.md (signbittf2): New expander. * config/i386/sse.md (ptesttf2): New insn pattern. * doc/extend.texi (x86 Built-in Functions): Document __builtin_signbitq. libgcc/ChangeLog: 2016-06-13 Uros Bizjak <ubiz...@gmail.com> * config.host (i[34567]86-*-* | x86_64-*-*): Always include i386/${host_address}/t-softfp in tmake_file. * config/i386/32/t-softfp: Update comment for __builtin_copysignq. * config/i386/32/tf-signs.c: Add __signbittf2 fallback function. * config/i386/64/t-softfp: New file. * config/i386/64/tf-signs.c: Ditto. * config/i386/libgcc-bsd.ver: Add __signbittf2. * config/i386/libgcc-glibc.ver: Ditto. * config/i386/libgcc-sol2.ver: Ditto. testsuite/ChangeLog: 2016-06-13 Uros Bizjak <ubiz...@gmail.com> * gcc.target/i386/float128-3.c: New test. * gcc.target/i386/quad-sse4.c: Ditto. * gcc.target/i386/quad-sse.c: Use -msse instead of -msse2. Update scan strings. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32} with and without "--with-arch=corei7 --with-cpu=corei7" configured compiler. The functionality was also tested by __builtin_signbitq amended libquadmath library, where ptest insn generation and a fallback to __signbittf2 support function were exercised. Committed to mainline SVN. Uros.
Index: gcc/config/i386/i386-builtin-types.def =================================================================== --- gcc/config/i386/i386-builtin-types.def (revision 237380) +++ gcc/config/i386/i386-builtin-types.def (working copy) @@ -202,6 +202,7 @@ DEF_FUNCTION_TYPE (INT, V8QI) DEF_FUNCTION_TYPE (INT, V8SF) DEF_FUNCTION_TYPE (INT, V32QI) DEF_FUNCTION_TYPE (INT, PCCHAR) +DEF_FUNCTION_TYPE (INT, FLOAT128) DEF_FUNCTION_TYPE (INT64, INT64) DEF_FUNCTION_TYPE (INT64, V2DF) DEF_FUNCTION_TYPE (INT64, V4SF) Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 237380) +++ gcc/config/i386/i386.c (working copy) @@ -32722,6 +32722,7 @@ enum ix86_builtins IX86_BUILTIN_NANSQ, IX86_BUILTIN_FABSQ, IX86_BUILTIN_COPYSIGNQ, + IX86_BUILTIN_SIGNBITQ, /* Vectorizer support builtins. */ IX86_BUILTIN_CPYSGNPS, @@ -33983,6 +33984,8 @@ static const struct builtin_description bdesc_args { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_signbittf2, 0, IX86_BUILTIN_SIGNBITQ, UNKNOWN, (int) INT_FTYPE_FLOAT128 }, + /* SSE4.1 */ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, @@ -38299,6 +38302,13 @@ ix86_init_builtins (void) TREE_READONLY (decl) = 1; ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; + ftype = ix86_get_builtin_func_type (INT_FTYPE_FLOAT128); + decl = add_builtin_function ("__builtin_signbitq", ftype, + IX86_BUILTIN_SIGNBITQ, BUILT_IN_MD, + "__signbittf2", NULL_TREE); + TREE_READONLY (decl) = 1; + ix86_builtins[(int) IX86_BUILTIN_SIGNBITQ] = decl; + ix86_init_tm_builtins (); ix86_init_mmx_sse_builtins (); ix86_init_mpx_builtins (); @@ -39128,6 +39138,7 @@ ix86_expand_args_builtin (const struct builtin_des case INT_FTYPE_V4SF: case INT_FTYPE_V2DF: case INT_FTYPE_V32QI: + case INT_FTYPE_FLOAT128: case V16QI_FTYPE_V16QI: case V8SI_FTYPE_V8SF: case V8SI_FTYPE_V4SI: @@ -42638,17 +42649,27 @@ rdseed_step: i < ARRAY_SIZE (bdesc_args); i++, d++) if (d->code == fcode) - switch (fcode) - { - case IX86_BUILTIN_FABSQ: - case IX86_BUILTIN_COPYSIGNQ: - if (!TARGET_SSE) - /* Emit a normal call if SSE isn't available. */ - return expand_call (exp, target, ignore); - default: - return ix86_expand_args_builtin (d, exp, target); - } + { + switch (fcode) + { + case IX86_BUILTIN_FABSQ: + case IX86_BUILTIN_COPYSIGNQ: + if (!TARGET_SSE) + /* Emit a normal call if SSE isn't available. */ + return expand_call (exp, target, ignore); + break; + case IX86_BUILTIN_SIGNBITQ: + if (!TARGET_SSE4_1) + /* Emit a normal call if SSE4_1 isn't available. */ + return expand_call (exp, target, ignore); + break; + default: + break; + } + return ix86_expand_args_builtin (d, exp, target); + } + for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) if (d->code == fcode) return ix86_expand_sse_comi (d, exp, target); Index: gcc/config/i386/i386.md =================================================================== --- gcc/config/i386/i386.md (revision 237382) +++ gcc/config/i386/i386.md (working copy) @@ -16198,6 +16198,22 @@ DONE; }) +(define_expand "signbittf2" + [(use (match_operand:SI 0 "register_operand")) + (use (match_operand:TF 1 "register_operand"))] + "TARGET_SSE4_1" +{ + rtx mask = ix86_build_signbit_mask (TFmode, 0, 0); + rtx scratch = gen_reg_rtx (QImode); + + emit_insn (gen_ptesttf2 (operands[1], mask)); + ix86_expand_setcc (scratch, NE, + gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx); + + emit_insn (gen_zero_extendqisi2 (operands[0], scratch)); + DONE; +}) + (define_expand "signbitxf2" [(use (match_operand:SI 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] Index: gcc/config/i386/sse.md =================================================================== --- gcc/config/i386/sse.md (revision 237380) +++ gcc/config/i386/sse.md (working copy) @@ -15212,6 +15212,19 @@ (const_string "*"))) (set_attr "mode" "<sseinsnmode>")]) +(define_insn "ptesttf2" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x") + (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")] + UNSPEC_PTEST))] + "TARGET_SSE4_1" + "%vptest\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssecomi") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) + (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>" [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 Index: gcc/doc/extend.texi =================================================================== --- gcc/doc/extend.texi (revision 237380) +++ gcc/doc/extend.texi (working copy) @@ -18455,6 +18455,7 @@ of them implement the function that is part of the @smallexample __float128 __builtin_fabsq (__float128) __float128 __builtin_copysignq (__float128, __float128) +int __builtin_signbitq (__float128) @end smallexample The following built-in functions are always available. Index: gcc/testsuite/gcc.target/i386/float128-3.c =================================================================== --- gcc/testsuite/gcc.target/i386/float128-3.c (nonexistent) +++ gcc/testsuite/gcc.target/i386/float128-3.c (working copy) @@ -0,0 +1,21 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ + +#include "sse4_1-check.h" + +extern void abort (void); + +static void +sse4_1_test (void) +{ + static volatile __float128 a; + + a = -1.2q; + if (!__builtin_signbitq (a)) + abort (); + + a = 1.2q; + if (__builtin_signbitq (a)) + abort (); +} Index: gcc/testsuite/gcc.target/i386/quad-sse.c =================================================================== --- gcc/testsuite/gcc.target/i386/quad-sse.c (revision 237380) +++ gcc/testsuite/gcc.target/i386/quad-sse.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -msse2" } */ +/* { dg-options "-O2 -msse" } */ __float128 x, y; @@ -18,4 +18,4 @@ __float128 test_3(void) return __builtin_copysignq (x, y); } -/* { dg-final { scan-assembler-not "call.*(neg|fabs|copysign)" } } */ +/* { dg-final { scan-assembler-not "neg|fabs|copysign" } } */ Index: gcc/testsuite/gcc.target/i386/quad-sse4.c =================================================================== --- gcc/testsuite/gcc.target/i386/quad-sse4.c (nonexistent) +++ gcc/testsuite/gcc.target/i386/quad-sse4.c (working copy) @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" } */ + +__float128 x; + +int __test_1(void) +{ + return __builtin_signbitq (x); +} + +/* { dg-final { scan-assembler-not "signbit" } } */ Index: libgcc/config/i386/32/t-softfp =================================================================== --- libgcc/config/i386/32/t-softfp (revision 237380) +++ libgcc/config/i386/32/t-softfp (working copy) @@ -1,5 +1,6 @@ # Omit TImode functions softfp_int_modes := si di -# Provide fallbacks for __builtin_copysignq and __builtin_fabsq. +# Provide fallbacks for __builtin_copysignq, __builtin_fabsq +# and __builtin_signbitq. LIB2ADD += $(srcdir)/config/i386/32/tf-signs.c Index: libgcc/config/i386/32/tf-signs.c =================================================================== --- libgcc/config/i386/32/tf-signs.c (revision 237380) +++ libgcc/config/i386/32/tf-signs.c (working copy) @@ -37,6 +37,7 @@ union _FP_UNION_Q __float128 __copysigntf3 (__float128, __float128); __float128 __fabstf2 (__float128); +int __signbittf2 (__float128); __float128 __copysigntf3 (__float128 a, __float128 b) @@ -60,3 +61,13 @@ __fabstf2 (__float128 a) return A.flt; } + +int +__signbittf2 (__float128 a) +{ + union _FP_UNION_Q A; + + A.flt = a; + + return A.bits.sign; +} Index: libgcc/config/i386/64/t-softfp =================================================================== --- libgcc/config/i386/64/t-softfp (nonexistent) +++ libgcc/config/i386/64/t-softfp (working copy) @@ -0,0 +1,2 @@ +# Provide fallbacks for __builtin_signbitq +LIB2ADD += $(srcdir)/config/i386/64/tf-signs.c Index: libgcc/config/i386/64/tf-signs.c =================================================================== --- libgcc/config/i386/64/tf-signs.c (nonexistent) +++ libgcc/config/i386/64/tf-signs.c (working copy) @@ -0,0 +1,46 @@ +/* Copyright (C) 2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +union _FP_UNION_Q +{ + __float128 flt; + struct + { + unsigned long long frac0 : 64; + unsigned long long frac1 : 48; + unsigned exp : 15; + unsigned sign : 1; + } bits __attribute__((packed)); +}; + +int __signbittf3 (__float128); + +int +__signbittf2 (__float128 a) +{ + union _FP_UNION_Q A; + + A.flt = a; + + return A.bits.sign; +} Index: libgcc/config/i386/libgcc-glibc.ver =================================================================== --- libgcc/config/i386/libgcc-glibc.ver (revision 237380) +++ libgcc/config/i386/libgcc-glibc.ver (working copy) @@ -152,6 +152,10 @@ GCC_4.8.0 { __cpu_model __cpu_indicator_init } + +GCC_7.0.0 { + __signbittf2 +} %else GCC_4.4.0 { __addtf3 @@ -193,4 +197,8 @@ GCC_4.8.0 { __cpu_model __cpu_indicator_init } + +GCC_7.0.0 { + __signbittf2 +} %endif Index: libgcc/config.host =================================================================== --- libgcc/config.host (revision 237380) +++ libgcc/config.host (working copy) @@ -1361,9 +1361,7 @@ i[34567]86-*-darwin* | x86_64-*-darwin* | \ i[34567]86-*-freebsd* | x86_64-*-freebsd* | \ i[34567]86-*-openbsd* | x86_64-*-openbsd*) tmake_file="${tmake_file} t-softfp-tf" - if test "${host_address}" = 32; then - tmake_file="${tmake_file} i386/${host_address}/t-softfp" - fi + tmake_file="${tmake_file} i386/${host_address}/t-softfp" tmake_file="${tmake_file} i386/t-softfp t-softfp" ;; esac