Hello!

Attached patch intriduces __builtin_signbitq built-in function, so the
compiler will be able to use SSE4.1 PTEST instruction to determine
sign bit of __float128 value.

The patch introduces complete infrastructure, including fallback to
__signbittf2 libgcc function for non-SSE4.1 targets.

I have changed libquadmath to use __builtin_signbitq, and there were
numerous places, where the call to signbitq + test + conditional jump
reduced to e.g.:

    e0d8:    66 0f 38 17 35 4f a6     ptest  0x1a64f(%rip),%xmm6
 # 28730 <_fini+0x24>
    e0df:    01 00
    e0e1:    74 19                    je     e0fc
<__quadmath_kernel_sincosq+0x24c>

2016-06-13  Uros Bizjak  <ubiz...@gmail.com>

    * config/i386/i386-builtin-types.def (INT_FTYPE_FLOAT128):
    New function type.
    * config/i386/i386.c (enum ix86_builtins) [IX86_BUILTIN_SIGNBITQ]: New.
    (ix86_init_builtins): Add __builtin_signbitq function.
    (ix86_expand_args_builtin): Handle INT_FTYPE_FLOAT128.
    (ix86_expand_builtin): Handle IX86_BUILTIN_SIGNBITQ.
    * config/i386/i386.md (signbittf2): New expander.
    * config/i386/sse.md (ptesttf2): New insn pattern.
    * doc/extend.texi (x86 Built-in Functions): Document
    __builtin_signbitq.

libgcc/ChangeLog:

2016-06-13  Uros Bizjak  <ubiz...@gmail.com>

    * config.host (i[34567]86-*-* | x86_64-*-*): Always include
    i386/${host_address}/t-softfp in tmake_file.
    * config/i386/32/t-softfp: Update comment for __builtin_copysignq.
    * config/i386/32/tf-signs.c: Add __signbittf2 fallback function.
    * config/i386/64/t-softfp: New file.
    * config/i386/64/tf-signs.c: Ditto.
    * config/i386/libgcc-bsd.ver: Add __signbittf2.
    * config/i386/libgcc-glibc.ver: Ditto.
    * config/i386/libgcc-sol2.ver: Ditto.

testsuite/ChangeLog:

2016-06-13  Uros Bizjak  <ubiz...@gmail.com>

    * gcc.target/i386/float128-3.c: New test.
    * gcc.target/i386/quad-sse4.c: Ditto.
    * gcc.target/i386/quad-sse.c: Use -msse instead of -msse2.
    Update scan strings.

Patch was bootstrapped and regression tested on x86_64-linux-gnu
{,-m32} with and without "--with-arch=corei7 --with-cpu=corei7"
configured compiler. The functionality was also tested by
__builtin_signbitq amended libquadmath library, where ptest insn
generation and a fallback to __signbittf2 support function were
exercised.

Committed to mainline SVN.

Uros.
Index: gcc/config/i386/i386-builtin-types.def
===================================================================
--- gcc/config/i386/i386-builtin-types.def      (revision 237380)
+++ gcc/config/i386/i386-builtin-types.def      (working copy)
@@ -202,6 +202,7 @@ DEF_FUNCTION_TYPE (INT, V8QI)
 DEF_FUNCTION_TYPE (INT, V8SF)
 DEF_FUNCTION_TYPE (INT, V32QI)
 DEF_FUNCTION_TYPE (INT, PCCHAR)
+DEF_FUNCTION_TYPE (INT, FLOAT128)
 DEF_FUNCTION_TYPE (INT64, INT64)
 DEF_FUNCTION_TYPE (INT64, V2DF)
 DEF_FUNCTION_TYPE (INT64, V4SF)
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c      (revision 237380)
+++ gcc/config/i386/i386.c      (working copy)
@@ -32722,6 +32722,7 @@ enum ix86_builtins
   IX86_BUILTIN_NANSQ,
   IX86_BUILTIN_FABSQ,
   IX86_BUILTIN_COPYSIGNQ,
+  IX86_BUILTIN_SIGNBITQ,
 
   /* Vectorizer support builtins.  */
   IX86_BUILTIN_CPYSGNPS,
@@ -33983,6 +33984,8 @@ static const struct builtin_description bdesc_args
   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 
"__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) 
V2DI_FTYPE_V4SI_V4SI },
   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", 
IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
 
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_signbittf2, 0, IX86_BUILTIN_SIGNBITQ, 
UNKNOWN, (int) INT_FTYPE_FLOAT128 },
+
   /* SSE4.1 */
   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", 
IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", 
IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
@@ -38299,6 +38302,13 @@ ix86_init_builtins (void)
   TREE_READONLY (decl) = 1;
   ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
 
+  ftype = ix86_get_builtin_func_type (INT_FTYPE_FLOAT128);
+  decl = add_builtin_function ("__builtin_signbitq", ftype,
+                              IX86_BUILTIN_SIGNBITQ, BUILT_IN_MD,
+                              "__signbittf2", NULL_TREE);
+  TREE_READONLY (decl) = 1;
+  ix86_builtins[(int) IX86_BUILTIN_SIGNBITQ] = decl;
+
   ix86_init_tm_builtins ();
   ix86_init_mmx_sse_builtins ();
   ix86_init_mpx_builtins ();
@@ -39128,6 +39138,7 @@ ix86_expand_args_builtin (const struct builtin_des
     case INT_FTYPE_V4SF:
     case INT_FTYPE_V2DF:
     case INT_FTYPE_V32QI:
+    case INT_FTYPE_FLOAT128:
     case V16QI_FTYPE_V16QI:
     case V8SI_FTYPE_V8SF:
     case V8SI_FTYPE_V4SI:
@@ -42638,17 +42649,27 @@ rdseed_step:
        i < ARRAY_SIZE (bdesc_args);
        i++, d++)
     if (d->code == fcode)
-      switch (fcode)
-       {
-       case IX86_BUILTIN_FABSQ:
-       case IX86_BUILTIN_COPYSIGNQ:
-         if (!TARGET_SSE)
-           /* Emit a normal call if SSE isn't available.  */
-           return expand_call (exp, target, ignore);
-       default:
-         return ix86_expand_args_builtin (d, exp, target);
-       }
+      {
+       switch (fcode)
+         {
+         case IX86_BUILTIN_FABSQ:
+         case IX86_BUILTIN_COPYSIGNQ:
+           if (!TARGET_SSE)
+             /* Emit a normal call if SSE isn't available.  */
+             return expand_call (exp, target, ignore);
+           break;
+         case IX86_BUILTIN_SIGNBITQ:
+           if (!TARGET_SSE4_1)
+             /* Emit a normal call if SSE4_1 isn't available.  */
+             return expand_call (exp, target, ignore);
+           break;
+         default:
+           break;
+         }
 
+       return ix86_expand_args_builtin (d, exp, target);
+      }
+
   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
     if (d->code == fcode)
       return ix86_expand_sse_comi (d, exp, target);
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md     (revision 237382)
+++ gcc/config/i386/i386.md     (working copy)
@@ -16198,6 +16198,22 @@
   DONE;
 })
 
+(define_expand "signbittf2"
+  [(use (match_operand:SI 0 "register_operand"))
+   (use (match_operand:TF 1 "register_operand"))]
+  "TARGET_SSE4_1"
+{
+  rtx mask = ix86_build_signbit_mask (TFmode, 0, 0);
+  rtx scratch = gen_reg_rtx (QImode);
+
+  emit_insn (gen_ptesttf2 (operands[1], mask));
+  ix86_expand_setcc (scratch, NE,
+                    gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
+
+  emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
+  DONE;
+})
+
 (define_expand "signbitxf2"
   [(use (match_operand:SI 0 "register_operand"))
    (use (match_operand:XF 1 "register_operand"))]
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md      (revision 237380)
+++ gcc/config/i386/sse.md      (working copy)
@@ -15212,6 +15212,19 @@
      (const_string "*")))
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "ptesttf2"
+  [(set (reg:CC FLAGS_REG)
+       (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
+                   (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
+                  UNSPEC_PTEST))]
+  "TARGET_SSE4_1"
+  "%vptest\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssecomi")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "mode" "TI")])
+
 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
   [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
        (unspec:VF_128_256
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi (revision 237380)
+++ gcc/doc/extend.texi (working copy)
@@ -18455,6 +18455,7 @@ of them implement the function that is part of the
 @smallexample
 __float128 __builtin_fabsq (__float128)
 __float128 __builtin_copysignq (__float128, __float128)
+int __builtin_signbitq (__float128)
 @end smallexample
 
 The following built-in functions are always available.
Index: gcc/testsuite/gcc.target/i386/float128-3.c
===================================================================
--- gcc/testsuite/gcc.target/i386/float128-3.c  (nonexistent)
+++ gcc/testsuite/gcc.target/i386/float128-3.c  (working copy)
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+
+#include "sse4_1-check.h"
+
+extern void abort (void);
+
+static void
+sse4_1_test (void)
+{
+  static volatile __float128 a;
+
+  a = -1.2q;
+  if (!__builtin_signbitq (a))
+    abort ();
+
+  a = 1.2q;
+  if (__builtin_signbitq (a))
+    abort ();
+}
Index: gcc/testsuite/gcc.target/i386/quad-sse.c
===================================================================
--- gcc/testsuite/gcc.target/i386/quad-sse.c    (revision 237380)
+++ gcc/testsuite/gcc.target/i386/quad-sse.c    (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse" } */
 
 __float128 x, y;
 
@@ -18,4 +18,4 @@ __float128 test_3(void)
   return __builtin_copysignq (x, y);
 }
 
-/* { dg-final { scan-assembler-not "call.*(neg|fabs|copysign)" } } */
+/* { dg-final { scan-assembler-not "neg|fabs|copysign" } } */
Index: gcc/testsuite/gcc.target/i386/quad-sse4.c
===================================================================
--- gcc/testsuite/gcc.target/i386/quad-sse4.c   (nonexistent)
+++ gcc/testsuite/gcc.target/i386/quad-sse4.c   (working copy)
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+
+__float128 x;
+
+int __test_1(void)
+{
+  return __builtin_signbitq (x);
+}
+
+/* { dg-final { scan-assembler-not "signbit" } } */
Index: libgcc/config/i386/32/t-softfp
===================================================================
--- libgcc/config/i386/32/t-softfp      (revision 237380)
+++ libgcc/config/i386/32/t-softfp      (working copy)
@@ -1,5 +1,6 @@
 # Omit TImode functions
 softfp_int_modes := si di
 
-# Provide fallbacks for __builtin_copysignq and __builtin_fabsq.
+# Provide fallbacks for __builtin_copysignq, __builtin_fabsq
+# and __builtin_signbitq.
 LIB2ADD += $(srcdir)/config/i386/32/tf-signs.c
Index: libgcc/config/i386/32/tf-signs.c
===================================================================
--- libgcc/config/i386/32/tf-signs.c    (revision 237380)
+++ libgcc/config/i386/32/tf-signs.c    (working copy)
@@ -37,6 +37,7 @@ union _FP_UNION_Q
 
 __float128 __copysigntf3 (__float128, __float128);
 __float128 __fabstf2 (__float128);
+int __signbittf2 (__float128);
 
 __float128
 __copysigntf3 (__float128 a, __float128 b)
@@ -60,3 +61,13 @@ __fabstf2 (__float128 a)
 
   return A.flt;
 }
+
+int
+__signbittf2 (__float128 a)
+{
+  union _FP_UNION_Q A;
+
+  A.flt = a;
+
+  return A.bits.sign;
+}
Index: libgcc/config/i386/64/t-softfp
===================================================================
--- libgcc/config/i386/64/t-softfp      (nonexistent)
+++ libgcc/config/i386/64/t-softfp      (working copy)
@@ -0,0 +1,2 @@
+# Provide fallbacks for __builtin_signbitq
+LIB2ADD += $(srcdir)/config/i386/64/tf-signs.c
Index: libgcc/config/i386/64/tf-signs.c
===================================================================
--- libgcc/config/i386/64/tf-signs.c    (nonexistent)
+++ libgcc/config/i386/64/tf-signs.c    (working copy)
@@ -0,0 +1,46 @@
+/* Copyright (C) 2016 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+union _FP_UNION_Q
+{
+   __float128 flt;
+   struct 
+   {
+      unsigned long long frac0 : 64;
+      unsigned long long frac1 : 48;
+      unsigned exp : 15;
+      unsigned sign : 1;
+   } bits __attribute__((packed));
+};
+
+int __signbittf3 (__float128);
+
+int
+__signbittf2 (__float128 a)
+{
+  union _FP_UNION_Q A;
+
+  A.flt = a;
+
+  return A.bits.sign;
+}
Index: libgcc/config/i386/libgcc-glibc.ver
===================================================================
--- libgcc/config/i386/libgcc-glibc.ver (revision 237380)
+++ libgcc/config/i386/libgcc-glibc.ver (working copy)
@@ -152,6 +152,10 @@ GCC_4.8.0 {
   __cpu_model
   __cpu_indicator_init
 }
+
+GCC_7.0.0 {
+  __signbittf2
+}
 %else
 GCC_4.4.0 {
   __addtf3
@@ -193,4 +197,8 @@ GCC_4.8.0 {
   __cpu_model
   __cpu_indicator_init
 }
+
+GCC_7.0.0 {
+  __signbittf2
+}
 %endif
Index: libgcc/config.host
===================================================================
--- libgcc/config.host  (revision 237380)
+++ libgcc/config.host  (working copy)
@@ -1361,9 +1361,7 @@ i[34567]86-*-darwin* | x86_64-*-darwin* | \
   i[34567]86-*-freebsd* | x86_64-*-freebsd* | \
   i[34567]86-*-openbsd* | x86_64-*-openbsd*)
        tmake_file="${tmake_file} t-softfp-tf"
-       if test "${host_address}" = 32; then
-               tmake_file="${tmake_file} i386/${host_address}/t-softfp"
-       fi
+       tmake_file="${tmake_file} i386/${host_address}/t-softfp"
        tmake_file="${tmake_file} i386/t-softfp t-softfp"
        ;;
 esac

Reply via email to