GCC 6.0 has introduced a new feature that allows inline asm to return flag registers (see https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#FlagOutputOperands). There are a couple dozen intrinsics in intrin-impl.h that would benefit from using this.

There is a define that signals if the gcc feature is available, so backward compatibility is straight-forward. I have updated intrin-impl.h and run it thru my test suite. It looks good.

The patch is attached, but I'm no expert with git, so I'm not sure the format is correct. If it would be easier, I can just attach the .h file.

For people who want to know the nitty gritty:

Before this feature, you had to write code like this:

   asm("bt $0, %1 ; setc %0" : "=q" (a) : "r" (value) : "cc");

This would generate code like this:

        bt $0, %ebx
        setc %al <--------- Convert flags to byte
        testb   %al, %al <------ Convert byte back to flags
        jne     .L5
        leaq    .LC1(%rip), %rcx
        call    printf

Using @cc, this code

   asm("bt $0, %1" : "=@ccc" (a) : "r" (value) );

produces this output:

        bt $0, %ebx
        jc      .L5 <--------- Use the flags directly
        leaq    .LC1(%rip), %rcx
        call    printf

dw
commit 527ef2be12fefe658cdf9a43fdc69601d005057c
Author: David Wohlferd <d...@limegreensocks.com>
Date:   Tue Nov 17 13:13:30 2015 -0800

    Support gcc 6 flag constraints
    
    Signed-off-by: David Wohlferd <d...@limegreensocks.com>

diff --git a/mingw-w64-headers/include/psdk_inc/intrin-impl.h b/mingw-w64-headers/include/psdk_inc/intrin-impl.h
index 7b2882e..9645e24 100644
--- a/mingw-w64-headers/include/psdk_inc/intrin-impl.h
+++ b/mingw-w64-headers/include/psdk_inc/intrin-impl.h
@@ -66,6 +66,20 @@ __INTRINSICS_USEINLINE
 #ifndef _INTRIN_MAC_
 #define _INTRIN_MAC_
 
+/* GCC v6 added support for outputting flags.  This allows better code to be
+   produced for a number of intrinsics. */
+#ifndef __GCC_ASM_FLAG_OUTPUTS__
+#define __FLAGCONSTRAINT "=qm"
+#define __FLAGSET "\n\tsetc %[old]"
+#define __FLAGCLOBBER1 , "cc"
+#define __FLAGCLOBBER2 "cc"
+#else
+#define __FLAGCONSTRAINT "=@ccc"
+#define __FLAGSET
+#define __FLAGCLOBBER1
+#define __FLAGCLOBBER2
+#endif
+
 /* This macro is used by __stosb, __stosw, __stosd, __stosq */
 
 /* Parameters: (FunctionName, DataType, Operator)
@@ -109,9 +123,9 @@ __INTRINSICS_USEINLINE
 { \
    unsigned char old; \
    __asm__ __volatile__ (z \
-      : [old] "=qm" (old), [Base] "+m" (*Base) \
+      : [old] __FLAGCONSTRAINT (old), [Base] "+m" (*Base) \
       : [Offset] a "r" (Offset) \
-      : "memory", "cc"); \
+      : "memory" __FLAGCLOBBER1); \
    return old; \
 }
 #elif defined(__arm__) || defined(_ARM_)
@@ -193,6 +207,9 @@ Parameters: (FunctionName, DataType, Segment)
    DataType: unsigned __LONG32 or unsigned __int64
    Statement: BSF or BSR */
 
+/* GCC v6 added support for outputting flags.  This allows better code to be
+   produced for a number of intrinsics. */
+#ifndef __GCC_ASM_FLAG_OUTPUTS__
 #define __buildbitscan(x, y, z) unsigned char x(unsigned __LONG32 *Index, y Mask) \
 { \
    y n; \
@@ -203,6 +220,18 @@ Parameters: (FunctionName, DataType, Segment)
    *Index = n; \
    return Mask!=0; \
 }
+#else
+#define __buildbitscan(x, y, z) unsigned char x(unsigned __LONG32 *Index, y Mask) \
+{ \
+   y n; \
+   unsigned char old; \
+   __asm__ (z \
+      : "=@ccnz" (old), [Index] "=r" (n) \
+      : [Mask] "r" (Mask)); \
+   *Index = n; \
+   return old; \
+}
+#endif
 
 /* This macro is used by _bittest & _bittest64
 
@@ -216,10 +245,10 @@ Parameters: (FunctionName, DataType, OffsetConstraint)
 #define __buildbittest(x, y, z, a) unsigned char x(const y *Base, y Offset) \
 { \
    unsigned char old; \
-   __asm__ ("bt{" z " %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]" \
-      : [old] "=rm" (old) \
+   __asm__ ("bt{" z " %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET \
+      : [old] __FLAGCONSTRAINT (old) \
       : [Offset] a "r" (Offset), [Base] "rm" (*Base) \
-      : "cc"); \
+      : __FLAGCLOBBER2); \
    return old; \
 }
 
@@ -236,10 +265,10 @@ Parameters: (FunctionName, DataType, Statement, OffsetConstraint)
 #define __buildbittestand(x, y, z, a, b) unsigned char x(y *Base, y Offset) \
 { \
    unsigned char old; \
-   __asm__ (z "{" b " %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]" \
-      : [old] "=r" (old), [Base] "+rm" (*Base) \
+   __asm__ (z "{" b " %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET \
+      : [old] __FLAGCONSTRAINT (old), [Base] "+rm" (*Base) \
       : [Offset] a "r" (Offset) \
-      : "cc"); \
+      : __FLAGCLOBBER2); \
    return old; \
 }
 
@@ -556,42 +585,42 @@ __buildstos(__stosq, unsigned __int64, "q|q")
 #if __INTRINSIC_PROLOG(_interlockedbittestandset64)
 __MINGW_EXTENSION unsigned char _interlockedbittestandset64(__int64 *a, __int64 b);
 __INTRINSICS_USEINLINE 
-__buildbittesti(_interlockedbittestandset64, __int64, "lock bts{q %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "J", __int64)
+__buildbittesti(_interlockedbittestandset64, __int64, "lock bts{q %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "J", __int64)
 #define __INTRINSIC_DEFINED__interlockedbittestandset64
 #endif /* __INTRINSIC_PROLOG */
 
 #if __INTRINSIC_PROLOG(_interlockedbittestandreset64)
 __MINGW_EXTENSION unsigned char _interlockedbittestandreset64(__int64 *a, __int64 b);
 __INTRINSICS_USEINLINE 
-__buildbittesti(_interlockedbittestandreset64, __int64, "lock btr{q %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "J", __int64)
+__buildbittesti(_interlockedbittestandreset64, __int64, "lock btr{q %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "J", __int64)
 #define __INTRINSIC_DEFINED__interlockedbittestandreset64
 #endif /* __INTRINSIC_PROLOG */
 
 #if __INTRINSIC_PROLOG(_interlockedbittestandcomplement64)
 __MINGW_EXTENSION unsigned char _interlockedbittestandcomplement64(__int64 *a, __int64 b);
 __INTRINSICS_USEINLINE 
-__buildbittesti(_interlockedbittestandcomplement64, __int64, "lock btc{q %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "J", __int64)
+__buildbittesti(_interlockedbittestandcomplement64, __int64, "lock btc{q %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "J", __int64)
 #define __INTRINSIC_DEFINED__interlockedbittestandcomplement64
 #endif /* __INTRINSIC_PROLOG */
 
 #if __INTRINSIC_PROLOG(InterlockedBitTestAndSet64)
 __MINGW_EXTENSION unsigned char InterlockedBitTestAndSet64(volatile __int64 *a, __int64 b);
 __INTRINSICS_USEINLINE 
-__buildbittesti(InterlockedBitTestAndSet64, volatile __int64, "lock bts{q %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "J", __int64)
+__buildbittesti(InterlockedBitTestAndSet64, volatile __int64, "lock bts{q %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "J", __int64)
 #define __INTRINSIC_DEFINED_InterlockedBitTestAndSet64
 #endif /* __INTRINSIC_PROLOG */
 
 #if __INTRINSIC_PROLOG(InterlockedBitTestAndReset64)
 __MINGW_EXTENSION unsigned char InterlockedBitTestAndReset64(volatile __int64 *a, __int64 b);
 __INTRINSICS_USEINLINE 
-__buildbittesti(InterlockedBitTestAndReset64, volatile __int64, "lock btr{q %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "J", __int64)
+__buildbittesti(InterlockedBitTestAndReset64, volatile __int64, "lock btr{q %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "J", __int64)
 #define __INTRINSIC_DEFINED_InterlockedBitTestAndReset64
 #endif /* __INTRINSIC_PROLOG */
 
 #if __INTRINSIC_PROLOG(InterlockedBitTestAndComplement64)
 __MINGW_EXTENSION unsigned char InterlockedBitTestAndComplement64(volatile __int64 *a, __int64 b);
 __INTRINSICS_USEINLINE 
-__buildbittesti(InterlockedBitTestAndComplement64, volatile __int64, "lock btc{q %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "J", __int64)
+__buildbittesti(InterlockedBitTestAndComplement64, volatile __int64, "lock btc{q %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "J", __int64)
 #define __INTRINSIC_DEFINED_InterlockedBitTestAndComplement64
 #endif /* __INTRINSIC_PROLOG */
 
@@ -1125,42 +1154,42 @@ __buildstos(__stosd, unsigned __LONG32, "l|d")
 #if __INTRINSIC_PROLOG(_interlockedbittestandset)
 unsigned char _interlockedbittestandset(__LONG32 *a, __LONG32 b);
 __INTRINSICS_USEINLINE
-__buildbittesti(_interlockedbittestandset, __LONG32, "lock bts{l %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "I", __LONG32)
+__buildbittesti(_interlockedbittestandset, __LONG32, "lock bts{l %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "I", __LONG32)
 #define __INTRINSIC_DEFINED__interlockedbittestandset
 #endif /* __INTRINSIC_PROLOG */
 
 #if __INTRINSIC_PROLOG(_interlockedbittestandreset)
 unsigned char _interlockedbittestandreset(__LONG32 *a, __LONG32 b);
 __INTRINSICS_USEINLINE
-__buildbittesti(_interlockedbittestandreset, __LONG32, "lock btr{l %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "I", __LONG32)
+__buildbittesti(_interlockedbittestandreset, __LONG32, "lock btr{l %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "I", __LONG32)
 #define __INTRINSIC_DEFINED__interlockedbittestandreset
 #endif /* __INTRINSIC_PROLOG */
 
 #if __INTRINSIC_PROLOG(_interlockedbittestandcomplement)
 unsigned char _interlockedbittestandcomplement(__LONG32 *a, __LONG32 b);
 __INTRINSICS_USEINLINE
-__buildbittesti(_interlockedbittestandcomplement, __LONG32, "lock btc{l %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "I", __LONG32)
+__buildbittesti(_interlockedbittestandcomplement, __LONG32, "lock btc{l %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "I", __LONG32)
 #define __INTRINSIC_DEFINED__interlockedbittestandcomplement
 #endif /* __INTRINSIC_PROLOG */
 
 #if __INTRINSIC_PROLOG(InterlockedBitTestAndSet)
 unsigned char InterlockedBitTestAndSet(volatile __LONG32 *a, __LONG32 b);
 __INTRINSICS_USEINLINE
-__buildbittesti(InterlockedBitTestAndSet, volatile __LONG32, "lock bts{l %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "I", __LONG32)
+__buildbittesti(InterlockedBitTestAndSet, volatile __LONG32, "lock bts{l %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "I", __LONG32)
 #define __INTRINSIC_DEFINED_InterlockedBitTestAndSet
 #endif /* __INTRINSIC_PROLOG */
 
 #if __INTRINSIC_PROLOG(InterlockedBitTestAndReset)
 unsigned char InterlockedBitTestAndReset(volatile __LONG32 *a, __LONG32 b);
 __INTRINSICS_USEINLINE
-__buildbittesti(InterlockedBitTestAndReset, volatile __LONG32, "lock btr{l %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "I", __LONG32)
+__buildbittesti(InterlockedBitTestAndReset, volatile __LONG32, "lock btr{l %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "I", __LONG32)
 #define __INTRINSIC_DEFINED_InterlockedBitTestAndReset
 #endif /* __INTRINSIC_PROLOG */
 
 #if __INTRINSIC_PROLOG(InterlockedBitTestAndComplement)
 unsigned char InterlockedBitTestAndComplement(volatile __LONG32 *a, __LONG32 b);
 __INTRINSICS_USEINLINE
-__buildbittesti(InterlockedBitTestAndComplement, volatile __LONG32, "lock btc{l %[Offset],%[Base] | %[Base],%[Offset]} ; setc %[old]", "I", __LONG32)
+__buildbittesti(InterlockedBitTestAndComplement, volatile __LONG32, "lock btc{l %[Offset],%[Base] | %[Base],%[Offset]}" __FLAGSET, "I", __LONG32)
 #define __INTRINSIC_DEFINED_InterlockedBitTestAndComplement
 #endif /* __INTRINSIC_PROLOG */
 
@@ -1479,5 +1508,9 @@ __build_writecr(__writecr8, unsigned __LONG32, "8")
 #undef __INTRINSIC_PROLOG
 #undef __INTRINSIC_EPILOG
 #undef __INTRINSICS_USEINLINE
+#undef __FLAGCONSTRAINT
+#undef __FLAGSET
+#undef __FLAGCLOBBER1
+#undef __FLAGCLOBBER2
 
 #endif /* __MINGW_INTRIN_INLINE */
------------------------------------------------------------------------------
_______________________________________________
Mingw-w64-public mailing list
Mingw-w64-public@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to