I've bootstrapped and reg-tested {,-m64} with this on 86_64-pc-linux-gnu, but I'm waiting for a reference test set to finish to compare them. I've verified that we're getting HAVE_AS_IX86_AVX in auto-host.h and I've also built and tested w/o to double-verify that the AVX code is correct. Now that I think of it, I didn't run the tests with -mno-omit-frame-pointer (to simulate Solaris), but since the tests pass as-is, I presume that it understands the 'f?' part of my regex. (A separate set of stubs are used when rbp is the frame pointer and these have 'f' appended to their names.)
OK to commit once I get a clean compare? gcc: configure.ac: Add Check for HAVE_AS_IX86_AVX config.in: Regenerate. configure: Likewise. gcc/testsuite: gcc.target/i386/pr82196-1.c: Simplify so that it doesn't break on Solaris or with -mno-omit-frame-pointer. gcc.target/i386/pr82196-2.c: Likewise. libgcc: config/i386/i386-asm.h (SSE_SAVE, SSE_RESTORE): Sniff HAVE_AS_IX86_AVX and directly emit raw .byte code when assembler doesn't support avx, correct out-of-date comments. Thanks, Daniel Signed-off-by: Daniel Santos <daniel.san...@pobox.com> --- gcc/config.in | 6 +++++ gcc/configure | 32 ++++++++++++++++++++++ gcc/configure.ac | 6 +++++ gcc/testsuite/gcc.target/i386/pr82196-1.c | 5 ++-- gcc/testsuite/gcc.target/i386/pr82196-2.c | 5 ++-- libgcc/config/i386/i386-asm.h | 44 ++++++++++++++++++++++++++----- 6 files changed, 85 insertions(+), 13 deletions(-) diff --git a/gcc/config.in b/gcc/config.in index 89d7108e8db..df2e518baa6 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -406,6 +406,12 @@ #endif +/* Define if your assembler supports avx extensions. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_IX86_AVX +#endif + + /* Define if your assembler supports the Sun syntax for cmov. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_IX86_CMOV_SUN_SYNTAX diff --git a/gcc/configure b/gcc/configure index 13f97cd3663..e982b86c25c 100755 --- a/gcc/configure +++ b/gcc/configure @@ -25881,6 +25881,38 @@ if test $gcc_cv_as_ix86_swap = yes; then $as_echo "#define HAVE_AS_IX86_SWAP 1" >>confdefs.h +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for avx extensions" >&5 +$as_echo_n "checking assembler for avx extensions... " >&6; } +if test "${gcc_cv_as_ix86_avx+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_ix86_avx=no + if test x$gcc_cv_as != x; then + $as_echo 'vzeroupper' > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_ix86_avx=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_ix86_avx" >&5 +$as_echo "$gcc_cv_as_ix86_avx" >&6; } +if test $gcc_cv_as_ix86_avx = yes; then + +$as_echo "#define HAVE_AS_IX86_AVX 1" >>confdefs.h + fi diff --git a/gcc/configure.ac b/gcc/configure.ac index 82711389281..a05f2ca10b2 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -4171,6 +4171,12 @@ foo: nop [AC_DEFINE(HAVE_AS_IX86_SWAP, 1, [Define if your assembler supports the swap suffix.])]) + gcc_GAS_CHECK_FEATURE([avx extensions], + gcc_cv_as_ix86_avx,,, + [vzeroupper],, + [AC_DEFINE(HAVE_AS_IX86_AVX, 1, + [Define if your assembler supports avx extensions.])]) + gcc_GAS_CHECK_FEATURE([different section symbol subtraction], gcc_cv_as_ix86_diff_sect_delta,,, [.section .rodata diff --git a/gcc/testsuite/gcc.target/i386/pr82196-1.c b/gcc/testsuite/gcc.target/i386/pr82196-1.c index ef858328f00..541d975480d 100644 --- a/gcc/testsuite/gcc.target/i386/pr82196-1.c +++ b/gcc/testsuite/gcc.target/i386/pr82196-1.c @@ -1,7 +1,7 @@ /* { dg-do compile { target lp64 } } */ /* { dg-options "-msse -mcall-ms2sysv-xlogues -O2" } */ -/* { dg-final { scan-assembler "call.*__sse_savms64_18" } } */ -/* { dg-final { scan-assembler "jmp.*__sse_resms64x_18" } } */ +/* { dg-final { scan-assembler "call.*__sse_savms64f?_12" } } */ +/* { dg-final { scan-assembler "jmp.*__sse_resms64f?x_12" } } */ void __attribute__((sysv_abi)) a() { } @@ -9,6 +9,5 @@ void __attribute__((sysv_abi)) a() { static void __attribute__((sysv_abi)) (*volatile a_noinfo)() = a; void __attribute__((ms_abi)) b() { - __asm__ __volatile__ ("" :::"rbx", "rbp", "r12", "r13", "r14", "r15"); a_noinfo (); } diff --git a/gcc/testsuite/gcc.target/i386/pr82196-2.c b/gcc/testsuite/gcc.target/i386/pr82196-2.c index 8fe58411d5e..7166d068bc1 100644 --- a/gcc/testsuite/gcc.target/i386/pr82196-2.c +++ b/gcc/testsuite/gcc.target/i386/pr82196-2.c @@ -1,7 +1,7 @@ /* { dg-do compile { target lp64 } } */ /* { dg-options "-mavx -mcall-ms2sysv-xlogues -O2" } */ -/* { dg-final { scan-assembler "call.*__avx_savms64_18" } } */ -/* { dg-final { scan-assembler "jmp.*__avx_resms64x_18" } } */ +/* { dg-final { scan-assembler "call.*__avx_savms64f?_12" } } */ +/* { dg-final { scan-assembler "jmp.*__avx_resms64f?x_12" } } */ void __attribute__((sysv_abi)) a() { } @@ -9,6 +9,5 @@ void __attribute__((sysv_abi)) a() { static void __attribute__((sysv_abi)) (*volatile a_noinfo)() = a; void __attribute__((ms_abi)) b() { - __asm__ __volatile__ ("" :::"rbx", "rbp", "r12", "r13", "r14", "r15"); a_noinfo (); } diff --git a/libgcc/config/i386/i386-asm.h b/libgcc/config/i386/i386-asm.h index 424e0f72aac..19ee113fbfa 100644 --- a/libgcc/config/i386/i386-asm.h +++ b/libgcc/config/i386/i386-asm.h @@ -69,13 +69,15 @@ ASMNAME(fn): #ifdef MS2SYSV_STUB_AVX # define MS2SYSV_STUB_PREFIX __avx_ -# define MOVAPS vmovaps +# ifdef HAVE_AS_IX86_AVX +# define MOVAPS vmovaps +# endif #elif defined(MS2SYSV_STUB_SSE) # define MS2SYSV_STUB_PREFIX __sse_ # define MOVAPS movaps #endif -#if defined (MS2SYSV_STUB_PREFIX) && defined (MOVAPS) +#if defined (MS2SYSV_STUB_PREFIX) # define MS2SYSV_STUB_BEGIN(base_name) \ HIDDEN_FUNC(PASTE2(MS2SYSV_STUB_PREFIX, base_name)) @@ -83,8 +85,10 @@ ASMNAME(fn): # define MS2SYSV_STUB_END(base_name) \ FUNC_END(PASTE2(MS2SYSV_STUB_PREFIX, base_name)) -/* Save SSE registers 6-15. off is the offset of rax to get to xmm6. */ -# define SSE_SAVE \ +/* If expanding for sse or avx and we have assembler support. */ +# ifdef MOVAPS +/* Save SSE registers 6-15 using rax as the base address. */ +# define SSE_SAVE \ MOVAPS %xmm15,-0x30(%rax); \ MOVAPS %xmm14,-0x20(%rax); \ MOVAPS %xmm13,-0x10(%rax); \ @@ -96,8 +100,8 @@ ASMNAME(fn): MOVAPS %xmm7, 0x50(%rax); \ MOVAPS %xmm6, 0x60(%rax) -/* Restore SSE registers 6-15. off is the offset of rsi to get to xmm6. */ -# define SSE_RESTORE \ +/* Restore SSE registers 6-15 using rsi as the base address. */ +# define SSE_RESTORE \ MOVAPS -0x30(%rsi), %xmm15; \ MOVAPS -0x20(%rsi), %xmm14; \ MOVAPS -0x10(%rsi), %xmm13; \ @@ -108,6 +112,32 @@ ASMNAME(fn): MOVAPS 0x40(%rsi), %xmm8 ; \ MOVAPS 0x50(%rsi), %xmm7 ; \ MOVAPS 0x60(%rsi), %xmm6 - +# else /* MOVAPS */ +/* If the assembler doesn't support AVX then directly emit machine code + for the instructions above directly. */ +# define BYTE .byte +# define SSE_SAVE \ + BYTE 0xc5, 0x78, 0x29, 0x78, 0xd0; /* vmovaps %xmm15,-0x30(%rax) */ \ + BYTE 0xc5, 0x78, 0x29, 0x70, 0xe0; /* vmovaps %xmm14,-0x20(%rax) */ \ + BYTE 0xc5, 0x78, 0x29, 0x68, 0xf0; /* vmovaps %xmm13,-0x10(%rax) */ \ + BYTE 0xc5, 0x78, 0x29, 0x20; /* vmovaps %xmm12, (%rax) */ \ + BYTE 0xc5, 0x78, 0x29, 0x58, 0x10; /* vmovaps %xmm11, 0x10(%rax) */ \ + BYTE 0xc5, 0x78, 0x29, 0x50, 0x20; /* vmovaps %xmm10, 0x20(%rax) */ \ + BYTE 0xc5, 0x78, 0x29, 0x48, 0x30; /* vmovaps %xmm9, 0x30(%rax) */ \ + BYTE 0xc5, 0x78, 0x29, 0x40, 0x40; /* vmovaps %xmm8, 0x40(%rax) */ \ + BYTE 0xc5, 0xf8, 0x29, 0x78, 0x50; /* vmovaps %xmm7, 0x50(%rax) */ \ + BYTE 0xc5, 0xf8, 0x29, 0x70, 0x60; /* vmovaps %xmm6, 0x60(%rax) */ +# define SSE_RESTORE \ + BYTE 0xc5, 0x78, 0x28, 0x7e, 0xd0; /* vmovaps -0x30(%rsi),%xmm15 */ \ + BYTE 0xc5, 0x78, 0x28, 0x76, 0xe0; /* vmovaps -0x20(%rsi),%xmm14 */ \ + BYTE 0xc5, 0x78, 0x28, 0x6e, 0xf0; /* vmovaps -0x10(%rsi),%xmm13 */ \ + BYTE 0xc5, 0x78, 0x28, 0x26; /* vmovaps (%rsi),%xmm12 */ \ + BYTE 0xc5, 0x78, 0x28, 0x5e, 0x10; /* vmovaps 0x10(%rsi),%xmm11 */ \ + BYTE 0xc5, 0x78, 0x28, 0x56, 0x20; /* vmovaps 0x20(%rsi),%xmm10 */ \ + BYTE 0xc5, 0x78, 0x28, 0x4e, 0x30; /* vmovaps 0x30(%rsi),%xmm9 */ \ + BYTE 0xc5, 0x78, 0x28, 0x46, 0x40; /* vmovaps 0x40(%rsi),%xmm8 */ \ + BYTE 0xc5, 0xf8, 0x28, 0x7e, 0x50; /* vmovaps 0x50(%rsi),%xmm7 */ \ + BYTE 0xc5, 0xf8, 0x28, 0x76, 0x60; /* vmovaps 0x60(%rsi),%xmm6 */ +# endif /* MOVAPS */ #endif /* defined (MS2SYSV_STUB_ISA) && defined (MOVAPS) */ #endif /* I386_ASM_H */ -- 2.14.1