On Mon, Jan 27, 2020 at 11:04 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Mon, Jan 27, 2020 at 11:17 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > On Mon, Jan 27, 2020 at 12:26 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > On Mon, Jan 27, 2020 at 7:23 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > > > movaps/movups is one byte shorter than movdaq/movdqu. But it isn't the > > > > case for AVX nor AVX512. We should disable TARGET_SSE_TYPELESS_STORES > > > > for TARGET_AVX. > > > > > > > > gcc/ > > > > > > > > PR target/91461 > > > > * config/i386/i386.h (TARGET_SSE_TYPELESS_STORES): Disable for > > > > TARGET_AVX. > > > > * config/i386/i386.md (*movoi_internal_avx): Remove > > > > TARGET_SSE_TYPELESS_STORES check. > > > > > > > > gcc/testsuite/ > > > > > > > > PR target/91461 > > > > * gcc.target/i386/pr91461-1.c: New test. > > > > * gcc.target/i386/pr91461-2.c: Likewise. > > > > * gcc.target/i386/pr91461-3.c: Likewise. > > > > * gcc.target/i386/pr91461-4.c: Likewise. > > > > * gcc.target/i386/pr91461-5.c: Likewise. > > > > --- > > > > gcc/config/i386/i386.h | 4 +- > > > > gcc/config/i386/i386.md | 4 +- > > > > gcc/testsuite/gcc.target/i386/pr91461-1.c | 66 ++++++++++++++++++++ > > > > gcc/testsuite/gcc.target/i386/pr91461-2.c | 19 ++++++ > > > > gcc/testsuite/gcc.target/i386/pr91461-3.c | 76 +++++++++++++++++++++++ > > > > gcc/testsuite/gcc.target/i386/pr91461-4.c | 21 +++++++ > > > > gcc/testsuite/gcc.target/i386/pr91461-5.c | 17 +++++ > > > > 7 files changed, 203 insertions(+), 4 deletions(-) > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-1.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-2.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-3.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-4.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-5.c > > > > > > > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > > > > index 943e9a5c783..c134b04c5c4 100644 > > > > --- a/gcc/config/i386/i386.h > > > > +++ b/gcc/config/i386/i386.h > > > > @@ -516,8 +516,10 @@ extern unsigned char > > > > ix86_tune_features[X86_TUNE_LAST]; > > > > #define TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL \ > > > > ix86_tune_features[X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL] > > > > #define TARGET_SSE_SPLIT_REGS > > > > ix86_tune_features[X86_TUNE_SSE_SPLIT_REGS] > > > > +/* NB: movaps/movups is one byte shorter than movdaq/movdqu. But it > > > > + isn't the case for AVX nor AVX512. */ > > > > #define TARGET_SSE_TYPELESS_STORES \ > > > > - ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES] > > > > + (!TARGET_AVX && > > > > ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES]) > > > > > > This is wrong place to disable the feature. > > > > Like this? > > No. > > There is a mode attribute in i386.md/sse.md for relevant patterns. > Please adapt calculation of mode attributes instead. >
Like this? -- H.J.
From 1ba0c9ce5f764b8faa8c66b70e676af187a57415 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Mon, 27 Jan 2020 09:35:11 -0800 Subject: [PATCH] i386: Disable TARGET_SSE_TYPELESS_STORES for TARGET_AVX movaps/movups is one byte shorter than movdaq/movdqu. But it isn't the case for AVX nor AVX512. We should disable TARGET_SSE_TYPELESS_STORES for TARGET_AVX. gcc/ PR target/91461 * config/i386/i386.md (*movoi_internal_avx): Remove TARGET_SSE_TYPELESS_STORES check. (*movti_internal): Disable TARGET_SSE_TYPELESS_STORES for TARGET_AVX. * config/i386/sse.md (mov<mode>_internal): Likewise. gcc/testsuite/ PR target/91461 * gcc.target/i386/pr91461-1.c: New test. * gcc.target/i386/pr91461-2.c: Likewise. * gcc.target/i386/pr91461-3.c: Likewise. * gcc.target/i386/pr91461-4.c: Likewise. * gcc.target/i386/pr91461-5.c: Likewise. --- gcc/config/i386/i386.md | 8 +-- gcc/config/i386/sse.md | 2 +- gcc/testsuite/gcc.target/i386/pr91461-1.c | 66 ++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr91461-2.c | 19 ++++++ gcc/testsuite/gcc.target/i386/pr91461-3.c | 76 +++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr91461-4.c | 21 +++++++ gcc/testsuite/gcc.target/i386/pr91461-5.c | 17 +++++ 7 files changed, 203 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-5.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a125ab350bb..62aaf40a4af 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1980,9 +1980,7 @@ (define_insn "*movoi_internal_avx" (and (eq_attr "alternative" "1") (match_test "TARGET_AVX512VL")) (const_string "XI") - (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") - (and (eq_attr "alternative" "3") - (match_test "TARGET_SSE_TYPELESS_STORES"))) + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V8SF") ] (const_string "OI")))]) @@ -2062,7 +2060,7 @@ (define_insn "*movti_internal" (ior (not (match_test "TARGET_SSE2")) (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (and (eq_attr "alternative" "5") - (match_test "TARGET_SSE_TYPELESS_STORES")))) + (match_test "!TARGET_AVX && TARGET_SSE_TYPELESS_STORES")))) (const_string "V4SF") (match_test "TARGET_AVX") (const_string "TI") @@ -3327,7 +3325,7 @@ (define_insn "*movtf_internal" (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V4SF") (and (eq_attr "alternative" "2") - (match_test "TARGET_SSE_TYPELESS_STORES")) + (match_test "!TARGET_AVX && TARGET_SSE_TYPELESS_STORES")) (const_string "V4SF") (match_test "TARGET_AVX") (const_string "TI") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 04a8c5e56b9..98d644ab317 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1119,7 +1119,7 @@ (define_insn "mov<mode>_internal" (and (match_test "<MODE_SIZE> == 16") (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (and (eq_attr "alternative" "3") - (match_test "TARGET_SSE_TYPELESS_STORES")))) + (match_test "!TARGET_AVX && TARGET_SSE_TYPELESS_STORES")))) (const_string "<ssePSmode>") (match_test "TARGET_AVX") (const_string "<sseinsnmode>") diff --git a/gcc/testsuite/gcc.target/i386/pr91461-1.c b/gcc/testsuite/gcc.target/i386/pr91461-1.c new file mode 100644 index 00000000000..0c94b8e2b76 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-1.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler "\tvmovdqa\t" } } */ +/* { dg-final { scan-assembler "\tvmovdqu\t" } } */ +/* { dg-final { scan-assembler "\tvmovapd\t" } } */ +/* { dg-final { scan-assembler "\tvmovupd\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovups\t" } } */ + +#include <immintrin.h> + +void +foo1 (__m128i *p, __m128i x) +{ + *p = x; +} + +void +foo2 (__m128d *p, __m128d x) +{ + *p = x; +} + +void +foo3 (__float128 *p, __float128 x) +{ + *p = x; +} + +void +foo4 (__m128i_u *p, __m128i x) +{ + *p = x; +} + +void +foo5 (__m128d_u *p, __m128d x) +{ + *p = x; +} + +typedef __float128 __float128_u __attribute__ ((__aligned__ (1))); + +void +foo6 (__float128_u *p, __float128 x) +{ + *p = x; +} + +#ifdef __x86_64__ +typedef __int128 __int128_u __attribute__ ((__aligned__ (1))); + +extern __int128 int128; + +void +foo7 (__int128 *p) +{ + *p = int128; +} + +void +foo8 (__int128_u *p) +{ + *p = int128; +} +#endif diff --git a/gcc/testsuite/gcc.target/i386/pr91461-2.c b/gcc/testsuite/gcc.target/i386/pr91461-2.c new file mode 100644 index 00000000000..921cfaf9780 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-2.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler "\tvmovdqa\t" } } */ +/* { dg-final { scan-assembler "\tvmovapd\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ + +#include <immintrin.h> + +void +foo1 (__m256i *p, __m256i x) +{ + *p = x; +} + +void +foo2 (__m256d *p, __m256d x) +{ + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr91461-3.c b/gcc/testsuite/gcc.target/i386/pr91461-3.c new file mode 100644 index 00000000000..c67a48063bf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-3.c @@ -0,0 +1,76 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512f -mavx512vl" } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovups\t" } } */ + +#include <immintrin.h> + +void +foo1 (__m128i *p, __m128i a) +{ + register __m128i x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo2 (__m128d *p, __m128d a) +{ + register __m128d x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo3 (__float128 *p, __float128 a) +{ + register __float128 x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo4 (__m128i_u *p, __m128i a) +{ + register __m128i x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo5 (__m128d_u *p, __m128d a) +{ + register __m128d x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +typedef __float128 __float128_u __attribute__ ((__aligned__ (1))); + +void +foo6 (__float128_u *p, __float128 a) +{ + register __float128 x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +typedef __int128 __int128_u __attribute__ ((__aligned__ (1))); + +extern __int128 int128; + +void +foo7 (__int128 *p) +{ + register __int128 x __asm ("xmm16") = int128; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo8 (__int128_u *p) +{ + register __int128 x __asm ("xmm16") = int128; + asm volatile ("" : "+v" (x)); + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr91461-4.c b/gcc/testsuite/gcc.target/i386/pr91461-4.c new file mode 100644 index 00000000000..69df590de3a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-4.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512f -mavx512vl" } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ + +#include <immintrin.h> + +void +foo1 (__m256i *p, __m256i a) +{ + register __m256i x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo2 (__m256d *p, __m256d a) +{ + register __m256d x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr91461-5.c b/gcc/testsuite/gcc.target/i386/pr91461-5.c new file mode 100644 index 00000000000..974263042f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-5.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ + +#include <immintrin.h> + +void +foo1 (__m512i *p, __m512i x) +{ + *p = x; +} + +void +foo2 (__m512d *p, __m512d x) +{ + *p = x; +} -- 2.24.1