vpbroadcastd/vpbroadcastq is avaiable under TARGET_AVX2, but vec_dup{v4di,v8si} pattern is avaiable under AVX with memory operand. And it will cause LRA/Reload to generate spill and reload if we put constant in register.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ready push to trunk. gcc/ChangeLog: PR target/112992 * config/i386/i386-expand.cc (ix86_convert_const_wide_int_to_broadcast): Don't convert to broadcast for vec_dup{v4di,v8si} when TARGET_AVX2 is not available. (ix86_broadcast_from_constant): Allow broadcast for V4DI/V8SI when !TARGET_AVX2 since it will be forced to memory later. (ix86_expand_vector_move): Force constant to mem for vec_dup{vssi,v4di} when TARGET_AVX2 is not available. gcc/testsuite/ChangeLog: * gcc.target/i386/pr100865-7a.c: Adjust testcase. * gcc.target/i386/pr100865-7c.c: Ditto. * gcc.target/i386/pr112992.c: New test. --- gcc/config/i386/i386-expand.cc | 48 +++++++++++++-------- gcc/testsuite/gcc.target/i386/pr100865-7a.c | 3 +- gcc/testsuite/gcc.target/i386/pr100865-7c.c | 3 +- gcc/testsuite/gcc.target/i386/pr112992.c | 30 +++++++++++++ 4 files changed, 62 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr112992.c diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index a53d69d5400..fad4f34f905 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -297,6 +297,12 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op) if (!TARGET_INTER_UNIT_MOVES_TO_VEC) return nullptr; + unsigned int msize = GET_MODE_SIZE (mode); + + /* Only optimized for vpbroadcast[bwsd]/vbroadcastss with xmm/ymm/zmm. */ + if (msize != 16 && msize != 32 && msize != 64) + return nullptr; + /* Convert CONST_WIDE_INT to a non-standard SSE constant integer broadcast only if vector broadcast is available. */ if (!TARGET_AVX @@ -309,18 +315,23 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op) HOST_WIDE_INT val = CONST_WIDE_INT_ELT (op, 0); HOST_WIDE_INT val_broadcast; scalar_int_mode broadcast_mode; - if (TARGET_AVX2 + /* vpbroadcastb zmm requires TARGET_AVX512BW. */ + if ((msize == 64 ? TARGET_AVX512BW : TARGET_AVX2) && ix86_broadcast (val, GET_MODE_BITSIZE (QImode), val_broadcast)) broadcast_mode = QImode; - else if (TARGET_AVX2 + else if ((msize == 64 ? TARGET_AVX512BW : TARGET_AVX2) && ix86_broadcast (val, GET_MODE_BITSIZE (HImode), val_broadcast)) broadcast_mode = HImode; - else if (ix86_broadcast (val, GET_MODE_BITSIZE (SImode), + /* vbroadcasts[sd] only support memory operand w/o AVX2. + When msize == 16, pshufs is used for vec_duplicate. + when msize == 64, vpbroadcastd is used, and TARGET_AVX512F must be existed. */ + else if ((msize != 32 || TARGET_AVX2) + && ix86_broadcast (val, GET_MODE_BITSIZE (SImode), val_broadcast)) broadcast_mode = SImode; - else if (TARGET_64BIT + else if (TARGET_64BIT && (msize != 32 || TARGET_AVX2) && ix86_broadcast (val, GET_MODE_BITSIZE (DImode), val_broadcast)) broadcast_mode = DImode; @@ -596,23 +607,17 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) && INTEGRAL_MODE_P (mode)) return nullptr; + unsigned int msize = GET_MODE_SIZE (mode); + unsigned int inner_size = GET_MODE_SIZE (GET_MODE_INNER ((mode))); + /* Convert CONST_VECTOR to a non-standard SSE constant integer broadcast only if vector broadcast is available. */ - if (!(TARGET_AVX2 - || (TARGET_AVX - && (GET_MODE_INNER (mode) == SImode - || GET_MODE_INNER (mode) == DImode)) - || FLOAT_MODE_P (mode)) - || standard_sse_constant_p (op, mode)) + if (standard_sse_constant_p (op, mode)) return nullptr; - /* Don't broadcast from a 64-bit integer constant in 32-bit mode. - We can still put 64-bit integer constant in memory when - avx512 embed broadcast is available. */ - if (GET_MODE_INNER (mode) == DImode && !TARGET_64BIT - && (!TARGET_AVX512F - || (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512) - || (GET_MODE_SIZE (mode) < 64 && !TARGET_AVX512VL))) + /* vpbroadcast[b,w] is available under TARGET_AVX2. + or TARGET_AVX512BW for zmm. */ + if (inner_size < 4 && !(msize == 64 ? TARGET_AVX512BW : TARGET_AVX2)) return nullptr; if (GET_MODE_INNER (mode) == TImode) @@ -710,7 +715,14 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) constant or scalar mem. */ op1 = gen_reg_rtx (mode); if (FLOAT_MODE_P (mode) - || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode)) + || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode) + /* vbroadcastss/vbroadcastsd only supports memory operand + w/o AVX2, force them into memory to avoid spill to + memory. */ + || (GET_MODE_SIZE (mode) == 32 + && (GET_MODE_INNER (mode) == DImode + || GET_MODE_INNER (mode) == SImode) + && !TARGET_AVX2)) first = force_const_mem (GET_MODE_INNER (mode), first); bool ok = ix86_expand_vector_init_duplicate (false, mode, op1, first); diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7a.c b/gcc/testsuite/gcc.target/i386/pr100865-7a.c index f6f2be91120..7de7d4a3ce3 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-7a.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-7a.c @@ -11,7 +11,6 @@ foo (void) array[i] = -45; } -/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */ -/* { dg-final { scan-assembler-not "vpbroadcastq" { target ia32 } } } */ /* { dg-final { scan-assembler-not "vmovdqa" { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7c.c b/gcc/testsuite/gcc.target/i386/pr100865-7c.c index 4d50bb7e2f6..edbfd5b09ed 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-7c.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-7c.c @@ -11,7 +11,6 @@ foo (void) array[i] = -45; } -/* { dg-final { scan-assembler-times "vbroadcastsd" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vbroadcastsd" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */ -/* { dg-final { scan-assembler-not "vbroadcastsd" { target ia32 } } } */ /* { dg-final { scan-assembler-not "vmovdqa" { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr112992.c b/gcc/testsuite/gcc.target/i386/pr112992.c new file mode 100644 index 00000000000..743e64dccba --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112992.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx -mno-avx2 -O2 " } */ +/* { dg-final { scan-assembler-not {(?n)(%rsp)} } } */ + +typedef unsigned long long v4di __attribute((vector_size(32))); +typedef unsigned int v8si __attribute((vector_size(32))); +typedef unsigned short v16hi __attribute((vector_size(32))); +typedef unsigned char v32qi __attribute((vector_size(32))); + +#define MASK 0x01010101 +#define MASKL 0x0101010101010101ULL +#define MASKS 0x0101 + +v4di fooq() { + return (v4di){MASKL,MASKL,MASKL,MASKL}; +} + +v8si food() { + return (v8si){MASK,MASK,MASK,MASK,MASK,MASK,MASK,MASK}; +} + +v16hi foow() { + return (v16hi){MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS, + MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS}; +} + +v32qi foob() { + return (v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; +} -- 2.31.1