On Wed, Feb 26, 2020 at 4:24 PM Jeff Law <l...@redhat.com> wrote: > > On Wed, 2020-02-26 at 16:02 -0800, H.J. Lu wrote: > > On Wed, Feb 26, 2020 at 2:42 PM Jeff Law <l...@redhat.com> wrote: > > > On Sat, 2020-02-15 at 07:26 -0800, H.J. Lu wrote: > > > > On x86, when AVX and AVX512 are enabled, vector move instructions can > > > > be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512): > > > > > > > > 0: c5 f9 6f d1 vmovdqa %xmm1,%xmm2 > > > > 4: 62 f1 fd 08 6f d1 vmovdqa64 %xmm1,%xmm2 > > > > > > > > We prefer VEX encoding over EVEX since VEX is shorter. Also AVX512F > > > > only supports 512-bit vector moves. AVX512F + AVX512VL supports 128-bit > > > > and 256-bit vector moves. Mode attributes on x86 vector move patterns > > > > indicate target preferences of vector move encoding. For vector > > > > register > > > > to vector register move, we can use 512-bit vector move instructions to > > > > move 128-bit/256-bit vector if AVX512VL isn't available. With AVX512F > > > > and AVX512VL, we should use VEX encoding for 128-bit/256-bit vector > > > > moves > > > > if upper 16 vector registers aren't used. This patch adds a function, > > > > ix86_output_ssemov, to generate vector moves: > > > > > > > > 1. If zmm registers are used, use EVEX encoding. > > > > 2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding > > > > will be generated. > > > > 3. If xmm16-xmm31/ymm16-ymm31 registers are used: > > > > a. With AVX512VL, AVX512VL vector moves will be generated. > > > > b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register > > > > move will be done with zmm register move. > > > > > > > > > > > [ ... ] > > > > > > > +/* Return the opcode of the TYPE_SSEMOV instruction. To move from > > > > + or to xmm16-xmm31/ymm16-ymm31 registers, we either require > > > > + TARGET_AVX512VL or it is a register to register move which can > > > > + be done with zmm register move. */ > > > > + > > > > +static const char * > > > > +ix86_get_ssemov (rtx *operands, unsigned size, > > > > + enum attr_mode insn_mode, machine_mode mode) > > > > +{ > > > > + char buf[128]; > > > > + bool misaligned_p = (misaligned_operand (operands[0], mode) > > > > + || misaligned_operand (operands[1], mode)); > > > > + bool evex_reg_p = (EXT_REX_SSE_REG_P (operands[0]) > > > > + || EXT_REX_SSE_REG_P (operands[1])); > > > > + machine_mode scalar_mode; > > > > + > > > > + else if (SCALAR_INT_MODE_P (scalar_mode)) > > > > + { > > > > + switch (scalar_mode) > > > > + { > > > > + case E_QImode: > > > > + if (size == 64) > > > > + opcode = (misaligned_p > > > > + ? (TARGET_AVX512BW > > > > + ? "vmovdqu8" > > > > + : "vmovdqu64") > > > > + : "vmovdqa64"); > > > > + else if (evex_reg_p) > > > > + { > > > > + if (TARGET_AVX512VL) > > > > + opcode = (misaligned_p > > > > + ? (TARGET_AVX512BW > > > > + ? "vmovdqu8" > > > > + : "vmovdqu64") > > > > + : "vmovdqa64"); > > > > + } > > > > + else > > > > + opcode = (misaligned_p > > > > + ? (TARGET_AVX512BW > > > > + ? "vmovdqu8" > > > > + : "%vmovdqu") > > > > + : "%vmovdqa"); > > > > + break; > > > > + case E_HImode: > > > > + if (size == 64) > > > > + opcode = (misaligned_p > > > > + ? (TARGET_AVX512BW > > > > + ? "vmovdqu16" > > > > + : "vmovdqu64") > > > > + : "vmovdqa64"); > > > > + else if (evex_reg_p) > > > > + { > > > > + if (TARGET_AVX512VL) > > > > + opcode = (misaligned_p > > > > + ? (TARGET_AVX512BW > > > > + ? "vmovdqu16" > > > > + : "vmovdqu64") > > > > + : "vmovdqa64"); > > > > + } > > > > + else > > > > + opcode = (misaligned_p > > > > + ? (TARGET_AVX512BW > > > > + ? "vmovdqu16" > > > > + : "%vmovdqu") > > > > + : "%vmovdqa"); > > > > + break; > > > > + case E_SImode: > > > > + if (size == 64) > > > > + opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32"; > > > > + else if (evex_reg_p) > > > > + { > > > > + if (TARGET_AVX512VL) > > > > + opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32"; > > > > + } > > > > + else > > > > + opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; > > > > + break; > > > > + case E_DImode: > > > > + case E_TImode: > > > > + case E_OImode: > > > > + if (size == 64) > > > > + opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; > > > > + else if (evex_reg_p) > > > > + { > > > > + if (TARGET_AVX512VL) > > > > + opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; > > > > + } > > > > + else > > > > + opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; > > > > + break; > > > > + case E_XImode: > > > > + opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; > > > > + break; > > > > + default: > > > > + gcc_unreachable (); > > > > + } > > > > + } > > > > + else > > > > + gcc_unreachable (); > > > > + > > > > + if (!opcode) > > > > + { > > > > + /* NB: We get here only because we move xmm16-xmm31/ymm16-ymm31 > > > > + registers without AVX512VL by using zmm register move. */ > > > So the overall flow control in here is rather convoluted. I hate the way > > > you > > > don't set OPCODE above and then do it down here. I would suggest breaking > > > the !opcode bits into its own little function. Then above in those places > > > where you do > > > > > > if (TARGET_AVX512VL) > > > opcode = <whatever>; > > > > > > > > > Instead change those to something like > > > > > > if (TARGET_AVX512VL) > > > opcode = <whatever>; > > > else > > > opcode = new_function (...) > > > > > > That way opcode is set on every path through the major if-else in this > > > function. > > > > > > Second when I suggested you break the patch up on a per-pattern basis, I > > > probably should have also said that I would start with the minimal support > > > in > > > ix86_get_ssemov and ix86_output_ssemov to support the pattern you just > > > converted. That way the mapping from current code to new code is more > > > obvious. > > > > I will do these. On x86, different instructions can move vector > > registers. They all > > do the same thing. But some are preferred over others, depending on > > tuning options. > I know. > > > > > > As it stands the breaking into separate patches didn't really help much > > > because > > > we still have all the complexity in ix86_get_ssemov and ix86_output_ssemov > > > in > > > patch #1 and that's the code I'm most worried about verifying we get > > > right, > > > particularly at this stage. I literally can't take any patch and map from > > > the > > > old code to the new code without having to understand all of patch #1. > > > > The old code is very convoluted and wrong in some cases. I am trying to > > clean it up. I will update my patches based on your feedback. > Thanks. I was going to try and break those two functions down on my own, but > you're more likely to get it right than I am :-) >
How about this? If it looks OK, I will post the whole patch set. Thanks. -- H.J.
From 3964b63d5ef086fa7466992f703bc1ec6de085dc Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Tue, 12 Feb 2019 13:25:41 -0800 Subject: [PATCH 01/10] i386: Properly encode vector registers in vector move On x86, when AVX and AVX512 are enabled, vector move instructions can be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512): 0: c5 f9 6f d1 vmovdqa %xmm1,%xmm2 4: 62 f1 fd 08 6f d1 vmovdqa64 %xmm1,%xmm2 We prefer VEX encoding over EVEX since VEX is shorter. Also AVX512F only supports 512-bit vector moves. AVX512F + AVX512VL supports 128-bit and 256-bit vector moves. Mode attributes on x86 vector move patterns indicate target preferences of vector move encoding. For vector register to vector register move, we can use 512-bit vector move instructions to move 128-bit/256-bit vector if AVX512VL isn't available. With AVX512F and AVX512VL, we should use VEX encoding for 128-bit/256-bit vector moves if upper 16 vector registers aren't used. This patch adds a function, ix86_output_ssemov, to generate vector moves: 1. If zmm registers are used, use EVEX encoding. 2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding will be generated. 3. If xmm16-xmm31/ymm16-ymm31 registers are used: a. With AVX512VL, AVX512VL vector moves will be generated. b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register move will be done with zmm register move. Tested on AVX2 and AVX512 with and without --with-arch=native. gcc/ PR target/89229 PR target/89346 * config/i386/i386-protos.h (ix86_output_ssemov): New prototype. * config/i386/i386.c (ix86_get_ssemov): New function. (ix86_output_ssemov): Likewise. * config/i386/sse.md (VMOVE:mov<mode>_internal): Call ix86_output_ssemov for TYPE_SSEMOV. Remove TARGET_AVX512VL check. gcc/testsuite/ PR target/89229 PR target/89346 * gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated. * gcc.target/i386/pr89346.c: New test. --- gcc/config/i386/i386-protos.h | 2 + gcc/config/i386/i386.c | 203 ++++++++++++++++++ gcc/config/i386/sse.md | 98 +-------- .../gcc.target/i386/avx512vl-vmovdqa64-1.c | 7 +- gcc/testsuite/gcc.target/i386/pr89346.c | 15 ++ 5 files changed, 225 insertions(+), 100 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89346.c diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 266381ca5a6..39fcaa0ad5f 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -38,6 +38,8 @@ extern void ix86_expand_split_stack_prologue (void); extern void ix86_output_addr_vec_elt (FILE *, int); extern void ix86_output_addr_diff_elt (FILE *, int, int); +extern const char *ix86_output_ssemov (rtx_insn *, rtx *); + extern enum calling_abi ix86_cfun_abi (void); extern enum calling_abi ix86_function_type_abi (const_tree); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index dac7a3fc5fd..4602149e10c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4915,6 +4915,209 @@ ix86_pre_reload_split (void) && !(cfun->curr_properties & PROP_rtl_split_insns)); } +/* Return the opcode of the TYPE_SSEMOV instruction. To move from + or to xmm16-xmm31/ymm16-ymm31 registers, we either require + TARGET_AVX512VL or it is a register to register move which can + be done with zmm register move. */ + +static const char * +ix86_get_ssemov (rtx *operands, unsigned size, + enum attr_mode insn_mode, machine_mode mode) +{ + char buf[128]; + bool misaligned_p = (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode)); + bool evex_reg_p = (size == 64 + || EXT_REX_SSE_REG_P (operands[0]) + || EXT_REX_SSE_REG_P (operands[1])); + machine_mode scalar_mode; + + const char *opcode = NULL; + enum + { + opcode_int, + opcode_float, + opcode_double + } type = opcode_int; + + switch (insn_mode) + { + case MODE_V16SF: + case MODE_V8SF: + case MODE_V4SF: + scalar_mode = E_SFmode; + type = opcode_float; + break; + case MODE_V8DF: + case MODE_V4DF: + case MODE_V2DF: + scalar_mode = E_DFmode; + type = opcode_double; + break; + case MODE_XI: + case MODE_OI: + case MODE_TI: + scalar_mode = GET_MODE_INNER (mode); + break; + default: + gcc_unreachable (); + } + + /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL, + we can only use zmm register move without memory operand. */ + if (evex_reg_p + && !TARGET_AVX512VL + && GET_MODE_SIZE (mode) < 64) + { + if (memory_operand (operands[0], mode) + || memory_operand (operands[1], mode)) + gcc_unreachable (); + size = 64; + switch (type) + { + case opcode_int: + opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32"; + break; + case opcode_float: + opcode = misaligned_p ? "vmovups" : "vmovaps"; + break; + case opcode_double: + opcode = misaligned_p ? "vmovupd" : "vmovapd"; + break; + } + } + else if (SCALAR_FLOAT_MODE_P (scalar_mode)) + { + switch (scalar_mode) + { + case E_SFmode: + opcode = misaligned_p ? "%vmovups" : "%vmovaps"; + break; + case E_DFmode: + opcode = misaligned_p ? "%vmovupd" : "%vmovapd"; + break; + case E_TFmode: + if (evex_reg_p) + opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + else + opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; + break; + default: + gcc_unreachable (); + } + } + else if (SCALAR_INT_MODE_P (scalar_mode)) + { + switch (scalar_mode) + { + case E_QImode: + if (evex_reg_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu8" + : "vmovdqu64") + : "vmovdqa64"); + else + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu8" + : "%vmovdqu") + : "%vmovdqa"); + break; + case E_HImode: + if (evex_reg_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu16" + : "vmovdqu64") + : "vmovdqa64"); + else + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu16" + : "%vmovdqu") + : "%vmovdqa"); + break; + case E_SImode: + if (evex_reg_p) + opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32"; + else + opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; + break; + case E_DImode: + case E_TImode: + case E_OImode: + if (evex_reg_p) + opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + else + opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; + break; + case E_XImode: + opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + break; + default: + gcc_unreachable (); + } + } + else + gcc_unreachable (); + + switch (size) + { + case 64: + snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}", + opcode); + break; + case 32: + snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}", + opcode); + break; + case 16: + snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}", + opcode); + break; + default: + gcc_unreachable (); + } + output_asm_insn (buf, operands); + return ""; +} + +/* Return the template of the TYPE_SSEMOV instruction to move + operands[1] into operands[0]. */ + +const char * +ix86_output_ssemov (rtx_insn *insn, rtx *operands) +{ + machine_mode mode = GET_MODE (operands[0]); + if (get_attr_type (insn) != TYPE_SSEMOV + || mode != GET_MODE (operands[1])) + gcc_unreachable (); + + enum attr_mode insn_mode = get_attr_mode (insn); + + switch (insn_mode) + { + case MODE_XI: + case MODE_V8DF: + case MODE_V16SF: + return ix86_get_ssemov (operands, 64, insn_mode, mode); + + case MODE_OI: + case MODE_V4DF: + case MODE_V8SF: + return ix86_get_ssemov (operands, 32, insn_mode, mode); + + case MODE_TI: + case MODE_V2DF: + case MODE_V4SF: + return ix86_get_ssemov (operands, 16, insn_mode, mode); + + default: + gcc_unreachable (); + } +} + /* Returns true if OP contains a symbol reference */ bool diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ee1f138d1af..8f5902292c6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1013,98 +1013,7 @@ (define_insn "mov<mode>_internal" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - /* There is no evex-encoded vmov* for sizes smaller than 64-bytes - in avx512f, so we need to use workarounds, to access sse registers - 16-31, which are evex-only. In avx512vl we don't need workarounds. */ - if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL - && (EXT_REX_SSE_REG_P (operands[0]) - || EXT_REX_SSE_REG_P (operands[1]))) - { - if (memory_operand (operands[0], <MODE>mode)) - { - if (<MODE_SIZE> == 32) - return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; - else if (<MODE_SIZE> == 16) - return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; - else - gcc_unreachable (); - } - else if (memory_operand (operands[1], <MODE>mode)) - { - if (<MODE_SIZE> == 32) - return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}"; - else if (<MODE_SIZE> == 16) - return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}"; - else - gcc_unreachable (); - } - else - /* Reg -> reg move is always aligned. Just use wider move. */ - switch (get_attr_mode (insn)) - { - case MODE_V8SF: - case MODE_V4SF: - return "vmovaps\t{%g1, %g0|%g0, %g1}"; - case MODE_V4DF: - case MODE_V2DF: - return "vmovapd\t{%g1, %g0|%g0, %g1}"; - case MODE_OI: - case MODE_TI: - return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; - default: - gcc_unreachable (); - } - } - - switch (get_attr_mode (insn)) - { - case MODE_V16SF: - case MODE_V8SF: - case MODE_V4SF: - if (misaligned_operand (operands[0], <MODE>mode) - || misaligned_operand (operands[1], <MODE>mode)) - return "%vmovups\t{%1, %0|%0, %1}"; - else - return "%vmovaps\t{%1, %0|%0, %1}"; - - case MODE_V8DF: - case MODE_V4DF: - case MODE_V2DF: - if (misaligned_operand (operands[0], <MODE>mode) - || misaligned_operand (operands[1], <MODE>mode)) - return "%vmovupd\t{%1, %0|%0, %1}"; - else - return "%vmovapd\t{%1, %0|%0, %1}"; - - case MODE_OI: - case MODE_TI: - if (misaligned_operand (operands[0], <MODE>mode) - || misaligned_operand (operands[1], <MODE>mode)) - return TARGET_AVX512VL - && (<MODE>mode == V4SImode - || <MODE>mode == V2DImode - || <MODE>mode == V8SImode - || <MODE>mode == V4DImode - || TARGET_AVX512BW) - ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}" - : "%vmovdqu\t{%1, %0|%0, %1}"; - else - return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}" - : "%vmovdqa\t{%1, %0|%0, %1}"; - case MODE_XI: - if (misaligned_operand (operands[0], <MODE>mode) - || misaligned_operand (operands[1], <MODE>mode)) - return (<MODE>mode == V16SImode - || <MODE>mode == V8DImode - || TARGET_AVX512BW) - ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}" - : "vmovdqu64\t{%1, %0|%0, %1}"; - else - return "vmovdqa64\t{%1, %0|%0, %1}"; - - default: - gcc_unreachable (); - } + return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); @@ -1113,10 +1022,7 @@ (define_insn "mov<mode>_internal" [(set_attr "type" "sselog1,sselog1,ssemov,ssemov") (set_attr "prefix" "maybe_vex") (set (attr "mode") - (cond [(and (eq_attr "alternative" "1") - (match_test "TARGET_AVX512VL")) - (const_string "<sseinsnmode>") - (match_test "TARGET_AVX") + (cond [(match_test "TARGET_AVX") (const_string "<sseinsnmode>") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c index 14fe4b84544..db4d9d14875 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c @@ -4,14 +4,13 @@ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\nxy\]*\\(.{5,6}(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\nxy\]*\\((?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\nxy\]*\\(.{5,6}(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89346.c b/gcc/testsuite/gcc.target/i386/pr89346.c new file mode 100644 index 00000000000..cdc9accf521 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89346.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +#include <immintrin.h> + +long long *p; +volatile __m256i y; + +void +foo (void) +{ + _mm256_store_epi64 (p, y); +} + +/* { dg-final { scan-assembler-not "vmovdqa64" } } */ -- 2.24.1