On Mon, Oct 15, 2012 at 6:39 PM, Ulrich Weigand <uweig...@de.ibm.com> wrote:
>> > I was wondering if the i386 port maintainers could have a look at this >> > pattern. Shouldn't we really have two patterns, one to *load* an unaligned >> > value and one to *store* and unaligned value, and not permit that memory >> > access to get reloaded? >> >> Please find attached a fairly mechanical patch that splits >> move_unaligned pattern into load_unaligned and store_unaligned >> patterns. We've had some problems with this pattern, and finally we >> found the reason to make unaligned moves more robust. >> >> I will wait for the confirmation that attached patch avoids the >> failure you are seeing with your reload patch. > > Yes, this patch does in fact fix the failure I was seeing with the > reload patch. (A full regression test shows a couple of extra fails: > FAIL: gcc.target/i386/avx256-unaligned-load-1.c scan-assembler sse_movups/1 > FAIL: gcc.target/i386/avx256-unaligned-load-3.c scan-assembler sse2_movupd/1 > FAIL: gcc.target/i386/avx256-unaligned-load-4.c scan-assembler avx_movups256/1 > FAIL: gcc.target/i386/avx256-unaligned-store-4.c scan-assembler > avx_movups256/2 > But I guess these tests simply need to be updated for the new pattern names.) 2012-10-15 Uros Bizjak <ubiz...@gmail.com> * config/i386/sse.md (UNSPEC_MOVU): Remove. (UNSPEC_LOADU): New. (UNSPEC_STOREU): Ditto. (<sse>_movu<ssemodesuffix><avxsizesuffix>): Split to ... (<sse>_loadu<ssemodesuffix><avxsizesuffix>): ... this and ... (<sse>_storeu<ssemodesuffix><avxsizesuffix>) ... this. (<sse2>_movdqu<avxsizesuffix>): Split to ... (<sse2>_loaddqu<avxsizesuffix>): ... this and ... (<sse2>_storedqu<avxsizesuffix>): ... this. (*sse4_2_pcmpestr_unaligned): Update. (*sse4_2_pcmpistr_unaligned): Ditto. * config/i386/i386.c (ix86_avx256_split_vector_move_misalign): Use gen_avx_load{dqu,ups,upd}256 to load from unaligned memory and gen_avx_store{dqu,ups,upd}256 to store to unaligned memory. (ix86_expand_vector_move_misalign): Use gen_sse_loadups or gen_sse2_load{dqu,upd} to load from unaligned memory and gen_sse_loadups or gen_sse2_store{dqu,upd}256 to store to unaligned memory. (struct builtin_description bdesc_spec) <IX86_BUILTIN_LOADUPS>: Use CODE_FOR_sse_loadups. <IX86_BUILTIN_LOADUPD>: Use CODE_FOR_sse2_loadupd. <IX86_BUILTIN_LOADDQU>: Use CODE_FOR_sse2_loaddqu. <IX86_BUILTIN_STOREUPS>: Use CODE_FOR_sse_storeups. <IX86_BUILTIN_STOREUPD>: Use CODE_FOR_sse2_storeupd. <IX86_BUILTIN_STOREDQU>: Use CODE_FOR_sse2_storedqu. <IX86_BUILTIN_LOADUPS256>: Use CODE_FOR_avx_loadups256. <IX86_BUILTIN_LOADUPD256>: Use CODE_FOR_avx_loadupd256. <IX86_BUILTIN_LOADDQU256>: Use CODE_FOR_avx_loaddqu256. <IX86_BUILTIN_STOREUPS256>: Use CODE_FOR_avx_storeups256. <IX86_BUILTIN_STOREUPD256>: Use CODE_FOR_avx_storeupd256. <IX86_BUILTIN_STOREDQU256>: Use CODE_FOR_avx_storedqu256. testsuite/ChangeLog: 2012-10-15 Uros Bizjak <ubiz...@gmail.com> * gcc.target/i386/avx256-unaligned-load-1.c: Update asm scan patterns. * gcc.target/i386/avx256-unaligned-load-2.c: Ditto. * gcc.target/i386/avx256-unaligned-load-3.c: Ditto. * gcc.target/i386/avx256-unaligned-load-4.c: Ditto. * gcc.target/i386/avx256-unaligned-store-1.c: Ditto. * gcc.target/i386/avx256-unaligned-store-2.c: Ditto. * gcc.target/i386/avx256-unaligned-store-3.c: Ditto. * gcc.target/i386/avx256-unaligned-store-4.c: Ditto. Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}. Committed to mainline, similar patch will be committed to 4.7 branch. Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 192420) +++ config/i386/i386.c (working copy) @@ -16059,7 +16059,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, r { rtx m; rtx (*extract) (rtx, rtx, rtx); - rtx (*move_unaligned) (rtx, rtx); + rtx (*load_unaligned) (rtx, rtx); + rtx (*store_unaligned) (rtx, rtx); enum machine_mode mode; switch (GET_MODE (op0)) @@ -16068,39 +16069,52 @@ ix86_avx256_split_vector_move_misalign (rtx op0, r gcc_unreachable (); case V32QImode: extract = gen_avx_vextractf128v32qi; - move_unaligned = gen_avx_movdqu256; + load_unaligned = gen_avx_loaddqu256; + store_unaligned = gen_avx_storedqu256; mode = V16QImode; break; case V8SFmode: extract = gen_avx_vextractf128v8sf; - move_unaligned = gen_avx_movups256; + load_unaligned = gen_avx_loadups256; + store_unaligned = gen_avx_storeups256; mode = V4SFmode; break; case V4DFmode: extract = gen_avx_vextractf128v4df; - move_unaligned = gen_avx_movupd256; + load_unaligned = gen_avx_loadupd256; + store_unaligned = gen_avx_storeupd256; mode = V2DFmode; break; } - if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD) + if (MEM_P (op1)) { - rtx r = gen_reg_rtx (mode); - m = adjust_address (op1, mode, 0); - emit_move_insn (r, m); - m = adjust_address (op1, mode, 16); - r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); - emit_move_insn (op0, r); + if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD) + { + rtx r = gen_reg_rtx (mode); + m = adjust_address (op1, mode, 0); + emit_move_insn (r, m); + m = adjust_address (op1, mode, 16); + r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); + emit_move_insn (op0, r); + } + else + emit_insn (load_unaligned (op0, op1)); } - else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE) + else if (MEM_P (op0)) { - m = adjust_address (op0, mode, 0); - emit_insn (extract (m, op1, const0_rtx)); - m = adjust_address (op0, mode, 16); - emit_insn (extract (m, op1, const1_rtx)); + if (TARGET_AVX256_SPLIT_UNALIGNED_STORE) + { + m = adjust_address (op0, mode, 0); + emit_insn (extract (m, op1, const0_rtx)); + m = adjust_address (op0, mode, 16); + emit_insn (extract (m, op1, const1_rtx)); + } + else + emit_insn (store_unaligned (op0, op1)); } else - emit_insn (move_unaligned (op0, op1)); + gcc_unreachable (); } /* Implement the movmisalign patterns for SSE. Non-SSE modes go @@ -16195,7 +16209,7 @@ ix86_expand_vector_move_misalign (enum machine_mod op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); /* We will eventually emit movups based on insn attributes. */ - emit_insn (gen_sse2_movdqu (op0, op1)); + emit_insn (gen_sse2_loaddqu (op0, op1)); } else if (TARGET_SSE2 && mode == V2DFmode) { @@ -16207,7 +16221,7 @@ ix86_expand_vector_move_misalign (enum machine_mod || optimize_function_for_size_p (cfun)) { /* We will eventually emit movups based on insn attributes. */ - emit_insn (gen_sse2_movupd (op0, op1)); + emit_insn (gen_sse2_loadupd (op0, op1)); return; } @@ -16245,7 +16259,7 @@ ix86_expand_vector_move_misalign (enum machine_mod { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); + emit_insn (gen_sse_loadups (op0, op1)); return; } @@ -16270,7 +16284,7 @@ ix86_expand_vector_move_misalign (enum machine_mod op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); /* We will eventually emit movups based on insn attributes. */ - emit_insn (gen_sse2_movdqu (op0, op1)); + emit_insn (gen_sse2_storedqu (op0, op1)); } else if (TARGET_SSE2 && mode == V2DFmode) { @@ -16279,7 +16293,7 @@ ix86_expand_vector_move_misalign (enum machine_mod || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL || optimize_function_for_size_p (cfun)) /* We will eventually emit movups based on insn attributes. */ - emit_insn (gen_sse2_movupd (op0, op1)); + emit_insn (gen_sse2_storeupd (op0, op1)); else { m = adjust_address (op0, DFmode, 0); @@ -16299,7 +16313,7 @@ ix86_expand_vector_move_misalign (enum machine_mod || optimize_function_for_size_p (cfun)) { op0 = gen_lowpart (V4SFmode, op0); - emit_insn (gen_sse_movups (op0, op1)); + emit_insn (gen_sse_storeups (op0, op1)); } else { @@ -26765,9 +26779,9 @@ static const struct builtin_description bdesc_spec { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, /* SSE */ - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, @@ -26781,14 +26795,14 @@ static const struct builtin_description bdesc_spec /* SSE2 */ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT }, { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, @@ -26813,12 +26827,12 @@ static const struct builtin_description bdesc_spec { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI }, Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 192420) +++ config/i386/sse.md (working copy) @@ -21,7 +21,8 @@ (define_c_enum "unspec" [ ;; SSE UNSPEC_MOVNT - UNSPEC_MOVU + UNSPEC_LOADU + UNSPEC_STOREU ;; SSE3 UNSPEC_LDDQU @@ -586,12 +587,12 @@ DONE; }) -(define_insn "<sse>_movu<ssemodesuffix><avxsizesuffix>" - [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m") +(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>" + [(set (match_operand:VF 0 "register_operand" "=x") (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + [(match_operand:VF 1 "memory_operand" "m")] + UNSPEC_LOADU))] + "TARGET_SSE" { switch (get_attr_mode (insn)) { @@ -618,11 +619,12 @@ ] (const_string "<MODE>")))]) -(define_insn "<sse2>_movdqu<avxsizesuffix>" - [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m") - (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>" + [(set (match_operand:VF 0 "memory_operand" "=m") + (unspec:VF + [(match_operand:VF 1 "register_operand" "x")] + UNSPEC_STOREU))] + "TARGET_SSE" { switch (get_attr_mode (insn)) { @@ -630,6 +632,37 @@ case MODE_V4SF: return "%vmovups\t{%1, %0|%0, %1}"; default: + return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"; + } +} + [(set_attr "type" "ssemov") + (set_attr "movu" "1") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (const_string "<ssePSmode>") + (and (eq_attr "alternative" "1") + (match_test "TARGET_SSE_TYPELESS_STORES")) + (const_string "<ssePSmode>") + (match_test "TARGET_AVX") + (const_string "<MODE>") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "V4SF") + ] + (const_string "<MODE>")))]) + +(define_insn "<sse2>_loaddqu<avxsizesuffix>" + [(set (match_operand:VI1 0 "register_operand" "=x") + (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] + UNSPEC_LOADU))] + "TARGET_SSE2" +{ + switch (get_attr_mode (insn)) + { + case MODE_V8SF: + case MODE_V4SF: + return "%vmovups\t{%1, %0|%0, %1}"; + default: return "%vmovdqu\t{%1, %0|%0, %1}"; } } @@ -654,6 +687,42 @@ ] (const_string "<sseinsnmode>")))]) +(define_insn "<sse2>_storedqu<avxsizesuffix>" + [(set (match_operand:VI1 0 "memory_operand" "=m") + (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")] + UNSPEC_STOREU))] + "TARGET_SSE2" +{ + switch (get_attr_mode (insn)) + { + case MODE_V8SF: + case MODE_V4SF: + return "%vmovups\t{%1, %0|%0, %1}"; + default: + return "%vmovdqu\t{%1, %0|%0, %1}"; + } +} + [(set_attr "type" "ssemov") + (set_attr "movu" "1") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (const_string "<ssePSmode>") + (and (eq_attr "alternative" "1") + (match_test "TARGET_SSE_TYPELESS_STORES")) + (const_string "<ssePSmode>") + (match_test "TARGET_AVX") + (const_string "<sseinsnmode>") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "V4SF") + ] + (const_string "<sseinsnmode>")))]) + (define_insn "<sse3>_lddqu<avxsizesuffix>" [(set (match_operand:VI1 0 "register_operand" "=x") (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] @@ -9307,7 +9376,7 @@ (match_operand:SI 3 "register_operand" "a") (unspec:V16QI [(match_operand:V16QI 4 "memory_operand" "m")] - UNSPEC_MOVU) + UNSPEC_LOADU) (match_operand:SI 5 "register_operand" "d") (match_operand:SI 6 "const_0_to_255_operand" "n")] UNSPEC_PCMPESTR)) @@ -9315,7 +9384,7 @@ (unspec:V16QI [(match_dup 2) (match_dup 3) - (unspec:V16QI [(match_dup 4)] UNSPEC_MOVU) + (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU) (match_dup 5) (match_dup 6)] UNSPEC_PCMPESTR)) @@ -9323,7 +9392,7 @@ (unspec:CC [(match_dup 2) (match_dup 3) - (unspec:V16QI [(match_dup 4)] UNSPEC_MOVU) + (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU) (match_dup 5) (match_dup 6)] UNSPEC_PCMPESTR))] @@ -9498,19 +9567,19 @@ [(match_operand:V16QI 2 "register_operand" "x") (unspec:V16QI [(match_operand:V16QI 3 "memory_operand" "m")] - UNSPEC_MOVU) + UNSPEC_LOADU) (match_operand:SI 4 "const_0_to_255_operand" "n")] UNSPEC_PCMPISTR)) (set (match_operand:V16QI 1 "register_operand" "=Yz") (unspec:V16QI [(match_dup 2) - (unspec:V16QI [(match_dup 3)] UNSPEC_MOVU) + (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU) (match_dup 4)] UNSPEC_PCMPISTR)) (set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 2) - (unspec:V16QI [(match_dup 3)] UNSPEC_MOVU) + (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU) (match_dup 4)] UNSPEC_PCMPISTR))] "TARGET_SSE4_2 Index: testsuite/gcc.target/i386/avx256-unaligned-load-1.c =================================================================== --- testsuite/gcc.target/i386/avx256-unaligned-load-1.c (revision 192420) +++ testsuite/gcc.target/i386/avx256-unaligned-load-1.c (working copy) @@ -14,6 +14,6 @@ avx_test (void) c[i] = a[i] * b[i+3]; } -/* { dg-final { scan-assembler-not "avx_movups256/1" } } */ -/* { dg-final { scan-assembler "sse_movups/1" } } */ +/* { dg-final { scan-assembler-not "avx_loadups256" } } */ +/* { dg-final { scan-assembler "sse_loadups" } } */ /* { dg-final { scan-assembler "vinsertf128" } } */ Index: testsuite/gcc.target/i386/avx256-unaligned-load-2.c =================================================================== --- testsuite/gcc.target/i386/avx256-unaligned-load-2.c (revision 192420) +++ testsuite/gcc.target/i386/avx256-unaligned-load-2.c (working copy) @@ -24,6 +24,6 @@ avx_test (void) } } -/* { dg-final { scan-assembler-not "avx_movdqu256/1" } } */ -/* { dg-final { scan-assembler "sse2_movdqu/1" } } */ +/* { dg-final { scan-assembler-not "avx_loaddqu256" } } */ +/* { dg-final { scan-assembler "sse2_loaddqu" } } */ /* { dg-final { scan-assembler "vinsert.128" } } */ Index: testsuite/gcc.target/i386/avx256-unaligned-load-3.c =================================================================== --- testsuite/gcc.target/i386/avx256-unaligned-load-3.c (revision 192420) +++ testsuite/gcc.target/i386/avx256-unaligned-load-3.c (working copy) @@ -14,6 +14,6 @@ avx_test (void) c[i] = a[i] * b[i+3]; } -/* { dg-final { scan-assembler-not "avx_movupd256/1" } } */ -/* { dg-final { scan-assembler "sse2_movupd/1" } } */ +/* { dg-final { scan-assembler-not "avx_loadupd256" } } */ +/* { dg-final { scan-assembler "sse2_loadupd" } } */ /* { dg-final { scan-assembler "vinsertf128" } } */ Index: testsuite/gcc.target/i386/avx256-unaligned-load-4.c =================================================================== --- testsuite/gcc.target/i386/avx256-unaligned-load-4.c (revision 192420) +++ testsuite/gcc.target/i386/avx256-unaligned-load-4.c (working copy) @@ -14,6 +14,6 @@ avx_test (void) b[i] = a[i+3] * 2; } -/* { dg-final { scan-assembler "avx_movups256/1" } } */ -/* { dg-final { scan-assembler-not "avx_movups/1" } } */ +/* { dg-final { scan-assembler "avx_loadups256" } } */ +/* { dg-final { scan-assembler-not "sse_loadups" } } */ /* { dg-final { scan-assembler-not "vinsertf128" } } */ Index: testsuite/gcc.target/i386/avx256-unaligned-store-1.c =================================================================== --- testsuite/gcc.target/i386/avx256-unaligned-store-1.c (revision 192420) +++ testsuite/gcc.target/i386/avx256-unaligned-store-1.c (working copy) @@ -17,6 +17,6 @@ avx_test (void) d[i] = c[i] * 20.0; } -/* { dg-final { scan-assembler-not "avx_movups256/2" } } */ +/* { dg-final { scan-assembler-not "avx_storeups256" } } */ /* { dg-final { scan-assembler "vmovups.*\\*movv4sf_internal/3" } } */ /* { dg-final { scan-assembler "vextractf128" } } */ Index: testsuite/gcc.target/i386/avx256-unaligned-store-2.c =================================================================== --- testsuite/gcc.target/i386/avx256-unaligned-store-2.c (revision 192420) +++ testsuite/gcc.target/i386/avx256-unaligned-store-2.c (working copy) @@ -24,6 +24,6 @@ avx_test (void) } } -/* { dg-final { scan-assembler-not "avx_movdqu256/2" } } */ +/* { dg-final { scan-assembler-not "avx_storedqu256" } } */ /* { dg-final { scan-assembler "vmovdqu.*\\*movv16qi_internal/3" } } */ /* { dg-final { scan-assembler "vextract.128" } } */ Index: testsuite/gcc.target/i386/avx256-unaligned-store-3.c =================================================================== --- testsuite/gcc.target/i386/avx256-unaligned-store-3.c (revision 192420) +++ testsuite/gcc.target/i386/avx256-unaligned-store-3.c (working copy) @@ -17,6 +17,6 @@ avx_test (void) d[i] = c[i] * 20.0; } -/* { dg-final { scan-assembler-not "avx_movupd256/2" } } */ +/* { dg-final { scan-assembler-not "avx_storeupd256" } } */ /* { dg-final { scan-assembler "vmovupd.*\\*movv2df_internal/3" } } */ /* { dg-final { scan-assembler "vextractf128" } } */ Index: testsuite/gcc.target/i386/avx256-unaligned-store-4.c =================================================================== --- testsuite/gcc.target/i386/avx256-unaligned-store-4.c (revision 192420) +++ testsuite/gcc.target/i386/avx256-unaligned-store-4.c (working copy) @@ -14,7 +14,7 @@ avx_test (void) b[i+3] = a[i] * c[i]; } -/* { dg-final { scan-assembler "avx_movups256/2" } } */ -/* { dg-final { scan-assembler-not "avx_movups/2" } } */ +/* { dg-final { scan-assembler "avx_storeups256" } } */ +/* { dg-final { scan-assembler-not "sse_storeups" } } */ /* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */ /* { dg-final { scan-assembler-not "vextractf128" } } */