Support TImode CONST_WIDE_INT store generated from piecewise store. Need to verify performance impact before enabling TImode CONST_INT store for __int128.
Tested on x86-64. OK for trunk? H.J. --- gcc/ * config/i386/i386.c (timode_scalar_to_vector_candidate_p): Allow TImode CONST_WIDE_INT store. (timode_scalar_chain::convert_insn): Handle CONST_WIDE_INT store. gcc/testsuite/ * gcc.target/i386/pieces-strcpy-1.c: New test. * gcc.target/i386/pieces-strcpy-2.c: Likewise. --- gcc/config/i386/i386.c | 23 ++++++++++++++++++++--- gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c | 15 +++++++++++++++ gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 15 +++++++++++++++ 3 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 93eaab1..d086ede 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2862,9 +2862,12 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn) if (MEM_P (dst)) { - /* Check for store. Only support store from register or standard - SSE constants. Memory must be aligned or unaligned store is - optimal. */ + /* Check for store. Memory must be aligned or unaligned store + is optimal. Only support store from register, standard SSE + constant or CONST_WIDE_INT generated from piecewise store. + + ??? Verify performance impact before enabling CONST_INT for + __int128 store. */ if (misaligned_operand (dst, TImode) && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) return false; @@ -2875,6 +2878,7 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn) return false; case REG: + case CONST_WIDE_INT: return true; case CONST_INT: @@ -3868,6 +3872,19 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) PUT_MODE (src, V1TImode); break; + case CONST_WIDE_INT: + if (NONDEBUG_INSN_P (insn)) + { + /* Since there are no instructions to store 128-bit constant, + temporary register usage is required. */ + rtx tmp = gen_reg_rtx (V1TImode); + src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); + src = validize_mem (force_const_mem (V1TImode, src)); + emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); + dst = tmp; + } + break; + case CONST_INT: switch (standard_sse_constant_p (src, TImode)) { diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c new file mode 100644 index 0000000..64b7329 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */ + +extern char *strcpy (char *, const char *); + +void +foo (char *s) +{ + strcpy (s, + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" + "1234567"); +} + +/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */ +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c new file mode 100644 index 0000000..7421255 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ + +extern char *strcpy (char *, const char *); + +void +foo (char *s) +{ + strcpy (s, + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" + "1234567"); +} + +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */ +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */ -- 2.7.4