Support TImode CONST_WIDE_INT store generated from piecewise store.
Need to verify performance impact before enabling TImode CONST_INT
store for __int128.

Tested on x86-64.  OK for trunk?

H.J.
---
gcc/

        * config/i386/i386.c (timode_scalar_to_vector_candidate_p): Allow
        TImode CONST_WIDE_INT store.
        (timode_scalar_chain::convert_insn): Handle CONST_WIDE_INT store.

gcc/testsuite/

        * gcc.target/i386/pieces-strcpy-1.c: New test.
        * gcc.target/i386/pieces-strcpy-2.c: Likewise.
---
 gcc/config/i386/i386.c                          | 23 ++++++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c | 15 +++++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 15 +++++++++++++++
 3 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 93eaab1..d086ede 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2862,9 +2862,12 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
 
   if (MEM_P (dst))
     {
-      /* Check for store.  Only support store from register or standard
-        SSE constants.  Memory must be aligned or unaligned store is
-        optimal.  */
+      /* Check for store.  Memory must be aligned or unaligned store
+        is optimal.  Only support store from register, standard SSE
+        constant or CONST_WIDE_INT generated from piecewise store.
+
+        ??? Verify performance impact before enabling CONST_INT for
+        __int128 store.  */
       if (misaligned_operand (dst, TImode)
          && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
        return false;
@@ -2875,6 +2878,7 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
          return false;
 
        case REG:
+       case CONST_WIDE_INT:
          return true;
 
        case CONST_INT:
@@ -3868,6 +3872,19 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
       PUT_MODE (src, V1TImode);
       break;
 
+    case CONST_WIDE_INT:
+      if (NONDEBUG_INSN_P (insn))
+       {
+         /* Since there are no instructions to store 128-bit constant,
+            temporary register usage is required.  */
+         rtx tmp = gen_reg_rtx (V1TImode);
+         src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
+         src = validize_mem (force_const_mem (V1TImode, src));
+         emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
+         dst = tmp;
+       }
+      break;
+
     case CONST_INT:
       switch (standard_sse_constant_p (src, TImode))
        {
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c 
b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
new file mode 100644
index 0000000..64b7329
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *strcpy (char *, const char *);
+
+void
+foo (char *s)
+{
+  strcpy (s,
+         "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
+         "1234567");
+}
+
+/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c 
b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
new file mode 100644
index 0000000..7421255
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *strcpy (char *, const char *);
+
+void
+foo (char *s)
+{
+  strcpy (s,
+         "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
+         "1234567");
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
-- 
2.7.4

Reply via email to