If a single instruction can store or move the whole block of memory, use
vector instruction and don't align destination.

gcc/

        PR target/121934
        * config/i386/i386-expand.cc (ix86_expand_set_or_cpymem): If a
        single instruction can store or move the whole block of memory,
        use vector instruction and don't align destination.

gcc/testsuite/

        PR target/121934
        * gcc.target/i386/pr121934-1a.c: New test.
        * gcc.target/i386/pr121934-1b.c: Likewise.
        * gcc.target/i386/pr121934-2a.c: Likewise.
        * gcc.target/i386/pr121934-2b.c: Likewise.
        * gcc.target/i386/pr121934-3a.c: Likewise.
        * gcc.target/i386/pr121934-3b.c: Likewise.
        * gcc.target/i386/pr121934-4a.c: Likewise.
        * gcc.target/i386/pr121934-4b.c: Likewise.
        * gcc.target/i386/pr121934-5a.c: Likewise.
        * gcc.target/i386/pr121934-5b.c: Likewise.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386-expand.cc              | 62 +++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr121934-1a.c | 22 ++++++++
 gcc/testsuite/gcc.target/i386/pr121934-1b.c |  7 +++
 gcc/testsuite/gcc.target/i386/pr121934-2a.c | 23 ++++++++
 gcc/testsuite/gcc.target/i386/pr121934-2b.c |  7 +++
 gcc/testsuite/gcc.target/i386/pr121934-3a.c | 23 ++++++++
 gcc/testsuite/gcc.target/i386/pr121934-3b.c |  7 +++
 gcc/testsuite/gcc.target/i386/pr121934-4a.c | 23 ++++++++
 gcc/testsuite/gcc.target/i386/pr121934-4b.c |  7 +++
 gcc/testsuite/gcc.target/i386/pr121934-5a.c | 23 ++++++++
 gcc/testsuite/gcc.target/i386/pr121934-5b.c |  7 +++
 11 files changed, 187 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121934-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121934-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121934-2a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121934-2b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121934-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121934-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121934-4a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121934-4b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121934-5a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121934-5b.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index dc26b3452cb..b0b9e6da946 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -9552,9 +9552,20 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx 
count_exp, rtx val_exp,
   if (!issetmem)
     srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
 
+  bool aligned_dstmem = false;
+  unsigned int nunits = issetmem ? STORE_MAX_PIECES : MOVE_MAX;
+  bool single_insn_p = count && count <= nunits;
+  if (single_insn_p)
+    {
+      /* If it can be done with a single instruction, use vector
+        instruction and don't align destination.  */
+      alg = vector_loop;
+      noalign = true;
+      dynamic_check = -1;
+    }
+
   unroll_factor = 1;
   move_mode = word_mode;
-  int nunits;
   switch (alg)
     {
     case libcall:
@@ -9576,7 +9587,6 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx 
count_exp, rtx val_exp,
       need_zero_guard = true;
       unroll_factor = 4;
       /* Get the vector mode to move STORE_MAX_PIECES/MOVE_MAX bytes.  */
-      nunits = issetmem ? STORE_MAX_PIECES : MOVE_MAX;
       nunits /= GET_MODE_SIZE (word_mode);
       if (nunits > 1)
        {
@@ -9629,28 +9639,32 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx 
count_exp, rtx val_exp,
     }
   gcc_assert (desired_align >= 1 && align >= 1);
 
-  /* Misaligned move sequences handle both prologue and epilogue at once.
-     Default code generation results in a smaller code for large alignments
-     and also avoids redundant job when sizes are known precisely.  */
-  misaligned_prologue_used
-    = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
-       && MAX (desired_align, epilogue_size_needed) <= 32
-       && desired_align <= epilogue_size_needed
-       && ((desired_align > align && !align_bytes)
-          || (!count && epilogue_size_needed > 1)));
-
-  /* Destination is aligned after the misaligned prologue.  */
-  bool aligned_dstmem = misaligned_prologue_used;
-
-  if (noalign && !misaligned_prologue_used)
-    {
-      /* Also use misaligned prologue if alignment isn't needed and
-        destination isn't aligned.   Since alignment isn't needed,
-        the destination after prologue won't be aligned.  */
-      aligned_dstmem = (GET_MODE_ALIGNMENT (move_mode)
-                       <= MEM_ALIGN (dst));
-      if (!aligned_dstmem)
-       misaligned_prologue_used = true;
+  if (!single_insn_p)
+    {
+      /* Misaligned move sequences handle both prologue and epilogue
+        at once.  Default code generation results in a smaller code
+        for large alignments and also avoids redundant job when sizes
+        are known precisely.  */
+      misaligned_prologue_used
+       = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
+          && MAX (desired_align, epilogue_size_needed) <= 32
+          && desired_align <= epilogue_size_needed
+          && ((desired_align > align && !align_bytes)
+              || (!count && epilogue_size_needed > 1)));
+
+      /* Destination is aligned after the misaligned prologue.  */
+      aligned_dstmem = misaligned_prologue_used;
+
+      if (noalign && !misaligned_prologue_used)
+       {
+         /* Also use misaligned prologue if alignment isn't needed and
+            destination isn't aligned.   Since alignment isn't needed,
+            the destination after prologue won't be aligned.  */
+         aligned_dstmem = (GET_MODE_ALIGNMENT (move_mode)
+                           <= MEM_ALIGN (dst));
+         if (!aligned_dstmem)
+           misaligned_prologue_used = true;
+       }
     }
 
   /* Do the cheap promotion to allow better CSE across the
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-1a.c 
b/gcc/testsuite/gcc.target/i386/pr121934-1a.c
new file mode 100644
index 00000000000..6b6881367db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121934-1a.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-pre -fno-tree-fre" } */
+
+extern int f();
+int a, b, c, d[3];
+void g() {
+  int h;
+  if (f()) {
+    if (b)
+    i:
+      c > 0;
+    a = 0;
+    for (h = 0; h < 3; h++) {
+      if (a != 1)
+        __builtin_printf("0\n");
+      d[h] = -1;
+    }
+    goto i;
+  }
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-1b.c 
b/gcc/testsuite/gcc.target/i386/pr121934-1b.c
new file mode 100644
index 00000000000..47381ec3476
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121934-1b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-pre -fno-tree-fre 
-mmemset-strategy=rep_byte:8192:align,libcall:-1:noalign" } */
+
+#include "pr121934-1a.c"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
+/* { dg-final { scan-assembler-not "movb\[ \\t\]+\\\$-1" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-2a.c 
b/gcc/testsuite/gcc.target/i386/pr121934-2a.c
new file mode 100644
index 00000000000..49def11aa4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121934-2a.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-pre -fno-tree-fre" } */
+
+extern int f();
+int a, b, c;
+long long int d[3];
+void g() {
+  int h;
+  if (f()) {
+    if (b)
+    i:
+      c > 0;
+    a = 0;
+    for (h = 0; h < 3; h++) {
+      if (a != 1)
+        __builtin_printf("0\n");
+      d[h] = (long long int) -1;
+    }
+    goto i;
+  }
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-2b.c 
b/gcc/testsuite/gcc.target/i386/pr121934-2b.c
new file mode 100644
index 00000000000..1c634dfe420
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121934-2b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-pre -fno-tree-fre 
-mmemset-strategy=rep_byte:8192:align,libcall:-1:noalign" } */
+
+#include "pr121934-2a.c"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
+/* { dg-final { scan-assembler-not "movb\[ \\t\]+\\\$-1" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-3a.c 
b/gcc/testsuite/gcc.target/i386/pr121934-3a.c
new file mode 100644
index 00000000000..0c04b69c0d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121934-3a.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-pre -fno-tree-fre -msse2" } */
+
+extern int f();
+int a, b, c;
+_BitInt(128) d[3];
+void g() {
+  int h;
+  if (f()) {
+    if (b)
+    i:
+      c > 0;
+    a = 0;
+    for (h = 0; h < 3; h++) {
+      if (a != 1)
+        __builtin_printf("0\n");
+      d[h] = (_BitInt(128)) -1;
+    }
+    goto i;
+  }
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-3b.c 
b/gcc/testsuite/gcc.target/i386/pr121934-3b.c
new file mode 100644
index 00000000000..ff4b0831cea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121934-3b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-pre -fno-tree-fre -msse2 
-mmemset-strategy=rep_byte:8192:align,libcall:-1:noalign" } */
+
+#include "pr121934-3a.c"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
+/* { dg-final { scan-assembler-not "movb\[ \\t\]+\\\$-1" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-4a.c 
b/gcc/testsuite/gcc.target/i386/pr121934-4a.c
new file mode 100644
index 00000000000..5aa3e069cff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121934-4a.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-pre -fno-tree-fre -mno-avx512f -mavx 
-mprefer-vector-width=256" } */
+
+extern int f();
+int a, b, c;
+_BitInt(256) d[3];
+void g() {
+  int h;
+  if (f()) {
+    if (b)
+    i:
+      c > 0;
+    a = 0;
+    for (h = 0; h < 3; h++) {
+      if (a != 1)
+        __builtin_printf("0\n");
+      d[h] = (_BitInt(256)) -1;
+    }
+    goto i;
+  }
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-4b.c 
b/gcc/testsuite/gcc.target/i386/pr121934-4b.c
new file mode 100644
index 00000000000..5f8241dcad5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121934-4b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-pre -fno-tree-fre -mno-avx512f -mavx 
-mprefer-vector-width=256 
-mmemset-strategy=rep_byte:8192:align,libcall:-1:noalign" } */
+
+#include "pr121934-4a.c"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
+/* { dg-final { scan-assembler-not "movb\[ \\t\]+\\\$-1" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-5a.c 
b/gcc/testsuite/gcc.target/i386/pr121934-5a.c
new file mode 100644
index 00000000000..10be0dd4343
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121934-5a.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-pre -fno-tree-fre -mavx512f 
-mprefer-vector-width=512" } */
+
+extern int f();
+int a, b, c;
+_BitInt(512) d[3];
+void g() {
+  int h;
+  if (f()) {
+    if (b)
+    i:
+      c > 0;
+    a = 0;
+    for (h = 0; h < 3; h++) {
+      if (a != 1)
+        __builtin_printf("0\n");
+      d[h] = (_BitInt(512)) -1;
+    }
+    goto i;
+  }
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-5b.c 
b/gcc/testsuite/gcc.target/i386/pr121934-5b.c
new file mode 100644
index 00000000000..6a45a8a7a8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121934-5b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-pre -fno-tree-fre -mavx512f 
-mprefer-vector-width=512 
-mmemset-strategy=rep_byte:8192:align,libcall:-1:noalign" } */
+
+#include "pr121934-5a.c"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
+/* { dg-final { scan-assembler-not "movb\[ \\t\]+\\\$-1" } } */
-- 
2.51.0

Reply via email to