Add TARGET_PREFER_OVERLAP_OP_BY_PIECES for Alder Lake and Intel core
processoes with AVX2 to enable -foverlap-op-by-pieces by default.

gcc/

        PR middl-end/90773
        * config/i386/i386-options.c (ix86_option_override_internal):
        Enable -foverlap-op-by-pieces by default for
        TARGET_PREFER_OVERLAP_OP_BY_PIECES.
        * config/i386/i386.h (TARGET_PREFER_OVERLAP_OP_BY_PIECES): New.
        * config/i386/x86-tune.def (X86_TUNE_PREFER_OVERLAP_OP_BY_PIECES):
        New.

gcc/testsuite/

        PR middl-end/90773
        * gcc.target/i386/pr90773-12.c: New test.
        * gcc.target/i386/pr90773-13.c: Likewise.
---
 gcc/config/i386/i386-options.c             |  3 +++
 gcc/config/i386/i386.h                     |  2 ++
 gcc/config/i386/x86-tune.def               |  6 ++++++
 gcc/testsuite/gcc.target/i386/pr90773-12.c | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr90773-13.c | 11 +++++++++++
 5 files changed, 33 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-13.c

diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index 2a12228d195..5949d2d5597 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -2821,6 +2821,9 @@ ix86_option_override_internal (bool main_args_p,
   if (ix86_indirect_branch != indirect_branch_keep)
     SET_OPTION_IF_UNSET (opts, opts_set, flag_jump_tables, 0);
 
+  if (TARGET_PREFER_OVERLAP_OP_BY_PIECES)
+    SET_OPTION_IF_UNSET (opts, opts_set, flag_overlap_op_by_pieces, 1);
+
   return true;
 }
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 96b46bac238..cf24fecaddc 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -304,6 +304,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_SINGLE_STRINGOP ix86_tune_features[X86_TUNE_SINGLE_STRINGOP]
 #define TARGET_PREFER_KNOWN_REP_MOVSB_STOSB \
   ix86_tune_features[X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB]
+#define TARGET_PREFER_OVERLAP_OP_BY_PIECES \
+  ix86_tune_features[X86_TUNE_PREFER_OVERLAP_OP_BY_PIECES]
 #define TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES \
        ix86_tune_features[X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES]
 #define TARGET_QIMODE_MATH     ix86_tune_features[X86_TUNE_QIMODE_MATH]
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index eb057a67750..848c1b53ad4 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -275,6 +275,12 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB,
          "prefer_known_rep_movsb_stosb",
          m_SKYLAKE | m_ALDERLAKE | m_CORE_AVX512)
 
+/* X86_TUNE_PREFER_OVERLAP_OP_BY_PIECES: Enable -foverlap-op-by-pieces-run
+   by default.  */
+DEF_TUNE (X86_TUNE_PREFER_OVERLAP_OP_BY_PIECES,
+         "prefer_overlap_op_by_pieces",
+         m_CORE_AVX2)
+
 /* X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES: Enable generation of
    compact prologues and epilogues by issuing a misaligned moves.  This
    requires target to handle misaligned moves and partial memory stalls
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-12.c 
b/gcc/testsuite/gcc.target/i386/pr90773-12.c
new file mode 100644
index 00000000000..e45840a5b8d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-12.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=skylake" } */
+
+void
+foo (char *dst, char *src)
+{
+  __builtin_memcpy (dst, src, 255);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[\\t \]+\[0-9\]*\\(%\[\^,\]+\\)," 
16 } } */
+/* { dg-final { scan-assembler-not "mov\[bwlq\]" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-13.c 
b/gcc/testsuite/gcc.target/i386/pr90773-13.c
new file mode 100644
index 00000000000..4d5ae8d1086
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-13.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=skylake" } */
+
+void
+foo (char *dst)
+{
+  __builtin_memset (dst, 0, 255);
+}
+
+/* { dg-final { scan-assembler-times "movups\[\\t \]+%xmm\[0-9\]+, 
\[0-9\]*\\(%\[\^,\]+\\)" 16 } } */
+/* { dg-final { scan-assembler-not "mov\[bwlq\]" } } */
-- 
2.30.2

Reply via email to