Add TARGET_PREFER_OVERLAP_OP_BY_PIECES for Alder Lake and Intel core processoes with AVX2 to enable -foverlap-op-by-pieces by default.
gcc/ PR middl-end/90773 * config/i386/i386-options.c (ix86_option_override_internal): Enable -foverlap-op-by-pieces by default for TARGET_PREFER_OVERLAP_OP_BY_PIECES. * config/i386/i386.h (TARGET_PREFER_OVERLAP_OP_BY_PIECES): New. * config/i386/x86-tune.def (X86_TUNE_PREFER_OVERLAP_OP_BY_PIECES): New. gcc/testsuite/ PR middl-end/90773 * gcc.target/i386/pr90773-12.c: New test. * gcc.target/i386/pr90773-13.c: Likewise. --- gcc/config/i386/i386-options.c | 3 +++ gcc/config/i386/i386.h | 2 ++ gcc/config/i386/x86-tune.def | 6 ++++++ gcc/testsuite/gcc.target/i386/pr90773-12.c | 11 +++++++++++ gcc/testsuite/gcc.target/i386/pr90773-13.c | 11 +++++++++++ 5 files changed, 33 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-12.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-13.c diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index 2a12228d195..5949d2d5597 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -2821,6 +2821,9 @@ ix86_option_override_internal (bool main_args_p, if (ix86_indirect_branch != indirect_branch_keep) SET_OPTION_IF_UNSET (opts, opts_set, flag_jump_tables, 0); + if (TARGET_PREFER_OVERLAP_OP_BY_PIECES) + SET_OPTION_IF_UNSET (opts, opts_set, flag_overlap_op_by_pieces, 1); + return true; } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 96b46bac238..cf24fecaddc 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -304,6 +304,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_SINGLE_STRINGOP ix86_tune_features[X86_TUNE_SINGLE_STRINGOP] #define TARGET_PREFER_KNOWN_REP_MOVSB_STOSB \ ix86_tune_features[X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB] +#define TARGET_PREFER_OVERLAP_OP_BY_PIECES \ + ix86_tune_features[X86_TUNE_PREFER_OVERLAP_OP_BY_PIECES] #define TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES \ ix86_tune_features[X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES] #define TARGET_QIMODE_MATH ix86_tune_features[X86_TUNE_QIMODE_MATH] diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index eb057a67750..848c1b53ad4 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -275,6 +275,12 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB, "prefer_known_rep_movsb_stosb", m_SKYLAKE | m_ALDERLAKE | m_CORE_AVX512) +/* X86_TUNE_PREFER_OVERLAP_OP_BY_PIECES: Enable -foverlap-op-by-pieces-run + by default. */ +DEF_TUNE (X86_TUNE_PREFER_OVERLAP_OP_BY_PIECES, + "prefer_overlap_op_by_pieces", + m_CORE_AVX2) + /* X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES: Enable generation of compact prologues and epilogues by issuing a misaligned moves. This requires target to handle misaligned moves and partial memory stalls diff --git a/gcc/testsuite/gcc.target/i386/pr90773-12.c b/gcc/testsuite/gcc.target/i386/pr90773-12.c new file mode 100644 index 00000000000..e45840a5b8d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90773-12.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx -msse2 -mtune=skylake" } */ + +void +foo (char *dst, char *src) +{ + __builtin_memcpy (dst, src, 255); +} + +/* { dg-final { scan-assembler-times "movdqu\[\\t \]+\[0-9\]*\\(%\[\^,\]+\\)," 16 } } */ +/* { dg-final { scan-assembler-not "mov\[bwlq\]" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr90773-13.c b/gcc/testsuite/gcc.target/i386/pr90773-13.c new file mode 100644 index 00000000000..4d5ae8d1086 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90773-13.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx -msse2 -mtune=skylake" } */ + +void +foo (char *dst) +{ + __builtin_memset (dst, 0, 255); +} + +/* { dg-final { scan-assembler-times "movups\[\\t \]+%xmm\[0-9\]+, \[0-9\]*\\(%\[\^,\]+\\)" 16 } } */ +/* { dg-final { scan-assembler-not "mov\[bwlq\]" } } */ -- 2.30.2