The -mgeneral-regs-only option generates code that uses only the general-purpose registers. It prevents the compiler from using vector registers. But GCC may still generate calls to memcpy, memmove, memset and memcmp library functions. In the GNU C library, these library functions are implementated with vector registers, which makes the -mgeneral-regs-only option less effective. The new -mavoid-libcall option expands memcpy, memmove and memset into REP MOVSB and REP STOSB sequence. This option can be further enhanced with a cmpmem pattern to expand memcmp into REP CMPSB sequence in the future.
Tested on Linux/x86 and Linux/x86-64. OK for master? Thanks. H.J. --- gcc/ PR target/95134 * config/i386/i386-expand.c (alg_usable_p): Return false for libcall with -mavoid-libcall. (decide_alg): Avoid libcall and rep_prefix_1_byte instead of libcall with -mavoid-libcall. * config/i386/i386.opt: Add -mavoid-libcall. * doc/invoke.texi: Document -mavoid-libcall. gcc/testsuite/ PR target/95134 * gcc.target/i386/pr95134-1.c: New test. * gcc.target/i386/pr95134-2.c: Likewise. * gcc.target/i386/pr95134-3.c: Likewise. * gcc.target/i386/pr95134-4.c: Likewise. --- gcc/config/i386/i386-expand.c | 15 ++++++++++----- gcc/config/i386/i386.opt | 6 +++++- gcc/doc/invoke.texi | 10 +++++++++- gcc/testsuite/gcc.target/i386/pr95134-1.c | 18 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr95134-2.c | 18 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr95134-3.c | 18 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr95134-4.c | 11 +++++++++++ 7 files changed, 89 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-4.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 26531585c5f..b38463bf88c 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -6816,7 +6816,7 @@ alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])) return false; } - return true; + return !flag_avoid_libcall || alg != libcall; } /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */ @@ -6889,7 +6889,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, setup. */ else if (expected_size != -1 && expected_size < 4) return loop_1_byte; - else if (expected_size != -1) + else if (expected_size != -1 && !flag_avoid_libcall) { enum stringop_alg alg = libcall; bool alg_noalign = false; @@ -6934,6 +6934,9 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, } } } + + enum stringop_alg alg; + /* When asked to inline the call anyway, try to pick meaningful choice. We look for maximal size of block that is faster to copy by hand and take blocks of at most of that size guessing that average size will @@ -6945,7 +6948,6 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, && (algs->unknown_size == libcall || !alg_usable_p (algs->unknown_size, memset, have_as))) { - enum stringop_alg alg; HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2; /* If there aren't any usable algorithms or if recursing already, @@ -6967,8 +6969,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, gcc_assert (alg != libcall); return alg; } - return (alg_usable_p (algs->unknown_size, memset, have_as) - ? algs->unknown_size : libcall); + alg = (alg_usable_p (algs->unknown_size, memset, have_as) + ? algs->unknown_size : libcall); + if (flag_avoid_libcall && alg == libcall) + alg = rep_prefix_1_byte; + return alg; } /* Decide on alignment. We know that the operand is already aligned to ALIGN diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index c9f7195d423..23b401bd424 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1114,4 +1114,8 @@ Support SERIALIZE built-in functions and code generation. mtsxldtrk Target Report Mask(ISA2_TSXLDTRK) Var(ix86_isa_flags2) Save -Support TSXLDTRK built-in functions and code generation. \ No newline at end of file +Support TSXLDTRK built-in functions and code generation. + +mavoid-libcall +Target Report Var(flag_avoid_libcall) Init(0) +Avoid generation of libcall. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 850aeac033d..0d2d70419d5 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1364,7 +1364,7 @@ See RS/6000 and PowerPC Options. -mstack-protector-guard-reg=@var{reg} @gol -mstack-protector-guard-offset=@var{offset} @gol -mstack-protector-guard-symbol=@var{symbol} @gol --mgeneral-regs-only -mcall-ms2sysv-xlogues @gol +-mgeneral-regs-only -mavoid-libcall -mcall-ms2sysv-xlogues @gol -mindirect-branch=@var{choice} -mfunction-return=@var{choice} @gol -mindirect-branch-register} @@ -30115,6 +30115,14 @@ Generate code that uses only the general-purpose registers. This prevents the compiler from using floating-point, vector, mask and bound registers. +@item -mavoid-libcall +@opindex mavoid-libcall +Avoid generation of calls to @code{memcpy}, @code{memmove} and +@code{memset} library functions. It can be used together with the +option @option{-mgeneral-regs-only} to avoid implicit vector register +usage in @code{memcpy}, @code{memmove} and @code{memset} library +functions. + @item -mindirect-branch=@var{choice} @opindex mindirect-branch Convert indirect call and jump with @var{choice}. The default is diff --git a/gcc/testsuite/gcc.target/i386/pr95134-1.c b/gcc/testsuite/gcc.target/i386/pr95134-1.c new file mode 100644 index 00000000000..8ffa680559d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95134-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall -mtune=skylake" } */ + +struct foo +{ + char array[513]; +}; + +extern struct foo x; + +int +func (void) +{ + __builtin_memset (&x, 0, sizeof (x)); + return 0; +} + +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memset" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr95134-2.c b/gcc/testsuite/gcc.target/i386/pr95134-2.c new file mode 100644 index 00000000000..7c6c42a736d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95134-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall -mtune=pentium" } */ + +struct foo +{ + char array[257]; +}; + +extern struct foo x; + +int +func (struct foo i) +{ + x = i; + return 0; +} + +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcpy" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr95134-3.c b/gcc/testsuite/gcc.target/i386/pr95134-3.c new file mode 100644 index 00000000000..4e4428cd0ae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95134-3.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall -mtune=pentium" } */ + +struct foo +{ + char array[257]; +}; + +extern struct foo x; + +int +func (struct foo i) +{ + __builtin_memcpy (&x, &i, sizeof (x)); + return 0; +} + +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcpy" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr95134-4.c b/gcc/testsuite/gcc.target/i386/pr95134-4.c new file mode 100644 index 00000000000..d1bd8fbf4c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95134-4.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall" } */ + +int +func (void *d, void *s, unsigned int l) +{ + __builtin_memcpy (d, s, l); + return 0; +} + +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcpy" } } */ -- 2.26.2