When profiling is enabled with shrink wrapping, the mcount call may not be placed at the function entry after
pushq %rbp movq %rsp,%rbp As the result, the profile data may be skewed which makes PGO less effective. Add --enable-x86-64-mfentry to enable -mfentry by default to use __fentry__, added to glibc in 2010 by: commit d22e4cc9397ed41534c9422d0b0ffef8c77bfa53 Author: Andi Kleen <a...@linux.intel.com> Date: Sat Aug 7 21:24:05 2010 -0700 x86: Add support for frame pointer less mcount instead of mcount, which is placed before the prologue so that -pg can be used with -fshrink-wrap-separate enabled at -O1. This option is 64-bit only because __fentry__ doesn't support PIC in 32-bit mode. Also warn -pg without -mfentry with shrink wrapping enabled. The warning is disable for PIC in 32-bit mode. gcc/ PR target/120881 * config.in: Regenerated. * configure: Likewise. * configure.ac: Add --enable-x86-64-mfentry. * config/i386/i386-options.cc (ix86_option_override_internal): Enable __fentry__ in 64-bit mode if ENABLE_X86_64_MFENTRY is set to 1. Warn -pg without -mfentry with shrink wrapping enabled. * doc/install.texi: Document --enable-x86-64-mfentry. gcc/testsuite/ PR target/120881 * gcc.target/i386/pr120881-1a.c: New test. * gcc.target/i386/pr120881-1b.c: Likewise. * gcc.target/i386/pr120881-1c.c: Likewise. * gcc.target/i386/pr120881-1d.c: Likewise. * gcc.target/i386/pr120881-2a.c: Likewise. * gcc.target/i386/pr120881-2b.c: Likewise. * lib/target-supports.exp (check_effective_target_fentry): New. OK for master? Thanks. -- H.J.
From 99d64e9355dfbb8e07582c62589530e6ad0257db Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Wed, 2 Jul 2025 08:58:23 +0800 Subject: [PATCH] x86-64: Add --enable-x86-64-mfentry When profiling is enabled with shrink wrapping, the mcount call may not be placed at the function entry after pushq %rbp movq %rsp,%rbp As the result, the profile data may be skewed which makes PGO less effective. Add --enable-x86-64-mfentry to enable -mfentry by default to use __fentry__, added to glibc in 2010 by: commit d22e4cc9397ed41534c9422d0b0ffef8c77bfa53 Author: Andi Kleen <a...@linux.intel.com> Date: Sat Aug 7 21:24:05 2010 -0700 x86: Add support for frame pointer less mcount instead of mcount, which is placed before the prologue so that -pg can be used with -fshrink-wrap-separate enabled at -O1. This option is 64-bit only because __fentry__ doesn't support PIC in 32-bit mode. Also warn -pg without -mfentry with shrink wrapping enabled. The warning is disable for PIC in 32-bit mode. gcc/ PR target/120881 * config.in: Regenerated. * configure: Likewise. * configure.ac: Add --enable-x86-64-mfentry. * config/i386/i386-options.cc (ix86_option_override_internal): Enable __fentry__ in 64-bit mode if ENABLE_X86_64_MFENTRY is set to 1. Warn -pg without -mfentry with shrink wrapping enabled. * doc/install.texi: Document --enable-x86-64-mfentry. gcc/testsuite/ PR target/120881 * gcc.target/i386/pr120881-1a.c: New test. * gcc.target/i386/pr120881-1b.c: Likewise. * gcc.target/i386/pr120881-1c.c: Likewise. * gcc.target/i386/pr120881-1d.c: Likewise. * gcc.target/i386/pr120881-2a.c: Likewise. * gcc.target/i386/pr120881-2b.c: Likewise. * lib/target-supports.exp (check_effective_target_fentry): New. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config.in | 6 +++ gcc/config/i386/i386-options.cc | 11 ++++- gcc/configure | 46 +++++++++++++++++++- gcc/configure.ac | 35 +++++++++++++++ gcc/doc/install.texi | 11 +++++ gcc/testsuite/gcc.target/i386/pr120881-1a.c | 4 ++ gcc/testsuite/gcc.target/i386/pr120881-1b.c | 4 ++ gcc/testsuite/gcc.target/i386/pr120881-1c.c | 3 ++ gcc/testsuite/gcc.target/i386/pr120881-1d.c | 3 ++ gcc/testsuite/gcc.target/i386/pr120881-2a.c | 21 +++++++++ gcc/testsuite/gcc.target/i386/pr120881-2b.c | 6 +++ gcc/testsuite/lib/target-supports.exp | 48 +++++++++++++++++++++ 12 files changed, 195 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr120881-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr120881-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr120881-1c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr120881-1d.c create mode 100644 gcc/testsuite/gcc.target/i386/pr120881-2a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr120881-2b.c diff --git a/gcc/config.in b/gcc/config.in index ab62c1566cb..353d1bc9407 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -318,6 +318,12 @@ #endif +/* Define to enable -mfentry by default on x86-64. */ +#ifndef USED_FOR_TARGET +#undef ENABLE_X86_64_MFENTRY +#endif + + /* Define to the name of a file containing a list of extra machine modes for this architecture. */ #ifndef USED_FOR_TARGET diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 09cb1337f94..53658496efd 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2839,7 +2839,9 @@ ix86_option_override_internal (bool main_args_p, /* Set the default value for -mfentry. */ if (!opts_set->x_flag_fentry) - opts->x_flag_fentry = TARGET_SEH; + opts->x_flag_fentry = (TARGET_SEH + || (TARGET_64BIT_P (opts->x_ix86_isa_flags) + && ENABLE_X86_64_MFENTRY)); else { if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic @@ -2850,6 +2852,13 @@ ix86_option_override_internal (bool main_args_p, sorry ("%<-mno-fentry%> isn%'t compatible with SEH"); } + if (!opts->x_flag_fentry + && (TARGET_64BIT_P (opts->x_ix86_isa_flags) || !opts->x_flag_pic) + && opts->x_flag_shrink_wrap + && opts->x_profile_flag) + warning (0, "%<-pg%> without %<-mfentry%> may be unreliable with " + "shrink wrapping"); + if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES) sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH"); diff --git a/gcc/configure b/gcc/configure index f056cfe9677..5dce9dfc60b 100755 --- a/gcc/configure +++ b/gcc/configure @@ -1064,6 +1064,7 @@ enable_versioned_jit enable_default_pie enable_cet enable_s390_excess_float_precision +enable_x86_64_mfentry ' ac_precious_vars='build_alias host_alias @@ -1842,6 +1843,7 @@ Optional Features: --enable-s390-excess-float-precision on s390 targets, evaluate float with double precision when in standards-conforming mode + --enable-x86-64-mfentry enable -mfentry by default on x86-64 targets Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -21520,7 +21522,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 21523 "configure" +#line 21525 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -21626,7 +21628,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 21629 "configure" +#line 21631 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -35022,6 +35024,46 @@ $as_echo "#define ENABLE_S390_EXCESS_FLOAT_PRECISION 1" >>confdefs.h ;; esac +# On x86-64, when profiling is enabled with shrink wrapping, the mcount +# call may not be placed at the function entry after +# pushq %rbp +# movq %rsp,%rbp +# As the result, the profile data may be skewed which makes PGO less +# effective: +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120881 +# Enable -mfentry by default on x86-64 to put the profiling counter call +# before the prologue. +# Check whether --enable-x86-64-mfentry was given. +if test "${enable_x86_64_mfentry+set}" = set; then : + enableval=$enable_x86_64_mfentry; case "${enableval}" in + yes | no | auto) + enable_x86_64_mfentry=$enableval + ;; + *) + as_fn_error $? "'$enable_x86_64_mfentry' is an invalid value for --enable-x86-64-mfentry. Valid choices are 'yes', 'no' and 'auto'." "$LINENO" 5 + ;; + esac +else + enable_x86_64_mfentry=auto +fi + + +if test x"$enable_x86_64_mfentry" = xauto; then + case "${target}" in + i?86-*-linux* | x86_64-*-linux*) + # Enable -mfentry by default on Linux. + enable_x86_64_mfentry=yes + ;; + esac +fi + +gif=`if test x$enable_x86_64_mfentry = xyes; then echo 1; else echo 0; fi` + +cat >>confdefs.h <<_ACEOF +#define ENABLE_X86_64_MFENTRY $gif +_ACEOF + + # Check if the linker supports '-z now' ld_now_support=no { $as_echo "$as_me:${as_lineno-$LINENO}: checking linker -z now option" >&5 diff --git a/gcc/configure.ac b/gcc/configure.ac index 58bf63f8be9..e1fb1d33682 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -7972,6 +7972,41 @@ standards-compatible mode on s390 targets.]) ;; esac +# On x86-64, when profiling is enabled with shrink wrapping, the mcount +# call may not be placed at the function entry after +# pushq %rbp +# movq %rsp,%rbp +# As the result, the profile data may be skewed which makes PGO less +# effective: +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120881 +# Enable -mfentry by default on x86-64 to put the profiling counter call +# before the prologue. +AC_ARG_ENABLE(x86-64-mfentry, + [AS_HELP_STRING([--enable-x86-64-mfentry], + [enable -mfentry by default on x86-64 targets])], + [case "${enableval}" in + yes | no | auto) + enable_x86_64_mfentry=$enableval + ;; + *) + AC_MSG_ERROR(['$enable_x86_64_mfentry' is an invalid value for --enable-x86-64-mfentry. Valid choices are 'yes', 'no' and 'auto'.]) + ;; + esac], + [enable_x86_64_mfentry=auto]) + +if test x"$enable_x86_64_mfentry" = xauto; then + case "${target}" in + i?86-*-linux* | x86_64-*-linux*) + # Enable -mfentry by default on Linux. + enable_x86_64_mfentry=yes + ;; + esac +fi + +gif=`if test x$enable_x86_64_mfentry = xyes; then echo 1; else echo 0; fi` +AC_DEFINE_UNQUOTED(ENABLE_X86_64_MFENTRY, $gif, +[Define to enable -mfentry by default on x86-64.]) + # Check if the linker supports '-z now' ld_now_support=no AC_MSG_CHECKING(linker -z now option) diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 80ee2cd6eba..09ea87aa812 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -2667,6 +2667,17 @@ target binutils supports @code{Intel CET} instructions and disabled otherwise. In this case, the target libraries are configured to get additional @option{-fcf-protection} option. +@item --enable-x86-64-mfentry +@itemx --disable-x86-64-mfentry +Enable @option {-mfentry} by default on x86-64 to put the profiling +counter call, @code{__fentry__}, before the prologue so that @option{-pg} +can be used with @option{-fshrink-wrap} which is enabled at @option{-O1}. +This configure option is 64-bit only because @code{__fentry__} doesn't +support PIC in 32-bit mode. + +@option{--enable-x86-64-mfentry=auto} is default. @option{-mfentry} is +enabled on Linux/x86-64 by default. + @item --with-riscv-attribute=@samp{yes}, @samp{no} or @samp{default} Generate RISC-V attribute by default, in order to record extra build information in object. diff --git a/gcc/testsuite/gcc.target/i386/pr120881-1a.c b/gcc/testsuite/gcc.target/i386/pr120881-1a.c new file mode 100644 index 00000000000..3d9ac0e9e86 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120881-1a.c @@ -0,0 +1,4 @@ +/* { dg-do compile { target fpic } } */ +/* { dg-require-profiling "-pg" } */ +/* { dg-options "-O2 -pg -mno-fentry -fno-pic" } */ +/* { dg-message "'-pg' without '-mfentry' may be unreliable with shrink wrapping" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/i386/pr120881-1b.c b/gcc/testsuite/gcc.target/i386/pr120881-1b.c new file mode 100644 index 00000000000..082640726b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120881-1b.c @@ -0,0 +1,4 @@ +/* { dg-do compile { target { fpic && { ! ia32 } } } } */ +/* { dg-require-profiling "-pg" } */ +/* { dg-options "-O2 -pg -mno-fentry -fpic" } */ +/* { dg-message "'-pg' without '-mfentry' may be unreliable with shrink wrapping" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/i386/pr120881-1c.c b/gcc/testsuite/gcc.target/i386/pr120881-1c.c new file mode 100644 index 00000000000..c21979f8eb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120881-1c.c @@ -0,0 +1,3 @@ +/* { dg-do compile { target { fpic && ia32 } } } */ +/* { dg-require-profiling "-pg" } */ +/* { dg-options "-O2 -pg -mno-fentry -fpic" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr120881-1d.c b/gcc/testsuite/gcc.target/i386/pr120881-1d.c new file mode 100644 index 00000000000..f74af23ff5c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120881-1d.c @@ -0,0 +1,3 @@ +/* { dg-do compile { target { fpic && ia32 } } } */ +/* { dg-require-profiling "-pg" } */ +/* { dg-options "-O2 -pg -mno-fentry -fno-shrink-wrap -fno-pic" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr120881-2a.c b/gcc/testsuite/gcc.target/i386/pr120881-2a.c new file mode 100644 index 00000000000..c7be72ace57 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120881-2a.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target fentry } } */ +/* { dg-options "-O2 -pg" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^(?:1|\t?\.)} } } */ + +/* +**f2: +**.LFB[0-9]+: +** .cfi_startproc +** call __fentry__ +**... +*/ + +extern void f1 (void); + +void +f2 (int count) +{ + for (int i = 0; i < count; ++i) + f1 (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr120881-2b.c b/gcc/testsuite/gcc.target/i386/pr120881-2b.c new file mode 100644 index 00000000000..43a12f00774 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120881-2b.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-pro_and_epilogue -march=x86-64" } */ +/* { dg-final { scan-rtl-dump "Now spread 1 times" "pro_and_epilogue" } } */ + +#include "pr120881-2a.c" + diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 25ceeea95cb..c0474f519b7 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -14509,3 +14509,51 @@ proc check_effective_target_foldable_pi_based_trigonometry { } { } }] } +# +# Return 1 if the x86-64 target enables -mfentry by default, 0 +# otherwise. Cache the result. + +proc check_effective_target_fentry { } { + global tool + global GCC_UNDER_TEST + + if { ![check_effective_target_x86] } { + return 0 + } + + # Need auto-host.h to check linker support. + if { ![file exists ../../auto-host.h ] } { + return 0 + } + + return [check_cached_effective_target fentry { + # Set up and compile to see if ENABLE_X86_64_MFENTRY is + # non-zero. Include the current process ID in the file + # names to prevent conflicts with invocations for multiple + # testsuites. + + set src pie[pid].c + set obj pie[pid].o + + set f [open $src "w"] + puts $f "#include \"../../auto-host.h\"" + puts $f "#if ENABLE_X86_64_MFENTRY == 0 || !defined __x86_64__" + puts $f "# error -mfentry isn't enabled by default." + puts $f "#endif" + close $f + + verbose "check_effective_target_fentry compiling testfile $src" 2 + set lines [${tool}_target_compile $src $obj object ""] + + file delete $src + file delete $obj + + if [string match "" $lines] then { + verbose "check_effective_target_fentry testfile compilation passed" 2 + return 1 + } else { + verbose "check_effective_target_fentry testfile compilation failed" 2 + return 0 + } + }] +} -- 2.50.0