commit: 915801955493b731f4996c28a247c177da85b6dc Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> AuthorDate: Mon Jun 29 17:41:23 2020 +0000 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> CommitDate: Mon Jun 29 17:41:23 2020 +0000 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=91580195
Kernel patch enables gcc = v10.1+ optimizations for additional CPUs. Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> 0000_README | 4 + 5013_enable-cpu-optimizations-for-gcc10.patch | 671 ++++++++++++++++++++++++++ 2 files changed, 675 insertions(+) diff --git a/0000_README b/0000_README index 3fde3ae..d1af7f9 100644 --- a/0000_README +++ b/0000_README @@ -598,3 +598,7 @@ Desc: Kernel patch for >= gccv8 enables kernel >= v4.13 optimizations for addi Patch: 5012_enable-cpu-optimizations-for-gcc91.patch From: https://github.com/graysky2/kernel_gcc_patch/ Desc: Kernel patch enables gcc >= v9.1 optimizations for additional CPUs. + +Patch: 5013_enable-cpu-optimizations-for-gcc10.patch +From: https://github.com/graysky2/kernel_gcc_patch/ +Desc: Kernel patch enables gcc = v10.1+ optimizations for additional CPUs. diff --git a/5013_enable-cpu-optimizations-for-gcc10.patch b/5013_enable-cpu-optimizations-for-gcc10.patch new file mode 100644 index 0000000..2a1e27d --- /dev/null +++ b/5013_enable-cpu-optimizations-for-gcc10.patch @@ -0,0 +1,671 @@ +WARNING +This patch works with gcc versions 10.1+ and with kernel versions 4.19-5.4 and should +NOT be applied when compiling on older versions of gcc due to key name changes +of the march flags introduced with the version 4.9 release of gcc.[1] + +Use the older version of this patch hosted on the same github for older +versions of gcc. + +FEATURES +This patch adds additional CPU options to the Linux kernel accessible under: + Processor type and features ---> + Processor family ---> + +The expanded microarchitectures include: +* AMD Improved K8-family +* AMD K10-family +* AMD Family 10h (Barcelona) +* AMD Family 14h (Bobcat) +* AMD Family 16h (Jaguar) +* AMD Family 15h (Bulldozer) +* AMD Family 15h (Piledriver) +* AMD Family 15h (Steamroller) +* AMD Family 15h (Excavator) +* AMD Family 17h (Zen) +* AMD Family 17h (Zen 2) +* Intel Silvermont low-power processors +* Intel Goldmont low-power processors (Apollo Lake and Denverton) +* Intel Goldmont Plus low-power processors (Gemini Lake) +* Intel 1st Gen Core i3/i5/i7 (Nehalem) +* Intel 1.5 Gen Core i3/i5/i7 (Westmere) +* Intel 2nd Gen Core i3/i5/i7 (Sandybridge) +* Intel 3rd Gen Core i3/i5/i7 (Ivybridge) +* Intel 4th Gen Core i3/i5/i7 (Haswell) +* Intel 5th Gen Core i3/i5/i7 (Broadwell) +* Intel 6th Gen Core i3/i5/i7 (Skylake) +* Intel 6th Gen Core i7/i9 (Skylake X) +* Intel 8th Gen Core i3/i5/i7 (Cannon Lake) +* Intel 10th Gen Core i7/i9 (Ice Lake) +* Intel Xeon (Cascade Lake) +* Intel Xeon (Cooper Lake) +* Intel 3rd Gen 10nm++ i3/i5/i7/i9-family (Tiger Lake) + +It also offers to compile passing the 'native' option which, "selects the CPU +to generate code for at compilation time by determining the processor type of +the compiling machine. Using -march=native enables all instruction subsets +supported by the local machine and will produce code optimized for the local +machine under the constraints of the selected instruction set."[2] + +Do NOT try using the 'native' option on AMD Piledriver, Steamroller, or +Excavator CPUs (-march=bdver{2,3,4} flag). The build will error out due the +kernel's objtool issue with these.[3a,b] + +MINOR NOTES +This patch also changes 'atom' to 'bonnell' in accordance with the gcc v4.9 +changes. Note that upstream is using the deprecated 'match=atom' flags when I +believe it should use the newer 'march=bonnell' flag for atom processors.[4] + +It is not recommended to compile on Atom-CPUs with the 'native' option.[5] The +recommendation is to use the 'atom' option instead. + +BENEFITS +Small but real speed increases are measurable using a make endpoint comparing +a generic kernel to one built with one of the respective microarchs. + +See the following experimental evidence supporting this statement: +https://github.com/graysky2/kernel_gcc_patch + +REQUIREMENTS +linux version 4.19-lts and 5.4-lts +gcc version >=10.1 + +ACKNOWLEDGMENTS +This patch builds on the seminal work by Jeroen.[6] + +REFERENCES +1. https://gcc.gnu.org/gcc-4.9/changes.html +2. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html +3a. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95671#c11 +3b. https://github.com/graysky2/kernel_gcc_patch/issues/55 +4. https://bugzilla.kernel.org/show_bug.cgi?id=77461 +5. https://github.com/graysky2/kernel_gcc_patch/issues/15 +6. http://www.linuxforge.net/docs/linux/linux-gcc.php + +--- a/arch/x86/include/asm/module.h 2020-06-10 15:35:02.000000000 -0400 ++++ b/arch/x86/include/asm/module.h 2020-06-15 10:31:48.627486708 -0400 +@@ -25,6 +25,40 @@ struct mod_arch_specific { + #define MODULE_PROC_FAMILY "586MMX " + #elif defined CONFIG_MCORE2 + #define MODULE_PROC_FAMILY "CORE2 " ++#elif defined CONFIG_MNATIVE ++#define MODULE_PROC_FAMILY "NATIVE " ++#elif defined CONFIG_MNEHALEM ++#define MODULE_PROC_FAMILY "NEHALEM " ++#elif defined CONFIG_MWESTMERE ++#define MODULE_PROC_FAMILY "WESTMERE " ++#elif defined CONFIG_MSILVERMONT ++#define MODULE_PROC_FAMILY "SILVERMONT " ++#elif defined CONFIG_MGOLDMONT ++#define MODULE_PROC_FAMILY "GOLDMONT " ++#elif defined CONFIG_MGOLDMONTPLUS ++#define MODULE_PROC_FAMILY "GOLDMONTPLUS " ++#elif defined CONFIG_MSANDYBRIDGE ++#define MODULE_PROC_FAMILY "SANDYBRIDGE " ++#elif defined CONFIG_MIVYBRIDGE ++#define MODULE_PROC_FAMILY "IVYBRIDGE " ++#elif defined CONFIG_MHASWELL ++#define MODULE_PROC_FAMILY "HASWELL " ++#elif defined CONFIG_MBROADWELL ++#define MODULE_PROC_FAMILY "BROADWELL " ++#elif defined CONFIG_MSKYLAKE ++#define MODULE_PROC_FAMILY "SKYLAKE " ++#elif defined CONFIG_MSKYLAKEX ++#define MODULE_PROC_FAMILY "SKYLAKEX " ++#elif defined CONFIG_MCANNONLAKE ++#define MODULE_PROC_FAMILY "CANNONLAKE " ++#elif defined CONFIG_MICELAKE ++#define MODULE_PROC_FAMILY "ICELAKE " ++#elif defined CONFIG_MCASCADELAKE ++#define MODULE_PROC_FAMILY "CASCADELAKE " ++#elif defined CONFIG_MCOOPERLAKE ++#define MODULE_PROC_FAMILY "COOPERLAKE " ++#elif defined CONFIG_MTIGERLAKE ++#define MODULE_PROC_FAMILY "TIGERLAKE " + #elif defined CONFIG_MATOM + #define MODULE_PROC_FAMILY "ATOM " + #elif defined CONFIG_M686 +@@ -43,6 +77,28 @@ struct mod_arch_specific { + #define MODULE_PROC_FAMILY "K7 " + #elif defined CONFIG_MK8 + #define MODULE_PROC_FAMILY "K8 " ++#elif defined CONFIG_MK8SSE3 ++#define MODULE_PROC_FAMILY "K8SSE3 " ++#elif defined CONFIG_MK10 ++#define MODULE_PROC_FAMILY "K10 " ++#elif defined CONFIG_MBARCELONA ++#define MODULE_PROC_FAMILY "BARCELONA " ++#elif defined CONFIG_MBOBCAT ++#define MODULE_PROC_FAMILY "BOBCAT " ++#elif defined CONFIG_MBULLDOZER ++#define MODULE_PROC_FAMILY "BULLDOZER " ++#elif defined CONFIG_MPILEDRIVER ++#define MODULE_PROC_FAMILY "PILEDRIVER " ++#elif defined CONFIG_MSTEAMROLLER ++#define MODULE_PROC_FAMILY "STEAMROLLER " ++#elif defined CONFIG_MJAGUAR ++#define MODULE_PROC_FAMILY "JAGUAR " ++#elif defined CONFIG_MEXCAVATOR ++#define MODULE_PROC_FAMILY "EXCAVATOR " ++#elif defined CONFIG_MZEN ++#define MODULE_PROC_FAMILY "ZEN " ++#elif defined CONFIG_MZEN2 ++#define MODULE_PROC_FAMILY "ZEN2 " + #elif defined CONFIG_MELAN + #define MODULE_PROC_FAMILY "ELAN " + #elif defined CONFIG_MCRUSOE +--- a/arch/x86/Kconfig.cpu 2020-06-10 15:35:02.000000000 -0400 ++++ b/arch/x86/Kconfig.cpu 2020-06-15 10:31:48.627486708 -0400 +@@ -116,6 +116,7 @@ config MPENTIUMM + config MPENTIUM4 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" + depends on X86_32 ++ select X86_P6_NOP + ---help--- + Select this for Intel Pentium 4 chips. This includes the + Pentium 4, Pentium D, P4-based Celeron and Xeon, and +@@ -148,9 +149,8 @@ config MPENTIUM4 + -Paxville + -Dempsey + +- + config MK6 +- bool "K6/K6-II/K6-III" ++ bool "AMD K6/K6-II/K6-III" + depends on X86_32 + ---help--- + Select this for an AMD K6-family processor. Enables use of +@@ -158,7 +158,7 @@ config MK6 + flags to GCC. + + config MK7 +- bool "Athlon/Duron/K7" ++ bool "AMD Athlon/Duron/K7" + depends on X86_32 + ---help--- + Select this for an AMD Athlon K7-family processor. Enables use of +@@ -166,12 +166,90 @@ config MK7 + flags to GCC. + + config MK8 +- bool "Opteron/Athlon64/Hammer/K8" ++ bool "AMD Opteron/Athlon64/Hammer/K8" + ---help--- + Select this for an AMD Opteron or Athlon64 Hammer-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + ++config MK8SSE3 ++ bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" ++ ---help--- ++ Select this for improved AMD Opteron or Athlon64 Hammer-family processors. ++ Enables use of some extended instructions, and passes appropriate ++ optimization flags to GCC. ++ ++config MK10 ++ bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" ++ ---help--- ++ Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, ++ Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. ++ Enables use of some extended instructions, and passes appropriate ++ optimization flags to GCC. ++ ++config MBARCELONA ++ bool "AMD Barcelona" ++ ---help--- ++ Select this for AMD Family 10h Barcelona processors. ++ ++ Enables -march=barcelona ++ ++config MBOBCAT ++ bool "AMD Bobcat" ++ ---help--- ++ Select this for AMD Family 14h Bobcat processors. ++ ++ Enables -march=btver1 ++ ++config MJAGUAR ++ bool "AMD Jaguar" ++ ---help--- ++ Select this for AMD Family 16h Jaguar processors. ++ ++ Enables -march=btver2 ++ ++config MBULLDOZER ++ bool "AMD Bulldozer" ++ ---help--- ++ Select this for AMD Family 15h Bulldozer processors. ++ ++ Enables -march=bdver1 ++ ++config MPILEDRIVER ++ bool "AMD Piledriver" ++ ---help--- ++ Select this for AMD Family 15h Piledriver processors. ++ ++ Enables -march=bdver2 ++ ++config MSTEAMROLLER ++ bool "AMD Steamroller" ++ ---help--- ++ Select this for AMD Family 15h Steamroller processors. ++ ++ Enables -march=bdver3 ++ ++config MEXCAVATOR ++ bool "AMD Excavator" ++ ---help--- ++ Select this for AMD Family 15h Excavator processors. ++ ++ Enables -march=bdver4 ++ ++config MZEN ++ bool "AMD Zen" ++ ---help--- ++ Select this for AMD Family 17h Zen processors. ++ ++ Enables -march=znver1 ++ ++config MZEN2 ++ bool "AMD Zen 2" ++ ---help--- ++ Select this for AMD Family 17h Zen 2 processors. ++ ++ Enables -march=znver2 ++ + config MCRUSOE + bool "Crusoe" + depends on X86_32 +@@ -253,6 +331,7 @@ config MVIAC7 + + config MPSC + bool "Intel P4 / older Netburst based Xeon" ++ select X86_P6_NOP + depends on X86_64 + ---help--- + Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey +@@ -262,8 +341,19 @@ config MPSC + using the cpu family field + in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. + ++config MATOM ++ bool "Intel Atom" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for the Intel Atom platform. Intel Atom CPUs have an ++ in-order pipelining architecture and thus can benefit from ++ accordingly optimized code. Use a recent GCC with specific Atom ++ support in order to fully benefit from selecting this option. ++ + config MCORE2 +- bool "Core 2/newer Xeon" ++ bool "Intel Core 2" ++ select X86_P6_NOP + ---help--- + + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and +@@ -271,14 +361,151 @@ config MCORE2 + family in /proc/cpuinfo. Newer ones have 6 and older ones 15 + (not a typo) + +-config MATOM +- bool "Intel Atom" ++ Enables -march=core2 ++ ++config MNEHALEM ++ bool "Intel Nehalem" ++ select X86_P6_NOP + ---help--- + +- Select this for the Intel Atom platform. Intel Atom CPUs have an +- in-order pipelining architecture and thus can benefit from +- accordingly optimized code. Use a recent GCC with specific Atom +- support in order to fully benefit from selecting this option. ++ Select this for 1st Gen Core processors in the Nehalem family. ++ ++ Enables -march=nehalem ++ ++config MWESTMERE ++ bool "Intel Westmere" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for the Intel Westmere formerly Nehalem-C family. ++ ++ Enables -march=westmere ++ ++config MSILVERMONT ++ bool "Intel Silvermont" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for the Intel Silvermont platform. ++ ++ Enables -march=silvermont ++ ++config MGOLDMONT ++ bool "Intel Goldmont" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for the Intel Goldmont platform including Apollo Lake and Denverton. ++ ++ Enables -march=goldmont ++ ++config MGOLDMONTPLUS ++ bool "Intel Goldmont Plus" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for the Intel Goldmont Plus platform including Gemini Lake. ++ ++ Enables -march=goldmont-plus ++ ++config MSANDYBRIDGE ++ bool "Intel Sandy Bridge" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for 2nd Gen Core processors in the Sandy Bridge family. ++ ++ Enables -march=sandybridge ++ ++config MIVYBRIDGE ++ bool "Intel Ivy Bridge" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for 3rd Gen Core processors in the Ivy Bridge family. ++ ++ Enables -march=ivybridge ++ ++config MHASWELL ++ bool "Intel Haswell" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for 4th Gen Core processors in the Haswell family. ++ ++ Enables -march=haswell ++ ++config MBROADWELL ++ bool "Intel Broadwell" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for 5th Gen Core processors in the Broadwell family. ++ ++ Enables -march=broadwell ++ ++config MSKYLAKE ++ bool "Intel Skylake" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for 6th Gen Core processors in the Skylake family. ++ ++ Enables -march=skylake ++ ++config MSKYLAKEX ++ bool "Intel Skylake X" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for 6th Gen Core processors in the Skylake X family. ++ ++ Enables -march=skylake-avx512 ++ ++config MCANNONLAKE ++ bool "Intel Cannon Lake" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for 8th Gen Core processors ++ ++ Enables -march=cannonlake ++ ++config MICELAKE ++ bool "Intel Ice Lake" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for 10th Gen Core processors in the Ice Lake family. ++ ++ Enables -march=icelake-client ++ ++config MCASCADELAKE ++ bool "Intel Cascade Lake" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for Xeon processors in the Cascade Lake family. ++ ++ Enables -march=cascadelake ++ ++config MCOOPERLAKE ++ bool "Intel Cooper Lake" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for Xeon processors in the Cooper Lake family. ++ ++ Enables -march=cooperlake ++ ++config MTIGERLAKE ++ bool "Intel Tiger Lake" ++ select X86_P6_NOP ++ ---help--- ++ ++ Select this for third-generation 10 nm process processors in the Tiger Lake family. ++ ++ Enables -march=tigerlake + + config GENERIC_CPU + bool "Generic-x86-64" +@@ -287,6 +514,19 @@ config GENERIC_CPU + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs. + ++config MNATIVE ++ bool "Native optimizations autodetected by GCC" ++ ---help--- ++ ++ GCC 4.2 and above support -march=native, which automatically detects ++ the optimum settings to use based on your processor. -march=native ++ also detects and applies additional settings beyond -march specific ++ to your CPU, (eg. -msse4). Unless you have a specific reason not to ++ (e.g. distcc cross-compiling), you should probably be using ++ -march=native rather than anything listed below. ++ ++ Enables -march=native ++ + endchoice + + config X86_GENERIC +@@ -311,7 +551,7 @@ config X86_INTERNODE_CACHE_SHIFT + config X86_L1_CACHE_SHIFT + int + default "7" if MPENTIUM4 || MPSC +- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU ++ default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "4" if MELAN || M486 || MGEODEGX1 + default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX + +@@ -329,35 +569,36 @@ config X86_ALIGNMENT_16 + + config X86_INTEL_USERCOPY + def_bool y +- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 ++ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MNATIVE + + config X86_USE_PPRO_CHECKSUM + def_bool y +- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM ++ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MATOM || MNATIVE + + config X86_USE_3DNOW + def_bool y + depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML + +-# +-# P6_NOPs are a relatively minor optimization that require a family >= +-# 6 processor, except that it is broken on certain VIA chips. +-# Furthermore, AMD chips prefer a totally different sequence of NOPs +-# (which work on all CPUs). In addition, it looks like Virtual PC +-# does not understand them. +-# +-# As a result, disallow these if we're not compiling for X86_64 (these +-# NOPs do work on all x86-64 capable chips); the list of processors in +-# the right-hand clause are the cores that benefit from this optimization. +-# + config X86_P6_NOP +- def_bool y +- depends on X86_64 +- depends on (MCORE2 || MPENTIUM4 || MPSC) ++ default n ++ bool "Support for P6_NOPs on Intel chips" ++ depends on (MCORE2 || MPENTIUM4 || MPSC || MATOM || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MNATIVE) ++ ---help--- ++ P6_NOPs are a relatively minor optimization that require a family >= ++ 6 processor, except that it is broken on certain VIA chips. ++ Furthermore, AMD chips prefer a totally different sequence of NOPs ++ (which work on all CPUs). In addition, it looks like Virtual PC ++ does not understand them. ++ ++ As a result, disallow these if we're not compiling for X86_64 (these ++ NOPs do work on all x86-64 capable chips); the list of processors in ++ the right-hand clause are the cores that benefit from this optimization. ++ ++ Say Y if you have Intel CPU newer than Pentium Pro, N otherwise. + + config X86_TSC + def_bool y +- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 ++ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MNATIVE || MATOM) || X86_64 + + config X86_CMPXCHG64 + def_bool y +@@ -367,7 +608,7 @@ config X86_CMPXCHG64 + # generates cmov. + config X86_CMOV + def_bool y +- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) ++ depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX) + + config X86_MINIMUM_CPU_FAMILY + int +--- a/arch/x86/Makefile 2020-06-10 15:35:02.000000000 -0400 ++++ b/arch/x86/Makefile 2020-06-15 10:32:39.508648036 -0400 +@@ -119,13 +119,60 @@ else + KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) + + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) ++ cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) ++ cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-mtune=k8) ++ cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10) ++ cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona) ++ cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1) ++ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2) ++ cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1) ++ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2) ++ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-mno-tbm) ++ cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3) ++ cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-mno-tbm) ++ cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4) ++ cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-mno-tbm) ++ cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1) ++ cflags-$(CONFIG_MZEN2) += $(call cc-option,-march=znver2) + cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) + + cflags-$(CONFIG_MCORE2) += \ +- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) +- cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ +- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) ++ $(call cc-option,-march=core2,$(call cc-option,-mtune=core2)) ++ cflags-$(CONFIG_MNEHALEM) += \ ++ $(call cc-option,-march=nehalem,$(call cc-option,-mtune=nehalem)) ++ cflags-$(CONFIG_MWESTMERE) += \ ++ $(call cc-option,-march=westmere,$(call cc-option,-mtune=westmere)) ++ cflags-$(CONFIG_MSILVERMONT) += \ ++ $(call cc-option,-march=silvermont,$(call cc-option,-mtune=silvermont)) ++ cflags-$(CONFIG_MGOLDMONT) += \ ++ $(call cc-option,-march=goldmont,$(call cc-option,-mtune=goldmont)) ++ cflags-$(CONFIG_MGOLDMONTPLUS) += \ ++ $(call cc-option,-march=goldmont-plus,$(call cc-option,-mtune=goldmont-plus)) ++ cflags-$(CONFIG_MSANDYBRIDGE) += \ ++ $(call cc-option,-march=sandybridge,$(call cc-option,-mtune=sandybridge)) ++ cflags-$(CONFIG_MIVYBRIDGE) += \ ++ $(call cc-option,-march=ivybridge,$(call cc-option,-mtune=ivybridge)) ++ cflags-$(CONFIG_MHASWELL) += \ ++ $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell)) ++ cflags-$(CONFIG_MBROADWELL) += \ ++ $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell)) ++ cflags-$(CONFIG_MSKYLAKE) += \ ++ $(call cc-option,-march=skylake,$(call cc-option,-mtune=skylake)) ++ cflags-$(CONFIG_MSKYLAKEX) += \ ++ $(call cc-option,-march=skylake-avx512,$(call cc-option,-mtune=skylake-avx512)) ++ cflags-$(CONFIG_MCANNONLAKE) += \ ++ $(call cc-option,-march=cannonlake,$(call cc-option,-mtune=cannonlake)) ++ cflags-$(CONFIG_MICELAKE) += \ ++ $(call cc-option,-march=icelake-client,$(call cc-option,-mtune=icelake-client)) ++ cflags-$(CONFIG_MCASCADELAKE) += \ ++ $(call cc-option,-march=cascadelake,$(call cc-option,-mtune=cascadelake)) ++ cflags-$(CONFIG_MCOOPERLAKE) += \ ++ $(call cc-option,-march=cooperlake,$(call cc-option,-mtune=cooperlake)) ++ cflags-$(CONFIG_MTIGERLAKE) += \ ++ $(call cc-option,-march=tigerlake,$(call cc-option,-mtune=tigerlake)) ++ cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \ ++ $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic)) + cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) + KBUILD_CFLAGS += $(cflags-y) + +--- a/arch/x86/Makefile_32.cpu 2020-06-10 15:35:02.000000000 -0400 ++++ b/arch/x86/Makefile_32.cpu 2020-06-15 10:31:48.627486708 -0400 +@@ -23,7 +23,19 @@ cflags-$(CONFIG_MK6) += -march=k6 + # Please note, that patches that add -march=athlon-xp and friends are pointless. + # They make zero difference whatsosever to performance at this time. + cflags-$(CONFIG_MK7) += -march=athlon ++cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) ++cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-march=athlon) ++cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon) ++cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon) ++cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon) ++cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon) ++cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon) ++cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon) ++cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3,-march=athlon) ++cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4,-march=athlon) ++cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1,-march=athlon) ++cflags-$(CONFIG_MZEN2) += $(call cc-option,-march=znver2,-march=athlon) + cflags-$(CONFIG_MCRUSOE) += -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0 + cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) -falign-functions=0 -falign-jumps=0 -falign-loops=0 + cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) +@@ -32,8 +44,24 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc- + cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) + cflags-$(CONFIG_MVIAC7) += -march=i686 + cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) +-cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ +- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) ++cflags-$(CONFIG_MNEHALEM) += -march=i686 $(call tune,nehalem) ++cflags-$(CONFIG_MWESTMERE) += -march=i686 $(call tune,westmere) ++cflags-$(CONFIG_MSILVERMONT) += -march=i686 $(call tune,silvermont) ++cflags-$(CONFIG_MGOLDMONT) += -march=i686 $(call tune,goldmont) ++cflags-$(CONFIG_MGOLDMONTPLUS) += -march=i686 $(call tune,goldmont-plus) ++cflags-$(CONFIG_MSANDYBRIDGE) += -march=i686 $(call tune,sandybridge) ++cflags-$(CONFIG_MIVYBRIDGE) += -march=i686 $(call tune,ivybridge) ++cflags-$(CONFIG_MHASWELL) += -march=i686 $(call tune,haswell) ++cflags-$(CONFIG_MBROADWELL) += -march=i686 $(call tune,broadwell) ++cflags-$(CONFIG_MSKYLAKE) += -march=i686 $(call tune,skylake) ++cflags-$(CONFIG_MSKYLAKEX) += -march=i686 $(call tune,skylake-avx512) ++cflags-$(CONFIG_MCANNONLAKE) += -march=i686 $(call tune,cannonlake) ++cflags-$(CONFIG_MICELAKE) += -march=i686 $(call tune,icelake-client) ++cflags-$(CONFIG_MCASCADELAKE) += -march=i686 $(call tune,cascadelake) ++cflags-$(CONFIG_MCOOPERLAKE) += -march=i686 $(call tune,cooperlake) ++cflags-$(CONFIG_MTIGERLAKE) += -march=i686 $(call tune,tigerlake) ++cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \ ++ $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic)) + + # AMD Elan support + cflags-$(CONFIG_MELAN) += -march=i486