----- Original Message ----- > From: H. Peter Anvin <h...@zytor.com> > Sent: Saturday, December 14, 2013 6:41 PM > Subject: Re: Fw: [PATCH] expand micro-optimizations in kernel to newer model > CPUs
> > Please submit in the email form requested by the > Documentation/SubmittingPatches email; in particular we need the > Signed-off-by: statements. > > > -hpa > From: John Audia <da_audioph...@yahoo.com> Signed-off-by: John Audia <da_audioph...@yahoo.com> This patch has been tested on and known to work with kernel versions from 3.2 up to the latest git version (pulled on 12/14/2013). This patch will expand the number of microarchitectures to include new processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family 14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 2nd Gen Core i3/i5/i7 (Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), and Intel 4th Gen Core i3/i5/i7 (Haswell). It also offers the compiler the 'native' flag. Small but real speed increases are measurable using a make endpoint comparing a generic kernel to one built with one of the respective microarchs. See the following experimental evidence of this statement: https://github.com/graysky2/kernel_gcc_patch --- diff -uprN a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h --- a/arch/x86/include/asm/module.h2013-11-03 18:41:51.000000000 -0500 +++ b/arch/x86/include/asm/module.h2013-12-15 06:21:24.351122516 -0500 @@ -15,6 +15,16 @@ #define MODULE_PROC_FAMILY "586MMX " #elif defined CONFIG_MCORE2 #define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MNATIVE +#define MODULE_PROC_FAMILY "NATIVE " +#elif defined CONFIG_MCOREI7 +#define MODULE_PROC_FAMILY "COREI7 " +#elif defined CONFIG_MCOREI7AVX +#define MODULE_PROC_FAMILY "COREI7AVX " +#elif defined CONFIG_MCOREAVXI +#define MODULE_PROC_FAMILY "COREAVXI " +#elif defined CONFIG_MCOREAVX2 +#define MODULE_PROC_FAMILY "COREAVX2 " #elif defined CONFIG_MATOM #define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 @@ -33,6 +43,18 @@ #define MODULE_PROC_FAMILY "K7 " #elif defined CONFIG_MK8 #define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MK10 +#define MODULE_PROC_FAMILY "K10 " +#elif defined CONFIG_MBARCELONA +#define MODULE_PROC_FAMILY "BARCELONA " +#elif defined CONFIG_MBOBCAT +#define MODULE_PROC_FAMILY "BOBCAT " +#elif defined CONFIG_MBULLDOZER +#define MODULE_PROC_FAMILY "BULLDOZER " +#elif defined CONFIG_MPILEDRIVER +#define MODULE_PROC_FAMILY "PILEDRIVER " +#elif defined CONFIG_MJAGUAR +#define MODULE_PROC_FAMILY "JAGUAR " #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE diff -uprN a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu --- a/arch/x86/Kconfig.cpu2013-11-03 18:41:51.000000000 -0500 +++ b/arch/x86/Kconfig.cpu2013-12-15 06:21:24.351122516 -0500 @@ -139,7 +139,7 @@ config MPENTIUM4 config MK6 -bool "K6/K6-II/K6-III" +bool "AMD K6/K6-II/K6-III" depends on X86_32 ---help--- Select this for an AMD K6-family processor. Enables use of @@ -147,7 +147,7 @@ config MK6 flags to GCC. config MK7 -bool "Athlon/Duron/K7" +bool "AMD Athlon/Duron/K7" depends on X86_32 ---help--- Select this for an AMD Athlon K7-family processor. Enables use of @@ -155,12 +155,55 @@ config MK7 flags to GCC. config MK8 -bool "Opteron/Athlon64/Hammer/K8" +bool "AMD Opteron/Athlon64/Hammer/K8" ---help--- Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. +config MK10 +bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" +---help--- + Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, +Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MBARCELONA +bool "AMD Barcelona" +---help--- + Select this for AMD Barcelona and newer processors. + + Enables -march=barcelona + +config MBOBCAT +bool "AMD Bobcat" +---help--- + Select this for AMD Bobcat processors. + + Enables -march=btver1 + +config MBULLDOZER +bool "AMD Bulldozer" +---help--- + Select this for AMD Bulldozer processors. + + Enables -march=bdver1 + +config MPILEDRIVER +bool "AMD Piledriver" +---help--- + Select this for AMD Piledriver processors. + + Enables -march=bdver2 + +config MJAGUAR +bool "AMD Jaguar" +---help--- + Select this for AMD Jaguar processors. + + Enables -march=btver2 + config MCRUSOE bool "Crusoe" depends on X86_32 @@ -251,8 +294,17 @@ config MPSC using the cpu family field in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. +config MATOM +bool "Intel Atom" +---help--- + + Select this for the Intel Atom platform. Intel Atom CPUs have an + in-order pipelining architecture and thus can benefit from + accordingly optimized code. Use a recent GCC with specific Atom + support in order to fully benefit from selecting this option. + config MCORE2 -bool "Core 2/newer Xeon" +bool "Intel Core 2" ---help--- Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and @@ -260,14 +312,40 @@ config MCORE2 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) -config MATOM -bool "Intel Atom" + Enables -march=core2 + +config MCOREI7 +bool "Intel Core i7" ---help--- - Select this for the Intel Atom platform. Intel Atom CPUs have an - in-order pipelining architecture and thus can benefit from - accordingly optimized code. Use a recent GCC with specific Atom - support in order to fully benefit from selecting this option. + Select this for the Intel Nehalem platform. Intel Nehalem proecessors + include Core i3, i5, i7, Xeon: 34xx, 35xx, 55xx, 56xx, 75xx processors. + + Enables -march=corei7 + +config MCOREI7AVX +bool "Intel Core 2nd Gen AVX" +---help--- + + Select this for 2nd Gen Core processors including Sandy Bridge. + + Enables -march=corei7-avx + +config MCOREAVXI +bool "Intel Core 3rd Gen AVX" +---help--- + + Select this for 3rd Gen Core processors including Ivy Bridge. + + Enables -march=core-avx-i + +config MCOREAVX2 +bool "Intel Core AVX2" +---help--- + + Select this for AVX2 enabled processors including Haswell. + + Enables -march=core-avx2 config GENERIC_CPU bool "Generic-x86-64" @@ -276,6 +354,19 @@ config GENERIC_CPU Generic x86-64 CPU. Run equally well on all x86-64 CPUs. +config MNATIVE + bool "Native optimizations autodetected by GCC" + ---help--- + + GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. -march=native + also detects and applies additional settings beyond -march specific + to your CPU, (eg. -msse4). Unless you have a specific reason not to + (e.g. distcc cross-compiling), you should probably be using + -march=native rather than anything listed below. + + Enables -march=native + endchoice config X86_GENERIC @@ -300,7 +391,7 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC -default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU +default "6" if MK7 || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MVIAC7 || X86_GENERIC || MNATIVE || GENERIC_CPU default "4" if MELAN || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX @@ -331,11 +422,11 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y -depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 +depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || MNATIVE || X86_GENERIC || MK8 || MK7 || MK10 || MBARCELONA || MEFFICEON || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 config X86_USE_PPRO_CHECKSUM def_bool y -depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM +depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MNATIVE config X86_USE_3DNOW def_bool y @@ -363,17 +454,17 @@ config X86_P6_NOP config X86_TSC def_bool y -depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64 +depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7-AVX || MATOM) && !X86_NUMAQ) || X86_64 || MNATIVE config X86_CMPXCHG64 def_bool y -depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM +depends on X86_PAE || X86_64 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV def_bool y -depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) +depends on (MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX) config X86_MINIMUM_CPU_FAMILY int diff -uprN a/arch/x86/Makefile b/arch/x86/Makefile --- a/arch/x86/Makefile2013-11-03 18:41:51.000000000 -0500 +++ b/arch/x86/Makefile2013-12-15 06:21:24.354455723 -0500 @@ -61,11 +61,26 @@ else KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3) # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) + cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) + cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10) + cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona) + cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1) + cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1) + cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2) + cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) cflags-$(CONFIG_MCORE2) += \ - $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) + $(call cc-option,-march=core2,$(call cc-option,-mtune=core2)) + cflags-$(CONFIG_MCOREI7) += \ + $(call cc-option,-march=corei7,$(call cc-option,-mtune=corei7)) + cflags-$(CONFIG_MCOREI7AVX) += \ + $(call cc-option,-march=corei7-avx,$(call cc-option,-mtune=corei7-avx)) + cflags-$(CONFIG_MCOREAVXI) += \ + $(call cc-option,-march=core-avx-i,$(call cc-option,-mtune=core-avx-i)) + cflags-$(CONFIG_MCOREAVX2) += \ + $(call cc-option,-march=core-avx2,$(call cc-option,-mtune=core-avx2)) cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) diff -uprN a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu --- a/arch/x86/Makefile_32.cpu2013-11-03 18:41:51.000000000 -0500 +++ b/arch/x86/Makefile_32.cpu2013-12-15 06:21:24.354455723 -0500 @@ -23,7 +23,14 @@ cflags-$(CONFIG_MK6)+= -march=k6 # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. cflags-$(CONFIG_MK7)+= -march=athlon +cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) cflags-$(CONFIG_MK8)+= $(call cc-option,-march=k8,-march=athlon) +cflags-$(CONFIG_MK10)+= $(call cc-option,-march=amdfam10,-march=athlon) +cflags-$(CONFIG_MBARCELONA)+= $(call cc-option,-march=barcelona,-march=athlon) +cflags-$(CONFIG_MBOBCAT)+= $(call cc-option,-march=btver1,-march=athlon) +cflags-$(CONFIG_MBULLDOZER)+= $(call cc-option,-march=bdver1,-march=athlon) +cflags-$(CONFIG_MPILEDRIVER)+= $(call cc-option,-march=bdver2,-march=athlon) +cflags-$(CONFIG_MJAGUAR)+= $(call cc-option,-march=btver2,-march=athlon) cflags-$(CONFIG_MCRUSOE)+= -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MEFFICEON)+= -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MWINCHIPC6)+= $(call cc-option,-march=winchip-c6,-march=i586) @@ -32,6 +39,10 @@ cflags-$(CONFIG_MCYRIXIII)+= $(call cc- cflags-$(CONFIG_MVIAC3_2)+= $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_MVIAC7)+= -march=i686 cflags-$(CONFIG_MCORE2)+= -march=i686 $(call tune,core2) +cflags-$(CONFIG_MCOREI7)+= -march=i686 $(call tune,corei7) +cflags-$(CONFIG_MCOREI7AVX)+= -march=i686 $(call tune,corei7-avx) +cflags-$(CONFIG_MCOREAVXI)+= -march=i686 $(call tune,core-avx-i) +cflags-$(CONFIG_MCOREAVX2)+= -march=i686 $(call tune,core-avx2) cflags-$(CONFIG_MATOM)+= $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/