On Sun, Feb 22, 2015 at 09:18:40AM +0100, Ingo Molnar wrote: > So am I interpreting the older and your latest numbers > correctly in stating that the cost observation has flipped > around 180 degrees: the first measurement showed eager FPU > to be a win, but now that we can do more precise > measurements, eager FPU has actually slowed down the kernel > build by ~0.5%?
Well, I wouldn't take the latest numbers too seriously - that was a single run without --repeat. > That's not good, and kernel builds are just a random load > that isn't even that FPU or context switch heavy - there > will certainly be other loads that would be hurt even more. That is my fear. > So just before we base wide reaching decisions based on any > of these measurements, would you mind help us increase our > confidence in the numbers some more: > > - It might make sense to do a 'perf stat --null --repeat' > measurement as well [without any -e arguments], to make > sure the rich PMU stats you are gathering are not > interfering? > > With 'perf stat --null --repeat' perf acts essenially > as a /usr/bin/time replacement, but can measure down to > microseconds and will calculate noise/sttdev properly. Cool, let me do that. > - Perhaps also double check the debug switch: is it > really properly switching FPU handling mode? I've changed the use_eager_fpu() test to do: static __always_inline __pure bool use_eager_fpu(void) { return boot_cpu_has(X86_FEATURE_EAGER_FPU); } and I'm clearing/setting eager FPU with setup_force_cpu_cap/setup_clear_cpu_cap, see full diff below. > - Do you have enough RAM that there's essentially no IO > in the system worth speaking of? Do you have enough RAM > to copy a whole kernel tree to /tmp/linux/ and do the > measurement there, on ramfs? /proc/meminfo says "MemTotal: 4011860 kB" which is probably not enough. But I could find one somewhere :-) --- arch/x86/include/asm/fpu-internal.h | 6 +++- arch/x86/kernel/xsave.c | 57 ++++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index e97622f57722..c8a161d02056 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -38,6 +38,8 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, # define ia32_setup_rt_frame __setup_rt_frame #endif + +extern unsigned long fpu_saved; extern unsigned int mxcsr_feature_mask; extern void fpu_init(void); extern void eager_fpu_init(void); @@ -87,7 +89,7 @@ static inline int is_x32_frame(void) static __always_inline __pure bool use_eager_fpu(void) { - return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); + return boot_cpu_has(X86_FEATURE_EAGER_FPU); } static __always_inline __pure bool use_xsaveopt(void) @@ -242,6 +244,8 @@ static inline void fpu_fxsave(struct fpu *fpu) */ static inline int fpu_save_init(struct fpu *fpu) { + fpu_saved++; + if (use_xsave()) { fpu_xsave(fpu); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 0de1fae2bdf0..943af0adacff 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -14,6 +14,8 @@ #include <asm/sigframe.h> #include <asm/xcr.h> +#include <linux/debugfs.h> + /* * Supported feature mask by the CPU and the kernel. */ @@ -638,7 +640,7 @@ static void __init xstate_enable_boot_cpu(void) setup_init_fpu_buf(); /* Auto enable eagerfpu for xsaveopt */ - if (cpu_has_xsaveopt && eagerfpu != DISABLE) + if (eagerfpu != DISABLE) eagerfpu = ENABLE; if (pcntxt_mask & XSTATE_EAGER) { @@ -739,3 +741,56 @@ void *get_xsave_addr(struct xsave_struct *xsave, int xstate) return (void *)xsave + xstate_comp_offsets[feature]; } EXPORT_SYMBOL_GPL(get_xsave_addr); + +unsigned long fpu_saved; + +static void my_clts(void *arg) +{ + asm volatile("clts"); +} + +static int eager_get(void *data, u64 *val) +{ + *val = fpu_saved; + + return 0; +} + +static int eager_set(void *data, u64 val) +{ + preempt_disable(); + if (val) { + on_each_cpu(my_clts, NULL, 1); + setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + } else { + setup_clear_cpu_cap(X86_FEATURE_EAGER_FPU); + stts(); + } + preempt_enable(); + + fpu_saved = 0; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(eager_fops, eager_get, eager_set, "%llu\n"); + +static int __init setup_eagerfpu_knob(void) +{ + static struct dentry *d_eager, *f_eager; + + d_eager = debugfs_create_dir("fpu", NULL); + if (!d_eager) { + pr_err("Error creating fpu debugfs dir\n"); + return -ENOMEM; + } + + f_eager = debugfs_create_file("eager", 0644, d_eager, NULL, &eager_fops); + if (!f_eager) { + pr_err("Error creating fpu debugfs node\n"); + return -ENOMEM; + } + + return 0; +} +late_initcall(setup_eagerfpu_knob); -- 2.2.0.33.gc18b867 -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply. -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/