From: Stefani Seibold <stef...@seibold.net> This small patch add the functions vdso_gettimeofday(), vdso_clock_gettime() and vdso_time() support to the VDSO for x86 32-bit kernels.
The reason to do this was to get a fast reliable time stamp. Many developers uses TSC to get a fast time time stamp, without knowing the pitfalls. VDSO time functions a fast and reliable way, because the kernel knows the best time source and the P- and C-state of the CPU. The helper library to use the VDSO functions can be download at http://http://seibold.net/vdso.c The libary is very small, only 228 lines of code. Compile it with gcc -Wall -O3 -fpic vdso.c -lrt -shared -o libvdso.so and use it with LD_PRELOAD=<path>/libvdso.so This kind of helper must be integrated into glibc, for x86 64-bit and PowerPC it is already there. Some benchmark results (all measurements are in nano seconds): Intel(R) Celeron(TM) CPU 400MHz Average time kernel call: gettimeofday(): 1039 clock_gettime(): 1578 time(): 526 Average time VDSO call: gettimeofday(): 378 clock_gettime(): 303 time(): 60 Celeron(R) Dual-Core CPU T3100 1.90GHz Average time kernel call: gettimeofday(): 209 clock_gettime(): 406 time(): 135 Average time VDSO call: gettimeofday(): 51 clock_gettime(): 43 time(): 10 So you can see a performance increase between 4 and 13, depending on the CPU and the function. The patch is against kernel 3.7. Please apply if you like it. Changelog: 25.11.2012 - first release and proof of concept for linux 3.4 11.12.2012 - Port to linux 3.7 and code cleanup 12.12.2012 - fixes suggested by Andy Lutomirski - fixes suggested by John Stultz - use call VDSO32_vsyscall instead of int 80 - code cleanup Signed-off-by: Stefani Seibold <stef...@seibold.net> --- arch/x86/Kconfig | 4 +- arch/x86/include/asm/clocksource.h | 4 -- arch/x86/include/asm/fixmap.h | 3 +- arch/x86/include/asm/vgtod.h | 1 + arch/x86/include/asm/vvar.h | 7 +++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/hpet.c | 9 ++-- arch/x86/kernel/setup.c | 2 + arch/x86/kernel/tsc.c | 2 - arch/x86/kernel/vmlinux.lds.S | 4 -- arch/x86/kernel/vsyscall_64.c | 49 ------------------ arch/x86/kernel/vsyscall_gtod.c | 93 +++++++++++++++++++++++++++++++++++ arch/x86/vdso/Makefile | 1 + arch/x86/vdso/vclock_gettime.c | 25 +++++++++- arch/x86/vdso/vdso32/vclock_gettime.c | 7 +++ arch/x86/vdso/vdso32/vdso32.lds.S | 5 ++ 16 files changed, 151 insertions(+), 66 deletions(-) create mode 100644 arch/x86/kernel/vsyscall_gtod.c create mode 100644 arch/x86/vdso/vdso32/vclock_gettime.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff..b8c2c74 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,9 +100,9 @@ config X86 select GENERIC_CMOS_UPDATE select CLOCKSOURCE_WATCHDOG select GENERIC_CLOCKEVENTS - select ARCH_CLOCKSOURCE_DATA if X86_64 + select ARCH_CLOCKSOURCE_DATA select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) - select GENERIC_TIME_VSYSCALL if X86_64 + select GENERIC_TIME_VSYSCALL select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index 0bdbbb3..67d68b9 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h @@ -3,8 +3,6 @@ #ifndef _ASM_X86_CLOCKSOURCE_H #define _ASM_X86_CLOCKSOURCE_H -#ifdef CONFIG_X86_64 - #define VCLOCK_NONE 0 /* No vDSO clock available. */ #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ @@ -13,6 +11,4 @@ struct arch_clocksource_data { int vclock_mode; }; -#endif /* CONFIG_X86_64 */ - #endif /* _ASM_X86_CLOCKSOURCE_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 4da3c0c..b26e9e0 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -78,9 +78,10 @@ enum fixed_addresses { VSYSCALL_LAST_PAGE, VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, +#endif VVAR_PAGE, VSYSCALL_HPET, -#endif + FIX_DBGP_BASE, FIX_EARLYCON_MEM_BASE, #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 46e24d3..eb87b53 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -27,4 +27,5 @@ struct vsyscall_gtod_data { }; extern struct vsyscall_gtod_data vsyscall_gtod_data; +extern void map_vgtod(void); #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index de656ac..6f71098 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -17,7 +17,11 @@ */ /* Base address of vvars. This is not ABI. */ +#ifdef CONFIG_X86_64 #define VVAR_ADDRESS (-10*1024*1024 - 4096) +#else +#define VVAR_ADDRESS 0xffffd000 +#endif #if defined(__VVAR_KERNEL_LDS) @@ -46,5 +50,8 @@ DECLARE_VVAR(0, volatile unsigned long, jiffies) DECLARE_VVAR(16, int, vgetcpu_mode) DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) +#ifdef CONFIG_X86_32 +DECLARE_VVAR(512, const void __iomem *, vsyscall_hpet) +#endif #undef DECLARE_VVAR diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 91ce48f..298a0b1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -26,6 +26,7 @@ obj-y += probe_roms.o obj-$(CONFIG_X86_32) += i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-y += syscall_$(BITS).o +obj-y += vsyscall_gtod.o obj-$(CONFIG_X86_64) += vsyscall_64.o obj-$(CONFIG_X86_64) += vsyscall_emu_64.o obj-y += bootflag.o e820.o diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 1460a5d..38887ca 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -69,13 +69,18 @@ static inline void hpet_writel(unsigned int d, unsigned int a) #ifdef CONFIG_X86_64 #include <asm/pgtable.h> +#else +#include <asm/vvar.h> + +DEFINE_VVAR(const void __iomem *, vsyscall_hpet); #endif static inline void hpet_set_mapping(void) { hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); -#ifdef CONFIG_X86_64 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VVAR_NOCACHE); +#ifdef CONFIG_X86_32 + vsyscall_hpet = (const void __iomem *)fix_to_virt(VSYSCALL_HPET); #endif } @@ -752,9 +757,7 @@ static struct clocksource clocksource_hpet = { .mask = HPET_MASK, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .resume = hpet_resume_counter, -#ifdef CONFIG_X86_64 .archdata = { .vclock_mode = VCLOCK_HPET }, -#endif }; static int hpet_clocksource_register(void) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ca45696..c2f6bbb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -114,6 +114,7 @@ #include <asm/mce.h> #include <asm/alternative.h> #include <asm/prom.h> +#include <asm/vgtod.h> /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -997,6 +998,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_64 map_vsyscall(); #endif + map_vgtod(); generic_apic_probe(); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index cfa5d4f..078cc9a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -772,9 +772,7 @@ static struct clocksource clocksource_tsc = { .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_MUST_VERIFY, -#ifdef CONFIG_X86_64 .archdata = { .vclock_mode = VCLOCK_TSC }, -#endif }; void mark_tsc_unstable(char *reason) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 22a1530..31a0cdd 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -151,8 +151,6 @@ SECTIONS _edata = .; } :data -#ifdef CONFIG_X86_64 - . = ALIGN(PAGE_SIZE); __vvar_page = .; @@ -173,8 +171,6 @@ SECTIONS . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); -#endif /* CONFIG_X86_64 */ - /* Init code and data - will be freed after init */ . = ALIGN(PAGE_SIZE); .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 3a3e8c9..dfc9727 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -54,7 +54,6 @@ #include "vsyscall_trace.h" DEFINE_VVAR(int, vgetcpu_mode); -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; @@ -77,48 +76,6 @@ static int __init vsyscall_setup(char *str) } early_param("vsyscall", vsyscall_setup); -void update_vsyscall_tz(void) -{ - vsyscall_gtod_data.sys_tz = sys_tz; -} - -void update_vsyscall(struct timekeeper *tk) -{ - struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - - write_seqcount_begin(&vdata->seq); - - /* copy vsyscall data */ - vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->clock->cycle_last; - vdata->clock.mask = tk->clock->mask; - vdata->clock.mult = tk->mult; - vdata->clock.shift = tk->shift; - - vdata->wall_time_sec = tk->xtime_sec; - vdata->wall_time_snsec = tk->xtime_nsec; - - vdata->monotonic_time_sec = tk->xtime_sec - + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->xtime_nsec - + (tk->wall_to_monotonic.tv_nsec - << tk->shift); - while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->shift)) { - vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->shift; - vdata->monotonic_time_sec++; - } - - vdata->wall_time_coarse.tv_sec = tk->xtime_sec; - vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); - - vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, - tk->wall_to_monotonic); - - write_seqcount_end(&vdata->seq); -} - static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, const char *message) { @@ -366,8 +323,6 @@ void __init map_vsyscall(void) { extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - extern char __vvar_page; - unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, vsyscall_mode == NATIVE @@ -375,10 +330,6 @@ void __init map_vsyscall(void) : PAGE_KERNEL_VVAR); BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) != (unsigned long)VSYSCALL_START); - - __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); - BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != - (unsigned long)VVAR_ADDRESS); } static int __init vsyscall_init(void) diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c new file mode 100644 index 0000000..9b96488 --- /dev/null +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2001 Andrea Arcangeli <and...@suse.de> SuSE + * Copyright 2003 Andi Kleen, SuSE Labs. + * + * Modified for x86 32 bit architecture by + * Stefani Seibold <stef...@seibold.net> + * + * Thanks to h...@transmeta.com for some useful hint. + * Special thanks to Ingo Molnar for his early experience with + * a different vsyscall implementation for Linux/IA32 and for the name. + * + */ + +#include <linux/time.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/seqlock.h> +#include <linux/jiffies.h> +#include <linux/sysctl.h> +#include <linux/topology.h> +#include <linux/timekeeper_internal.h> +#include <linux/ratelimit.h> + +#include <asm/vsyscall.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/unistd.h> +#include <asm/fixmap.h> +#include <asm/errno.h> +#include <asm/io.h> +#include <asm/segment.h> +#include <asm/desc.h> +#include <asm/topology.h> +#include <asm/vgtod.h> +#include <asm/traps.h> + +DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); + +void update_vsyscall_tz(void) +{ + vsyscall_gtod_data.sys_tz = sys_tz; +} + +void update_vsyscall(struct timekeeper *tk) +{ + struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; + + write_seqcount_begin(&vdata->seq); + + /* copy vsyscall data */ + vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; + vdata->clock.cycle_last = tk->clock->cycle_last; + vdata->clock.mask = tk->clock->mask; + vdata->clock.mult = tk->mult; + vdata->clock.shift = tk->shift; + + vdata->wall_time_sec = tk->xtime_sec; + vdata->wall_time_snsec = tk->xtime_nsec; + + vdata->monotonic_time_sec = tk->xtime_sec + + tk->wall_to_monotonic.tv_sec; + vdata->monotonic_time_snsec = tk->xtime_nsec + + (tk->wall_to_monotonic.tv_nsec + << tk->shift); + while (vdata->monotonic_time_snsec >= + (((u64)NSEC_PER_SEC) << tk->shift)) { + vdata->monotonic_time_snsec -= + ((u64)NSEC_PER_SEC) << tk->shift; + vdata->monotonic_time_sec++; + } + + vdata->wall_time_coarse.tv_sec = tk->xtime_sec; + vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); + + vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, + tk->wall_to_monotonic); + + write_seqcount_end(&vdata->seq); +} + +void __init map_vgtod(void) +{ + extern char __vvar_page; + unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); + + __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); +#ifdef CONFIG_X86_64 + BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != + (unsigned long)VVAR_ADDRESS); +#endif +} + diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index fd14be1..959221b 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -147,6 +147,7 @@ $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ $(obj)/vdso32/vdso32.lds \ + $(obj)/vdso32/vclock_gettime.o \ $(obj)/vdso32/note.o \ $(obj)/vdso32/%.o $(call if_changed,vdso) diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 4df6c37..3490e1c 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -57,6 +57,7 @@ notrace static cycle_t vread_tsc(void) return last; } +#ifdef CONFIG_X86_64 static notrace cycle_t vread_hpet(void) { return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); @@ -78,11 +79,33 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); return ret; } +#else +static notrace cycle_t vread_hpet(void) +{ + return readl(VVAR(vsyscall_hpet) + HPET_COUNTER); +} +notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +{ + long ret; + asm("call VDSO32_vsyscall" : "=a" (ret) : + "a" (__NR_clock_gettime), "b" (clock), "c" (ts) : "memory"); + return ret; +} + +notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) +{ + long ret; + + asm("call VDSO32_vsyscall" : "=a" (ret) : + "a" (__NR_gettimeofday), "b" (tv), "c" (tz) : "memory"); + return ret; +} +#endif notrace static inline u64 vgetsns(void) { - long v; + u64 v; cycles_t cycles; if (gtod->clock.vclock_mode == VCLOCK_TSC) cycles = vread_tsc(); diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c new file mode 100644 index 0000000..c9a1909 --- /dev/null +++ b/arch/x86/vdso/vdso32/vclock_gettime.c @@ -0,0 +1,7 @@ +/* + * since vgtod layout differs between X86_64 and x86_32, it is not possible to + * provide a 32 bit vclock with a 64 bit kernel + */ +#ifdef CONFIG_X86_32 +#include "../vclock_gettime.c" +#endif diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S index 976124b..197d50f 100644 --- a/arch/x86/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/vdso/vdso32/vdso32.lds.S @@ -24,6 +24,11 @@ VERSION __kernel_vsyscall; __kernel_sigreturn; __kernel_rt_sigreturn; +#ifdef CONFIG_X86_32 + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_time; +#endif local: *; }; } -- 1.8.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/