This patch add the VDSO time support for the IA32 Emulation Layer. Due the nature of the kernel headers and the LP64 compiler where the size of a long and a pointer differs against a 32 bit compiler, there is some type hacking necessary for optimal performance.
The vsyscall_gtod_data struture must be a rearranged to serve 32- and 64-bit code access at the same time: - The seqcount_t was replaced by an unsigned, this makes the vsyscall_gtod_data intedepend of kernel configuration and internal functions. - All kernel internal structures are replaced by fix size elements which works for 32- and 64-bit access - The inner struct clock was removed to pack the whole struct. The "unsigned seq" would be handled by functions derivated from seqcount_t. Signed-off-by: Stefani Seibold <stef...@seibold.net> --- arch/x86/include/asm/vgtod.h | 71 +++++++++++++++++++++------ arch/x86/include/asm/vvar.h | 5 ++ arch/x86/kernel/vsyscall_gtod.c | 34 ++++++++----- arch/x86/vdso/vclock_gettime.c | 91 +++++++++++++++++++---------------- arch/x86/vdso/vdso32/vclock_gettime.c | 21 ++++++++ 5 files changed, 155 insertions(+), 67 deletions(-) diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 46e24d3..3c3366c 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -1,30 +1,73 @@ #ifndef _ASM_X86_VGTOD_H #define _ASM_X86_VGTOD_H -#include <asm/vsyscall.h> +#include <linux/compiler.h> #include <linux/clocksource.h> +#ifdef BUILD_VDSO32_64 +typedef u64 gtod_long_t; +#else +typedef unsigned long gtod_long_t; +#endif +/* + * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time + * so be carefull by modifying this structure. + */ struct vsyscall_gtod_data { - seqcount_t seq; + unsigned seq; - struct { /* extract of a clocksource struct */ - int vclock_mode; - cycle_t cycle_last; - cycle_t mask; - u32 mult; - u32 shift; - } clock; + int vclock_mode; + cycle_t cycle_last; + cycle_t mask; + u32 mult; + u32 shift; /* open coded 'struct timespec' */ - time_t wall_time_sec; u64 wall_time_snsec; + gtod_long_t wall_time_sec; + gtod_long_t monotonic_time_sec; u64 monotonic_time_snsec; - time_t monotonic_time_sec; + gtod_long_t wall_time_coarse_sec; + gtod_long_t wall_time_coarse_nsec; + gtod_long_t monotonic_time_coarse_sec; + gtod_long_t monotonic_time_coarse_nsec; - struct timezone sys_tz; - struct timespec wall_time_coarse; - struct timespec monotonic_time_coarse; + int tz_minuteswest; + int tz_dsttime; }; extern struct vsyscall_gtod_data vsyscall_gtod_data; +static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) +{ + unsigned ret; + +repeat: + ret = ACCESS_ONCE(s->seq); + if (unlikely(ret & 1)) { + cpu_relax(); + goto repeat; + } + smp_rmb(); + return ret; +} + +static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, + unsigned start) +{ + smp_rmb(); + return unlikely(s->seq != start); +} + +static inline void gtod_write_begin(struct vsyscall_gtod_data *s) +{ + ++s->seq; + smp_wmb(); +} + +static inline void gtod_write_end(struct vsyscall_gtod_data *s) +{ + smp_wmb(); + ++s->seq; +} + #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index 52c79ff..081d909 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -16,6 +16,9 @@ * you mess up, the linker will catch it.) */ +#ifndef _ASM_X86_VVAR_H +#define _ASM_X86_VVAR_H + #if defined(__VVAR_KERNEL_LDS) /* The kernel linker script defines its own magic to put vvars in the @@ -64,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode) DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) #undef DECLARE_VVAR + +#endif diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index b5a943d..f9c6e56 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -4,6 +4,7 @@ * * Modified for x86 32 bit architecture by * Stefani Seibold <stef...@seibold.net> + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany * * Thanks to h...@transmeta.com for some useful hint. * Special thanks to Ingo Molnar for his early experience with @@ -13,26 +14,28 @@ #include <linux/timekeeper_internal.h> #include <asm/vgtod.h> +#include <asm/vvar.h> DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); void update_vsyscall_tz(void) { - vsyscall_gtod_data.sys_tz = sys_tz; + vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest; + vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime; } void update_vsyscall(struct timekeeper *tk) { struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - write_seqcount_begin(&vdata->seq); + gtod_write_begin(vdata); /* copy vsyscall data */ - vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->clock->cycle_last; - vdata->clock.mask = tk->clock->mask; - vdata->clock.mult = tk->mult; - vdata->clock.shift = tk->shift; + vdata->vclock_mode = tk->clock->archdata.vclock_mode; + vdata->cycle_last = tk->clock->cycle_last; + vdata->mask = tk->clock->mask; + vdata->mult = tk->mult; + vdata->shift = tk->shift; vdata->wall_time_sec = tk->xtime_sec; vdata->wall_time_snsec = tk->xtime_nsec; @@ -49,11 +52,18 @@ void update_vsyscall(struct timekeeper *tk) vdata->monotonic_time_sec++; } - vdata->wall_time_coarse.tv_sec = tk->xtime_sec; - vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); + vdata->wall_time_coarse_sec = tk->xtime_sec; + vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift); - vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, - tk->wall_to_monotonic); + vdata->monotonic_time_coarse_sec = + vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; + vdata->monotonic_time_coarse_nsec = + vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec; - write_seqcount_end(&vdata->seq); + while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) { + vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC; + vdata->monotonic_time_coarse_sec++; + } + + gtod_write_end(vdata); } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 90bb5e8..16d6861 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -14,16 +14,14 @@ /* Disable profiling for userspace code: */ #define DISABLE_BRANCH_PROFILING -#include <linux/kernel.h> #include <uapi/linux/time.h> -#include <linux/string.h> -#include <asm/vsyscall.h> -#include <asm/fixmap.h> #include <asm/vgtod.h> #include <asm/hpet.h> +#include <asm/vvar.h> #include <asm/unistd.h> -#include <asm/io.h> -#include <asm/pvclock.h> +#include <asm/msr.h> +#include <linux/math64.h> +#include <linux/time.h> #define gtod (&VVAR(vsyscall_gtod_data)) @@ -31,11 +29,23 @@ extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); +#ifdef CONFIG_HPET_TIMER +static inline u32 read_hpet_counter(const volatile void *addr) +{ + return *(const volatile u32 *) (addr + HPET_COUNTER); +} +#endif + #ifndef BUILD_VDSO32 +#include <linux/kernel.h> +#include <asm/vsyscall.h> +#include <asm/fixmap.h> +#include <asm/pvclock.h> + static notrace cycle_t vread_hpet(void) { - return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); + return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET)); } notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) @@ -116,7 +126,7 @@ static notrace cycle_t vread_pvclock(int *mode) *mode = VCLOCK_NONE; /* refer to tsc.c read_tsc() comment for rationale */ - last = gtod->clock.cycle_last; + last = gtod->cycle_last; if (likely(ret >= last)) return ret; @@ -133,7 +143,7 @@ extern u8 hpet_page #ifdef CONFIG_HPET_TIMER static notrace cycle_t vread_hpet(void) { - return readl((const void __iomem *)(&hpet_page + HPET_COUNTER)); + return read_hpet_counter((const void *)(&hpet_page)); } #endif @@ -193,7 +203,7 @@ notrace static cycle_t vread_tsc(void) rdtsc_barrier(); ret = (cycle_t)__native_read_tsc(); - last = gtod->clock.cycle_last; + last = gtod->cycle_last; if (likely(ret >= last)) return ret; @@ -214,20 +224,21 @@ notrace static inline u64 vgetsns(int *mode) { u64 v; cycles_t cycles; - if (gtod->clock.vclock_mode == VCLOCK_TSC) + + if (gtod->vclock_mode == VCLOCK_TSC) cycles = vread_tsc(); #ifdef CONFIG_HPET_TIMER - else if (gtod->clock.vclock_mode == VCLOCK_HPET) + else if (gtod->vclock_mode == VCLOCK_HPET) cycles = vread_hpet(); #endif #ifdef CONFIG_PARAVIRT_CLOCK - else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) + else if (gtod->vclock_mode == VCLOCK_PVCLOCK) cycles = vread_pvclock(mode); #endif else return 0; - v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; - return v * gtod->clock.mult; + v = (cycles - gtod->cycle_last) & gtod->mask; + return v * gtod->mult; } /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ @@ -237,17 +248,18 @@ notrace static int __always_inline do_realtime(struct timespec *ts) u64 ns; int mode; - ts->tv_nsec = 0; do { - seq = raw_read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; + seq = gtod_read_begin(gtod); + mode = gtod->vclock_mode; ts->tv_sec = gtod->wall_time_sec; ns = gtod->wall_time_snsec; ns += vgetsns(&mode); - ns >>= gtod->clock.shift; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); + ns >>= gtod->shift; + } while (unlikely(gtod_read_retry(gtod, seq))); + + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; - timespec_add_ns(ts, ns); return mode; } @@ -257,16 +269,17 @@ notrace static int __always_inline do_monotonic(struct timespec *ts) u64 ns; int mode; - ts->tv_nsec = 0; do { - seq = raw_read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; + seq = gtod_read_begin(gtod); + mode = gtod->vclock_mode; ts->tv_sec = gtod->monotonic_time_sec; ns = gtod->monotonic_time_snsec; ns += vgetsns(&mode); - ns >>= gtod->clock.shift; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); - timespec_add_ns(ts, ns); + ns >>= gtod->shift; + } while (unlikely(gtod_read_retry(gtod, seq))); + + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; return mode; } @@ -275,20 +288,20 @@ notrace static void do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { - seq = raw_read_seqcount_begin(>od->seq); - ts->tv_sec = gtod->wall_time_coarse.tv_sec; - ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); + seq = gtod_read_begin(gtod); + ts->tv_sec = gtod->wall_time_coarse_sec; + ts->tv_nsec = gtod->wall_time_coarse_nsec; + } while (unlikely(gtod_read_retry(gtod, seq))); } notrace static void do_monotonic_coarse(struct timespec *ts) { unsigned long seq; do { - seq = raw_read_seqcount_begin(>od->seq); - ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; - ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); + seq = gtod_read_begin(gtod); + ts->tv_sec = gtod->monotonic_time_coarse_sec; + ts->tv_nsec = gtod->monotonic_time_coarse_nsec; + } while (unlikely(gtod_read_retry(gtod, seq))); } notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) @@ -322,17 +335,13 @@ int clock_gettime(clockid_t, struct timespec *) notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { if (likely(tv != NULL)) { - BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != - offsetof(struct timespec, tv_nsec) || - sizeof(*tv) != sizeof(struct timespec)); if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) return vdso_fallback_gtod(tv, tz); tv->tv_usec /= 1000; } if (unlikely(tz != NULL)) { - /* Avoid memcpy. Some old compilers fail to inline it */ - tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; - tz->tz_dsttime = gtod->sys_tz.tz_dsttime; + tz->tz_minuteswest = gtod->tz_minuteswest; + tz->tz_dsttime = gtod->tz_dsttime; } return 0; diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c index ca65e42..175cc72 100644 --- a/arch/x86/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/vdso/vdso32/vclock_gettime.c @@ -6,4 +6,25 @@ #undef CONFIG_X86_PPRO_FENCE +#ifdef CONFIG_X86_64 + +/* + * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel + * configuration + */ +#undef CONFIG_64BIT +#undef CONFIG_X86_64 +#undef CONFIG_ILLEGAL_POINTER_VALUE +#undef CONFIG_SPARSEMEM_VMEMMAP +#undef CONFIG_NR_CPUS + +#define CONFIG_X86_32 1 +#define CONFIG_PAGE_OFFSET 0 +#define CONFIG_ILLEGAL_POINTER_VALUE 0 +#define CONFIG_NR_CPUS 1 + +#define BUILD_VDSO32_64 + +#endif + #include "../vclock_gettime.c" -- 1.9.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/