This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs against a 32 bit compiler, there
is some type hacking necessary for optimal performance.

The vsyscall_gtod_data struture must be a rearranged to serve 32- and
64-bit code access at the same time:

- The seqcount_t was replaced by an unsigned, this makes the
  vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- All kernel internal structures are replaced by fix size elements
  which works for 32- and 64-bit access
- The inner struct clock was removed to pack the whole struct.

The "unsigned seq" would be handled by functions derivated from seqcount_t.

Signed-off-by: Stefani Seibold <stef...@seibold.net>
---
 arch/x86/include/asm/vgtod.h          | 71 +++++++++++++++++++++------
 arch/x86/include/asm/vvar.h           |  5 ++
 arch/x86/kernel/vsyscall_gtod.c       | 34 ++++++++-----
 arch/x86/vdso/vclock_gettime.c        | 91 +++++++++++++++++++----------------
 arch/x86/vdso/vdso32/vclock_gettime.c | 21 ++++++++
 5 files changed, 155 insertions(+), 67 deletions(-)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d3..3c3366c 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -1,30 +1,73 @@
 #ifndef _ASM_X86_VGTOD_H
 #define _ASM_X86_VGTOD_H
 
-#include <asm/vsyscall.h>
+#include <linux/compiler.h>
 #include <linux/clocksource.h>
 
+#ifdef BUILD_VDSO32_64
+typedef u64 gtod_long_t;
+#else
+typedef unsigned long gtod_long_t;
+#endif
+/*
+ * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
+ * so be carefull by modifying this structure.
+ */
 struct vsyscall_gtod_data {
-       seqcount_t      seq;
+       unsigned seq;
 
-       struct { /* extract of a clocksource struct */
-               int vclock_mode;
-               cycle_t cycle_last;
-               cycle_t mask;
-               u32     mult;
-               u32     shift;
-       } clock;
+       int vclock_mode;
+       cycle_t cycle_last;
+       cycle_t mask;
+       u32     mult;
+       u32     shift;
 
        /* open coded 'struct timespec' */
-       time_t          wall_time_sec;
        u64             wall_time_snsec;
+       gtod_long_t     wall_time_sec;
+       gtod_long_t     monotonic_time_sec;
        u64             monotonic_time_snsec;
-       time_t          monotonic_time_sec;
+       gtod_long_t     wall_time_coarse_sec;
+       gtod_long_t     wall_time_coarse_nsec;
+       gtod_long_t     monotonic_time_coarse_sec;
+       gtod_long_t     monotonic_time_coarse_nsec;
 
-       struct timezone sys_tz;
-       struct timespec wall_time_coarse;
-       struct timespec monotonic_time_coarse;
+       int             tz_minuteswest;
+       int             tz_dsttime;
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
+{
+       unsigned ret;
+
+repeat:
+       ret = ACCESS_ONCE(s->seq);
+       if (unlikely(ret & 1)) {
+               cpu_relax();
+               goto repeat;
+       }
+       smp_rmb();
+       return ret;
+}
+
+static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
+                                       unsigned start)
+{
+       smp_rmb();
+       return unlikely(s->seq != start);
+}
+
+static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
+{
+       ++s->seq;
+       smp_wmb();
+}
+
+static inline void gtod_write_end(struct vsyscall_gtod_data *s)
+{
+       smp_wmb();
+       ++s->seq;
+}
+
 #endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 52c79ff..081d909 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,6 +16,9 @@
  * you mess up, the linker will catch it.)
  */
 
+#ifndef _ASM_X86_VVAR_H
+#define _ASM_X86_VVAR_H
+
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -64,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
 
 #undef DECLARE_VVAR
+
+#endif
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index b5a943d..f9c6e56 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -4,6 +4,7 @@
  *
  *  Modified for x86 32 bit architecture by
  *  Stefani Seibold <stef...@seibold.net>
+ *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
  *
  *  Thanks to h...@transmeta.com for some useful hint.
  *  Special thanks to Ingo Molnar for his early experience with
@@ -13,26 +14,28 @@
 
 #include <linux/timekeeper_internal.h>
 #include <asm/vgtod.h>
+#include <asm/vvar.h>
 
 DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
 
 void update_vsyscall_tz(void)
 {
-       vsyscall_gtod_data.sys_tz = sys_tz;
+       vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
+       vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
 }
 
 void update_vsyscall(struct timekeeper *tk)
 {
        struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
 
-       write_seqcount_begin(&vdata->seq);
+       gtod_write_begin(vdata);
 
        /* copy vsyscall data */
-       vdata->clock.vclock_mode        = tk->clock->archdata.vclock_mode;
-       vdata->clock.cycle_last         = tk->clock->cycle_last;
-       vdata->clock.mask               = tk->clock->mask;
-       vdata->clock.mult               = tk->mult;
-       vdata->clock.shift              = tk->shift;
+       vdata->vclock_mode      = tk->clock->archdata.vclock_mode;
+       vdata->cycle_last       = tk->clock->cycle_last;
+       vdata->mask             = tk->clock->mask;
+       vdata->mult             = tk->mult;
+       vdata->shift            = tk->shift;
 
        vdata->wall_time_sec            = tk->xtime_sec;
        vdata->wall_time_snsec          = tk->xtime_nsec;
@@ -49,11 +52,18 @@ void update_vsyscall(struct timekeeper *tk)
                vdata->monotonic_time_sec++;
        }
 
-       vdata->wall_time_coarse.tv_sec  = tk->xtime_sec;
-       vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
+       vdata->wall_time_coarse_sec     = tk->xtime_sec;
+       vdata->wall_time_coarse_nsec    = (long)(tk->xtime_nsec >> tk->shift);
 
-       vdata->monotonic_time_coarse    = timespec_add(vdata->wall_time_coarse,
-                                                       tk->wall_to_monotonic);
+       vdata->monotonic_time_coarse_sec =
+               vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
+       vdata->monotonic_time_coarse_nsec =
+               vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
 
-       write_seqcount_end(&vdata->seq);
+       while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
+               vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
+               vdata->monotonic_time_coarse_sec++;
+       }
+
+       gtod_write_end(vdata);
 }
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 90bb5e8..16d6861 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -14,16 +14,14 @@
 /* Disable profiling for userspace code: */
 #define DISABLE_BRANCH_PROFILING
 
-#include <linux/kernel.h>
 #include <uapi/linux/time.h>
-#include <linux/string.h>
-#include <asm/vsyscall.h>
-#include <asm/fixmap.h>
 #include <asm/vgtod.h>
 #include <asm/hpet.h>
+#include <asm/vvar.h>
 #include <asm/unistd.h>
-#include <asm/io.h>
-#include <asm/pvclock.h>
+#include <asm/msr.h>
+#include <linux/math64.h>
+#include <linux/time.h>
 
 #define gtod (&VVAR(vsyscall_gtod_data))
 
@@ -31,11 +29,23 @@ extern int __vdso_clock_gettime(clockid_t clock, struct 
timespec *ts);
 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
 extern time_t __vdso_time(time_t *t);
 
+#ifdef CONFIG_HPET_TIMER
+static inline u32 read_hpet_counter(const volatile void *addr)
+{
+       return *(const volatile u32 *) (addr + HPET_COUNTER);
+}
+#endif
+
 #ifndef BUILD_VDSO32
 
+#include <linux/kernel.h>
+#include <asm/vsyscall.h>
+#include <asm/fixmap.h>
+#include <asm/pvclock.h>
+
 static notrace cycle_t vread_hpet(void)
 {
-       return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
+       return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET));
 }
 
 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
@@ -116,7 +126,7 @@ static notrace cycle_t vread_pvclock(int *mode)
                *mode = VCLOCK_NONE;
 
        /* refer to tsc.c read_tsc() comment for rationale */
-       last = gtod->clock.cycle_last;
+       last = gtod->cycle_last;
 
        if (likely(ret >= last))
                return ret;
@@ -133,7 +143,7 @@ extern u8 hpet_page
 #ifdef CONFIG_HPET_TIMER
 static notrace cycle_t vread_hpet(void)
 {
-       return readl((const void __iomem *)(&hpet_page + HPET_COUNTER));
+       return read_hpet_counter((const void *)(&hpet_page));
 }
 #endif
 
@@ -193,7 +203,7 @@ notrace static cycle_t vread_tsc(void)
        rdtsc_barrier();
        ret = (cycle_t)__native_read_tsc();
 
-       last = gtod->clock.cycle_last;
+       last = gtod->cycle_last;
 
        if (likely(ret >= last))
                return ret;
@@ -214,20 +224,21 @@ notrace static inline u64 vgetsns(int *mode)
 {
        u64 v;
        cycles_t cycles;
-       if (gtod->clock.vclock_mode == VCLOCK_TSC)
+
+       if (gtod->vclock_mode == VCLOCK_TSC)
                cycles = vread_tsc();
 #ifdef CONFIG_HPET_TIMER
-       else if (gtod->clock.vclock_mode == VCLOCK_HPET)
+       else if (gtod->vclock_mode == VCLOCK_HPET)
                cycles = vread_hpet();
 #endif
 #ifdef CONFIG_PARAVIRT_CLOCK
-       else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
+       else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
                cycles = vread_pvclock(mode);
 #endif
        else
                return 0;
-       v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
-       return v * gtod->clock.mult;
+       v = (cycles - gtod->cycle_last) & gtod->mask;
+       return v * gtod->mult;
 }
 
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
@@ -237,17 +248,18 @@ notrace static int __always_inline do_realtime(struct 
timespec *ts)
        u64 ns;
        int mode;
 
-       ts->tv_nsec = 0;
        do {
-               seq = raw_read_seqcount_begin(&gtod->seq);
-               mode = gtod->clock.vclock_mode;
+               seq = gtod_read_begin(gtod);
+               mode = gtod->vclock_mode;
                ts->tv_sec = gtod->wall_time_sec;
                ns = gtod->wall_time_snsec;
                ns += vgetsns(&mode);
-               ns >>= gtod->clock.shift;
-       } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+               ns >>= gtod->shift;
+       } while (unlikely(gtod_read_retry(gtod, seq)));
+
+       ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+       ts->tv_nsec = ns;
 
-       timespec_add_ns(ts, ns);
        return mode;
 }
 
@@ -257,16 +269,17 @@ notrace static int __always_inline do_monotonic(struct 
timespec *ts)
        u64 ns;
        int mode;
 
-       ts->tv_nsec = 0;
        do {
-               seq = raw_read_seqcount_begin(&gtod->seq);
-               mode = gtod->clock.vclock_mode;
+               seq = gtod_read_begin(gtod);
+               mode = gtod->vclock_mode;
                ts->tv_sec = gtod->monotonic_time_sec;
                ns = gtod->monotonic_time_snsec;
                ns += vgetsns(&mode);
-               ns >>= gtod->clock.shift;
-       } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
-       timespec_add_ns(ts, ns);
+               ns >>= gtod->shift;
+       } while (unlikely(gtod_read_retry(gtod, seq)));
+
+       ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+       ts->tv_nsec = ns;
 
        return mode;
 }
@@ -275,20 +288,20 @@ notrace static void do_realtime_coarse(struct timespec 
*ts)
 {
        unsigned long seq;
        do {
-               seq = raw_read_seqcount_begin(&gtod->seq);
-               ts->tv_sec = gtod->wall_time_coarse.tv_sec;
-               ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
-       } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+               seq = gtod_read_begin(gtod);
+               ts->tv_sec = gtod->wall_time_coarse_sec;
+               ts->tv_nsec = gtod->wall_time_coarse_nsec;
+       } while (unlikely(gtod_read_retry(gtod, seq)));
 }
 
 notrace static void do_monotonic_coarse(struct timespec *ts)
 {
        unsigned long seq;
        do {
-               seq = raw_read_seqcount_begin(&gtod->seq);
-               ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
-               ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
-       } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+               seq = gtod_read_begin(gtod);
+               ts->tv_sec = gtod->monotonic_time_coarse_sec;
+               ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
+       } while (unlikely(gtod_read_retry(gtod, seq)));
 }
 
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
@@ -322,17 +335,13 @@ int clock_gettime(clockid_t, struct timespec *)
 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
        if (likely(tv != NULL)) {
-               BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
-                            offsetof(struct timespec, tv_nsec) ||
-                            sizeof(*tv) != sizeof(struct timespec));
                if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
                        return vdso_fallback_gtod(tv, tz);
                tv->tv_usec /= 1000;
        }
        if (unlikely(tz != NULL)) {
-               /* Avoid memcpy. Some old compilers fail to inline it */
-               tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
-               tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
+               tz->tz_minuteswest = gtod->tz_minuteswest;
+               tz->tz_dsttime = gtod->tz_dsttime;
        }
 
        return 0;
diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c 
b/arch/x86/vdso/vdso32/vclock_gettime.c
index ca65e42..175cc72 100644
--- a/arch/x86/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/vdso/vdso32/vclock_gettime.c
@@ -6,4 +6,25 @@
 
 #undef CONFIG_X86_PPRO_FENCE
 
+#ifdef CONFIG_X86_64
+
+/*
+ * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
+ * configuration
+ */
+#undef CONFIG_64BIT
+#undef CONFIG_X86_64
+#undef CONFIG_ILLEGAL_POINTER_VALUE
+#undef CONFIG_SPARSEMEM_VMEMMAP
+#undef CONFIG_NR_CPUS
+
+#define CONFIG_X86_32 1
+#define CONFIG_PAGE_OFFSET 0
+#define CONFIG_ILLEGAL_POINTER_VALUE 0
+#define CONFIG_NR_CPUS 1
+
+#define BUILD_VDSO32_64
+
+#endif
+
 #include "../vclock_gettime.c"
-- 
1.9.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to