Make use of the whole Master Timer infrastructure in gettimeofday, 
monotonic_clock, etc.

Also make the vsyscall version of gettimeofday use the guess_mt() if
possible.

Signed-off-by: Jiri Bohac <[EMAIL PROTECTED]>

Index: linux-2.6.20-rc5/arch/x86_64/kernel/time.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/time.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/time.c
@@ -341,27 +341,48 @@ inline u64 mt_to_nsec(u64 mt)
 }
 
 /*
- * do_gettimeoffset() returns microseconds since last timer interrupt was
+ * do_gettimeoffset() returns nanoseconds since last timer interrupt was
  * triggered by hardware. A memory read of HPET is slower than a register read
  * of TSC, but much more reliable. It's also synchronized to the timer
  * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
  * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
  * This is not a problem, because jiffies hasn't updated either. They are bound
  * together by xtime_lock.
+ *
+ * If used_mt is not null, it will be filled with the master timer value
+ * used for the calculation
  */
 
-static inline unsigned int do_gettimeoffset_tsc(void)
+static inline s64 do_gettimeoffset(u64 *used_mt)
 {
-       unsigned long t;
-       unsigned long x;
-       t = get_cycles_sync();
-       if (t < vxtime.last_tsc) 
-               t = vxtime.last_tsc; /* hack */
-       x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
-       return x;
-}
+       int cpu = 0;
+       u64 tsc = 0, mt;
+       switch (vxtime.mode) {
+
+               case VXTIME_TSC:
+                       rdtscll(tsc);
+                        break;
+
+                case VXTIME_TSCP:
+                        rdtscpll(tsc, cpu);
+                       cpu &= 0xfff;
+                       break;
 
-unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
+               case VXTIME_TSCS:
+               case VXTIME_TSCM:
+                       preempt_disable();
+                       cpu = smp_processor_id();
+                       rdtscll(tsc);
+                       preempt_enable();
+                       break;
+       }
+
+       mt = guess_mt(tsc, cpu);
+       if (used_mt)
+               *used_mt = mt;
+
+       return (((s64)(mt - vxtime.mt_wall)) * (s64)vxtime.mt_q) >> 32;
+}
 
 /*
  * This version of gettimeofday() has microsecond resolution and better than
@@ -372,28 +393,32 @@ unsigned int (*do_gettimeoffset)(void) =
 void do_gettimeofday(struct timeval *tv)
 {
        unsigned long seq;
-       unsigned int sec, usec;
+       unsigned int sec;
+       int nsec;
+       u64 mt;
 
        do {
                seq = read_seqbegin(&xtime_lock);
 
                sec = xtime.tv_sec;
-               usec = xtime.tv_nsec / NSEC_PER_USEC;
+               nsec = xtime.tv_nsec;
 
-               /* i386 does some correction here to keep the clock 
-                  monotonous even when ntpd is fixing drift.
-                  But they didn't work for me, there is a non monotonic
-                  clock anyways with ntp.
-                  I dropped all corrections now until a real solution can
-                  be found. Note when you fix it here you need to do the same
-                  in arch/x86_64/kernel/vsyscall.c and export all needed
-                  variables in vmlinux.lds. -AK */ 
-               usec += do_gettimeoffset();
+               nsec += max(do_gettimeoffset(&mt), vxtime.ns_drift);
 
        } while (read_seqretry(&xtime_lock, seq));
 
-       tv->tv_sec = sec + usec / USEC_PER_SEC;
-       tv->tv_usec = usec % USEC_PER_SEC;
+       /* this must be done outside the seqlock loop. Until the loop has 
finished,
+       the mt may be completely wrong, calculated from incosistent data */
+       update_monotonic_mt(mt);
+
+       sec += nsec / NSEC_PER_SEC;
+       nsec %= NSEC_PER_SEC;
+       if (nsec < 0) {
+               --sec;
+               nsec += NSEC_PER_SEC;
+       }
+       tv->tv_sec = sec;
+       tv->tv_usec = nsec / NSEC_PER_USEC;
 }
 
 EXPORT_SYMBOL(do_gettimeofday);
@@ -408,13 +433,13 @@ int do_settimeofday(struct timespec *tv)
 {
        time_t wtm_sec, sec = tv->tv_sec;
        long wtm_nsec, nsec = tv->tv_nsec;
+       unsigned long flags;
 
        if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;
+       write_seqlock_irqsave(&xtime_lock, flags);
 
-       write_seqlock_irq(&xtime_lock);
-
-       nsec -= do_gettimeoffset() * NSEC_PER_USEC;
+       nsec -= do_gettimeoffset(NULL);
 
        wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
        wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
@@ -424,7 +449,7 @@ int do_settimeofday(struct timespec *tv)
 
        ntp_clear();
 
-       write_sequnlock_irq(&xtime_lock);
+       write_sequnlock_irqrestore(&xtime_lock, flags);
        clock_was_set();
        return 0;
 }
@@ -519,27 +544,32 @@ static void set_rtc_mmss(unsigned long n
        spin_unlock(&rtc_lock);
 }
 
-
 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
  *             Note: This function is required to return accurate
  *             time even in the absence of multiple timer ticks.
  */
-static inline unsigned long long cycles_2_ns(unsigned long long cyc);
 unsigned long long monotonic_clock(void)
 {
-       unsigned long seq;
-       u32 last_offset, this_offset, offset;
-       unsigned long long base;
+       int cpu;
+       unsigned long flags;
+       u64 t;
 
-               do {
-                       seq = read_seqbegin(&xtime_lock);
+       /* any code that modifies the per-CPU variables used in guess_mt
+          will always run on this CPU, so we don't need to lock the xtime_lock
+          here. If we did, it would create a deadlock on debug printks (and
+          possibly elsewhere) called from other critical sections protected by
+          the lock */
 
-                       last_offset = vxtime.last_tsc;
-                       base = monotonic_base;
-               } while (read_seqretry(&xtime_lock, seq));
-               this_offset = get_cycles_sync();
-               offset = cycles_2_ns(this_offset - last_offset);
-       return base + offset;
+       local_irq_save(flags);
+
+       cpu = smp_processor_id();
+       rdtscll(t);
+       t = guess_mt(t, cpu);
+       update_monotonic_mt(t);
+
+       local_irq_restore(flags);
+
+       return mt_to_nsec(t);
 }
 EXPORT_SYMBOL(monotonic_clock);
 
@@ -573,62 +603,54 @@ static noinline void handle_lost_ticks(i
 void main_timer_handler(void)
 {
        static unsigned long rtc_update = 0;
-       unsigned long tsc;
-       int delay = 0, offset = 0, lost = 0;
-
-/*
- * Here we are in the timer irq handler. We have irqs locally disabled (so we
- * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
- * on the other CPU, so we need a lock. We also need to lock the vsyscall
- * variables, because both do_timer() and us change them -arca+vojtech
- */
-
-       write_seqlock(&xtime_lock);
+       unsigned long flags;
+       u64 mt;
+       int ticks, i;
+       u64 xtime_nsecs, mt_ticks;
 
-       if (vxtime.hpet_address)
-               offset = hpet_readl(HPET_COUNTER);
+       write_seqlock_irqsave(&xtime_lock, flags);
 
-       if (hpet_use_timer) {
-               /* if we're using the hpet timer functionality,
-                * we can more accurately know the counter value
-                * when the timer interrupt occured.
-                */
-               offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
-               delay = hpet_readl(HPET_COUNTER) - offset;
+       mt = update_master_timer64();
+       ticks = (mt - vxtime.mt_wall + mt_per_tick / 2) / mt_per_tick;
+       mt_ticks = ticks * mt_per_tick;
+
+       if (ticks > 1) {
+               handle_lost_ticks(ticks - 1);
+               jiffies += ticks - 1;
        }
 
-       tsc = get_cycles_sync();
-
-               offset = (((tsc - vxtime.last_tsc) *
-                          vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
 
-               if (offset < 0)
-                       offset = 0;
+/*
+ * Do the timer stuff.
+ * NTP will cause the actual increment of xtime to be slightly different from
+ * NSEC_PER_TICK, so we set xtime.ns_drift to the difference. This will be used
+ * by do_gettimeofday() to make sure the time stays monotonic.
+ */
 
-               if (offset > USEC_PER_TICK) {
-                       lost = offset / USEC_PER_TICK;
-                       offset %= USEC_PER_TICK;
+       xtime_nsecs = xtime.tv_sec * NSEC_PER_SEC + xtime.tv_nsec;
+       for (i = 0; i < ticks; ++i)
+               do_timer(1);
+       xtime_nsecs = xtime.tv_sec * NSEC_PER_SEC + xtime.tv_nsec - xtime_nsecs;
 
-               monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc);
+       vxtime.ns_drift = (mt_ticks * mtq >> 32) - xtime_nsecs;
+       vxtime.mt_wall += mt_ticks;
 
-               vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
+/*
+ * If we have an externally synchronized Linux clock, then update CMOS clock
+ * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
+ * closest to exactly 500 ms before the next second. If the update fails, we
+ * don't care, as it'll be updated on the next turn, and the problem (time way
+ * off) isn't likely to go away much sooner anyway.
+ */
 
-               if ((((tsc - vxtime.last_tsc) *
-                     vxtime.tsc_quot) >> US_SCALE) < offset)
-                       vxtime.last_tsc = tsc -
-                               (((long) offset << US_SCALE) / vxtime.tsc_quot) 
- 1;
+       if (ntp_synced() && xtime.tv_sec > rtc_update &&
+               abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
+               set_rtc_mmss(xtime.tv_sec);
+               rtc_update = xtime.tv_sec + 660;
        }
 
-       if (lost > 0)
-               handle_lost_ticks(lost);
-       else
-               lost = 0;
-
-/*
- * Do the timer stuff.
- */
+       write_sequnlock_irqrestore(&xtime_lock, flags);
 
-       do_timer(lost + 1);
 #ifndef CONFIG_SMP
        update_process_times(user_mode(get_irq_regs()));
 #endif
@@ -642,21 +664,6 @@ void main_timer_handler(void)
        if (!using_apic_timer)
                smp_local_timer_interrupt();
 
-/*
- * If we have an externally synchronized Linux clock, then update CMOS clock
- * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
- * closest to exactly 500 ms before the next second. If the update fails, we
- * don't care, as it'll be updated on the next turn, and the problem (time way
- * off) isn't likely to go away much sooner anyway.
- */
-
-       if (ntp_synced() && xtime.tv_sec > rtc_update &&
-               abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
-               set_rtc_mmss(xtime.tv_sec);
-               rtc_update = xtime.tv_sec + 660;
-       }
- 
-       write_sequnlock(&xtime_lock);
 }
 
 static irqreturn_t timer_interrupt(int irq, void *dev_id)
@@ -669,24 +676,9 @@ static irqreturn_t timer_interrupt(int i
        return IRQ_HANDLED;
 }
 
-static unsigned int cyc2ns_scale __read_mostly;
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
-       cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
-       return (cyc * cyc2ns_scale) >> NS_SCALE;
-}
-
 unsigned long long sched_clock(void)
 {
-       unsigned long a = 0;
-
-       rdtscll(a);
-       return cycles_2_ns(a);
+       return monotonic_clock();
 }
 
 static unsigned long get_cmos_time(void)
Index: linux-2.6.20-rc5/arch/x86_64/kernel/vsyscall.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/vsyscall.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/vsyscall.c
@@ -61,24 +61,35 @@ static __always_inline void timeval_norm
        }
 }
 
-static __always_inline void do_vgettimeofday(struct timeval * tv)
+static __always_inline u64 __guess_mt(u64 tsc, int cpu)
 {
-       long sequence, t;
-       unsigned long sec, usec;
+       return (((tsc - __vxtime.cpu[cpu].tsc_last) * 
__vxtime.cpu[cpu].tsc_slope)
+                       >> TSC_SLOPE_SCALE) + __vxtime.cpu[cpu].mt_base;
+}
+
+#define USEC_PER_TICK (USEC_PER_SEC / HZ)
+static __always_inline s64 __do_gettimeoffset(u64 tsc, int cpu)
+{
+       return (((s64)(__guess_mt(tsc, cpu) - __vxtime.mt_wall)) * 
(s64)__vxtime.mt_q) >> 32;
+}
+
+static __always_inline void do_vgettimeofday(struct timeval * tv, u64 tsc, int 
cpu)
+{
+       unsigned int sec;
+       s64 nsec;
 
-       do {
-               sequence = read_seqbegin(&__xtime_lock);
-               
-               sec = __xtime.tv_sec;
-               usec = __xtime.tv_nsec / 1000;
-
-                       usec += ((readl((void __iomem *)
-                                  fix_to_virt(VSYSCALL_HPET) + 0xf0) -
-                                 __vxtime.last) * __vxtime.quot) >> 32;
-       } while (read_seqretry(&__xtime_lock, sequence));
+       sec = __xtime.tv_sec;
+       nsec = __xtime.tv_nsec;
+       nsec += max(__do_gettimeoffset(tsc, cpu), __vxtime.drift);
 
-       tv->tv_sec = sec + usec / 1000000;
-       tv->tv_usec = usec % 1000000;
+       sec += nsec / NSEC_PER_SEC;
+       nsec %= NSEC_PER_SEC;
+       if (nsec < 0) {
+               --sec;
+               nsec += NSEC_PER_SEC;
+       }
+       tv->tv_sec = sec;
+       tv->tv_usec = nsec / NSEC_PER_USEC;
 }
 
 /* RED-PEN may want to readd seq locking, but then the variable should be 
write-once. */
@@ -107,10 +118,39 @@ static __always_inline long time_syscall
 
 int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
 {
-       if (!__sysctl_vsyscall)
+       int cpu = 0;
+       u64 tsc;
+       unsigned long seq;
+       int do_syscall = !__sysctl_vsyscall;
+
+       if (tv && !do_syscall)
+               switch (__vxtime.mode) {
+                       case VXTIME_TSC:
+                       case VXTIME_TSCP:
+                               do {
+                                       seq = read_seqbegin(&__xtime_lock);
+
+                                       if (__vxtime.mode == VXTIME_TSC)
+                                               rdtscll(tsc);
+                                       else {
+                                               rdtscpll(tsc, cpu);
+                                               cpu &= 0xfff;
+                                       }
+
+                                       if 
(unlikely(__vxtime.cpu[cpu].tsc_invalid))
+                                               do_syscall = 1;
+                                       else
+                                               do_vgettimeofday(tv, tsc, cpu);
+
+                               } while (read_seqretry(&__xtime_lock, seq));
+                               break;
+                       default:
+                               do_syscall = 1;
+               }
+
+       if (do_syscall)
                return gettimeofday(tv,tz);
-       if (tv)
-               do_vgettimeofday(tv);
+
        if (tz)
                do_get_tz(tz);
        return 0;

--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to