On Friday 18 March 2011 01:19 am, Bruce Evans wrote:
> On Thu, 17 Mar 2011, Jung-uk Kim wrote:
> > On Thursday 17 March 2011 03:57 pm, Peter Jeremy wrote:
> >> On 2011-Mar-16 16:34:04 -0400, Jung-uk Kim <j...@freebsd.org> 
wrote:
> >>> On Wednesday 16 March 2011 01:45 pm, Roman Divacky wrote:
> >>>> if we drop i486 I think it makes sense to require something
> >>>> that has at least SSE2, thus we can have the same expectations
> >>>> as on amd64.
> >>
> >> I think it's stil a bit early for that - especially the SSE2
> >> requirement.
> >>
> >>> This is a proof-of-concept patch for sys/x86/isa/clock.c:
> >>>
> >>> http://people.freebsd.org/~jkim/clock.diff
> >>>
> >>> You see the complexity, just because I wanted to load 64-bit
> >>> value atomically... :-(
> >>
> >> An alternative approach is to have _fetch_frequency() be
> >>   uint64_t (*_fetch_frequency)(uint64_t *);
> >> if i386 and I486 are defined (otherwise it's just the #define
> >> (*(p))) then initialise it to either atomic_fetch_quad_i386 or
> >> atomic_fetch_quad_i586 as part of the CPU detection process. 
> >> This is the way bcopy() is/was handled on Pentium.
> >>
> >> Another approach would be to always have cmpxchg8b instructions
> >> (followed by a suitably large NOP) always inlined in the code
> >> and if it traps, patch the code to call a function that emulates
> >> it.
> >
> > I think the former makes more sense for atomic read/write because
> > we don't need complete cmpxchg8b support but kind of movq
> > support, actually.
>
> Both require a function call.  With a function call, patching
> becomes much easier since there is only 1 place to patch, so
> patching is almost as easy as changing a function pointer (might
> need an instruction queue flush and/or prevention of the function
> being called before or while it is being patched).
>
> Patching the code also makes it easier to null out the lock prefix
> in the !SMP case when it is presumably not needed.  The function
> call to a function without a lock prefix will then be faster than
> inline code with a lock prefix.  With a function pointer, you start
> getting combinatorial explosion in the number of separate functions
> needed (1 without cmpxchg8b or a lock prefix (for i486), 1 with
> cmpxchg8b without a lock prefix (for !SMP i586+), and 1 with both
> (for SMP i586+).

I already implemented the function pointer thing last night.  You can 
see the current work-in-progress patch here:

http://people.freebsd.org/~jkim/tsc_cleanup.diff

Also, it's attached here as well.  I didn't notice any problem so far 
but I am sure you will find some. ;-)

Please note the patch includes get_cyclecount() to cpu_ticks() 
conversion to give you a complete picture.

Jung-uk Kim
Index: sys/kern/kern_ktr.c
===================================================================
--- sys/kern/kern_ktr.c (revision 219741)
+++ sys/kern/kern_ktr.c (working copy)
@@ -73,7 +73,7 @@ __FBSDID("$FreeBSD$");
 #endif
 
 #ifndef KTR_TIME
-#define        KTR_TIME        get_cyclecount()
+#define        KTR_TIME        cpu_ticks()
 #endif
 
 #ifndef KTR_CPU
Index: sys/kern/init_main.c
===================================================================
--- sys/kern/init_main.c        (revision 219741)
+++ sys/kern/init_main.c        (working copy)
@@ -560,7 +560,7 @@ SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST,
 static void
 proc0_post(void *dummy __unused)
 {
-       struct timespec ts;
+       struct bintime bt;
        struct proc *p;
        struct rusage ru;
        struct thread *td;
@@ -590,8 +590,8 @@ proc0_post(void *dummy __unused)
        /*
         * Give the ``random'' number generator a thump.
         */
-       nanotime(&ts);
-       srandom(ts.tv_sec ^ ts.tv_nsec);
+       bintime(&bt);
+       srandom(bt.sec ^ bt.frac);
 }
 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL);
 
@@ -601,10 +601,10 @@ random_init(void *dummy __unused)
 
        /*
         * After CPU has been started we have some randomness on most
-        * platforms via get_cyclecount().  For platforms that don't
-        * we will reseed random(9) in proc0_post() as well.
+        * platforms via cpu_ticks().  For platforms that don't we will
+        * reseed random(9) in proc0_post() as well.
         */
-       srandom(get_cyclecount());
+       srandom(cpu_ticks());
 }
 SYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL);
 
Index: sys/netinet/sctp_os_bsd.h
===================================================================
--- sys/netinet/sctp_os_bsd.h   (revision 219741)
+++ sys/netinet/sctp_os_bsd.h   (working copy)
@@ -129,7 +129,7 @@ MALLOC_DECLARE(SCTP_M_MCORE);
 
 #if defined(SCTP_LOCAL_TRACE_BUF)
 
-#define SCTP_GET_CYCLECOUNT get_cyclecount()
+#define SCTP_GET_CYCLECOUNT cpu_ticks()
 #define SCTP_CTR6 sctp_log_trace
 
 #else
Index: sys/dev/acpica/acpi_cpu.c
===================================================================
--- sys/dev/acpica/acpi_cpu.c   (revision 219741)
+++ sys/dev/acpica/acpi_cpu.c   (working copy)
@@ -516,7 +516,7 @@ acpi_cpu_read_ivar(device_t dev, device_t child, i
 #if defined(__amd64__) || defined(__i386__)
     case CPU_IVAR_NOMINAL_MHZ:
        if (tsc_is_invariant) {
-           *result = (uintptr_t)(tsc_freq / 1000000);
+           *result = (uintptr_t)(GET_TSC_FREQ() / 1000000);
            break;
        }
        /* FALLTHROUGH */
Index: sys/dev/de/if_devar.h
===================================================================
--- sys/dev/de/if_devar.h       (revision 219741)
+++ sys/dev/de/if_devar.h       (working copy)
@@ -903,7 +903,7 @@ typedef u_long tulip_cycle_t;
 static __inline tulip_cycle_t
 TULIP_PERFREAD(void)
 {
-       return (get_cyclecount());
+       return (cpu_ticks());
 }
 
 #define        TULIP_PERFDIFF(s, f)    ((f) - (s))
Index: sys/dev/random/randomdev_soft.c
===================================================================
--- sys/dev/random/randomdev_soft.c     (revision 219741)
+++ sys/dev/random/randomdev_soft.c     (working copy)
@@ -353,8 +353,8 @@ random_yarrow_write(void *buf, int count)
                chunk = HARVESTSIZE;
                if (i + chunk >= count)
                        chunk = (u_int)(count - i);
-               random_harvest_internal(get_cyclecount(), (char *)buf + i,
-                   chunk, 0, 0, RANDOM_WRITE);
+               random_harvest_internal(cpu_ticks(), (char *)buf + i, chunk,
+                   0, 0, RANDOM_WRITE);
        }
 }
 
Index: sys/dev/random/harvest.c
===================================================================
--- sys/dev/random/harvest.c    (revision 219741)
+++ sys/dev/random/harvest.c    (working copy)
@@ -78,17 +78,16 @@ random_yarrow_deinit_harvester(void)
  * Implemented as in indirect call to allow non-inclusion of
  * the entropy device.
  *
- * XXXRW: get_cyclecount() is cheap on most modern hardware, where cycle
- * counters are built in, but on older hardware it will do a real time clock
- * read which can be quite expensive.
+ * XXXRW: cpu_ticks() is cheap on most modern hardware, where cycle counters
+ * are built in, but on older hardware it will do a real time clock read
+ * which can be quite expensive.
  */
 void
 random_harvest(void *entropy, u_int count, u_int bits, u_int frac,
     enum esource origin)
 {
        if (reap_func)
-               (*reap_func)(get_cyclecount(), entropy, count, bits, frac,
-                   origin);
+               (*reap_func)(cpu_ticks(), entropy, count, bits, frac, origin);
 }
 
 /* Userland-visible version of read_random */
Index: sys/compat/linprocfs/linprocfs.c
===================================================================
--- sys/compat/linprocfs/linprocfs.c    (revision 219741)
+++ sys/compat/linprocfs/linprocfs.c    (working copy)
@@ -221,6 +221,7 @@ linprocfs_docpuinfo(PFS_FILL_ARGS)
 {
        int hw_model[2];
        char model[128];
+       uint64_t freq;
        size_t size;
        int class, fqmhz, fqkhz;
        int i;
@@ -303,9 +304,10 @@ linprocfs_docpuinfo(PFS_FILL_ARGS)
                if (cpu_feature & (1 << i))
                        sbuf_printf(sb, " %s", flags[i]);
        sbuf_cat(sb, "\n");
-       if (class >= 5) {
-               fqmhz = (tsc_freq + 4999) / 1000000;
-               fqkhz = ((tsc_freq + 4999) / 10000) % 100;
+       freq = GET_TSC_FREQ();
+       if (class >=5 && freq != 0) {
+               fqmhz = (freq + 4999) / 1000000;
+               fqkhz = ((freq + 4999) / 10000) % 100;
                sbuf_printf(sb,
                    "cpu MHz\t\t: %d.%02d\n"
                    "bogomips\t: %d.%02d\n",
Index: sys/pc98/pc98/machdep.c
===================================================================
--- sys/pc98/pc98/machdep.c     (revision 219741)
+++ sys/pc98/pc98/machdep.c     (working copy)
@@ -1072,16 +1072,17 @@ int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
        register_t reg;
-       uint64_t tsc1, tsc2;
+       uint64_t freq, tsc1, tsc2;
 
        if (pcpu_find(cpu_id) == NULL || rate == NULL)
                return (EINVAL);
        if ((cpu_feature & CPUID_TSC) == 0)
                return (EOPNOTSUPP);
+       freq = GET_TSC_FREQ();
 
        /* If we're booting, trust the rate calibrated moments ago. */
-       if (cold && tsc_freq != 0) {
-               *rate = tsc_freq;
+       if (cold && freq != 0) {
+               *rate = freq;
                return (0);
        }
 
@@ -1109,17 +1110,7 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
        }
 #endif
 
-       tsc2 -= tsc1;
-       if (tsc_freq != 0) {
-               *rate = tsc2 * 1000;
-               return (0);
-       }
-
-       /*
-        * Subtract 0.5% of the total.  Empirical testing has shown that
-        * overhead in DELAY() works out to approximately this value.
-        */
-       *rate = tsc2 * 1000 - tsc2 * 5;
+       *rate = (tsc2 - tsc1) * 1000;
        return (0);
 }
 
Index: sys/x86/cpufreq/est.c
===================================================================
--- sys/x86/cpufreq/est.c       (revision 219741)
+++ sys/x86/cpufreq/est.c       (working copy)
@@ -1215,7 +1215,7 @@ est_msr_info(device_t dev, uint64_t msr, freq_info
                return (EOPNOTSUPP);
 
        /* Figure out the bus clock. */
-       freq = tsc_freq / 1000000;
+       freq = GET_TSC_FREQ() / 1000000;
        id = msr >> 32;
        bus = freq / (id >> 8);
        device_printf(dev, "Guessed bus clock (high) of %d MHz\n", bus);
Index: sys/x86/x86/tsc.c
===================================================================
--- sys/x86/x86/tsc.c   (revision 219741)
+++ sys/x86/x86/tsc.c   (working copy)
@@ -245,14 +245,16 @@ tsc_freq_changing(void *arg, const struct cf_level
 static void
 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
 {
+       uint64_t freq;
 
        /* If there was an error during the transition, don't do anything. */
        if (tsc_disabled || status != 0)
                return;
 
        /* Total setting for this level gives the new frequency in MHz. */
-       tsc_freq = (uint64_t)level->total_set.freq * 1000000;
-       tsc_timecounter.tc_frequency = tsc_freq;
+       freq = (uint64_t)level->total_set.freq * 1000000;
+       SET_TSC_FREQ(freq);
+       atomic_store_64(&tsc_timecounter.tc_frequency, freq);
 }
 
 static int
@@ -261,13 +263,13 @@ sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
        int error;
        uint64_t freq;
 
-       if (tsc_timecounter.tc_frequency == 0)
+       freq = GET_TSC_FREQ();
+       if (freq == 0)
                return (EOPNOTSUPP);
-       freq = tsc_freq;
        error = sysctl_handle_64(oidp, &freq, 0, req);
        if (error == 0 && req->newptr != NULL) {
-               tsc_freq = freq;
-               tsc_timecounter.tc_frequency = tsc_freq;
+               SET_TSC_FREQ(freq);
+               atomic_store_64(&tsc_timecounter.tc_frequency, freq);
        }
        return (error);
 }
Index: sys/x86/isa/clock.c
===================================================================
--- sys/x86/isa/clock.c (revision 219741)
+++ sys/x86/isa/clock.c (working copy)
@@ -245,40 +245,43 @@ getit(void)
        return ((high << 8) | low);
 }
 
-static __inline void
-delay_tsc(int n)
+static __inline int
+_delay(int n)
 {
-       uint64_t start, end, now;
-
-       sched_pin();
-       start = rdtsc();
-       end = start + (tsc_freq * n) / 1000000;
-       do {
-               cpu_spinwait();
-               now = rdtsc();
-       } while (now < end || (now > start && end < start));
-       sched_unpin();
-}
-
-static __inline void
-delay_timecounter(struct timecounter *tc, int n)
-{
-       uint64_t end, now;
+       struct timecounter *tc;
+       uint64_t end, freq, now;
        u_int last, mask, u;
+       int use_tsc;
 
-       mask = tc->tc_counter_mask;
-       last = tc->tc_get_timecount(tc) & mask;
-       end = tc->tc_frequency * n / 1000000;
+       tc = timecounter;
+       freq = GET_TSC_FREQ();
+       use_tsc = tsc_is_invariant && freq != 0;
+       if (use_tsc) {
+               mask = ~0u;
+               sched_pin();
+               last = rdtsc();
+       } else {
+               if (tc->tc_quality <= 0)
+                       return (0);
+               freq = atomic_load_64(&tc->tc_frequency);
+               mask = tc->tc_counter_mask;
+               last = tc->tc_get_timecount(tc);
+       }
+       last &= mask;
+       end = freq * n / 1000000;
        now = 0;
        do {
                cpu_spinwait();
-               u = tc->tc_get_timecount(tc) & mask;
+               u = (use_tsc ? rdtsc() : tc->tc_get_timecount(tc)) & mask;
                if (u < last)
                        now += mask - last + u + 1;
                else
                        now += u - last;
                last = u;
        } while (now < end);
+       if (use_tsc)
+               sched_unpin();
+       return (1);
 }
 
 /*
@@ -289,7 +292,6 @@ getit(void)
 void
 DELAY(int n)
 {
-       struct timecounter *tc;
        int delta, prev_tick, tick, ticks_left;
 
 #ifdef DELAYDEBUG
@@ -298,15 +300,8 @@ DELAY(int n)
        static int state = 0;
 #endif
 
-       if (tsc_freq != 0) {
-               delay_tsc(n);
+       if (_delay(n))
                return;
-       }
-       tc = timecounter;
-       if (tc->tc_quality > 0) {
-               delay_timecounter(tc, n);
-               return;
-       }
 #ifdef DELAYDEBUG
        if (state == 0) {
                state = 1;
Index: sys/i386/include/clock.h
===================================================================
--- sys/i386/include/clock.h    (revision 219741)
+++ sys/i386/include/clock.h    (working copy)
@@ -20,6 +20,9 @@ extern int    i8254_max_count;
 extern uint64_t        tsc_freq;
 extern int     tsc_is_invariant;
 
+#define        GET_TSC_FREQ()  atomic_load_64(&tsc_freq)
+#define        SET_TSC_FREQ(f) atomic_store_64(&tsc_freq, (f))
+
 void   i8254_init(void);
 
 /*
Index: sys/i386/include/atomic.h
===================================================================
--- sys/i386/include/atomic.h   (revision 219741)
+++ sys/i386/include/atomic.h   (working copy)
@@ -120,6 +120,76 @@ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p,
 }                                                      \
 struct __hack
 
+#if defined(_KERNEL) && !defined(WANT_FUNCTIONS)
+
+/* I486 does not support SMP or CMPXCHG8B. */
+static __inline uint64_t
+atomic_load_64_i386(uint64_t *p)
+{
+       uint64_t v;
+
+       __asm __volatile(
+       "       pushfl ;                "
+       "       cli ;                   "
+       "       movl (%1),%%eax ;       "
+       "       movl 4(%1),%%edx ;      "
+       "       popfl"
+       : "=A" (v)                      /* 0 */
+       : "c" (p));                     /* 1 */
+       return (v);
+}
+
+static __inline void
+atomic_store_64_i386(uint64_t *p, uint64_t v)
+{
+
+       __asm __volatile(
+       "       pushfl ;                "
+       "       cli ;                   "
+       "       movl %%eax,(%0) ;       "
+       "       movl %%edx,4(%0) ;      "
+       "       popfl"
+       :
+       : "r" (p),                      /* 0 */
+         "A" (v)                       /* 1 */
+       : "memory");
+}
+
+/* For Pentium and above, use CMPXCHG8B to emulate MOVQ. */
+static __inline uint64_t
+atomic_load_64_i586(uint64_t *p)
+{
+       uint64_t v;
+
+       __asm __volatile(
+       "       movl %%ebx,%%eax ;      "
+       "       movl %%ecx,%%edx ;      "
+       "       " MPLOCKED "            "
+       "       cmpxchg8b (%1)"
+       : "=A" (v)                      /* 0 */
+       : "c" (p)                       /* 1 */
+       : "cc");
+       return (v);
+}
+
+static __inline void
+atomic_store_64_i586(uint64_t *p, uint64_t v)
+{
+
+       __asm __volatile(
+       "       movl %%eax,%%ebx ;      "
+       "       movl %%edx,%%ecx ;      "
+       "1:                             "
+       "       cmpxchg8b (%0) ;        "
+       "       jne 1b"
+       :
+       : "r" (p),                      /* 0 */
+         "A" (v)                       /* 1 */
+       : "ebx", "ecx", "memory", "cc");
+}
+
+#endif /* _KERNEL && !WANT_FUNCTIONS */
+
 /*
  * Atomic compare and set, used by the mutex functions
  *
@@ -292,6 +362,11 @@ ATOMIC_STORE_LOAD(long,    "cmpxchgl %0,%1",  "xchgl
 
 #ifndef WANT_FUNCTIONS
 
+#ifdef _KERNEL
+extern uint64_t (*atomic_load_64)(uint64_t *);
+extern void (*atomic_store_64)(uint64_t *, uint64_t);
+#endif
+
 static __inline int
 atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
 {
Index: sys/i386/include/cpu.h
===================================================================
--- sys/i386/include/cpu.h      (revision 219741)
+++ sys/i386/include/cpu.h      (working copy)
@@ -39,7 +39,6 @@
 /*
  * Definitions unique to i386 cpu support.
  */
-#include <machine/cputypes.h>
 #include <machine/psl.h>
 #include <machine/frame.h>
 #include <machine/segments.h>
@@ -70,13 +69,8 @@ void swi_vm(void *);
 static __inline uint64_t
 get_cyclecount(void)
 {
-       struct bintime bt;
 
-       if (cpu_class == CPUCLASS_486) {
-               binuptime(&bt);
-               return ((uint64_t)bt.sec << 56 | bt.frac >> 8);
-       }
-       return (rdtsc());
+       return (cpu_ticks());
 }
 
 #endif
Index: sys/i386/i386/legacy.c
===================================================================
--- sys/i386/i386/legacy.c      (revision 219741)
+++ sys/i386/i386/legacy.c      (working copy)
@@ -342,7 +342,7 @@ cpu_read_ivar(device_t dev, device_t child, int in
                break;
        case CPU_IVAR_NOMINAL_MHZ:
                if (tsc_is_invariant) {
-                       *result = (uintptr_t)(tsc_freq / 1000000);
+                       *result = (uintptr_t)(GET_TSC_FREQ() / 1000000);
                        break;
                }
                /* FALLTHROUGH */
Index: sys/i386/i386/perfmon.c
===================================================================
--- sys/i386/i386/perfmon.c     (revision 219741)
+++ sys/i386/i386/perfmon.c     (working copy)
@@ -336,6 +336,7 @@ perfmon_ioctl(struct cdev *dev, u_long cmd, caddr_
        struct pmc *pmc;
        struct pmc_data *pmcd;
        struct pmc_tstamp *pmct;
+       uint64_t freq;
        int *ip;
        int rv;
 
@@ -386,13 +387,14 @@ perfmon_ioctl(struct cdev *dev, u_long cmd, caddr_
                break;
 
        case PMIOTSTAMP:
-               if (!tsc_freq) {
+               freq = GET_TSC_FREQ();
+               if (freq == 0) {
                        rv = ENOTTY;
                        break;
                }
                pmct = (struct pmc_tstamp *)param;
                /* XXX interface loses precision. */
-               pmct->pmct_rate = tsc_freq / 1000000;
+               pmct->pmct_rate = freq / 1000000;
                pmct->pmct_value = rdtsc();
                rv = 0;
                break;
Index: sys/i386/i386/machdep.c
===================================================================
--- sys/i386/i386/machdep.c     (revision 219741)
+++ sys/i386/i386/machdep.c     (working copy)
@@ -1137,20 +1137,21 @@ int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
        register_t reg;
-       uint64_t tsc1, tsc2;
+       uint64_t freq, tsc1, tsc2;
 
        if (pcpu_find(cpu_id) == NULL || rate == NULL)
                return (EINVAL);
        if ((cpu_feature & CPUID_TSC) == 0)
                return (EOPNOTSUPP);
+       freq = GET_TSC_FREQ();
 
        /* If TSC is P-state invariant, DELAY(9) based logic fails. */
-       if (tsc_is_invariant && tsc_freq != 0)
+       if (tsc_is_invariant && freq != 0)
                return (EOPNOTSUPP);
 
        /* If we're booting, trust the rate calibrated moments ago. */
-       if (cold && tsc_freq != 0) {
-               *rate = tsc_freq;
+       if (cold && freq != 0) {
+               *rate = freq;
                return (0);
        }
 
@@ -1178,17 +1179,7 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
        }
 #endif
 
-       tsc2 -= tsc1;
-       if (tsc_freq != 0) {
-               *rate = tsc2 * 1000;
-               return (0);
-       }
-
-       /*
-        * Subtract 0.5% of the total.  Empirical testing has shown that
-        * overhead in DELAY() works out to approximately this value.
-        */
-       *rate = tsc2 * 1000 - tsc2 * 5;
+       *rate = (tsc2 - tsc1) * 1000;
        return (0);
 }
 
@@ -1419,6 +1410,19 @@ cpu_idle_wakeup(int cpu)
        return (1);
 }
 
+uint64_t (*atomic_load_64)(uint64_t *) = atomic_load_64_i386;
+void (*atomic_store_64)(uint64_t *, uint64_t) = atomic_store_64_i386;
+
+static void
+cpu_probe_cx8(void)
+{
+
+       if ((cpu_feature & CPUID_CX8) != 0) {
+               atomic_load_64 = atomic_load_64_i586;
+               atomic_store_64 = atomic_store_64_i586;
+       }
+}
+
 /*
  * Ordered by speed/power consumption.
  */
@@ -2730,6 +2734,7 @@ init386(first)
        thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
 
        cpu_probe_amdc1e();
+       cpu_probe_cx8();
 }
 
 #else
@@ -3006,6 +3011,7 @@ init386(first)
        thread0.td_frame = &proc0_tf;
 
        cpu_probe_amdc1e();
+       cpu_probe_cx8();
 }
 #endif
 
Index: sys/contrib/altq/altq/altq_subr.c
===================================================================
--- sys/contrib/altq/altq/altq_subr.c   (revision 219741)
+++ sys/contrib/altq/altq/altq_subr.c   (working copy)
@@ -929,7 +929,7 @@ init_machclk_setup(void)
 #if defined(__amd64__) || defined(__i386__)
        /* check if TSC is available */
 #ifdef __FreeBSD__
-       if ((cpu_feature & CPUID_TSC) == 0 || tsc_freq == 0)
+       if ((cpu_feature & CPUID_TSC) == 0 || GET_TSC_FREQ() == 0)
 #else
        if ((cpu_feature & CPUID_TSC) == 0)
 #endif
@@ -964,7 +964,7 @@ init_machclk(void)
         */
 #if defined(__amd64__) || defined(__i386__)
 #ifdef __FreeBSD__
-       machclk_freq = tsc_freq;
+       machclk_freq = GET_TSC_FREQ();
 #elif defined(__NetBSD__)
        machclk_freq = (u_int32_t)cpu_tsc_freq;
 #elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
Index: sys/cddl/dev/dtrace/i386/dtrace_subr.c
===================================================================
--- sys/cddl/dev/dtrace/i386/dtrace_subr.c      (revision 219741)
+++ sys/cddl/dev/dtrace/i386/dtrace_subr.c      (working copy)
@@ -403,7 +403,7 @@ dtrace_gethrtime_init(void *arg)
         * Otherwise tick->time conversion will be inaccurate, but
         * will preserve monotonic property of TSC.
         */
-       tsc_f = tsc_freq;
+       tsc_f = GET_TSC_FREQ();
 
        /*
         * The following line checks that nsec_scale calculated below
Index: sys/cddl/dev/dtrace/amd64/dtrace_subr.c
===================================================================
--- sys/cddl/dev/dtrace/amd64/dtrace_subr.c     (revision 219741)
+++ sys/cddl/dev/dtrace/amd64/dtrace_subr.c     (working copy)
@@ -403,7 +403,7 @@ dtrace_gethrtime_init(void *arg)
         * Otherwise tick->time conversion will be inaccurate, but
         * will preserve monotonic property of TSC.
         */
-       tsc_f = tsc_freq;
+       tsc_f = GET_TSC_FREQ();
 
        /*
         * The following line checks that nsec_scale calculated below
Index: sys/amd64/include/clock.h
===================================================================
--- sys/amd64/include/clock.h   (revision 219741)
+++ sys/amd64/include/clock.h   (working copy)
@@ -20,6 +20,9 @@ extern int    i8254_max_count;
 extern uint64_t        tsc_freq;
 extern int     tsc_is_invariant;
 
+#define        GET_TSC_FREQ()  atomic_load_64(&tsc_freq)
+#define        SET_TSC_FREQ(f) atomic_store_64(&tsc_freq, (f))
+
 void   i8254_init(void);
 
 /*
Index: sys/amd64/include/atomic.h
===================================================================
--- sys/amd64/include/atomic.h  (revision 219741)
+++ sys/amd64/include/atomic.h  (working copy)
@@ -303,6 +303,11 @@ ATOMIC_STORE_LOAD(long,    "cmpxchgq %0,%1",  "xchgq
 
 #ifndef WANT_FUNCTIONS
 
+#ifdef _KERNEL
+#define        atomic_load_64(p)       (*(p))
+#define        atomic_store_64(p, v)   do { *(p) = (v); } while (0)
+#endif
+
 /* Read the current value and store a zero in the destination. */
 #ifdef __GNUCLIKE_ASM
 
Index: sys/amd64/amd64/legacy.c
===================================================================
--- sys/amd64/amd64/legacy.c    (revision 219741)
+++ sys/amd64/amd64/legacy.c    (working copy)
@@ -321,7 +321,7 @@ cpu_read_ivar(device_t dev, device_t child, int in
                break;
        case CPU_IVAR_NOMINAL_MHZ:
                if (tsc_is_invariant) {
-                       *result = (uintptr_t)(tsc_freq / 1000000);
+                       *result = (uintptr_t)(GET_TSC_FREQ() / 1000000);
                        break;
                }
                /* FALLTHROUGH */
Index: sys/amd64/amd64/machdep.c
===================================================================
--- sys/amd64/amd64/machdep.c   (revision 219741)
+++ sys/amd64/amd64/machdep.c   (working copy)
@@ -541,18 +541,19 @@ int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
        register_t reg;
-       uint64_t tsc1, tsc2;
+       uint64_t freq, tsc1, tsc2;
 
        if (pcpu_find(cpu_id) == NULL || rate == NULL)
                return (EINVAL);
+       freq = GET_TSC_FREQ();
 
        /* If TSC is P-state invariant, DELAY(9) based logic fails. */
-       if (tsc_is_invariant && tsc_freq != 0)
+       if (tsc_is_invariant && freq != 0)
                return (EOPNOTSUPP);
 
        /* If we're booting, trust the rate calibrated moments ago. */
-       if (cold && tsc_freq != 0) {
-               *rate = tsc_freq;
+       if (cold && freq != 0) {
+               *rate = freq;
                return (0);
        }
 
@@ -580,17 +581,7 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
        }
 #endif
 
-       tsc2 -= tsc1;
-       if (tsc_freq != 0) {
-               *rate = tsc2 * 1000;
-               return (0);
-       }
-
-       /*
-        * Subtract 0.5% of the total.  Empirical testing has shown that
-        * overhead in DELAY() works out to approximately this value.
-        */
-       *rate = tsc2 * 1000 - tsc2 * 5;
+       *rate = (tsc2 - tsc1) * 1000;
        return (0);
 }
 
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to