Re: TSC synchronization on MP machines
> Date: Fri, 9 Aug 2019 14:35:34 +0300 > From: Paul Irofti > > > > I changed cpu_serializing_counter() with tsc_get_timecount() as they > > > were the same function now that msr is gone. Hope that is not too gross. > > > > That doesn't work as tsc_get_timecount() returns a 32-bit integer. > > > > I can not just rdtsc because that means changing the drift max > > > > I don't understand that comment. Whether you add the previous skew or > > not makes no difference when calculating the new skew since you're > > adding the same value to both TSC counts when calculating the > > difference. The measured skew shouldn't change. And when calculating > > the drift you simply subtract the new skew from the old skew. So > > nothing changes. Or am I missing something? > > > > Nope. Double checked with my fears about the drift and, as stated on the > channel, it was a false alarm. :) go for it! > Index: arch/amd64/amd64/cpu.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v > retrieving revision 1.137 > diff -u -p -u -p -r1.137 cpu.c > --- arch/amd64/amd64/cpu.c28 May 2019 18:17:01 - 1.137 > +++ arch/amd64/amd64/cpu.c9 Aug 2019 11:33:41 - > @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) > cr4 = rcr4(); > lcr4(cr4 & ~CR4_PGE); > lcr4(cr4); > + > + /* Synchronize TSC */ > + if (cold && !CPU_IS_PRIMARY(ci)) > + tsc_sync_ap(ci); > #endif > } > > @@ -808,6 +812,7 @@ void > cpu_start_secondary(struct cpu_info *ci) > { > int i; > + u_long s; > > ci->ci_flags |= CPUF_AP; > > @@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* > + * Synchronize time stamp counters. Invalidate cache and > + * synchronize twice (in tsc_sync_bp) to minimize possible > + * cache effects. Disable interrupts to try and rule out any > + * external interference. > + */ > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew); > } > > if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { > @@ -852,6 +869,8 @@ void > cpu_boot_secondary(struct cpu_info *ci) > { > int i; > + int64_t drift; > + u_long s; > > atomic_setbits_int(&ci->ci_flags, CPUF_GO); > > @@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else if (cold) { > + /* Synchronize TSC again, check for drift. */ > + drift = ci->ci_tsc_skew; > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + drift -= ci->ci_tsc_skew; > + printf("TSC skew=%lld drift=%lld\n", > + (long long)ci->ci_tsc_skew, (long long)drift); > + tsc_sync_drift(drift); > } > } > > @@ -888,7 +918,14 @@ cpu_hatch(void *v) > panic("%s: already running!?", ci->ci_dev->dv_xname); > #endif > > + /* > + * Synchronize the TSC for the first time. Note that interrupts are > + * off at this point. > + */ > + wbinvd(); > ci->ci_flags |= CPUF_PRESENT; > + ci->ci_tsc_skew = 0;/* reset on resume */ > + tsc_sync_ap(ci); > > lapic_enable(); > lapic_startclock(); > Index: arch/amd64/amd64/tsc.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v > retrieving revision 1.11 > diff -u -p -u -p -r1.11 tsc.c > --- arch/amd64/amd64/tsc.c6 Jun 2019 19:43:35 - 1.11 > +++ arch/amd64/amd64/tsc.c9 Aug 2019 11:33:41 - > @@ -1,8 +1,10 @@ > /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ > /* > + * Copyright (c) 2008 The NetBSD Foundation, Inc. > * Copyright (c) 2016,2017 Reyk Floeter > * Copyright (c) 2017 Adam Steen > * Copyright (c) 2017 Mike Belopuhov > + * Copyright (c) 2019 Paul Irofti > * > * Permission to use, copy, modify, and distribute this software for any > * purpose with or without fee is hereby granted, provided that the above > @@ -20,6 +22,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -33,6 +36,12 @@ inttsc_recalibrate; > uint64_t tsc_frequency; > int tsc_is_invariant; > > +#define TSC_DRIFT_MAX 250 > +int64_t tsc_drift_observed; > + > +volatile int64_t tsc_sync_val; > +volatile struct cpu_info *tsc_sync_cpu; > + > uint tsc_get_timecount(struct tim
Re: TSC synchronization on MP machines
> > I changed cpu_serializing_counter() with tsc_get_timecount() as they > > were the same function now that msr is gone. Hope that is not too gross. > > That doesn't work as tsc_get_timecount() returns a 32-bit integer. > > I can not just rdtsc because that means changing the drift max > > I don't understand that comment. Whether you add the previous skew or > not makes no difference when calculating the new skew since you're > adding the same value to both TSC counts when calculating the > difference. The measured skew shouldn't change. And when calculating > the drift you simply subtract the new skew from the old skew. So > nothing changes. Or am I missing something? > Nope. Double checked with my fears about the drift and, as stated on the channel, it was a false alarm. :) Index: arch/amd64/amd64/cpu.c === RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.137 diff -u -p -u -p -r1.137 cpu.c --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 - 1.137 +++ arch/amd64/amd64/cpu.c 9 Aug 2019 11:33:41 - @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) cr4 = rcr4(); lcr4(cr4 & ~CR4_PGE); lcr4(cr4); + + /* Synchronize TSC */ + if (cold && !CPU_IS_PRIMARY(ci)) + tsc_sync_ap(ci); #endif } @@ -808,6 +812,7 @@ void cpu_start_secondary(struct cpu_info *ci) { int i; + u_long s; ci->ci_flags |= CPUF_AP; @@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* +* Synchronize time stamp counters. Invalidate cache and +* synchronize twice (in tsc_sync_bp) to minimize possible +* cache effects. Disable interrupts to try and rule out any +* external interference. +*/ + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew); } if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { @@ -852,6 +869,8 @@ void cpu_boot_secondary(struct cpu_info *ci) { int i; + int64_t drift; + u_long s; atomic_setbits_int(&ci->ci_flags, CPUF_GO); @@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else if (cold) { + /* Synchronize TSC again, check for drift. */ + drift = ci->ci_tsc_skew; + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + drift -= ci->ci_tsc_skew; + printf("TSC skew=%lld drift=%lld\n", + (long long)ci->ci_tsc_skew, (long long)drift); + tsc_sync_drift(drift); } } @@ -888,7 +918,14 @@ cpu_hatch(void *v) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif + /* +* Synchronize the TSC for the first time. Note that interrupts are +* off at this point. +*/ + wbinvd(); ci->ci_flags |= CPUF_PRESENT; + ci->ci_tsc_skew = 0;/* reset on resume */ + tsc_sync_ap(ci); lapic_enable(); lapic_startclock(); Index: arch/amd64/amd64/tsc.c === RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v retrieving revision 1.11 diff -u -p -u -p -r1.11 tsc.c --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 - 1.11 +++ arch/amd64/amd64/tsc.c 9 Aug 2019 11:33:41 - @@ -1,8 +1,10 @@ /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ /* + * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2016,2017 Reyk Floeter * Copyright (c) 2017 Adam Steen * Copyright (c) 2017 Mike Belopuhov + * Copyright (c) 2019 Paul Irofti * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -20,6 +22,7 @@ #include #include #include +#include #include #include @@ -33,6 +36,12 @@ int tsc_recalibrate; uint64_t tsc_frequency; inttsc_is_invariant; +#defineTSC_DRIFT_MAX 250 +int64_ttsc_drift_observed; + +volatile int64_t tsc_sync_val; +volatile struct cpu_info *tsc_sync_cpu; + uint tsc_get_timecount(struct timecounter *tc); struct timecounter tsc_timecounter = { @@ -172,10 +181,8 @@ calibrate_tsc_freq(void) return; tsc_frequency = freq; tsc_timecounter.tc_frequency = freq; -#ifndef MULTIPROCESSOR
Re: TSC synchronization on MP machines
> Date: Fri, 9 Aug 2019 10:21:14 +0300 > From: Paul Irofti > > On Wed, Aug 07, 2019 at 02:55:54PM +0200, Mark Kettenis wrote: > > > Date: Tue, 6 Aug 2019 23:29:30 +0300 > > > From: Paul Irofti > > > > > > Hi, > > > > > > Here is a fourth diff addressing all the issues so far, that have been > > > mainly pointed out by kettenis@, thanks! > > > > > > Changes: > > > - stop resetting the observed drift as it does not affect tsc > > > re-initialization on resume, thus removing all changes from > > > acpi_machdep.c > > > - fix comment and put a temporary pretty printf of resume > > > - rename cpu_cc_skew to ci_tsc_skew > > > - remove unfinished code using MSR_TSC for synchronization (to > > > be added later on together with the missing IA32_TSC_ADJUST > > > wrmsr commands) > > > > > > All other technical issues were discussed and settled in private and > > > require no change to the former diff. > > > > > > > > > For testing you can also use the regress test after booting with tsc as > > > default clock and waiting for an hour or so to let the clocks go wild: > > > > > > # cd /usr/src/regress/sys/kern/clock_gettime > > > # make regress > > > > > > There is another test program flying around the mailing lists I guess, > > > but I could not locate it now so if someone is kind enough to reply with > > > the code, that would be lovely! > > > > > > Paul > > > > Hi Paul, > > > > Still some small questions/issues now that the MSR thing has been > > cleared up. > > > > With those issues fixed, this is ok kettenis@ > > Hi Mark, > > I have addressed all your comments in the diff below. > > I changed cpu_serializing_counter() with tsc_get_timecount() as they > were the same function now that msr is gone. Hope that is not too gross. That doesn't work as tsc_get_timecount() returns a 32-bit integer. I can not just rdtsc because that means changing the drift max I don't understand that comment. Whether you add the previous skew or not makes no difference when calculating the new skew since you're adding the same value to both TSC counts when calculating the difference. The measured skew shouldn't change. And when calculating the drift you simply subtract the new skew from the old skew. So nothing changes. Or am I missing something? > Index: arch/amd64/amd64/cpu.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v > retrieving revision 1.137 > diff -u -p -u -p -r1.137 cpu.c > --- arch/amd64/amd64/cpu.c28 May 2019 18:17:01 - 1.137 > +++ arch/amd64/amd64/cpu.c9 Aug 2019 07:16:40 - > @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) > cr4 = rcr4(); > lcr4(cr4 & ~CR4_PGE); > lcr4(cr4); > + > + /* Synchronize TSC */ > + if (cold && !CPU_IS_PRIMARY(ci)) > + tsc_sync_ap(ci); > #endif > } > > @@ -808,6 +812,7 @@ void > cpu_start_secondary(struct cpu_info *ci) > { > int i; > + u_long s; > > ci->ci_flags |= CPUF_AP; > > @@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* > + * Synchronize time stamp counters. Invalidate cache and > + * synchronize twice (in tsc_sync_bp) to minimize possible > + * cache effects. Disable interrupts to try and rule out any > + * external interference. > + */ > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew); > } > > if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { > @@ -852,6 +869,8 @@ void > cpu_boot_secondary(struct cpu_info *ci) > { > int i; > + int64_t drift; > + u_long s; > > atomic_setbits_int(&ci->ci_flags, CPUF_GO); > > @@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else if (cold) { > + /* Synchronize TSC again, check for drift. */ > + drift = ci->ci_tsc_skew; > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + drift -= ci->ci_tsc_skew; > + printf("TSC skew=%lld drift=%lld\n", > + (long long)ci->ci_tsc_skew, (long long)drift); > + tsc_sync_drift(drift); > } > } > > @@ -888,7 +918,14 @@ cpu_hatch(void *v) > panic("%s: already running!?", ci->ci_dev->dv_xname); > #endif > > + /* > + * Synchronize the TSC for the first time. Note that interrupts are > + * off at this point. > + */ > + wbinvd(); > ci->ci_flags |= CPUF_PRESENT
Re: TSC synchronization on MP machines
On Wed, Aug 07, 2019 at 02:55:54PM +0200, Mark Kettenis wrote: > > Date: Tue, 6 Aug 2019 23:29:30 +0300 > > From: Paul Irofti > > > > Hi, > > > > Here is a fourth diff addressing all the issues so far, that have been > > mainly pointed out by kettenis@, thanks! > > > > Changes: > > - stop resetting the observed drift as it does not affect tsc > > re-initialization on resume, thus removing all changes from > > acpi_machdep.c > > - fix comment and put a temporary pretty printf of resume > > - rename cpu_cc_skew to ci_tsc_skew > > - remove unfinished code using MSR_TSC for synchronization (to > > be added later on together with the missing IA32_TSC_ADJUST > > wrmsr commands) > > > > All other technical issues were discussed and settled in private and > > require no change to the former diff. > > > > > > For testing you can also use the regress test after booting with tsc as > > default clock and waiting for an hour or so to let the clocks go wild: > > > > # cd /usr/src/regress/sys/kern/clock_gettime > > # make regress > > > > There is another test program flying around the mailing lists I guess, > > but I could not locate it now so if someone is kind enough to reply with > > the code, that would be lovely! > > > > Paul > > Hi Paul, > > Still some small questions/issues now that the MSR thing has been > cleared up. > > With those issues fixed, this is ok kettenis@ Hi Mark, I have addressed all your comments in the diff below. I changed cpu_serializing_counter() with tsc_get_timecount() as they were the same function now that msr is gone. Hope that is not too gross. Thank you for another review. Paul Index: arch/amd64/amd64/cpu.c === RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.137 diff -u -p -u -p -r1.137 cpu.c --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 - 1.137 +++ arch/amd64/amd64/cpu.c 9 Aug 2019 07:16:40 - @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) cr4 = rcr4(); lcr4(cr4 & ~CR4_PGE); lcr4(cr4); + + /* Synchronize TSC */ + if (cold && !CPU_IS_PRIMARY(ci)) + tsc_sync_ap(ci); #endif } @@ -808,6 +812,7 @@ void cpu_start_secondary(struct cpu_info *ci) { int i; + u_long s; ci->ci_flags |= CPUF_AP; @@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* +* Synchronize time stamp counters. Invalidate cache and +* synchronize twice (in tsc_sync_bp) to minimize possible +* cache effects. Disable interrupts to try and rule out any +* external interference. +*/ + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew); } if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { @@ -852,6 +869,8 @@ void cpu_boot_secondary(struct cpu_info *ci) { int i; + int64_t drift; + u_long s; atomic_setbits_int(&ci->ci_flags, CPUF_GO); @@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else if (cold) { + /* Synchronize TSC again, check for drift. */ + drift = ci->ci_tsc_skew; + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + drift -= ci->ci_tsc_skew; + printf("TSC skew=%lld drift=%lld\n", + (long long)ci->ci_tsc_skew, (long long)drift); + tsc_sync_drift(drift); } } @@ -888,7 +918,14 @@ cpu_hatch(void *v) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif + /* +* Synchronize the TSC for the first time. Note that interrupts are +* off at this point. +*/ + wbinvd(); ci->ci_flags |= CPUF_PRESENT; + ci->ci_tsc_skew = 0;/* reset on resume */ + tsc_sync_ap(ci); lapic_enable(); lapic_startclock(); Index: arch/amd64/amd64/tsc.c === RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v retrieving revision 1.11 diff -u -p -u -p -r1.11 tsc.c --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 - 1.11 +++ arch/amd64/amd64/tsc.c 9 Aug 2019 07:16:41 - @@ -1,8 +1,10 @@ /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ /* + * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2016,2017 Reyk Floeter * Copyright (c) 2017 Adam Steen
Re: TSC synchronization on MP machines
On 6.8.2019. 22:29, Paul Irofti wrote: > Hi, > > Here is a fourth diff addressing all the issues so far, that have been > mainly pointed out by kettenis@, thanks! > > Changes: > - stop resetting the observed drift as it does not affect tsc > re-initialization on resume, thus removing all changes from > acpi_machdep.c > - fix comment and put a temporary pretty printf of resume > - rename cpu_cc_skew to ci_tsc_skew > - remove unfinished code using MSR_TSC for synchronization (to > be added later on together with the missing IA32_TSC_ADJUST > wrmsr commands) > > All other technical issues were discussed and settled in private and > require no change to the former diff. > > > For testing you can also use the regress test after booting with tsc as > default clock and waiting for an hour or so to let the clocks go wild: > > # cd /usr/src/regress/sys/kern/clock_gettime > # make regress > > There is another test program flying around the mailing lists I guess, > but I could not locate it now so if someone is kind enough to reply with > the code, that would be lovely! > > Paul Hi, I applied this diff on Dell R6415 with AMD EPYC 7551P with 32/64 cores, run regress and test program that tb@ pointed out .. and everything seem right .. r6415# sysctl kern.timecounter.hardware kern.timecounter.hardware=tsc r6415# dmesg | grep tsc_timecounter_init tsc_timecounter_init: TSC skew=0 observed drift=0 tsc_timecounter_init: TSC skew=-260 observed drift=0 tsc_timecounter_init: TSC skew=-240 observed drift=0 tsc_timecounter_init: TSC skew=-470 observed drift=0 tsc_timecounter_init: TSC skew=120 observed drift=0 tsc_timecounter_init: TSC skew=-480 observed drift=0 tsc_timecounter_init: TSC skew=-520 observed drift=0 tsc_timecounter_init: TSC skew=-440 observed drift=0 tsc_timecounter_init: TSC skew=-10 observed drift=0 tsc_timecounter_init: TSC skew=-460 observed drift=0 tsc_timecounter_init: TSC skew=-490 observed drift=0 tsc_timecounter_init: TSC skew=-440 observed drift=0 tsc_timecounter_init: TSC skew=110 observed drift=0 tsc_timecounter_init: TSC skew=-470 observed drift=0 tsc_timecounter_init: TSC skew=-490 observed drift=0 tsc_timecounter_init: TSC skew=-440 observed drift=0 tsc_timecounter_init: TSC skew=-30 observed drift=0 tsc_timecounter_init: TSC skew=-470 observed drift=0 tsc_timecounter_init: TSC skew=-490 observed drift=0 tsc_timecounter_init: TSC skew=-430 observed drift=0 tsc_timecounter_init: TSC skew=110 observed drift=0 tsc_timecounter_init: TSC skew=-480 observed drift=0 tsc_timecounter_init: TSC skew=-470 observed drift=0 tsc_timecounter_init: TSC skew=-450 observed drift=0 tsc_timecounter_init: TSC skew=20 observed drift=0 tsc_timecounter_init: TSC skew=-470 observed drift=0 tsc_timecounter_init: TSC skew=-470 observed drift=0 tsc_timecounter_init: TSC skew=-450 observed drift=0 tsc_timecounter_init: TSC skew=110 observed drift=0 tsc_timecounter_init: TSC skew=-460 observed drift=0 tsc_timecounter_init: TSC skew=-480 observed drift=0 tsc_timecounter_init: TSC skew=-440 observed drift=0 tsc_timecounter_init: TSC skew=-10 observed drift=0 tsc_timecounter_init: TSC skew=-460 observed drift=0 tsc_timecounter_init: TSC skew=-490 observed drift=0 tsc_timecounter_init: TSC skew=-450 observed drift=0 tsc_timecounter_init: TSC skew=130 observed drift=0 tsc_timecounter_init: TSC skew=-460 observed drift=0 tsc_timecounter_init: TSC skew=-480 observed drift=0 tsc_timecounter_init: TSC skew=-430 observed drift=0 tsc_timecounter_init: TSC skew=70 observed drift=0 tsc_timecounter_init: TSC skew=-480 observed drift=0 tsc_timecounter_init: TSC skew=-510 observed drift=0 tsc_timecounter_init: TSC skew=-430 observed drift=0 tsc_timecounter_init: TSC skew=140 observed drift=0 tsc_timecounter_init: TSC skew=-460 observed drift=0 tsc_timecounter_init: TSC skew=-490 observed drift=0 tsc_timecounter_init: TSC skew=-430 observed drift=0 tsc_timecounter_init: TSC skew=20 observed drift=0 tsc_timecounter_init: TSC skew=-490 observed drift=0 tsc_timecounter_init: TSC skew=-500 observed drift=0 tsc_timecounter_init: TSC skew=-430 observed drift=0 tsc_timecounter_init: TSC skew=-380 observed drift=0 tsc_timecounter_init: TSC skew=-460 observed drift=0 tsc_timecounter_init: TSC skew=-490 observed drift=0 tsc_timecounter_init: TSC skew=-470 observed drift=0 tsc_timecounter_init: TSC skew=0 observed drift=0 tsc_timecounter_init: TSC skew=-460 observed drift=0 tsc_timecounter_init: TSC skew=-480 observed drift=0 tsc_timecounter_init: TSC skew=-440 observed drift=0 tsc_timecounter_init: TSC skew=130 observed drift=0 tsc_timecounter_init: TSC skew=-450 observed drift=0 tsc_timecounter_init: TSC skew=-510 observed drift=0 tsc_timecounter_init: TSC skew=-480 observed drift=0 r6415# dmesg | grep tsc_timecounter_init | wc -l 64 OpenBSD 6.5-current (GENERIC.MP) #4: Wed Aug 7 13:45:08 CEST 2019 hrv...@r6415.lala:/sys/arch/amd64/compile/GENERIC.MP real mem = 27452
Re: TSC synchronization on MP machines
> Date: Tue, 6 Aug 2019 23:29:30 +0300 > From: Paul Irofti > > Hi, > > Here is a fourth diff addressing all the issues so far, that have been > mainly pointed out by kettenis@, thanks! > > Changes: > - stop resetting the observed drift as it does not affect tsc > re-initialization on resume, thus removing all changes from > acpi_machdep.c > - fix comment and put a temporary pretty printf of resume > - rename cpu_cc_skew to ci_tsc_skew > - remove unfinished code using MSR_TSC for synchronization (to > be added later on together with the missing IA32_TSC_ADJUST > wrmsr commands) > > All other technical issues were discussed and settled in private and > require no change to the former diff. > > > For testing you can also use the regress test after booting with tsc as > default clock and waiting for an hour or so to let the clocks go wild: > > # cd /usr/src/regress/sys/kern/clock_gettime > # make regress > > There is another test program flying around the mailing lists I guess, > but I could not locate it now so if someone is kind enough to reply with > the code, that would be lovely! > > Paul Hi Paul, Still some small questions/issues now that the MSR thing has been cleared up. With those issues fixed, this is ok kettenis@ > Index: arch/amd64/amd64/cpu.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v > retrieving revision 1.137 > diff -u -p -u -p -r1.137 cpu.c > --- arch/amd64/amd64/cpu.c28 May 2019 18:17:01 - 1.137 > +++ arch/amd64/amd64/cpu.c6 Aug 2019 20:19:27 - > @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) > cr4 = rcr4(); > lcr4(cr4 & ~CR4_PGE); > lcr4(cr4); > + > + /* Synchronize TSC */ > + if (cold && !CPU_IS_PRIMARY(ci)) > + tsc_sync_ap(ci); > #endif > } > > @@ -808,6 +812,7 @@ void > cpu_start_secondary(struct cpu_info *ci) > { > int i; > + u_long s; > > ci->ci_flags |= CPUF_AP; > > @@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* > + * Synchronize time stamp counters. Invalidate cache and > + * synchronize twice (in tsc_sync_bp) to minimize possible > + * cache effects. Disable interrupts to try and rule out any > + * external interference. > + */ > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew); > } > > if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { > @@ -852,6 +869,8 @@ void > cpu_boot_secondary(struct cpu_info *ci) > { > int i; > + int64_t drift; > + u_long s; > > atomic_setbits_int(&ci->ci_flags, CPUF_GO); > > @@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else if (cold) { > + /* Synchronize TSC again, check for drift. */ > + drift = ci->ci_tsc_skew; > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + drift -= ci->ci_tsc_skew; > + printf("TSC skew=%lld drift=%lld\n", > + (long long)ci->ci_tsc_skew, (long long)drift); > + tsc_sync_drift(drift); > } > } > > @@ -888,7 +918,14 @@ cpu_hatch(void *v) > panic("%s: already running!?", ci->ci_dev->dv_xname); > #endif > > + /* > + * Synchronize the TSC for the first time. Note that interrupts are > + * off at this point. > + */ > + wbinvd(); > ci->ci_flags |= CPUF_PRESENT; > + ci->ci_tsc_skew = 0;/* reset on resume */ > + tsc_sync_ap(ci); > > lapic_enable(); > lapic_startclock(); > Index: arch/amd64/amd64/tsc.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v > retrieving revision 1.11 > diff -u -p -u -p -r1.11 tsc.c > --- arch/amd64/amd64/tsc.c6 Jun 2019 19:43:35 - 1.11 > +++ arch/amd64/amd64/tsc.c6 Aug 2019 20:19:27 - > @@ -1,8 +1,10 @@ > /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ > /* > + * Copyright (c) 2008 The NetBSD Foundation, Inc. > * Copyright (c) 2016,2017 Reyk Floeter > * Copyright (c) 2017 Adam Steen > * Copyright (c) 2017 Mike Belopuhov > + * Copyright (c) 2019 Paul Irofti > * > * Permission to use, copy, modify, and distribute this software for any > * purpose with or without fee is hereby granted, provided that the above > @@ -20,6 +22,7 @@ > #include > #include >
Re: TSC synchronization on MP machines
On Tue, Aug 06, 2019 at 11:29:30PM +0300, Paul Irofti wrote: > Hi, > > Here is a fourth diff addressing all the issues so far, that have been > mainly pointed out by kettenis@, thanks! > > Changes: > - stop resetting the observed drift as it does not affect tsc > re-initialization on resume, thus removing all changes from > acpi_machdep.c > - fix comment and put a temporary pretty printf of resume > - rename cpu_cc_skew to ci_tsc_skew > - remove unfinished code using MSR_TSC for synchronization (to > be added later on together with the missing IA32_TSC_ADJUST > wrmsr commands) > > All other technical issues were discussed and settled in private and > require no change to the former diff. > > > For testing you can also use the regress test after booting with tsc as > default clock and waiting for an hour or so to let the clocks go wild: > > # cd /usr/src/regress/sys/kern/clock_gettime > # make regress > > There is another test program flying around the mailing lists I guess, > but I could not locate it now so if someone is kind enough to reply with > the code, that would be lovely! > Works fine on my AMD Phenom(tm) II X6 1055T system. I remember some TSC issues with this box but running the test code posted by tb@ never triggered even during a make build. -- :wq Claudio
Re: TSC synchronization on MP machines
/* * Version of https://marc.info/?l=openbsd-tech&m=155978126517159&w=2 * without non-breaking spaces, thus appeasing -Wunicode-whitespace. */ #include #include #include #include int main(void) { int r; struct timespec tp1, tp2, tout; tout.tv_sec = 0; tout.tv_nsec = 10; for (;;) { r = clock_gettime(CLOCK_MONOTONIC, &tp1); if (r == -1) { perror("clock_gettime"); exit(-1); } nanosleep(&tout, NULL); r = clock_gettime(CLOCK_MONOTONIC, &tp2); if (r == -1) { perror("clock_gettime"); exit(-1); } // tp1 should never be larger than tp2 r = timespeccmp(&tp1, &tp2, >); if (r == 1) { printf("timespeccmp failed\n"); printf("tp1 s:%lld n:%ld\n", tp1.tv_sec, tp1.tv_nsec); printf("tp2 s:%lld n:%ld\n", tp2.tv_sec, tp2.tv_nsec); exit(-1); } } return 0; }
Re: TSC synchronization on MP machines
> There is another test program flying around the mailing lists I guess, > but I could not locate it now so if someone is kind enough to reply with > the code, that would be lovely! I think this is the program you're looking for: https://marc.info/?l=openbsd-tech&m=155978126517159&w=2
Re: TSC synchronization on MP machines
Hi, Here is a fourth diff addressing all the issues so far, that have been mainly pointed out by kettenis@, thanks! Changes: - stop resetting the observed drift as it does not affect tsc re-initialization on resume, thus removing all changes from acpi_machdep.c - fix comment and put a temporary pretty printf of resume - rename cpu_cc_skew to ci_tsc_skew - remove unfinished code using MSR_TSC for synchronization (to be added later on together with the missing IA32_TSC_ADJUST wrmsr commands) All other technical issues were discussed and settled in private and require no change to the former diff. For testing you can also use the regress test after booting with tsc as default clock and waiting for an hour or so to let the clocks go wild: # cd /usr/src/regress/sys/kern/clock_gettime # make regress There is another test program flying around the mailing lists I guess, but I could not locate it now so if someone is kind enough to reply with the code, that would be lovely! Paul Index: arch/amd64/amd64/cpu.c === RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.137 diff -u -p -u -p -r1.137 cpu.c --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 - 1.137 +++ arch/amd64/amd64/cpu.c 6 Aug 2019 20:19:27 - @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) cr4 = rcr4(); lcr4(cr4 & ~CR4_PGE); lcr4(cr4); + + /* Synchronize TSC */ + if (cold && !CPU_IS_PRIMARY(ci)) + tsc_sync_ap(ci); #endif } @@ -808,6 +812,7 @@ void cpu_start_secondary(struct cpu_info *ci) { int i; + u_long s; ci->ci_flags |= CPUF_AP; @@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* +* Synchronize time stamp counters. Invalidate cache and +* synchronize twice (in tsc_sync_bp) to minimize possible +* cache effects. Disable interrupts to try and rule out any +* external interference. +*/ + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew); } if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { @@ -852,6 +869,8 @@ void cpu_boot_secondary(struct cpu_info *ci) { int i; + int64_t drift; + u_long s; atomic_setbits_int(&ci->ci_flags, CPUF_GO); @@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else if (cold) { + /* Synchronize TSC again, check for drift. */ + drift = ci->ci_tsc_skew; + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + drift -= ci->ci_tsc_skew; + printf("TSC skew=%lld drift=%lld\n", + (long long)ci->ci_tsc_skew, (long long)drift); + tsc_sync_drift(drift); } } @@ -888,7 +918,14 @@ cpu_hatch(void *v) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif + /* +* Synchronize the TSC for the first time. Note that interrupts are +* off at this point. +*/ + wbinvd(); ci->ci_flags |= CPUF_PRESENT; + ci->ci_tsc_skew = 0;/* reset on resume */ + tsc_sync_ap(ci); lapic_enable(); lapic_startclock(); Index: arch/amd64/amd64/tsc.c === RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v retrieving revision 1.11 diff -u -p -u -p -r1.11 tsc.c --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 - 1.11 +++ arch/amd64/amd64/tsc.c 6 Aug 2019 20:19:27 - @@ -1,8 +1,10 @@ /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ /* + * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2016,2017 Reyk Floeter * Copyright (c) 2017 Adam Steen * Copyright (c) 2017 Mike Belopuhov + * Copyright (c) 2019 Paul Irofti * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -20,6 +22,7 @@ #include #include #include +#include #include #include @@ -33,6 +36,12 @@ int tsc_recalibrate; uint64_t tsc_frequency; inttsc_is_invariant; +int64_ttsc_drift_max = 250;/* max cycles */ +int64_ttsc_drift_observed; + +volatile int64_t tsc_sync_val; +volatile struct cpu_info *tsc_sync_cpu; + u
Re: TSC synchronization on MP machines
On Tue, Aug 06, 2019 at 12:38:51AM +0200, Mark Kettenis wrote: > > Date: Mon, 5 Aug 2019 16:58:27 +0300 > > From: Paul Irofti > > > > On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote: > > > On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote: > > > > > Date: Thu, 27 Jun 2019 15:08:00 +0300 > > > > > From: Paul Irofti > > > > > > > > > > Hi, > > > > > > > > > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC > > > > > clocks across cores. > > > > > > > > > > CPU0 is the reference clock and all others are skewed. During CPU > > > > > initialization the clocks synchronize by keeping a registry of each > > > > > CPU > > > > > clock skewness and adapting the TSC read routine accordingly. > > > > > > > > > > I choose this implementation over what FreeBSD is doing (which is just > > > > > copying Linux really), because it is clean and elegant. > > > > > > > > > > I would love to hear reports from machines that were broken by this. > > > > > Mine, which never exhibited the problem in the first place, run just > > > > > fine with the following diff. In fact I am writting this message on > > > > > one > > > > > such machine. > > > > > > > > > > Also constructive comments are more than welcomed! > > > > > > > > > > Notes: > > > > > > > > > > - cpu_counter_serializing() could probably have a better name > > > > > (tsc _read for example) > > > > > - the PAUSE instruction is probably not needed > > > > > - acpi(4) suspend and resume bits are left out on purpose, but should > > > > > be trivial to add once the current diff settles > > > > > > > > > > Paul Irofti > > > > > > > > I don't think we want to introduce a header file. > > > > > > > > The code suffers from some NetBSD-isms, so that'll need to be fixed. > > > > I pointed some of them out below. > > > > > > > > Also, how accurate is your skew detection? What skew is detected on a > > > > machine that (supposedly) has the TSCs in sync? The result will be > > > > that you actually slightly desync the counters on different CPUs. > > > > > > > > I think Linux uses the TSC_ADJUST MSR and compares its value across > > > > cores. If the skew is small and the TSC_ADJUST values are the same > > > > across cores it skips the TSC adjustments. > > > > > > Hi, > > > > > > Here is an updated diff with a few bugs eliminated from the previous and > > > with most of the concerns I got in private and from Mark fixed. > > > > > > I will do the TSC_ADJUST_MSR dance in another iteration if the current > > > incarnation turns out to be correct for machines suffering from TSCs not > > > in sync. > > > > > > The thing I am mostly worried about now is in the following sum > > > > > > uint > > > tsc_get_timecount(struct timecounter *tc) > > > { > > > return rdtsc() + curcpu()->cpu_cc_skew; > > > } > > > > > > can one term be executed on one CPU and the other on another? Is there a > > > way to protect this from happening other than locking? > > > > > > I see NetBSD is checking for a change in the number of context switches > > > of the current process. > > > > > > My plan is to have a fix in the tree before 6.6 is released, so I would > > > love to hear your thoughts and reports on this. > > > > > > Thanks, > > > Paul > > > > Hi, > > > > Here is a third version of the TSC diff that also take into > > consideration the suspend-resume path which was ignored by the previous > > thus rendering resume broken. > > Hmm, wat is this diff supposed to do upon suspend-resume? I'm fairly > certain that you'll need to recalibrate the skew in the resume path. > But it doesn't seem to do that. Or at least it doesn't print any > messages. I agree with kettenis. You definitely will need to re-calculate and apply the skews on resume. -ml > > Further comments/questions/requests below... > > > Anyway, here is what gets reported on my x1c3: > > cpu0: Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz, 2494.66 MHz, 06-3d-04 > > tsc_timecounter_init: TSC skew=0 observed drift=0 > tsc_timecounter_init: TSC skew=-344 observed drift=0 > tsc_timecounter_init: TSC skew=-8 observed drift=0 > tsc_timecounter_init: TSC skew=-26 observed drift=0 > > > Index: arch/amd64/amd64/acpi_machdep.c > > === > > RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v > > retrieving revision 1.86 > > diff -u -p -u -p -r1.86 acpi_machdep.c > > --- arch/amd64/amd64/acpi_machdep.c 23 Oct 2018 17:51:32 - 1.86 > > +++ arch/amd64/amd64/acpi_machdep.c 5 Aug 2019 13:54:33 - > > @@ -60,6 +60,8 @@ extern paddr_t tramp_pdirpa; > > > > extern int acpi_savecpu(void) __returns_twice; > > > > +extern int64_t tsc_drift_observed; > > + > > #define ACPI_BIOS_RSDP_WINDOW_BASE0xe > > #define ACPI_BIOS_RSDP_WINDOW_SIZE0x2 > > > > @@ -481,6 +483,8 @@ acpi_resume_cpu(struct acpi_softc *sc) > > { > > fpuinit(&cpu_info_primary); > > > > + cpu_info_primar
Re: TSC synchronization on MP machines
> Date: Mon, 5 Aug 2019 16:58:27 +0300 > From: Paul Irofti > > On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote: > > On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote: > > > > Date: Thu, 27 Jun 2019 15:08:00 +0300 > > > > From: Paul Irofti > > > > > > > > Hi, > > > > > > > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC > > > > clocks across cores. > > > > > > > > CPU0 is the reference clock and all others are skewed. During CPU > > > > initialization the clocks synchronize by keeping a registry of each CPU > > > > clock skewness and adapting the TSC read routine accordingly. > > > > > > > > I choose this implementation over what FreeBSD is doing (which is just > > > > copying Linux really), because it is clean and elegant. > > > > > > > > I would love to hear reports from machines that were broken by this. > > > > Mine, which never exhibited the problem in the first place, run just > > > > fine with the following diff. In fact I am writting this message on one > > > > such machine. > > > > > > > > Also constructive comments are more than welcomed! > > > > > > > > Notes: > > > > > > > > - cpu_counter_serializing() could probably have a better name > > > > (tsc _read for example) > > > > - the PAUSE instruction is probably not needed > > > > - acpi(4) suspend and resume bits are left out on purpose, but should > > > > be trivial to add once the current diff settles > > > > > > > > Paul Irofti > > > > > > I don't think we want to introduce a header file. > > > > > > The code suffers from some NetBSD-isms, so that'll need to be fixed. > > > I pointed some of them out below. > > > > > > Also, how accurate is your skew detection? What skew is detected on a > > > machine that (supposedly) has the TSCs in sync? The result will be > > > that you actually slightly desync the counters on different CPUs. > > > > > > I think Linux uses the TSC_ADJUST MSR and compares its value across > > > cores. If the skew is small and the TSC_ADJUST values are the same > > > across cores it skips the TSC adjustments. > > > > Hi, > > > > Here is an updated diff with a few bugs eliminated from the previous and > > with most of the concerns I got in private and from Mark fixed. > > > > I will do the TSC_ADJUST_MSR dance in another iteration if the current > > incarnation turns out to be correct for machines suffering from TSCs not > > in sync. > > > > The thing I am mostly worried about now is in the following sum > > > > uint > > tsc_get_timecount(struct timecounter *tc) > > { > > return rdtsc() + curcpu()->cpu_cc_skew; > > } > > > > can one term be executed on one CPU and the other on another? Is there a > > way to protect this from happening other than locking? > > > > I see NetBSD is checking for a change in the number of context switches > > of the current process. > > > > My plan is to have a fix in the tree before 6.6 is released, so I would > > love to hear your thoughts and reports on this. > > > > Thanks, > > Paul > > Hi, > > Here is a third version of the TSC diff that also take into > consideration the suspend-resume path which was ignored by the previous > thus rendering resume broken. Also... > Index: arch/amd64/amd64/tsc.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v > retrieving revision 1.11 > diff -u -p -u -p -r1.11 tsc.c > --- arch/amd64/amd64/tsc.c6 Jun 2019 19:43:35 - 1.11 > +++ arch/amd64/amd64/tsc.c5 Aug 2019 13:54:34 - > @@ -1,8 +1,10 @@ > /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ > /* > + * Copyright (c) 2008 The NetBSD Foundation, Inc. > * Copyright (c) 2016,2017 Reyk Floeter > * Copyright (c) 2017 Adam Steen > * Copyright (c) 2017 Mike Belopuhov > + * Copyright (c) 2019 Paul Irofti > * > * Permission to use, copy, modify, and distribute this software for any > * purpose with or without fee is hereby granted, provided that the above > @@ -20,6 +22,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -33,6 +36,13 @@ inttsc_recalibrate; > uint64_t tsc_frequency; > int tsc_is_invariant; > > +int64_t tsc_drift_max = 250;/* max cycles */ > +int64_t tsc_drift_observed; > +bool tsc_good; > + > +volatile int64_t tsc_sync_val; > +volatile struct cpu_info *tsc_sync_cpu; > + > uint tsc_get_timecount(struct timecounter *tc); > > struct timecounter tsc_timecounter = { > @@ -172,10 +182,8 @@ calibrate_tsc_freq(void) > return; > tsc_frequency = freq; > tsc_timecounter.tc_frequency = freq; > -#ifndef MULTIPROCESSOR > if (tsc_is_invariant) > tsc_timecounter.tc_quality = 2000; > -#endif > } > > void > @@ -194,26 +202,25 @@ cpu_recalibrate_tsc(struct timecounter * > uint > tsc_get_timecount(struct timecounter *tc) > { > - return rdtsc(
Re: TSC synchronization on MP machines
> Date: Mon, 5 Aug 2019 16:58:27 +0300 > From: Paul Irofti > > On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote: > > On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote: > > > > Date: Thu, 27 Jun 2019 15:08:00 +0300 > > > > From: Paul Irofti > > > > > > > > Hi, > > > > > > > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC > > > > clocks across cores. > > > > > > > > CPU0 is the reference clock and all others are skewed. During CPU > > > > initialization the clocks synchronize by keeping a registry of each CPU > > > > clock skewness and adapting the TSC read routine accordingly. > > > > > > > > I choose this implementation over what FreeBSD is doing (which is just > > > > copying Linux really), because it is clean and elegant. > > > > > > > > I would love to hear reports from machines that were broken by this. > > > > Mine, which never exhibited the problem in the first place, run just > > > > fine with the following diff. In fact I am writting this message on one > > > > such machine. > > > > > > > > Also constructive comments are more than welcomed! > > > > > > > > Notes: > > > > > > > > - cpu_counter_serializing() could probably have a better name > > > > (tsc _read for example) > > > > - the PAUSE instruction is probably not needed > > > > - acpi(4) suspend and resume bits are left out on purpose, but should > > > > be trivial to add once the current diff settles > > > > > > > > Paul Irofti > > > > > > I don't think we want to introduce a header file. > > > > > > The code suffers from some NetBSD-isms, so that'll need to be fixed. > > > I pointed some of them out below. > > > > > > Also, how accurate is your skew detection? What skew is detected on a > > > machine that (supposedly) has the TSCs in sync? The result will be > > > that you actually slightly desync the counters on different CPUs. > > > > > > I think Linux uses the TSC_ADJUST MSR and compares its value across > > > cores. If the skew is small and the TSC_ADJUST values are the same > > > across cores it skips the TSC adjustments. > > > > Hi, > > > > Here is an updated diff with a few bugs eliminated from the previous and > > with most of the concerns I got in private and from Mark fixed. > > > > I will do the TSC_ADJUST_MSR dance in another iteration if the current > > incarnation turns out to be correct for machines suffering from TSCs not > > in sync. > > > > The thing I am mostly worried about now is in the following sum > > > > uint > > tsc_get_timecount(struct timecounter *tc) > > { > > return rdtsc() + curcpu()->cpu_cc_skew; > > } > > > > can one term be executed on one CPU and the other on another? Is there a > > way to protect this from happening other than locking? > > > > I see NetBSD is checking for a change in the number of context switches > > of the current process. > > > > My plan is to have a fix in the tree before 6.6 is released, so I would > > love to hear your thoughts and reports on this. > > > > Thanks, > > Paul > > Hi, > > Here is a third version of the TSC diff that also take into > consideration the suspend-resume path which was ignored by the previous > thus rendering resume broken. Hmm, wat is this diff supposed to do upon suspend-resume? I'm fairly certain that you'll need to recalibrate the skew in the resume path. But it doesn't seem to do that. Or at least it doesn't print any messages. Further comments/questions/requests below... Anyway, here is what gets reported on my x1c3: cpu0: Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz, 2494.66 MHz, 06-3d-04 tsc_timecounter_init: TSC skew=0 observed drift=0 tsc_timecounter_init: TSC skew=-344 observed drift=0 tsc_timecounter_init: TSC skew=-8 observed drift=0 tsc_timecounter_init: TSC skew=-26 observed drift=0 > Index: arch/amd64/amd64/acpi_machdep.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v > retrieving revision 1.86 > diff -u -p -u -p -r1.86 acpi_machdep.c > --- arch/amd64/amd64/acpi_machdep.c 23 Oct 2018 17:51:32 - 1.86 > +++ arch/amd64/amd64/acpi_machdep.c 5 Aug 2019 13:54:33 - > @@ -60,6 +60,8 @@ extern paddr_t tramp_pdirpa; > > extern int acpi_savecpu(void) __returns_twice; > > +extern int64_t tsc_drift_observed; > + > #define ACPI_BIOS_RSDP_WINDOW_BASE0xe > #define ACPI_BIOS_RSDP_WINDOW_SIZE0x2 > > @@ -481,6 +483,8 @@ acpi_resume_cpu(struct acpi_softc *sc) > { > fpuinit(&cpu_info_primary); > > + cpu_info_primary.cpu_cc_skew = 0; /* futile */ So you'll drop it from the final version? > + tsc_drift_observed = 0; /* reset tsc drift on resume */ What is the point of this? Do you think that after a suspend/resume cycle the TSCs are suddenly not drifting anymore? > cpu_init(&cpu_info_primary); > cpu_ucode_apply(&cpu_info_primary); > > Index: arch/amd64/amd64/cpu.c > =
Re: TSC synchronization on MP machines
On Mon, Aug 05, 2019 at 04:58:27PM +0300, Paul Irofti wrote: > Hi, > > Here is a third version of the TSC diff that also take into > consideration the suspend-resume path which was ignored by the previous > thus rendering resume broken. > > Have a go at it. Reports are welcome. So far I only got ONE report from > a machine with broken TSC :( > > Paul > > > Index: arch/amd64/amd64/acpi_machdep.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v > retrieving revision 1.86 > diff -u -p -u -p -r1.86 acpi_machdep.c > --- arch/amd64/amd64/acpi_machdep.c 23 Oct 2018 17:51:32 - 1.86 > +++ arch/amd64/amd64/acpi_machdep.c 5 Aug 2019 13:54:33 - > @@ -60,6 +60,8 @@ extern paddr_t tramp_pdirpa; > > extern int acpi_savecpu(void) __returns_twice; > > +extern int64_t tsc_drift_observed; > + > #define ACPI_BIOS_RSDP_WINDOW_BASE0xe > #define ACPI_BIOS_RSDP_WINDOW_SIZE0x2 > > @@ -481,6 +483,8 @@ acpi_resume_cpu(struct acpi_softc *sc) > { > fpuinit(&cpu_info_primary); > > + cpu_info_primary.cpu_cc_skew = 0; /* futile */ > + tsc_drift_observed = 0; /* reset tsc drift on resume */ > cpu_init(&cpu_info_primary); > cpu_ucode_apply(&cpu_info_primary); > > Index: arch/amd64/amd64/cpu.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v > retrieving revision 1.137 > diff -u -p -u -p -r1.137 cpu.c > --- arch/amd64/amd64/cpu.c28 May 2019 18:17:01 - 1.137 > +++ arch/amd64/amd64/cpu.c5 Aug 2019 13:54:34 - > @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) > cr4 = rcr4(); > lcr4(cr4 & ~CR4_PGE); > lcr4(cr4); > + > + /* Synchronize TSC */ > + if (cold && !CPU_IS_PRIMARY(ci)) > + tsc_sync_ap(ci); > #endif > } > > @@ -808,6 +812,7 @@ void > cpu_start_secondary(struct cpu_info *ci) > { > int i; > + u_long s; > > ci->ci_flags |= CPUF_AP; > > @@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* > + * Synchronize time stamp counters. Invalidate cache and do > + * twice (in tsc_sync_bp) to minimize possible cache effects. > + * Disable interrupts to try and rule out any external > + * interference. > + */ > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > } > > if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { > @@ -852,6 +868,8 @@ void > cpu_boot_secondary(struct cpu_info *ci) > { > int i; > + int64_t drift; > + u_long s; > > atomic_setbits_int(&ci->ci_flags, CPUF_GO); > > @@ -864,6 +882,17 @@ cpu_boot_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else if (cold) { > + /* Synchronize TSC again, check for drift. */ > + drift = ci->cpu_cc_skew; > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + drift -= ci->cpu_cc_skew; > + printf("TSC skew=%lld drift=%lld\n", > + (long long)ci->cpu_cc_skew, (long long)drift); > + tsc_sync_drift(drift); > } > } > > @@ -888,7 +917,14 @@ cpu_hatch(void *v) > panic("%s: already running!?", ci->ci_dev->dv_xname); > #endif > > + /* > + * Synchronize the TSC for the first time. Note that interrupts are > + * off at this point. > + */ > + wbinvd(); > ci->ci_flags |= CPUF_PRESENT; > + ci->cpu_cc_skew = 0;/* reset on resume */ > + tsc_sync_ap(ci); > > lapic_enable(); > lapic_startclock(); > Index: arch/amd64/amd64/tsc.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v > retrieving revision 1.11 > diff -u -p -u -p -r1.11 tsc.c > --- arch/amd64/amd64/tsc.c6 Jun 2019 19:43:35 - 1.11 > +++ arch/amd64/amd64/tsc.c5 Aug 2019 13:54:34 - > @@ -1,8 +1,10 @@ > /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ > /* > + * Copyright (c) 2008 The NetBSD Foundation, Inc. > * Copyright (c) 2016,2017 Reyk Floeter > * Copyright (c) 2017 Adam Steen > * Copyright (c) 2017 Mike Belopuhov > + * Copyright (c) 2019 Paul Irofti > * > * Permission to use, copy, modify, and distribute this software for any > * purpose with or without fee is hereby granted, provided that the above > @@ -20,6 +22,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -
Re: TSC synchronization on MP machines
On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote: > On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote: > > > Date: Thu, 27 Jun 2019 15:08:00 +0300 > > > From: Paul Irofti > > > > > > Hi, > > > > > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC > > > clocks across cores. > > > > > > CPU0 is the reference clock and all others are skewed. During CPU > > > initialization the clocks synchronize by keeping a registry of each CPU > > > clock skewness and adapting the TSC read routine accordingly. > > > > > > I choose this implementation over what FreeBSD is doing (which is just > > > copying Linux really), because it is clean and elegant. > > > > > > I would love to hear reports from machines that were broken by this. > > > Mine, which never exhibited the problem in the first place, run just > > > fine with the following diff. In fact I am writting this message on one > > > such machine. > > > > > > Also constructive comments are more than welcomed! > > > > > > Notes: > > > > > > - cpu_counter_serializing() could probably have a better name > > > (tsc _read for example) > > > - the PAUSE instruction is probably not needed > > > - acpi(4) suspend and resume bits are left out on purpose, but should > > > be trivial to add once the current diff settles > > > > > > Paul Irofti > > > > I don't think we want to introduce a header file. > > > > The code suffers from some NetBSD-isms, so that'll need to be fixed. > > I pointed some of them out below. > > > > Also, how accurate is your skew detection? What skew is detected on a > > machine that (supposedly) has the TSCs in sync? The result will be > > that you actually slightly desync the counters on different CPUs. > > > > I think Linux uses the TSC_ADJUST MSR and compares its value across > > cores. If the skew is small and the TSC_ADJUST values are the same > > across cores it skips the TSC adjustments. > > Hi, > > Here is an updated diff with a few bugs eliminated from the previous and > with most of the concerns I got in private and from Mark fixed. > > I will do the TSC_ADJUST_MSR dance in another iteration if the current > incarnation turns out to be correct for machines suffering from TSCs not > in sync. > > The thing I am mostly worried about now is in the following sum > > uint > tsc_get_timecount(struct timecounter *tc) > { > return rdtsc() + curcpu()->cpu_cc_skew; > } > > can one term be executed on one CPU and the other on another? Is there a > way to protect this from happening other than locking? > > I see NetBSD is checking for a change in the number of context switches > of the current process. > > My plan is to have a fix in the tree before 6.6 is released, so I would > love to hear your thoughts and reports on this. > > Thanks, > Paul Hi, Here is a third version of the TSC diff that also take into consideration the suspend-resume path which was ignored by the previous thus rendering resume broken. Have a go at it. Reports are welcome. So far I only got ONE report from a machine with broken TSC :( Paul Index: arch/amd64/amd64/acpi_machdep.c === RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v retrieving revision 1.86 diff -u -p -u -p -r1.86 acpi_machdep.c --- arch/amd64/amd64/acpi_machdep.c 23 Oct 2018 17:51:32 - 1.86 +++ arch/amd64/amd64/acpi_machdep.c 5 Aug 2019 13:54:33 - @@ -60,6 +60,8 @@ extern paddr_t tramp_pdirpa; extern int acpi_savecpu(void) __returns_twice; +extern int64_t tsc_drift_observed; + #define ACPI_BIOS_RSDP_WINDOW_BASE0xe #define ACPI_BIOS_RSDP_WINDOW_SIZE0x2 @@ -481,6 +483,8 @@ acpi_resume_cpu(struct acpi_softc *sc) { fpuinit(&cpu_info_primary); + cpu_info_primary.cpu_cc_skew = 0; /* futile */ + tsc_drift_observed = 0; /* reset tsc drift on resume */ cpu_init(&cpu_info_primary); cpu_ucode_apply(&cpu_info_primary); Index: arch/amd64/amd64/cpu.c === RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.137 diff -u -p -u -p -r1.137 cpu.c --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 - 1.137 +++ arch/amd64/amd64/cpu.c 5 Aug 2019 13:54:34 - @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) cr4 = rcr4(); lcr4(cr4 & ~CR4_PGE); lcr4(cr4); + + /* Synchronize TSC */ + if (cold && !CPU_IS_PRIMARY(ci)) + tsc_sync_ap(ci); #endif } @@ -808,6 +812,7 @@ void cpu_start_secondary(struct cpu_info *ci) { int i; + u_long s; ci->ci_flags |= CPUF_AP; @@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* +* Synchron
Re: TSC synchronization on MP machines
On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote: > Hi, > > Here is an updated diff with a few bugs eliminated from the previous and > with most of the concerns I got in private and from Mark fixed. > > I will do the TSC_ADJUST_MSR dance in another iteration if the current > incarnation turns out to be correct for machines suffering from TSCs not > in sync. > > The thing I am mostly worried about now is in the following sum > > uint > tsc_get_timecount(struct timecounter *tc) > { > return rdtsc() + curcpu()->cpu_cc_skew; > } > > can one term be executed on one CPU and the other on another? Is there a > way to protect this from happening other than locking? > > I see NetBSD is checking for a change in the number of context switches > of the current process. > > My plan is to have a fix in the tree before 6.6 is released, so I would > love to hear your thoughts and reports on this. > > Thanks, > Paul > > > Index: arch/amd64/amd64/cpu.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v > retrieving revision 1.137 > diff -u -p -u -p -r1.137 cpu.c > --- arch/amd64/amd64/cpu.c28 May 2019 18:17:01 - 1.137 > +++ arch/amd64/amd64/cpu.c2 Aug 2019 10:25:04 - > @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) > cr4 = rcr4(); > lcr4(cr4 & ~CR4_PGE); > lcr4(cr4); > + > + /* Synchronize TSC */ > + if (!CPU_IS_PRIMARY(ci)) > + tsc_sync_ap(ci); > #endif > } > > @@ -808,6 +812,7 @@ void > cpu_start_secondary(struct cpu_info *ci) > { > int i; > + u_long s; > > ci->ci_flags |= CPUF_AP; > > @@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* > + * Synchronize time stamp counters. Invalidate cache and do > + * twice (in tsc_sync_bp) to minimize possible cache effects. > + * Disable interrupts to try and rule out any external > + * interference. > + */ > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > } > > if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { > @@ -852,6 +868,8 @@ void > cpu_boot_secondary(struct cpu_info *ci) > { > int i; > + int64_t drift; > + u_long s; > > atomic_setbits_int(&ci->ci_flags, CPUF_GO); > > @@ -864,6 +882,17 @@ cpu_boot_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* Synchronize TSC again, check for drift. */ > + drift = ci->cpu_cc_skew; > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + drift -= ci->cpu_cc_skew; > + printf("TSC skew=%lld drift=%lld\n", > + (long long)ci->cpu_cc_skew, (long long)drift); > + tsc_sync_drift(drift); > } > } > > @@ -888,7 +917,13 @@ cpu_hatch(void *v) > panic("%s: already running!?", ci->ci_dev->dv_xname); > #endif > > + /* > + * Synchronize the TSC for the first time. Note that interrupts are > + * off at this point. > + */ > + wbinvd(); > ci->ci_flags |= CPUF_PRESENT; > + tsc_sync_ap(ci); > > lapic_enable(); > lapic_startclock(); > Index: arch/amd64/amd64/tsc.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v > retrieving revision 1.11 > diff -u -p -u -p -r1.11 tsc.c > --- arch/amd64/amd64/tsc.c6 Jun 2019 19:43:35 - 1.11 > +++ arch/amd64/amd64/tsc.c2 Aug 2019 10:25:04 - > @@ -1,8 +1,10 @@ > /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ > /* > + * Copyright (c) 2008 The NetBSD Foundation, Inc. > * Copyright (c) 2016,2017 Reyk Floeter > * Copyright (c) 2017 Adam Steen > * Copyright (c) 2017 Mike Belopuhov > + * Copyright (c) 2019 Paul Irofti > * > * Permission to use, copy, modify, and distribute this software for any > * purpose with or without fee is hereby granted, provided that the above > @@ -20,6 +22,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -33,6 +36,13 @@ inttsc_recalibrate; > uint64_t tsc_frequency; > int tsc_is_invariant; > > +int64_t tsc_drift_max = 250;/* max cycles */ > +int64_t tsc_drift_observed; > +bool tsc_good; > + > +volatile int64_t tsc_sync_val; > +volatile struct cpu_info *tsc_sync_cpu; > + > uint tsc_get_timecount(struct timecounter *tc); > > struct timecounter tsc_timecounter = { > @@ -172,10 +182,8 @@ calib
Re: TSC synchronization on MP machines
> Date: Fri, 2 Aug 2019 13:29:37 +0300 > From: Paul Irofti > > On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote: > > > Date: Thu, 27 Jun 2019 15:08:00 +0300 > > > From: Paul Irofti > > > > > > Hi, > > > > > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC > > > clocks across cores. > > > > > > CPU0 is the reference clock and all others are skewed. During CPU > > > initialization the clocks synchronize by keeping a registry of each CPU > > > clock skewness and adapting the TSC read routine accordingly. > > > > > > I choose this implementation over what FreeBSD is doing (which is just > > > copying Linux really), because it is clean and elegant. > > > > > > I would love to hear reports from machines that were broken by this. > > > Mine, which never exhibited the problem in the first place, run just > > > fine with the following diff. In fact I am writting this message on one > > > such machine. > > > > > > Also constructive comments are more than welcomed! > > > > > > Notes: > > > > > > - cpu_counter_serializing() could probably have a better name > > > (tsc _read for example) > > > - the PAUSE instruction is probably not needed > > > - acpi(4) suspend and resume bits are left out on purpose, but should > > > be trivial to add once the current diff settles > > > > > > Paul Irofti > > > > I don't think we want to introduce a header file. > > > > The code suffers from some NetBSD-isms, so that'll need to be fixed. > > I pointed some of them out below. > > > > Also, how accurate is your skew detection? What skew is detected on a > > machine that (supposedly) has the TSCs in sync? The result will be > > that you actually slightly desync the counters on different CPUs. > > > > I think Linux uses the TSC_ADJUST MSR and compares its value across > > cores. If the skew is small and the TSC_ADJUST values are the same > > across cores it skips the TSC adjustments. > > Hi, > > Here is an updated diff with a few bugs eliminated from the previous and > with most of the concerns I got in private and from Mark fixed. > > I will do the TSC_ADJUST_MSR dance in another iteration if the current > incarnation turns out to be correct for machines suffering from TSCs not > in sync. > > The thing I am mostly worried about now is in the following sum > > uint > tsc_get_timecount(struct timecounter *tc) > { > return rdtsc() + curcpu()->cpu_cc_skew; > } > > can one term be executed on one CPU and the other on another? Is there a > way to protect this from happening other than locking? Our kernel is non-preemptable so a context switch will only happen if you sleep. So that isn't an issue. > I see NetBSD is checking for a change in the number of context switches > of the current process.
Re: TSC synchronization on MP machines
On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote: > > Date: Thu, 27 Jun 2019 15:08:00 +0300 > > From: Paul Irofti > > > > Hi, > > > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC > > clocks across cores. > > > > CPU0 is the reference clock and all others are skewed. During CPU > > initialization the clocks synchronize by keeping a registry of each CPU > > clock skewness and adapting the TSC read routine accordingly. > > > > I choose this implementation over what FreeBSD is doing (which is just > > copying Linux really), because it is clean and elegant. > > > > I would love to hear reports from machines that were broken by this. > > Mine, which never exhibited the problem in the first place, run just > > fine with the following diff. In fact I am writting this message on one > > such machine. > > > > Also constructive comments are more than welcomed! > > > > Notes: > > > > - cpu_counter_serializing() could probably have a better name > > (tsc _read for example) > > - the PAUSE instruction is probably not needed > > - acpi(4) suspend and resume bits are left out on purpose, but should > > be trivial to add once the current diff settles > > > > Paul Irofti > > I don't think we want to introduce a header file. > > The code suffers from some NetBSD-isms, so that'll need to be fixed. > I pointed some of them out below. > > Also, how accurate is your skew detection? What skew is detected on a > machine that (supposedly) has the TSCs in sync? The result will be > that you actually slightly desync the counters on different CPUs. > > I think Linux uses the TSC_ADJUST MSR and compares its value across > cores. If the skew is small and the TSC_ADJUST values are the same > across cores it skips the TSC adjustments. Hi, Here is an updated diff with a few bugs eliminated from the previous and with most of the concerns I got in private and from Mark fixed. I will do the TSC_ADJUST_MSR dance in another iteration if the current incarnation turns out to be correct for machines suffering from TSCs not in sync. The thing I am mostly worried about now is in the following sum uint tsc_get_timecount(struct timecounter *tc) { return rdtsc() + curcpu()->cpu_cc_skew; } can one term be executed on one CPU and the other on another? Is there a way to protect this from happening other than locking? I see NetBSD is checking for a change in the number of context switches of the current process. My plan is to have a fix in the tree before 6.6 is released, so I would love to hear your thoughts and reports on this. Thanks, Paul Index: arch/amd64/amd64/cpu.c === RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.137 diff -u -p -u -p -r1.137 cpu.c --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 - 1.137 +++ arch/amd64/amd64/cpu.c 2 Aug 2019 10:25:04 - @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) cr4 = rcr4(); lcr4(cr4 & ~CR4_PGE); lcr4(cr4); + + /* Synchronize TSC */ + if (!CPU_IS_PRIMARY(ci)) + tsc_sync_ap(ci); #endif } @@ -808,6 +812,7 @@ void cpu_start_secondary(struct cpu_info *ci) { int i; + u_long s; ci->ci_flags |= CPUF_AP; @@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* +* Synchronize time stamp counters. Invalidate cache and do +* twice (in tsc_sync_bp) to minimize possible cache effects. +* Disable interrupts to try and rule out any external +* interference. +*/ + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); } if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { @@ -852,6 +868,8 @@ void cpu_boot_secondary(struct cpu_info *ci) { int i; + int64_t drift; + u_long s; atomic_setbits_int(&ci->ci_flags, CPUF_GO); @@ -864,6 +882,17 @@ cpu_boot_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* Synchronize TSC again, check for drift. */ + drift = ci->cpu_cc_skew; + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + drift -= ci->cpu_cc_skew; + printf("TSC skew=%lld drift=%lld\n", + (long long)ci->cpu_cc_skew, (long long)drift); + tsc_sync_drift(drift); } } @@ -888,7 +917,13 @@ cpu_hatch(void *v) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif + /* +* Synchronize
Re: TSC synchronization on MP machines
> Date: Thu, 27 Jun 2019 15:08:00 +0300 > From: Paul Irofti > > Hi, > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC > clocks across cores. > > CPU0 is the reference clock and all others are skewed. During CPU > initialization the clocks synchronize by keeping a registry of each CPU > clock skewness and adapting the TSC read routine accordingly. > > I choose this implementation over what FreeBSD is doing (which is just > copying Linux really), because it is clean and elegant. > > I would love to hear reports from machines that were broken by this. > Mine, which never exhibited the problem in the first place, run just > fine with the following diff. In fact I am writting this message on one > such machine. > > Also constructive comments are more than welcomed! > > Notes: > > - cpu_counter_serializing() could probably have a better name > (tsc _read for example) > - the PAUSE instruction is probably not needed > - acpi(4) suspend and resume bits are left out on purpose, but should > be trivial to add once the current diff settles > > Paul Irofti I don't think we want to introduce a header file. The code suffers from some NetBSD-isms, so that'll need to be fixed. I pointed some of them out below. Also, how accurate is your skew detection? What skew is detected on a machine that (supposedly) has the TSCs in sync? The result will be that you actually slightly desync the counters on different CPUs. I think Linux uses the TSC_ADJUST MSR and compares its value across cores. If the skew is small and the TSC_ADJUST values are the same across cores it skips the TSC adjustments. Cheers, Mark > > Index: arch/amd64/amd64/cpu.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v > retrieving revision 1.137 > diff -u -p -u -p -r1.137 cpu.c > --- arch/amd64/amd64/cpu.c28 May 2019 18:17:01 - 1.137 > +++ arch/amd64/amd64/cpu.c27 Jun 2019 11:55:08 - > @@ -96,6 +96,7 @@ > #include > #include > #include > +#include > > #if NLAPIC > 0 > #include > @@ -754,6 +755,10 @@ cpu_init(struct cpu_info *ci) > cr4 = rcr4(); > lcr4(cr4 & ~CR4_PGE); > lcr4(cr4); > + > + /* Synchronize TSC */ > + if (!CPU_IS_PRIMARY(ci)) > + tsc_sync_ap(ci); > #endif > } > > @@ -808,6 +813,7 @@ void > cpu_start_secondary(struct cpu_info *ci) > { > int i; > + u_long s; > > ci->ci_flags |= CPUF_AP; > > @@ -828,8 +834,20 @@ cpu_start_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* > + * Synchronize time stamp counters. Invalidate cache and do > + * twice (in tsc_sync_bp) to minimize possible cache effects. > + * Disable interrupts to try and rule out any external > + * interference. > + */ > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > } > > + Please don't introduce additional empty lines. > if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { > atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY); > > @@ -852,6 +870,8 @@ void > cpu_boot_secondary(struct cpu_info *ci) > { > int i; > + int64_t drift; > + u_long s; > > atomic_setbits_int(&ci->ci_flags, CPUF_GO); > > @@ -864,6 +884,17 @@ cpu_boot_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* Synchronize TSC again, check for drift. */ > + drift = ci->cpu_cc_skew; > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + drift -= ci->cpu_cc_skew; > + printf("TSC skew=%lld drift=%lld\n", > + (long long)ci->cpu_cc_skew, (long long)drift); > + tsc_sync_drift(drift); > } > } > > @@ -888,7 +919,13 @@ cpu_hatch(void *v) > panic("%s: already running!?", ci->ci_dev->dv_xname); > #endif > > + /* > + * Synchronize the TSC for the first time. Note that interrupts are > + * off at this point. > + */ > + wbinvd(); > ci->ci_flags |= CPUF_PRESENT; > + tsc_sync_ap(ci); > > lapic_enable(); > lapic_startclock(); > Index: arch/amd64/amd64/tsc.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v > retrieving revision 1.11 > diff -u -p -u -p -r1.11 tsc.c > --- arch/amd64/amd64/tsc.c6 Jun 2019 19:43:35 - 1.11 > +++ arch/amd64/amd64/tsc.c27 Jun 2019 11:55:08 - > @@ -1,8 +1,10 @@ > /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ >
Re: TSC synchronization on MP machines
Hi, Thanks for the report! This does not look correct. TSC skew=-6129185140 drift=170 TSC skew=-6129184900 drift=-10 TSC skew=-6129184890 drift=-20 TSC skew=-6129184910 drift=30 TSC skew=-6129184910 drift=10 TSC skew=-6129184900 drift=20 TSC skew=-6129184910 drift=30 I'll be back with some printf's. Paul
Re: TSC synchronization on MP machines
Paul Irofti writes: > Hi, > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC > clocks across cores. > > CPU0 is the reference clock and all others are skewed. During CPU > initialization the clocks synchronize by keeping a registry of each CPU > clock skewness and adapting the TSC read routine accordingly. > > I choose this implementation over what FreeBSD is doing (which is just > copying Linux really), because it is clean and elegant. > > I would love to hear reports from machines that were broken by this. > Mine, which never exhibited the problem in the first place, run just > fine with the following diff. In fact I am writting this message on one > such machine. > > Also constructive comments are more than welcomed! > > Notes: > > - cpu_counter_serializing() could probably have a better name > (tsc _read for example) > - the PAUSE instruction is probably not needed > - acpi(4) suspend and resume bits are left out on purpose, but should > be trivial to add once the current diff settles > > Paul Irofti > > Index: arch/amd64/amd64/cpu.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v > retrieving revision 1.137 > diff -u -p -u -p -r1.137 cpu.c > --- arch/amd64/amd64/cpu.c28 May 2019 18:17:01 - 1.137 > +++ arch/amd64/amd64/cpu.c27 Jun 2019 11:55:08 - > @@ -96,6 +96,7 @@ > #include > #include > #include > +#include > > #if NLAPIC > 0 > #include > @@ -754,6 +755,10 @@ cpu_init(struct cpu_info *ci) > cr4 = rcr4(); > lcr4(cr4 & ~CR4_PGE); > lcr4(cr4); > + > + /* Synchronize TSC */ > + if (!CPU_IS_PRIMARY(ci)) > + tsc_sync_ap(ci); > #endif > } > > @@ -808,6 +813,7 @@ void > cpu_start_secondary(struct cpu_info *ci) > { > int i; > + u_long s; > > ci->ci_flags |= CPUF_AP; > > @@ -828,8 +834,20 @@ cpu_start_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* > + * Synchronize time stamp counters. Invalidate cache and do > + * twice (in tsc_sync_bp) to minimize possible cache effects. > + * Disable interrupts to try and rule out any external > + * interference. > + */ > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > } > > + > if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { > atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY); > > @@ -852,6 +870,8 @@ void > cpu_boot_secondary(struct cpu_info *ci) > { > int i; > + int64_t drift; > + u_long s; > > atomic_setbits_int(&ci->ci_flags, CPUF_GO); > > @@ -864,6 +884,17 @@ cpu_boot_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* Synchronize TSC again, check for drift. */ > + drift = ci->cpu_cc_skew; > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + drift -= ci->cpu_cc_skew; > + printf("TSC skew=%lld drift=%lld\n", > + (long long)ci->cpu_cc_skew, (long long)drift); > + tsc_sync_drift(drift); > } > } > > @@ -888,7 +919,13 @@ cpu_hatch(void *v) > panic("%s: already running!?", ci->ci_dev->dv_xname); > #endif > > + /* > + * Synchronize the TSC for the first time. Note that interrupts are > + * off at this point. > + */ > + wbinvd(); > ci->ci_flags |= CPUF_PRESENT; > + tsc_sync_ap(ci); > > lapic_enable(); > lapic_startclock(); > Index: arch/amd64/amd64/tsc.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v > retrieving revision 1.11 > diff -u -p -u -p -r1.11 tsc.c > --- arch/amd64/amd64/tsc.c6 Jun 2019 19:43:35 - 1.11 > +++ arch/amd64/amd64/tsc.c27 Jun 2019 11:55:08 - > @@ -1,8 +1,10 @@ > /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ > /* > + * Copyright (c) 2008 The NetBSD Foundation, Inc. > * Copyright (c) 2016,2017 Reyk Floeter > * Copyright (c) 2017 Adam Steen > * Copyright (c) 2017 Mike Belopuhov > + * Copyright (c) 2019 Paul Irofti > * > * Permission to use, copy, modify, and distribute this software for any > * purpose with or without fee is hereby granted, provided that the above > @@ -20,6 +22,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -33,6 +36,13 @@ inttsc_recalibrate; > uint64_t tsc_frequency; > int tsc_is_invariant; > > +static int64_t tsc_drift_max = 250;/* max
TSC synchronization on MP machines
Hi, Here is an initial diff, adapted from NetBSD, that synchronizes TSC clocks across cores. CPU0 is the reference clock and all others are skewed. During CPU initialization the clocks synchronize by keeping a registry of each CPU clock skewness and adapting the TSC read routine accordingly. I choose this implementation over what FreeBSD is doing (which is just copying Linux really), because it is clean and elegant. I would love to hear reports from machines that were broken by this. Mine, which never exhibited the problem in the first place, run just fine with the following diff. In fact I am writting this message on one such machine. Also constructive comments are more than welcomed! Notes: - cpu_counter_serializing() could probably have a better name (tsc _read for example) - the PAUSE instruction is probably not needed - acpi(4) suspend and resume bits are left out on purpose, but should be trivial to add once the current diff settles Paul Irofti Index: arch/amd64/amd64/cpu.c === RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.137 diff -u -p -u -p -r1.137 cpu.c --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 - 1.137 +++ arch/amd64/amd64/cpu.c 27 Jun 2019 11:55:08 - @@ -96,6 +96,7 @@ #include #include #include +#include #if NLAPIC > 0 #include @@ -754,6 +755,10 @@ cpu_init(struct cpu_info *ci) cr4 = rcr4(); lcr4(cr4 & ~CR4_PGE); lcr4(cr4); + + /* Synchronize TSC */ + if (!CPU_IS_PRIMARY(ci)) + tsc_sync_ap(ci); #endif } @@ -808,6 +813,7 @@ void cpu_start_secondary(struct cpu_info *ci) { int i; + u_long s; ci->ci_flags |= CPUF_AP; @@ -828,8 +834,20 @@ cpu_start_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* +* Synchronize time stamp counters. Invalidate cache and do +* twice (in tsc_sync_bp) to minimize possible cache effects. +* Disable interrupts to try and rule out any external +* interference. +*/ + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); } + if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY); @@ -852,6 +870,8 @@ void cpu_boot_secondary(struct cpu_info *ci) { int i; + int64_t drift; + u_long s; atomic_setbits_int(&ci->ci_flags, CPUF_GO); @@ -864,6 +884,17 @@ cpu_boot_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* Synchronize TSC again, check for drift. */ + drift = ci->cpu_cc_skew; + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + drift -= ci->cpu_cc_skew; + printf("TSC skew=%lld drift=%lld\n", + (long long)ci->cpu_cc_skew, (long long)drift); + tsc_sync_drift(drift); } } @@ -888,7 +919,13 @@ cpu_hatch(void *v) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif + /* +* Synchronize the TSC for the first time. Note that interrupts are +* off at this point. +*/ + wbinvd(); ci->ci_flags |= CPUF_PRESENT; + tsc_sync_ap(ci); lapic_enable(); lapic_startclock(); Index: arch/amd64/amd64/tsc.c === RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v retrieving revision 1.11 diff -u -p -u -p -r1.11 tsc.c --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 - 1.11 +++ arch/amd64/amd64/tsc.c 27 Jun 2019 11:55:08 - @@ -1,8 +1,10 @@ /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ /* + * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2016,2017 Reyk Floeter * Copyright (c) 2017 Adam Steen * Copyright (c) 2017 Mike Belopuhov + * Copyright (c) 2019 Paul Irofti * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -20,6 +22,7 @@ #include #include #include +#include #include #include @@ -33,6 +36,13 @@ int tsc_recalibrate; uint64_t tsc_frequency; inttsc_is_invariant; +static int64_t tsc_drift_max = 250;/* max cycles */ +static int64_t tsc_drift_observed; +static booltsc_good; + +static volatile int64_ttsc_sync_val; +static volatile struct cpu_info*tsc_sync_cpu; + uint tsc_get_timecount(struct ti