Hello all! I propose to fix buggy PCC.
PCC is buggy on NetBSD/alpha MP kernel. http://mail-index.netbsd.org/port-alpha/2016/01/20/msg000735.html https://mail-index.netbsd.org/port-alpha/2017/07/13/msg000830.html I can't understand this algorithm. https://nxr.netbsd.org/xref/src/sys/kern/kern_cctr.c I wonder that why this algorithm calibrate cycle counters on single CPU. There are many unnecessary things in this algorithm. Taylor R Campbell's patch is made quickly as a makeshift. http://mail-index.netbsd.org/port-alpha/2017/11/01/msg000873.html http://mail-index.netbsd.org/port-alpha/2017/11/02/msg000879.html I propose a new synchronization algorithm. This synchronization cycle counters algorithm is take from Linux/ia64, Linux/SPARC. Please see arch/ia64/kernel/smpboot.c and arch/sparc/kernel/smp_64.c http://elixir.free-electrons.com/linux/latest/source/arch/ia64/kernel/smpboot.c#L244 http://elixir.free-electrons.com/linux/latest/source/arch/sparc/kernel/smp_64.c#L155 When you need more information, Please read IA-64 Linux Kernel: Design and Implementation p356-p361 (Hewlett-Packard Professional Books). If you have time, I'd really appreciate it if you could give me your opinion. =================================================================== diff -Naru src.org/sys/arch/alpha/alpha/clock.c src/sys/arch/alpha/alpha/clock.c --- src.org/sys/arch/alpha/alpha/clock.c 2012-02-06 02:14:10.000000000 +0000 +++ src/sys/arch/alpha/alpha/clock.c 2018-01-13 19:10:55.000000000 +0000 @@ -78,6 +78,7 @@ cpu_initclocks(void) { uint64_t pcc_freq; + extern int ncpus; if (clock_init == NULL) panic("cpu_initclocks: no clock attached"); @@ -101,7 +102,7 @@ * Initialize PCC timecounter. */ pcc_freq = cpu_frequency(curcpu()); - cc_init(NULL, pcc_freq, "PCC", PCC_QUAL); + cc_init(NULL, pcc_freq, "PCC", PCC_QUAL, ncpus); /* * Get the clock started. diff -Naru src.org/sys/arch/alpha/alpha/ipifuncs.c src/sys/arch/alpha/alpha/ipifuncs.c --- src.org/sys/arch/alpha/alpha/ipifuncs.c 2014-05-19 22:47:53.000000000 +0000 +++ src/sys/arch/alpha/alpha/ipifuncs.c 2018-01-14 15:48:43.000000000 +0000 @@ -245,11 +245,31 @@ /* NOTREACHED */ } +/* + * Interprocessor interrupt for boot(time-keeper) processor. + */ + static void alpha_ipi_microset(struct cpu_info *ci, struct trapframe *framep) { + int s; + + s = splhigh(); + + cc_bootcpu = 0; - cc_calibrate_cpu(ci); + turn = BOOTCPU_READY; + + alpha_mb(); + + while (turn != SUBCPU_READY) + alpha_mb(); + + cc_bootcpu = (alpha_rpcc() & 0xffffffffU); + + alpha_mb(); + + splx(s); } static void diff -Naru src.org/sys/arch/alpha/include/cpu.h src/sys/arch/alpha/include/cpu.h --- src.org/sys/arch/alpha/include/cpu.h 2014-01-22 22:52:04.000000000 +0000 +++ src/sys/arch/alpha/include/cpu.h 2018-01-14 13:41:39.000000000 +0000 @@ -110,7 +110,7 @@ struct lwp *ci_curlwp; /* current owner of the processor */ struct cpu_data ci_data; /* MI per-cpu data */ #if !defined(_KMEMUSER) - struct cctr_state ci_cc; /* cycle counter state */ + int64_t cc_delta; /* reference CC difference for time-keeper processor */ struct cpu_info *ci_next; /* next cpu_info structure */ int ci_mtx_count; int ci_mtx_oldspl; diff -Naru src.org/sys/arch/alpha/include/cpu_counter.h src/sys/arch/alpha/include/cpu_counter.h --- src.org/sys/arch/alpha/include/cpu_counter.h 2008-04-28 20:23:11.000000000 +0000 +++ src/sys/arch/alpha/include/cpu_counter.h 2018-01-13 16:34:34.000000000 +0000 @@ -41,8 +41,10 @@ #include <machine/cpu.h> #include <machine/rpb.h> -#define cc_calibrate_mp(ci) \ - alpha_multicast_ipi(cpus_running, ALPHA_IPI_MICROSET) +#define TIME_KEEPER_PROCESSOR hwrpb->rpb_primary_cpu_id + +#define cc_get_timecountbp(ci) \ + alpha_send_ipi(TIME_KEEPER_PROCESSOR, ALPHA_IPI_MICROSET) /* Process Cycle Counter is always available. */ #define cpu_hascounter() (1) diff -Naru src.org/sys/arch/ia64/include/cpu.h src/sys/arch/ia64/include/cpu.h --- src.org/sys/arch/ia64/include/cpu.h 2013-11-10 00:50:13.000000000 +0000 +++ src/sys/arch/ia64/include/cpu.h 2018-01-09 16:08:19.000000000 +0000 @@ -89,7 +89,6 @@ struct cpu_data ci_data; /* MI per-cpu data */ device_t ci_dev; /* pointer to our device */ struct lwp *ci_curlwp; /* current owner of the processor */ - struct cctr_state ci_cc; /* cycle counter state */ struct cpu_info *ci_next; /* next cpu_info structure */ volatile int ci_mtx_count; /* Negative count of spin mutexes */ diff -Naru src.org/sys/kern/kern_cctr.c src/sys/kern/kern_cctr.c --- src.org/sys/kern/kern_cctr.c 2009-01-03 03:31:23.000000000 +0000 +++ src/sys/kern/kern_cctr.c 2018-01-14 15:48:56.000000000 +0000 @@ -1,12 +1,7 @@ -/* $NetBSD: kern_cctr.c,v 1.9 2009/01/03 03:31:23 yamt Exp $ */ - -/*- - * Copyright (c) 2006, 2008 The NetBSD Foundation, Inc. +/* + * Copyright (c) 2018 Naruaki Etomi * All rights reserved. * - * re-implementation of TSC for MP systems merging cc_microtime and - * TSC for timecounters by Frank Kardel - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -16,85 +11,54 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* basic calibration ideas are (kern_microtime.c): */ -/****************************************************************************** - * * - * Copyright (c) David L. Mills 1993, 1994 * - * * - * Permission to use, copy, modify, and distribute this software and its * - * documentation for any purpose and without fee is hereby granted, provided * - * that the above copyright notice appears in all copies and that both the * - * copyright notice and this permission notice appear in supporting * - * documentation, and that the name University of Delaware not be used in * - * advertising or publicity pertaining to distribution of the software * - * without specific, written prior permission. The University of Delaware * - * makes no representations about the suitability this software for any * - * purpose. It is provided "as is" without express or implied warranty. * - * * - ******************************************************************************/ - -/* reminiscents from older version of this file are: */ -/*- - * Copyright (c) 1998-2003 Poul-Henning Kamp - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. +/* + * Most of the following was adapted from the Linux/ia64, Linux/SPARC + * synchronization the cycle counters algorithm. + * see arch/ia64/kernel/smpboot.c, arch/sparc/kernel/smp_64.c and + * IA-64 Linux Kernel: Design and Implementation p356-p361 (Hewlett-Packard Professional Books) */ -#include <sys/cdefs.h> -/* __FBSDID("$FreeBSD: src/sys/i386/i386/tsc.c,v 1.204 2003/10/21 18:28:34 silby Exp $"); */ -__KERNEL_RCSID(0, "$NetBSD: kern_cctr.c,v 1.9 2009/01/03 03:31:23 yamt Exp $"); +#include <sys/cdefs.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/sysctl.h> +#include <sys/timepps.h> #include <sys/time.h> #include <sys/timetc.h> #include <sys/kernel.h> #include <sys/power.h> #include <sys/cpu.h> #include <machine/cpu_counter.h> +#include <sys/atomic.h> /* XXX make cc_timecounter.tc_frequency settable by sysctl() */ +#if defined(MULTIPROCESSOR) +volatile u_int32_t cc_bootcpu __cacheline_aligned; +volatile u_int32_t turn __cacheline_aligned; + +static int cc_tick; + +kmutex_t cc_calibrate_lock; +#endif /* MULTIPROCESSOR */ + static timecounter_pps_t cc_calibrate; void cc_calibrate_cpu(struct cpu_info *); -static int64_t cc_cal_val; /* last calibrate time stamp */ - static struct timecounter cc_timecounter = { .tc_get_timecount = cc_get_timecount, .tc_poll_pps = cc_calibrate, @@ -115,7 +79,7 @@ * initialize cycle counter based timecounter */ struct timecounter * -cc_init(timecounter_get_t getcc, uint64_t freq, const char *name, int quality) +cc_init(timecounter_get_t getcc, uint64_t freq, const char *name, int quality, int clocksrcnum) { if (getcc != NULL) @@ -126,6 +90,18 @@ cc_timecounter.tc_quality = quality; tc_init(&cc_timecounter); +#if defined(MULTIPROCESSOR) + mutex_init(&cc_calibrate_lock, MUTEX_DEFAULT, IPL_HIGH); + + if (hz > 1000) + cc_tick = (hz + 500) / 1000; + else + cc_tick = 1; + + /* ncpus - TIME_KEEPER_PROCESSOR */ + cc_tick = (cc_tick * clocksrcnum) - 1; +#endif /* MULTIPROCESSOR */ + return &cc_timecounter; } @@ -136,47 +112,22 @@ cc_get_timecount(struct timecounter *tc) { struct cpu_info *ci; - int64_t rcc, cc, ncsw; - u_int gen; + int64_t rcc, ncsw; retry: ncsw = curlwp->l_ncsw; + __insn_barrier(); + ci = curcpu(); - if (ci->ci_cc.cc_denom == 0) { - /* - * This is our first time here on this CPU. Just - * start with reasonable initial values. - */ - ci->ci_cc.cc_cc = cpu_counter32(); - ci->ci_cc.cc_val = 0; - if (ci->ci_cc.cc_gen == 0) - ci->ci_cc.cc_gen++; - - ci->ci_cc.cc_denom = cpu_frequency(ci); - if (ci->ci_cc.cc_denom == 0) - ci->ci_cc.cc_denom = cc_timecounter.tc_frequency; - ci->ci_cc.cc_delta = ci->ci_cc.cc_denom; - } - /* - * read counter and re-read when the re-calibration - * strikes inbetween - */ - do { - /* pick up current generation number */ - gen = ci->ci_cc.cc_gen; - - /* determine local delta ticks */ - cc = cpu_counter32() - ci->ci_cc.cc_cc; - if (cc < 0) - cc += 0x100000000LL; - - /* scale to primary */ - rcc = (cc * ci->ci_cc.cc_delta) / ci->ci_cc.cc_denom - + ci->ci_cc.cc_val; - } while (gen == 0 || gen != ci->ci_cc.cc_gen); + if (CPU_IS_PRIMARY(ci)) + rcc = cpu_counter32(); + else + rcc = cpu_counter32() - ci->cc_delta; + __insn_barrier(); + if (ncsw != curlwp->l_ncsw) { /* Was preempted */ goto retry; @@ -185,121 +136,99 @@ return rcc; } -/* - * called once per clock tick via the pps callback - * for the calibration of the TSC counters. - * it is called only for the PRIMARY cpu. all - * other cpus are called via a broadcast IPI - * calibration interval is 1 second - we call - * the calibration code only every hz calls - */ -static void -cc_calibrate(struct timecounter *tc) +#if defined(MULTIPROCESSOR) +static inline int64_t +get_delta (void) { - static int calls; - struct cpu_info *ci; + int64_t t0, t1, tcenter = 0; - KASSERT(kpreempt_disabled()); + t0 = cpu_counter32(); - /* - * XXX: for high interrupt frequency - * support: ++calls < hz / tc_tick - */ - if (++calls < hz) - return; + turn = SUBCPU_READY; - calls = 0; - ci = curcpu(); - /* pick up reference ticks */ - cc_cal_val = cpu_counter32(); + membar_sync(); -#if defined(MULTIPROCESSOR) - cc_calibrate_mp(ci); -#endif - cc_calibrate_cpu(ci); + while (cc_bootcpu == 0) + membar_sync(); + + t1 = cpu_counter32(); + + if ((t1 - t0) < 0) { + /* Overflow ! */ + turn = SUBCPU_RETRY; + membar_sync(); + return 0; + } else { + /* average best_t0 and best_t1 without overflow: */ + tcenter = (t0/2 + t1/2); + + if (t0 % 2 + t1 % 2 == 2) + tcenter++; + + return tcenter - cc_bootcpu; + } } /* - * This routine is called about once per second directly by the master - * processor and via an interprocessor interrupt for other processors. - * It determines the CC frequency of each processor relative to the - * master clock and the time this determination is made. These values - * are used by cc_get_timecount() to interpolate the ticks between - * timer interrupts. Note that we assume the kernel variables have - * been zeroed early in life. + * Call by sub processor */ + void cc_calibrate_cpu(struct cpu_info *ci) { - u_int gen; - int64_t val; - int64_t delta, denom; int s; -#ifdef TIMECOUNTER_DEBUG - int64_t factor, old_factor; -#endif - val = cc_cal_val; - s = splhigh(); - /* create next generation number */ - gen = ci->ci_cc.cc_gen; - gen++; - if (gen == 0) - gen++; - - /* update in progress */ - ci->ci_cc.cc_gen = 0; - - denom = ci->ci_cc.cc_cc; - ci->ci_cc.cc_cc = cpu_counter32(); - - if (ci->ci_cc.cc_denom == 0) { - /* - * This is our first time here on this CPU. Just - * start with reasonable initial values. - */ - ci->ci_cc.cc_val = val; - ci->ci_cc.cc_denom = cpu_frequency(ci); - if (ci->ci_cc.cc_denom == 0) - ci->ci_cc.cc_denom = cc_timecounter.tc_frequency; - ci->ci_cc.cc_delta = ci->ci_cc.cc_denom; - ci->ci_cc.cc_gen = gen; - splx(s); - return; - } + /* This is impossible! */ + if (CPU_IS_PRIMARY(ci)) + panic("cc_calibrate_cpu"); -#ifdef TIMECOUNTER_DEBUG - old_factor = (ci->ci_cc.cc_delta * 1000 ) / ci->ci_cc.cc_denom; -#endif - - /* local ticks per period */ - denom = ci->ci_cc.cc_cc - denom; - if (denom < 0) - denom += 0x100000000LL; - - ci->ci_cc.cc_denom = denom; - - /* reference ticks per period */ - delta = val - ci->ci_cc.cc_val; - if (delta < 0) - delta += 0x100000000LL; + s = splhigh(); - ci->ci_cc.cc_val = val; - ci->ci_cc.cc_delta = delta; +retry: - /* publish new generation number */ - ci->ci_cc.cc_gen = gen; + turn = SUBCPU_READY; + membar_sync(); + + /* Call boot processor via IPI */ + cc_get_timecountbp(); + + while (turn != BOOTCPU_READY) + membar_sync(); + + ci->cc_delta = get_delta(); + + if (turn == SUBCPU_RETRY) + goto retry; + splx(s); +} +#endif /* MULTIPROCESSOR */ -#ifdef TIMECOUNTER_DEBUG - factor = (delta * 1000) / denom - old_factor; - if (factor < 0) - factor = -factor; - - if (factor > old_factor / 10) - printf("cc_calibrate_cpu[%u]: 10%% exceeded - delta %" - PRId64 ", denom %" PRId64 ", factor %" PRId64 - ", old factor %" PRId64"\n", ci->ci_index, - delta, denom, (delta * 1000) / denom, old_factor); -#endif /* TIMECOUNTER_DEBUG */ +static void +cc_calibrate(struct timecounter *tc) +{ + struct cpu_info *ci; + + ci = curcpu(); + + if (CPU_IS_PRIMARY(ci)) + return; + +#if defined(MULTIPROCESSOR) + static int calls; + + /* + * XXX: for high interrupt frequency + * support: ++calls < hz / cc_tick + */ + + if (++calls < cc_tick) + return; + + calls = 0; + + mutex_spin_enter(&cc_calibrate_lock); + cc_calibrate_cpu(ci); + mutex_spin_exit(&cc_calibrate_lock); +#endif /* MULTIPROCESSOR */ } diff -Naru src.org/sys/kern/kern_clock.c src/sys/kern/kern_clock.c --- src.org/sys/kern/kern_clock.c 2012-12-02 01:05:16.000000000 +0000 +++ src/sys/kern/kern_clock.c 2018-01-13 19:35:15.000000000 +0000 @@ -229,6 +229,8 @@ tc_ticktock(); } + tc_ticktock_poll(); + /* * Update real-time timeout queue. */ diff -Naru src.org/sys/kern/kern_tc.c src/sys/kern/kern_tc.c --- src.org/sys/kern/kern_tc.c 2013-09-14 20:52:43.000000000 +0000 +++ src/sys/kern/kern_tc.c 2018-01-14 13:22:02.000000000 +0000 @@ -741,17 +741,6 @@ bintime_addx(&th->th_offset, th->th_scale * delta); /* - * Hardware latching timecounters may not generate interrupts on - * PPS events, so instead we poll them. There is a finite risk that - * the hardware might capture a count which is later than the one we - * got above, and therefore possibly in the next NTP second which might - * have a different rate than the current NTP second. It doesn't - * matter in practice. - */ - if (tho->th_counter->tc_poll_pps) - tho->th_counter->tc_poll_pps(tho->th_counter); - - /* * Deal with NTP second processing. The for loop normally * iterates at most once, but in extreme situations it might * keep NTP sane if timeouts are not run for several seconds. @@ -1300,6 +1289,25 @@ } void +tc_ticktock_poll(void) +{ + struct timehands *th; + th = timehands; + + /* + * Hardware latching timecounters may not generate interrupts on + * PPS events, so instead we poll them. There is a finite risk that + * the hardware might capture a count which is later than the one we + * got above, and therefore possibly in the next NTP second which might + * have a different rate than the current NTP second. It doesn't + * matter in practice. + */ + + if (th->th_counter->tc_poll_pps) + th->th_counter->tc_poll_pps(th->th_counter); +} + +void inittimecounter(void) { u_int p; diff -Naru src.org/sys/sys/cctr.h src/sys/sys/cctr.h --- src.org/sys/sys/cctr.h 2008-04-28 20:24:10.000000000 +0000 +++ src/sys/sys/cctr.h 2018-01-14 12:17:26.000000000 +0000 @@ -31,23 +31,17 @@ #include <sys/timetc.h> -/* - * Variables used by cycle counter in kern_cctr.c. - */ -struct cctr_state { - volatile u_int cc_gen; /* generation number for this data set */ - volatile int64_t cc_val; /* reference CC value at calibration time */ - volatile int64_t cc_cc; /* local CC value at calibration time */ - volatile int64_t cc_delta; /* reference CC difference for - last calibration period */ - volatile int64_t cc_denom; /* local CC difference for - last calibration period */ -}; - struct cpu_info; +#define BOOTCPU_READY 0 +#define SUBCPU_READY 1 +#define SUBCPU_RETRY 2 + +extern volatile u_int32_t cc_bootcpu; +extern volatile u_int32_t turn; + void cc_calibrate_cpu(struct cpu_info *); -struct timecounter *cc_init(timecounter_get_t, uint64_t, const char *, int); +struct timecounter *cc_init(timecounter_get_t, uint64_t, const char *, int, int); u_int cc_get_timecount(struct timecounter *); #endif /* _SYS_CCTR_H_ */ diff -Naru src.org/sys/sys/timetc.h src/sys/sys/timetc.h --- src.org/sys/sys/timetc.h 2009-01-11 02:45:56.000000000 +0000 +++ src/sys/sys/timetc.h 2018-01-10 15:29:32.000000000 +0000 @@ -83,6 +83,7 @@ int tc_detach(struct timecounter *); void tc_setclock(const struct timespec *ts); void tc_ticktock(void); +void tc_ticktock_poll(void); void tc_gonebad(struct timecounter *); #ifdef SYSCTL_DECL =================================================================== -- Naruaki.Etomi nullnil...@gmail.com