On Tue, Apr 13 2021 at 21:35, Paul E. McKenney wrote: > #define WATCHDOG_INTERVAL (HZ >> 1) > #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) > +#define WATCHDOG_MAX_SKEW (NSEC_PER_SEC >> 6)
That's ~15ms which is a tad large I'd say... > static void clocksource_watchdog_work(struct work_struct *work) > { > @@ -213,9 +214,10 @@ static void clocksource_watchdog_inject_delay(void) > static void clocksource_watchdog(struct timer_list *unused) > { > struct clocksource *cs; > - u64 csnow, wdnow, cslast, wdlast, delta; > - int64_t wd_nsec, cs_nsec; > + u64 csnow, wdnow, wdagain, cslast, wdlast, delta; > + int64_t wd_nsec, wdagain_delta, wderr_nsec = 0, cs_nsec; > int next_cpu, reset_pending; > + int nretries; > > spin_lock(&watchdog_lock); > if (!watchdog_running) > @@ -224,6 +226,7 @@ static void clocksource_watchdog(struct timer_list > *unused) > reset_pending = atomic_read(&watchdog_reset_pending); > > list_for_each_entry(cs, &watchdog_list, wd_list) { > + nretries = 0; > > /* Clocksource already marked unstable? */ > if (cs->flags & CLOCK_SOURCE_UNSTABLE) { > @@ -232,11 +235,24 @@ static void clocksource_watchdog(struct timer_list > *unused) > continue; > } > > +retry: > local_irq_disable(); > - csnow = cs->read(cs); > - clocksource_watchdog_inject_delay(); > wdnow = watchdog->read(watchdog); > + clocksource_watchdog_inject_delay(); > + csnow = cs->read(cs); > + wdagain = watchdog->read(watchdog); > local_irq_enable(); > + delta = clocksource_delta(wdagain, wdnow, watchdog->mask); > + wdagain_delta = clocksource_cyc2ns(delta, watchdog->mult, > watchdog->shift); > + if (wdagain_delta > WATCHDOG_MAX_SKEW) { > + wderr_nsec = wdagain_delta; > + if (nretries++ < max_read_retries) > + goto retry; > + } > + if (nretries) { > + pr_warn("timekeeping watchdog on CPU%d: %s read-back > delay of %lldns, attempt %d\n", > + smp_processor_id(), watchdog->name, wderr_nsec, > nretries); > + } > > /* Clocksource initialized ? */ > if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || This can nicely be split out into a read function which avoids brain overload when reading. Something like the uncompiled below. I so wish we could just delete all of this horror instead of making it more horrible. Thanks, tglx --- --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -124,6 +124,12 @@ static void __clocksource_change_rating( #define WATCHDOG_INTERVAL (HZ >> 1) #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) +/* + * The maximum delay between two consecutive readouts of the watchdog + * clocksource to detect SMI,NMI,vCPU preemption. + */ +#define WATCHDOG_MAX_DELAY (100 * NSEC_PER_USEC) + static void clocksource_watchdog_work(struct work_struct *work) { /* @@ -184,12 +190,37 @@ void clocksource_mark_unstable(struct cl spin_unlock_irqrestore(&watchdog_lock, flags); } +static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) +{ + unsigned int nretries; + u64 wd_end, wd_delta; + int64_t wd_delay; + + for (nretries = 0; nretries < max_read_retries; nretries++) { + local_irq_disable(); + *wdnow = watchdog->read(watchdog); + clocksource_watchdog_inject_delay(); + *csnow = cs->read(cs); + wd_end = watchdog->read(watchdog); + local_irq_enable(); + + wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask); + wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift); + if (wd_delay < WATCHDOG_MAX_DELAY) + return true; + } + + pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, %d attempts\n", + smp_processor_id(), watchdog->name, wd_delay, nretries); + return false; +} + static void clocksource_watchdog(struct timer_list *unused) { - struct clocksource *cs; u64 csnow, wdnow, cslast, wdlast, delta; - int64_t wd_nsec, cs_nsec; int next_cpu, reset_pending; + int64_t wd_nsec, cs_nsec; + struct clocksource *cs; spin_lock(&watchdog_lock); if (!watchdog_running) @@ -206,10 +237,14 @@ static void clocksource_watchdog(struct continue; } - local_irq_disable(); - csnow = cs->read(cs); - wdnow = watchdog->read(watchdog); - local_irq_enable(); + if (!cs_watchdog_read(cs, &csnow, &wdnow)) { + /* + * No point to continue if the watchdog readout is + * unreliable. + */ + __clocksource_unstable(cs); + continue; + } /* Clocksource initialized ? */ if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||