On Tue, Apr 13 2021 at 21:35, Paul E. McKenney wrote:
>  #define WATCHDOG_INTERVAL (HZ >> 1)
>  #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
> +#define WATCHDOG_MAX_SKEW (NSEC_PER_SEC >> 6)

That's ~15ms which is a tad large I'd say...
  
>  static void clocksource_watchdog_work(struct work_struct *work)
>  {
> @@ -213,9 +214,10 @@ static void clocksource_watchdog_inject_delay(void)
>  static void clocksource_watchdog(struct timer_list *unused)
>  {
>       struct clocksource *cs;
> -     u64 csnow, wdnow, cslast, wdlast, delta;
> -     int64_t wd_nsec, cs_nsec;
> +     u64 csnow, wdnow, wdagain, cslast, wdlast, delta;
> +     int64_t wd_nsec, wdagain_delta, wderr_nsec = 0, cs_nsec;
>       int next_cpu, reset_pending;
> +     int nretries;
>  
>       spin_lock(&watchdog_lock);
>       if (!watchdog_running)
> @@ -224,6 +226,7 @@ static void clocksource_watchdog(struct timer_list 
> *unused)
>       reset_pending = atomic_read(&watchdog_reset_pending);
>  
>       list_for_each_entry(cs, &watchdog_list, wd_list) {
> +             nretries = 0;
>  
>               /* Clocksource already marked unstable? */
>               if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
> @@ -232,11 +235,24 @@ static void clocksource_watchdog(struct timer_list 
> *unused)
>                       continue;
>               }
>  
> +retry:
>               local_irq_disable();
> -             csnow = cs->read(cs);
> -             clocksource_watchdog_inject_delay();
>               wdnow = watchdog->read(watchdog);
> +             clocksource_watchdog_inject_delay();
> +             csnow = cs->read(cs);
> +             wdagain = watchdog->read(watchdog);
>               local_irq_enable();
> +             delta = clocksource_delta(wdagain, wdnow, watchdog->mask);
> +             wdagain_delta = clocksource_cyc2ns(delta, watchdog->mult, 
> watchdog->shift);
> +             if (wdagain_delta > WATCHDOG_MAX_SKEW) {
> +                     wderr_nsec = wdagain_delta;
> +                     if (nretries++ < max_read_retries)
> +                             goto retry;
> +             }
> +             if (nretries) {
> +                     pr_warn("timekeeping watchdog on CPU%d: %s read-back 
> delay of %lldns, attempt %d\n",
> +                             smp_processor_id(), watchdog->name, wderr_nsec, 
> nretries);
> +             }
>  
>               /* Clocksource initialized ? */
>               if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||

This can nicely be split out into a read function which avoids brain
overload when reading. Something like the uncompiled below.

I so wish we could just delete all of this horror instead of making it
more horrible.

Thanks,

        tglx
---
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -124,6 +124,12 @@ static void __clocksource_change_rating(
 #define WATCHDOG_INTERVAL (HZ >> 1)
 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 
+/*
+ * The maximum delay between two consecutive readouts of the watchdog
+ * clocksource to detect SMI,NMI,vCPU preemption.
+ */
+#define WATCHDOG_MAX_DELAY (100 * NSEC_PER_USEC)
+
 static void clocksource_watchdog_work(struct work_struct *work)
 {
        /*
@@ -184,12 +190,37 @@ void clocksource_mark_unstable(struct cl
        spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
+static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
+{
+       unsigned int nretries;
+       u64 wd_end, wd_delta;
+       int64_t wd_delay;
+
+       for (nretries = 0; nretries < max_read_retries; nretries++) {
+               local_irq_disable();
+               *wdnow = watchdog->read(watchdog);
+               clocksource_watchdog_inject_delay();
+               *csnow = cs->read(cs);
+               wd_end = watchdog->read(watchdog);
+               local_irq_enable();
+
+               wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
+               wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, 
watchdog->shift);
+               if (wd_delay < WATCHDOG_MAX_DELAY)
+                       return true;
+       }
+
+       pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, 
%d attempts\n",
+               smp_processor_id(), watchdog->name, wd_delay, nretries);
+       return false;
+}
+
 static void clocksource_watchdog(struct timer_list *unused)
 {
-       struct clocksource *cs;
        u64 csnow, wdnow, cslast, wdlast, delta;
-       int64_t wd_nsec, cs_nsec;
        int next_cpu, reset_pending;
+       int64_t wd_nsec, cs_nsec;
+       struct clocksource *cs;
 
        spin_lock(&watchdog_lock);
        if (!watchdog_running)
@@ -206,10 +237,14 @@ static void clocksource_watchdog(struct
                        continue;
                }
 
-               local_irq_disable();
-               csnow = cs->read(cs);
-               wdnow = watchdog->read(watchdog);
-               local_irq_enable();
+               if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
+                       /*
+                        * No point to continue if the watchdog readout is
+                        * unreliable.
+                        */
+                       __clocksource_unstable(cs);
+                       continue;
+               }
 
                /* Clocksource initialized ? */
                if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||

Reply via email to