The only user of the cycle_last validation is the x86 TSC. In order to
provide NMI safe accessor functions for clock monotonic and
monotonic_raw we need to do that in the core.

We can't do the TSC specific

    if (now < cycle_last)
            now = cycle_last;

for the other wrapping around clocksources, but TSC has
CLOCKSOURCE_MASK(64) which actually does not mask out anything so if
now is less than cycle_last the subtraction will give a negative
result. So we can check for that in clocksource_delta() and return 0
for that case.

Implement and enable it for x86

Signed-off-by: Thomas Gleixner <t...@linutronix.de>
---
 arch/x86/Kconfig                   |    1 +
 arch/x86/kernel/tsc.c              |   21 +++++++++------------
 kernel/time/Kconfig                |    5 +++++
 kernel/time/timekeeping_internal.h |    9 +++++++++
 4 files changed, 24 insertions(+), 12 deletions(-)

Index: tip/arch/x86/Kconfig
===================================================================
--- tip.orig/arch/x86/Kconfig
+++ tip/arch/x86/Kconfig
@@ -109,6 +109,7 @@ config X86
        select CLOCKSOURCE_WATCHDOG
        select GENERIC_CLOCKEVENTS
        select ARCH_CLOCKSOURCE_DATA
+       select CLOCKSOURCE_VALIDATE_LAST_CYCLE
        select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && 
X86_LOCAL_APIC)
        select GENERIC_TIME_VSYSCALL
        select GENERIC_STRNCPY_FROM_USER
Index: tip/arch/x86/kernel/tsc.c
===================================================================
--- tip.orig/arch/x86/kernel/tsc.c
+++ tip/arch/x86/kernel/tsc.c
@@ -951,7 +951,7 @@ core_initcall(cpufreq_tsc);
 static struct clocksource clocksource_tsc;
 
 /*
- * We compare the TSC to the cycle_last value in the clocksource
+ * We used to compare the TSC to the cycle_last value in the clocksource
  * structure to avoid a nasty time-warp. This can be observed in a
  * very small window right after one CPU updated cycle_last under
  * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
@@ -961,26 +961,23 @@ static struct clocksource clocksource_ts
  * due to the unsigned delta calculation of the time keeping core
  * code, which is necessary to support wrapping clocksources like pm
  * timer.
+ *
+ * This sanity check is now done in the core timekeeping code.
+ * checking the result of read_tsc() - cycle_last for being negative.
+ * That works because CLOCKSOURCE_MASK(64) does not mask out any bit.
  */
 static cycle_t read_tsc(struct clocksource *cs)
 {
-       cycle_t ret = (cycle_t)get_cycles();
-
-       return ret >= clocksource_tsc.cycle_last ?
-               ret : clocksource_tsc.cycle_last;
-}
-
-static void resume_tsc(struct clocksource *cs)
-{
-       if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
-               clocksource_tsc.cycle_last = 0;
+       return (cycle_t)get_cycles();
 }
 
+/*
+ * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
+ */
 static struct clocksource clocksource_tsc = {
        .name                   = "tsc",
        .rating                 = 300,
        .read                   = read_tsc,
-       .resume                 = resume_tsc,
        .mask                   = CLOCKSOURCE_MASK(64),
        .flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
                                  CLOCK_SOURCE_MUST_VERIFY,
Index: tip/kernel/time/Kconfig
===================================================================
--- tip.orig/kernel/time/Kconfig
+++ tip/kernel/time/Kconfig
@@ -12,6 +12,11 @@ config CLOCKSOURCE_WATCHDOG
 config ARCH_CLOCKSOURCE_DATA
        bool
 
+# Clocksources require validation of the clocksource against the last
+# cycle update - x86/TSC misfeature
+config CLOCKSOURCE_VALIDATE_LAST_CYCLE
+       bool
+
 # Timekeeping vsyscall support
 config GENERIC_TIME_VSYSCALL
        bool
Index: tip/kernel/time/timekeeping_internal.h
===================================================================
--- tip.orig/kernel/time/timekeeping_internal.h
+++ tip/kernel/time/timekeeping_internal.h
@@ -12,9 +12,18 @@ extern void tk_debug_account_sleep_time(
 #define tk_debug_account_sleep_time(x)
 #endif
 
+#ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE
+static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t 
mask)
+{
+       cycle_t ret = (now - last) & mask;
+
+       return (s64) ret > 0 ? ret : 0;
+}
+#else
 static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t 
mask)
 {
        return (now - last) & mask;
 }
+#endif
 
 #endif /* _TIMEKEEPING_INTERNAL_H */


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to