On Tue, Jan 21, 2014 at 05:28:37PM -0500, Sasha Levin wrote:
>       [    0.000000] Initmem setup node 30 [mem 0x12ee000000-0x138dffffff]
>       [    0.000000]   NODE_DATA [mem 0xcfa42000-0xcfa72fff]
>       [    0.000000]     NODE_DATA(30) on node 1
>       [    0.000000] Initmem setup node 31 [mem 0x138e000000-0x142fffffff]
>       [    0.000000]   NODE_DATA [mem 0xcfa11000-0xcfa41fff]
>       [    0.000000]     NODE_DATA(31) on node 1
>       [    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
>       [    0.000000] kvm-clock: cpu 0, msr 0:cf991001, boot clock
>       [133538.294040] Zone ranges:
>       [133538.294338]   DMA      [mem 0x00001000-0x00ffffff]
>       [133538.294804]   DMA32    [mem 0x01000000-0xffffffff]
>       [133538.295223]   Normal   [mem 0x100000000-0x142fffffff]
>       [133538.295670] Movable zone start for each node

OK, took me a while to fiddle with KVM and all the various muck around
that to reproduce. But I can confirm the below does fix the issue for
me.

I'm hoping to not have to re-introcude the kevents_up() check, but I
need to figure out what hardware triggered that and test again.

---
Subject: sched/clock: Fixup early sched_clock initialization
From: Peter Zijlstra <pet...@infradead.org>
Date: Wed, 22 Jan 2014 12:59:18 +0100

The code would assume sched_clock_stable() and switch to !stable
later, this switch brings a discontinuity in time.

The discontinuity on switching from stable to unstable was always
present, but previously we would set stable/unstable before
initializing TSC and usually stick to the one we start out with.

So the static_key bits brought an extra switch where there previously
wasn't one.

Things are further complicated by the fact that we cannot use
static_key as early as we usually call set_sched_clock_stable().

Fix things by tracking the stable state in a regular variable and only
set the static_key to the right state on sched_clock_init(), which is
ran right after late_time_init->tsc_init().

Before this we would not be using the TSC anyway.

Fixes: 35af99e646c7 ("sched/clock, x86: Use a static_key for 
sched_clock_stable")
Cc: jacob.jun....@linux.intel.com
Cc: Mike Galbraith <bitbuc...@online.de>
Cc: Ingo Molnar <mi...@kernel.org>
Cc: h...@zytor.com
Cc: paul...@linux.vnet.ibm.com
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: John Stultz <john.stu...@linaro.org>
Cc: Andy Lutomirski <l...@amacapital.net>
Cc: Arjan van de Ven <ar...@linux.intel.com>
Cc: l...@kernel.org
Cc: r...@rjwysocki.net
Cc: Eliezer Tamir <eliezer.ta...@linux.intel.com>
Cc: rui.zh...@intel.com
Reported-by: Sasha Levin <sasha.le...@oracle.com>
Reported-by: dyo...@redhat.com
Signed-off-by: Peter Zijlstra <pet...@infradead.org>
Link: 
http://lkml.kernel.org/r/20140122115918.gg3...@twins.programming.kicks-ass.net
---
 kernel/sched/clock.c |   53 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 41 insertions(+), 12 deletions(-)

--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -77,35 +77,50 @@ __read_mostly int sched_clock_running;
 
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 static struct static_key __sched_clock_stable = STATIC_KEY_INIT;
+static int __sched_clock_stable_early;
 
 int sched_clock_stable(void)
 {
-       if (static_key_false(&__sched_clock_stable))
-               return false;
-       return true;
+       return static_key_false(&__sched_clock_stable);
 }
 
-void set_sched_clock_stable(void)
+static void __set_sched_clock_stable(void)
 {
        if (!sched_clock_stable())
-               static_key_slow_dec(&__sched_clock_stable);
+               static_key_slow_inc(&__sched_clock_stable);
+}
+
+void set_sched_clock_stable(void)
+{
+       __sched_clock_stable_early = 1;
+
+       smp_mb(); /* matches sched_clock_init() */
+
+       if (!sched_clock_running)
+               return;
+
+       __set_sched_clock_stable();
 }
 
 static void __clear_sched_clock_stable(struct work_struct *work)
 {
        /* XXX worry about clock continuity */
        if (sched_clock_stable())
-               static_key_slow_inc(&__sched_clock_stable);
+               static_key_slow_dec(&__sched_clock_stable);
 }
 
 static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
 
 void clear_sched_clock_stable(void)
 {
-       if (keventd_up())
-               schedule_work(&sched_clock_work);
-       else
-               __clear_sched_clock_stable(&sched_clock_work);
+       __sched_clock_stable_early = 0;
+
+       smp_mb(); /* matches sched_clock_init() */
+
+       if (!sched_clock_running)
+               return;
+
+       schedule_work(&sched_clock_work);
 }
 
 struct sched_clock_data {
@@ -140,6 +155,20 @@ void sched_clock_init(void)
        }
 
        sched_clock_running = 1;
+
+       /*
+        * Ensure that it is impossible to not do a static_key update.
+        *
+        * Either {set,clear}_sched_clock_stable() must see sched_clock_running
+        * and do the update, or we must see their __sched_clock_stable_early
+        * and do the update, or both.
+        */
+       smp_mb(); /* matches {set,clear}_sched_clock_stable() */
+
+       if (__sched_clock_stable_early)
+               __set_sched_clock_stable();
+       else
+               __clear_sched_clock_stable(NULL);
 }
 
 /*
@@ -340,7 +369,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeu
  */
 u64 cpu_clock(int cpu)
 {
-       if (static_key_false(&__sched_clock_stable))
+       if (!sched_clock_stable())
                return sched_clock_cpu(cpu);
 
        return sched_clock();
@@ -355,7 +384,7 @@ u64 cpu_clock(int cpu)
  */
 u64 local_clock(void)
 {
-       if (static_key_false(&__sched_clock_stable))
+       if (!sched_clock_stable())
                return sched_clock_cpu(raw_smp_processor_id());
 
        return sched_clock();
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to