On Tue, Jan 21, 2014 at 05:28:37PM -0500, Sasha Levin wrote: > [ 0.000000] Initmem setup node 30 [mem 0x12ee000000-0x138dffffff] > [ 0.000000] NODE_DATA [mem 0xcfa42000-0xcfa72fff] > [ 0.000000] NODE_DATA(30) on node 1 > [ 0.000000] Initmem setup node 31 [mem 0x138e000000-0x142fffffff] > [ 0.000000] NODE_DATA [mem 0xcfa11000-0xcfa41fff] > [ 0.000000] NODE_DATA(31) on node 1 > [ 0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00 > [ 0.000000] kvm-clock: cpu 0, msr 0:cf991001, boot clock > [133538.294040] Zone ranges: > [133538.294338] DMA [mem 0x00001000-0x00ffffff] > [133538.294804] DMA32 [mem 0x01000000-0xffffffff] > [133538.295223] Normal [mem 0x100000000-0x142fffffff] > [133538.295670] Movable zone start for each node
OK, took me a while to fiddle with KVM and all the various muck around that to reproduce. But I can confirm the below does fix the issue for me. I'm hoping to not have to re-introcude the kevents_up() check, but I need to figure out what hardware triggered that and test again. --- Subject: sched/clock: Fixup early sched_clock initialization From: Peter Zijlstra <pet...@infradead.org> Date: Wed, 22 Jan 2014 12:59:18 +0100 The code would assume sched_clock_stable() and switch to !stable later, this switch brings a discontinuity in time. The discontinuity on switching from stable to unstable was always present, but previously we would set stable/unstable before initializing TSC and usually stick to the one we start out with. So the static_key bits brought an extra switch where there previously wasn't one. Things are further complicated by the fact that we cannot use static_key as early as we usually call set_sched_clock_stable(). Fix things by tracking the stable state in a regular variable and only set the static_key to the right state on sched_clock_init(), which is ran right after late_time_init->tsc_init(). Before this we would not be using the TSC anyway. Fixes: 35af99e646c7 ("sched/clock, x86: Use a static_key for sched_clock_stable") Cc: jacob.jun....@linux.intel.com Cc: Mike Galbraith <bitbuc...@online.de> Cc: Ingo Molnar <mi...@kernel.org> Cc: h...@zytor.com Cc: paul...@linux.vnet.ibm.com Cc: Thomas Gleixner <t...@linutronix.de> Cc: John Stultz <john.stu...@linaro.org> Cc: Andy Lutomirski <l...@amacapital.net> Cc: Arjan van de Ven <ar...@linux.intel.com> Cc: l...@kernel.org Cc: r...@rjwysocki.net Cc: Eliezer Tamir <eliezer.ta...@linux.intel.com> Cc: rui.zh...@intel.com Reported-by: Sasha Levin <sasha.le...@oracle.com> Reported-by: dyo...@redhat.com Signed-off-by: Peter Zijlstra <pet...@infradead.org> Link: http://lkml.kernel.org/r/20140122115918.gg3...@twins.programming.kicks-ass.net --- kernel/sched/clock.c | 53 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 12 deletions(-) --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -77,35 +77,50 @@ __read_mostly int sched_clock_running; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK static struct static_key __sched_clock_stable = STATIC_KEY_INIT; +static int __sched_clock_stable_early; int sched_clock_stable(void) { - if (static_key_false(&__sched_clock_stable)) - return false; - return true; + return static_key_false(&__sched_clock_stable); } -void set_sched_clock_stable(void) +static void __set_sched_clock_stable(void) { if (!sched_clock_stable()) - static_key_slow_dec(&__sched_clock_stable); + static_key_slow_inc(&__sched_clock_stable); +} + +void set_sched_clock_stable(void) +{ + __sched_clock_stable_early = 1; + + smp_mb(); /* matches sched_clock_init() */ + + if (!sched_clock_running) + return; + + __set_sched_clock_stable(); } static void __clear_sched_clock_stable(struct work_struct *work) { /* XXX worry about clock continuity */ if (sched_clock_stable()) - static_key_slow_inc(&__sched_clock_stable); + static_key_slow_dec(&__sched_clock_stable); } static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); void clear_sched_clock_stable(void) { - if (keventd_up()) - schedule_work(&sched_clock_work); - else - __clear_sched_clock_stable(&sched_clock_work); + __sched_clock_stable_early = 0; + + smp_mb(); /* matches sched_clock_init() */ + + if (!sched_clock_running) + return; + + schedule_work(&sched_clock_work); } struct sched_clock_data { @@ -140,6 +155,20 @@ void sched_clock_init(void) } sched_clock_running = 1; + + /* + * Ensure that it is impossible to not do a static_key update. + * + * Either {set,clear}_sched_clock_stable() must see sched_clock_running + * and do the update, or we must see their __sched_clock_stable_early + * and do the update, or both. + */ + smp_mb(); /* matches {set,clear}_sched_clock_stable() */ + + if (__sched_clock_stable_early) + __set_sched_clock_stable(); + else + __clear_sched_clock_stable(NULL); } /* @@ -340,7 +369,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeu */ u64 cpu_clock(int cpu) { - if (static_key_false(&__sched_clock_stable)) + if (!sched_clock_stable()) return sched_clock_cpu(cpu); return sched_clock(); @@ -355,7 +384,7 @@ u64 cpu_clock(int cpu) */ u64 local_clock(void) { - if (static_key_false(&__sched_clock_stable)) + if (!sched_clock_stable()) return sched_clock_cpu(raw_smp_processor_id()); return sched_clock(); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/