On 27/02/2017 13:30, Wanpeng Li wrote: > This results in sched clock always unstable for kvm guest since there > is no invariant tsc cpuid bit exposed for kvm guest currently. The > blockage happened for several reasons: > > 1) Migration: to host with different TSC frequency. > 2) Savevm: It is not safe to use the TSC for wall clock timer services.
Right, the purpose of kvmclock is basically to ensure a stable clock without using TSC. However, I know zilch about kernel/sched/clock.c so I cannot really understand your patch. Paolo > How about something like below(untested): > > diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c > index bae6ea6..a61c477 100644 > --- a/arch/x86/kernel/kvmclock.c > +++ b/arch/x86/kernel/kvmclock.c > @@ -115,6 +115,7 @@ static inline void kvm_sched_clock_init(bool stable) > > kvm_sched_clock_offset = kvm_clock_read(); > pv_time_ops.sched_clock = kvm_sched_clock_read; > + hypervisor_sched_clock_stable(); > > printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", > kvm_sched_clock_offset); > diff --git a/include/linux/sched.h b/include/linux/sched.h > index 451e241..38c6edb 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -2499,6 +2499,10 @@ static inline void clear_sched_clock_stable(void) > { > } > > +static inline void hypervisor_sched_clock_stable(void) > +{ > +} > + > static inline void sched_clock_idle_sleep_event(void) > { > } > @@ -2526,6 +2530,7 @@ extern void sched_clock_init_late(void); > */ > extern int sched_clock_stable(void); > extern void clear_sched_clock_stable(void); > +extern void hypervisor_sched_clock_stable(void); > > extern void sched_clock_tick(void); > extern void sched_clock_idle_sleep_event(void); > diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c > index ad64efe..a46639e 100644 > --- a/kernel/sched/clock.c > +++ b/kernel/sched/clock.c > @@ -77,9 +77,15 @@ EXPORT_SYMBOL_GPL(sched_clock); > > __read_mostly int sched_clock_running; > > +enum { > + SCHED_CLOCK_INIT = 0, > + HYPERVISOR_SCHED_CLOCK_STABLE, > + SCHED_CLOCK_INIT_LATE > +}; > + > void sched_clock_init(void) > { > - sched_clock_running = 1; > + sched_clock_running = SCHED_CLOCK_INIT; > } > > #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK > @@ -170,13 +176,14 @@ void clear_sched_clock_stable(void) > > smp_mb(); /* matches sched_clock_init_late() */ > > - if (sched_clock_running == 2) > + if (sched_clock_running == SCHED_CLOCK_INIT_LATE) > schedule_work(&sched_clock_work); > } > > void sched_clock_init_late(void) > { > - sched_clock_running = 2; > + if (sched_clock_running == SCHED_CLOCK_INIT) > + sched_clock_running = SCHED_CLOCK_INIT_LATE; > /* > * Ensure that it is impossible to not do a static_key update. > * > @@ -186,8 +193,15 @@ void sched_clock_init_late(void) > */ > smp_mb(); /* matches {set,clear}_sched_clock_stable() */ > > - if (__sched_clock_stable_early) > + if (__sched_clock_stable_early || > + sched_clock_running == HYPERVISOR_SCHED_CLOCK_STABLE) { > __set_sched_clock_stable(); > + } > +} > + > +void hypervisor_sched_clock_stable() > +{ > + sched_clock_running = HYPERVISOR_SCHED_CLOCK_STABLE; > } > > /*