From: Ben Luo <bn0...@gmail.com>

In general, KVM guest programs tsc-deadline timestamp to
MSR_IA32_TSC_DEADLINE MSR.

When pvtimer is enabled, we introduce a new mechanism to
reprogram KVM guest timer. A periodically working kthread
scans share page and synchronize timer setting for guest
on a dedicated CPU. The next time event of the periodically
working kthread is a threshold to decide whether to program
tsc-deadline timestamp to MSR_IA32_TSC_DEADLINE MSR, or to
share page.

Signed-off-by: Yang Zhang <yang.zhang...@gmail.com>
Signed-off-by: Quan Xu <quan....@gmail.com>
Signed-off-by: Ben Luo <bn0...@gmail.com>
---
 arch/x86/include/asm/kvm_para.h |    9 +++++++++
 arch/x86/kernel/apic/apic.c     |    9 ++++++---
 arch/x86/kernel/kvm.c           |   38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c373e44..109e706 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -5,6 +5,7 @@
 #include <asm/processor.h>
 #include <asm/alternative.h>
 #include <uapi/asm/kvm_para.h>
+#include <linux/hrtimer.h>
 
 extern void kvmclock_init(void);
 extern int kvm_register_clock(char *txt);
@@ -92,6 +93,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned 
long p1,
 void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
+int kvm_pv_timer_next_event(unsigned long tsc,
+               struct clock_event_device *evt);
 extern void kvm_disable_steal_time(void);
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
@@ -126,6 +129,12 @@ static inline void kvm_disable_steal_time(void)
 {
        return;
 }
+
+static inline int kvm_pv_timer_next_event(unsigned long tsc,
+                       struct clock_event_device *evt)
+{
+       return 0;
+}
 #endif
 
 #endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ff89177..286c1b3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -471,10 +471,13 @@ static int lapic_next_event(unsigned long delta,
 static int lapic_next_deadline(unsigned long delta,
                               struct clock_event_device *evt)
 {
-       u64 tsc;
+       u64 tsc = rdtsc() + (((u64) delta) * TSC_DIVISOR);
 
-       tsc = rdtsc();
-       wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
+       /* TODO: undisciplined function call */
+       if (kvm_pv_timer_next_event(tsc, evt))
+               return 0;
+
+       wrmsrl(MSR_IA32_TSC_DEADLINE, tsc);
        return 0;
 }
 
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8bb9594..ec7aff1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -328,6 +328,35 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 
val)
        apic->native_eoi_write(APIC_EOI, APIC_EOI_ACK);
 }
 
+static DEFINE_PER_CPU(int, pvtimer_enabled);
+static DEFINE_PER_CPU(struct pvtimer_vcpu_event_info,
+                     pvtimer_shared_buf) = {0};
+#define PVTIMER_PADDING                25000
+int kvm_pv_timer_next_event(unsigned long tsc,
+               struct clock_event_device *evt)
+{
+       struct pvtimer_vcpu_event_info *src;
+       u64 now;
+
+       if (!this_cpu_read(pvtimer_enabled))
+               return false;
+
+       src = this_cpu_ptr(&pvtimer_shared_buf);
+       xchg((u64 *)&src->expire_tsc, tsc);
+
+       barrier();
+
+       if (tsc < src->next_sync_tsc)
+               return false;
+
+       rdtscll(now);
+       if (tsc < now || tsc - now < PVTIMER_PADDING)
+               return false;
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(kvm_pv_timer_next_event);
+
 static void kvm_guest_cpu_init(void)
 {
        if (!kvm_para_available())
@@ -362,6 +391,15 @@ static void kvm_guest_cpu_init(void)
 
        if (has_steal_clock)
                kvm_register_steal_time();
+
+       if (kvm_para_has_feature(KVM_FEATURE_PV_TIMER)) {
+               unsigned long data;
+
+               data  = slow_virt_to_phys(this_cpu_ptr(&pvtimer_shared_buf))
+                                         | KVM_MSR_ENABLED;
+               wrmsrl(MSR_KVM_PV_TIMER_EN, data);
+               this_cpu_write(pvtimer_enabled, 1);
+       }
 }
 
 static void kvm_pv_disable_apf(void)
-- 
1.7.1

Reply via email to