From: Wanpeng Li <wanpen...@tencent.com>

Even if for realtime CPUs, cache line bounces, frequency scaling, presence 
of higher-priority RT tasks, etc can still cause different response. These 
interferences should be considered and periodically revaluate whether 
or not the lapic_timer_advance_ns value is the best, do nothing if it is,
otherwise recaluate again. Set lapic_timer_advance_ns to the minimal 
conservative value from all the estimated values.

Testing on Skylake server, cat vcpu*/lapic_timer_advance_ns, before patch:
1628
4161
4321
3236
...

Testing on Skylake server, cat vcpu*/lapic_timer_advance_ns, after patch:
1553
1499
1509
1489
...

Testing on Haswell desktop, cat vcpu*/lapic_timer_advance_ns, before patch:
4617
3641
4102
4577
...
Testing on Haswell desktop, cat vcpu*/lapic_timer_advance_ns, after patch:
2775
2892
2764
2775
...

Cc: Paolo Bonzini <pbonz...@redhat.com>
Cc: Radim Krčmář <rkrc...@redhat.com>
Signed-off-by: Wanpeng Li <wanpen...@tencent.com>
---
 arch/x86/kvm/lapic.c | 37 ++++++++++++++++++++++++++++++-------
 arch/x86/kvm/lapic.h |  2 ++
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2f4a48a..12ade70 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -70,6 +70,7 @@
 /* step-by-step approximation to mitigate fluctuation */
 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
 #define LAPIC_TIMER_ADVANCE_FILTER 10000
+#define LAPIC_TIMER_ADVANCE_RECALC_PERIOD (600 * HZ)
 
 static inline int apic_test_vector(int vec, void *bitmap)
 {
@@ -1484,13 +1485,24 @@ static inline void __wait_lapic_expire(struct kvm_vcpu 
*vcpu, u64 guest_cycles)
 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
                                              s64 advance_expire_delta)
 {
-       struct kvm_lapic *apic = vcpu->arch.apic;
-       u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns, ns;
+       struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
+       u32 timer_advance_ns = ktimer->timer_advance_ns, ns;
 
        if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_FILTER)
                /* filter out drastic fluctuations */
                return;
 
+       /* periodic revaluate */
+       if (unlikely(ktimer->timer_advance_adjust_done)) {
+               ktimer->recalc_timer_advance_ns = jiffies +
+                       LAPIC_TIMER_ADVANCE_RECALC_PERIOD;
+               if (abs(advance_expire_delta) > 
LAPIC_TIMER_ADVANCE_ADJUST_DONE) {
+                       timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
+                       ktimer->timer_advance_adjust_done = false;
+               } else
+                       return;
+       }
+
        /* too early */
        if (advance_expire_delta < 0) {
                ns = -advance_expire_delta * 1000000ULL;
@@ -1503,17 +1515,24 @@ static inline void adjust_lapic_timer_advance(struct 
kvm_vcpu *vcpu,
                timer_advance_ns += ns;
        }
 
-       timer_advance_ns = (apic->lapic_timer.timer_advance_ns *
+       timer_advance_ns = (ktimer->timer_advance_ns *
                (LAPIC_TIMER_ADVANCE_ADJUST_STEP - 1) + advance_expire_delta) /
                LAPIC_TIMER_ADVANCE_ADJUST_STEP;
 
        if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
-               apic->lapic_timer.timer_advance_adjust_done = true;
+               ktimer->timer_advance_adjust_done = true;
        if (unlikely(timer_advance_ns > 5000)) {
                timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
-               apic->lapic_timer.timer_advance_adjust_done = false;
+               ktimer->timer_advance_adjust_done = false;
+       }
+
+       ktimer->timer_advance_ns = timer_advance_ns;
+
+       if (ktimer->timer_advance_adjust_done) {
+               if (ktimer->min_timer_advance_ns > timer_advance_ns)
+                       ktimer->min_timer_advance_ns = timer_advance_ns;
+               ktimer->timer_advance_ns = ktimer->min_timer_advance_ns;
        }
-       apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 }
 
 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
@@ -1532,7 +1551,8 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
        if (guest_tsc < tsc_deadline)
                __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
 
-       if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
+       if (unlikely(!apic->lapic_timer.timer_advance_adjust_done) ||
+               time_before(apic->lapic_timer.recalc_timer_advance_ns, jiffies))
                adjust_lapic_timer_advance(vcpu, 
apic->lapic_timer.advance_expire_delta);
 }
 
@@ -2310,9 +2330,12 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int 
timer_advance_ns)
        if (timer_advance_ns == -1) {
                apic->lapic_timer.timer_advance_ns = 
LAPIC_TIMER_ADVANCE_ADJUST_INIT;
                apic->lapic_timer.timer_advance_adjust_done = false;
+               apic->lapic_timer.recalc_timer_advance_ns = jiffies;
+               apic->lapic_timer.min_timer_advance_ns = UINT_MAX;
        } else {
                apic->lapic_timer.timer_advance_ns = timer_advance_ns;
                apic->lapic_timer.timer_advance_adjust_done = true;
+               apic->lapic_timer.recalc_timer_advance_ns = MAX_JIFFY_OFFSET;
        }
 
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 50053d2..56a05eb 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,6 +31,8 @@ struct kvm_timer {
        u32 timer_mode_mask;
        u64 tscdeadline;
        u64 expired_tscdeadline;
+       unsigned long recalc_timer_advance_ns;
+       u32 min_timer_advance_ns;
        u32 timer_advance_ns;
        s64 advance_expire_delta;
        atomic_t pending;                       /* accumulated triggered timers 
*/
-- 
2.7.4

Reply via email to