Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta
On Mon, 20 May 2019 at 19:41, Paolo Bonzini wrote: > > On 20/05/19 13:36, Wanpeng Li wrote: > >> Hmm, yeah, that makes sense. The location of the tracepoint is a bit > >> weird, but I guess we can add a comment in the code. > > Do you need me to post a new patchset? :) > > No problem. The final patch that I committed is this: > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index c12b090f4fad..f8615872ae64 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -1502,27 +1502,27 @@ static inline void __wait_lapic_expire(struct > kvm_vcpu *vcpu, u64 guest_cycles) > } > > static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, > - u64 guest_tsc, u64 tsc_deadline) > + s64 advance_expire_delta) > { > struct kvm_lapic *apic = vcpu->arch.apic; > u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; > u64 ns; > > /* too early */ > - if (guest_tsc < tsc_deadline) { > - ns = (tsc_deadline - guest_tsc) * 100ULL; > + if (advance_expire_delta < 0) { > + ns = -advance_expire_delta * 100ULL; > do_div(ns, vcpu->arch.virtual_tsc_khz); > timer_advance_ns -= min((u32)ns, > timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); > } else { > /* too late */ > - ns = (guest_tsc - tsc_deadline) * 100ULL; > + ns = advance_expire_delta * 100ULL; > do_div(ns, vcpu->arch.virtual_tsc_khz); > timer_advance_ns += min((u32)ns, > timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); > } > > - if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) > + if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) > apic->lapic_timer.timer_advance_adjust_done = true; > if (unlikely(timer_advance_ns > 5000)) { > timer_advance_ns = 0; > @@ -1545,13 +1545,13 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) > tsc_deadline = apic->lapic_timer.expired_tscdeadline; > apic->lapic_timer.expired_tscdeadline = 0; > guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); > - trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); > + apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; > > if (guest_tsc < tsc_deadline) > __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); > > if (unlikely(!apic->lapic_timer.timer_advance_adjust_done)) > - adjust_lapic_timer_advance(vcpu, guest_tsc, tsc_deadline); > + adjust_lapic_timer_advance(vcpu, > apic->lapic_timer.advance_expire_delta); > } > > static void start_sw_tscdeadline(struct kvm_lapic *apic) > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index d6d049ba3045..3e72a255543d 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -32,6 +32,7 @@ struct kvm_timer { > u64 tscdeadline; > u64 expired_tscdeadline; > u32 timer_advance_ns; > + s64 advance_expire_delta; > atomic_t pending; /* accumulated triggered > timers */ > bool hv_timer_in_use; > bool timer_advance_adjust_done; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index e7e57de50a3c..35631505421c 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -8008,6 +8008,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > ++vcpu->stat.exits; > > guest_exit_irqoff(); > + if (lapic_in_kernel(vcpu)) { > + s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta; > + if (delta != S64_MIN) { > + trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta); > + vcpu->arch.apic->lapic_timer.advance_expire_delta = > S64_MIN; > + } > + } > > local_irq_enable(); > preempt_enable(); > > so that KVM tracks whether wait_lapic_expire was called, and do not > invoke the tracepoint if not. Looks good to me, thank you. :) Regards, Wanpeng Li
Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta
On 20/05/19 13:36, Wanpeng Li wrote: >> Hmm, yeah, that makes sense. The location of the tracepoint is a bit >> weird, but I guess we can add a comment in the code. > Do you need me to post a new patchset? :) No problem. The final patch that I committed is this: diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c12b090f4fad..f8615872ae64 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1502,27 +1502,27 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) } static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, - u64 guest_tsc, u64 tsc_deadline) + s64 advance_expire_delta) { struct kvm_lapic *apic = vcpu->arch.apic; u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; u64 ns; /* too early */ - if (guest_tsc < tsc_deadline) { - ns = (tsc_deadline - guest_tsc) * 100ULL; + if (advance_expire_delta < 0) { + ns = -advance_expire_delta * 100ULL; do_div(ns, vcpu->arch.virtual_tsc_khz); timer_advance_ns -= min((u32)ns, timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); } else { /* too late */ - ns = (guest_tsc - tsc_deadline) * 100ULL; + ns = advance_expire_delta * 100ULL; do_div(ns, vcpu->arch.virtual_tsc_khz); timer_advance_ns += min((u32)ns, timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); } - if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) + if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) apic->lapic_timer.timer_advance_adjust_done = true; if (unlikely(timer_advance_ns > 5000)) { timer_advance_ns = 0; @@ -1545,13 +1545,13 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); + apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; if (guest_tsc < tsc_deadline) __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); if (unlikely(!apic->lapic_timer.timer_advance_adjust_done)) - adjust_lapic_timer_advance(vcpu, guest_tsc, tsc_deadline); + adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); } static void start_sw_tscdeadline(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index d6d049ba3045..3e72a255543d 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -32,6 +32,7 @@ struct kvm_timer { u64 tscdeadline; u64 expired_tscdeadline; u32 timer_advance_ns; + s64 advance_expire_delta; atomic_t pending; /* accumulated triggered timers */ bool hv_timer_in_use; bool timer_advance_adjust_done; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e7e57de50a3c..35631505421c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8008,6 +8008,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ++vcpu->stat.exits; guest_exit_irqoff(); + if (lapic_in_kernel(vcpu)) { + s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta; + if (delta != S64_MIN) { + trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta); + vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN; + } + } local_irq_enable(); preempt_enable(); so that KVM tracks whether wait_lapic_expire was called, and do not invoke the tracepoint if not. Thanks, Paolo
Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta
On Mon, 20 May 2019 at 19:33, Paolo Bonzini wrote: > > On 20/05/19 13:22, Wanpeng Li wrote: > >> > >> We would like to move wait_lapic_expire() just before vmentry, which would > >> place wait_lapic_expire() again inside the extended quiescent state. Drop > >> the tracepoint, but add instead another one that can be useful and where > >> we can check the status of the adaptive tuning procedure. > > https://lkml.org/lkml/2019/5/15/1435 > > > > Maybe Sean's comment is reasonable, per-vCPU debugfs entry for > > adaptive tuning and wait_lapic_expire() tracepoint for hand tuning. > > Hmm, yeah, that makes sense. The location of the tracepoint is a bit > weird, but I guess we can add a comment in the code. Do you need me to post a new patchset? :) Regards, Wanpeng Li
Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta
On 20/05/19 13:22, Wanpeng Li wrote: >> >> We would like to move wait_lapic_expire() just before vmentry, which would >> place wait_lapic_expire() again inside the extended quiescent state. Drop >> the tracepoint, but add instead another one that can be useful and where >> we can check the status of the adaptive tuning procedure. > https://lkml.org/lkml/2019/5/15/1435 > > Maybe Sean's comment is reasonable, per-vCPU debugfs entry for > adaptive tuning and wait_lapic_expire() tracepoint for hand tuning. Hmm, yeah, that makes sense. The location of the tracepoint is a bit weird, but I guess we can add a comment in the code. Paolo
Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta
On Mon, 20 May 2019 at 19:14, Paolo Bonzini wrote: > > On 20/05/19 10:18, Wanpeng Li wrote: > > From: Wanpeng Li > > > > wait_lapic_expire() call was moved above guest_enter_irqoff() because of > > its tracepoint, which violated the RCU extended quiescent state invoked > > by guest_enter_irqoff()[1][2]. This patch simply moves the tracepoint > > below guest_exit_irqoff() in vcpu_enter_guest(). Snapshot the delta before > > VM-Enter, but trace it after VM-Exit. This can help us to move > > wait_lapic_expire() just before vmentry in the later patch. > > > > [1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended > > quiescent state") > > [2] https://patchwork.kernel.org/patch/782/ > > This is a bit confusing, since the delta is printed after the > corresponding vmexit but the wait is done before the vmentry. I think > we can drop the tracepoint: > > - 8< > From ae148d98d49b96b5222e2c78ac1b1e13cc526d71 Mon Sep 17 00:00:00 2001 > From: Paolo Bonzini > Date: Mon, 20 May 2019 13:10:01 +0200 > Subject: [PATCH] KVM: lapic: replace wait_lapic_expire tracepoint with > restart_apic_timer > > wait_lapic_expire() call was moved above guest_enter_irqoff() because of > its tracepoint, which violated the RCU extended quiescent state invoked > by guest_enter_irqoff()[1][2]. > > We would like to move wait_lapic_expire() just before vmentry, which would > place wait_lapic_expire() again inside the extended quiescent state. Drop > the tracepoint, but add instead another one that can be useful and where > we can check the status of the adaptive tuning procedure. https://lkml.org/lkml/2019/5/15/1435 Maybe Sean's comment is reasonable, per-vCPU debugfs entry for adaptive tuning and wait_lapic_expire() tracepoint for hand tuning. Regards, Wanpeng Li > > [1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended > quiescent state") > [2] https://patchwork.kernel.org/patch/782/ > > Signed-off-by: Paolo Bonzini > > --- > arch/x86/kvm/lapic.c | 4 +++- > arch/x86/kvm/trace.h | 15 +++ > 2 files changed, 10 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index c12b090f4fad..8f05c1d0b486 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -1545,7 +1545,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) > tsc_deadline = apic->lapic_timer.expired_tscdeadline; > apic->lapic_timer.expired_tscdeadline = 0; > guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); > - trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); > > if (guest_tsc < tsc_deadline) > __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); > @@ -1763,6 +1762,9 @@ static void start_sw_timer(struct kvm_lapic *apic) > > static void restart_apic_timer(struct kvm_lapic *apic) > { > + trace_kvm_restart_apic_timer(apic->vcpu->vcpu_id, > +apic->lapic_timer.timer_advance_ns); > + > preempt_disable(); > > if (!apic_lvtt_period(apic) && > atomic_read(&apic->lapic_timer.pending)) > diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h > index 4d47a2631d1f..f6e38f3f 100644 > --- a/arch/x86/kvm/trace.h > +++ b/arch/x86/kvm/trace.h > @@ -953,24 +953,23 @@ > __entry->flags) > ); > > -TRACE_EVENT(kvm_wait_lapic_expire, > - TP_PROTO(unsigned int vcpu_id, s64 delta), > - TP_ARGS(vcpu_id, delta), > +TRACE_EVENT(kvm_restart_apic_timer, > + TP_PROTO(unsigned int vcpu_id, u32 advance), > + TP_ARGS(vcpu_id, advance), > > TP_STRUCT__entry( > __field(unsigned int, vcpu_id ) > - __field(s64,delta ) > + __field(u32,advance ) > ), > > TP_fast_assign( > __entry->vcpu_id = vcpu_id; > - __entry->delta = delta; > + __entry->advance = advance; > ), > > - TP_printk("vcpu %u: delta %lld (%s)", > + TP_printk("vcpu %u: advance %u", > __entry->vcpu_id, > - __entry->delta, > - __entry->delta < 0 ? "early" : "late") > + __entry->advance) > ); > > TRACE_EVENT(kvm_enter_smm,
Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta
On 20/05/19 10:18, Wanpeng Li wrote: > From: Wanpeng Li > > wait_lapic_expire() call was moved above guest_enter_irqoff() because of > its tracepoint, which violated the RCU extended quiescent state invoked > by guest_enter_irqoff()[1][2]. This patch simply moves the tracepoint > below guest_exit_irqoff() in vcpu_enter_guest(). Snapshot the delta before > VM-Enter, but trace it after VM-Exit. This can help us to move > wait_lapic_expire() just before vmentry in the later patch. > > [1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended > quiescent state") > [2] https://patchwork.kernel.org/patch/782/ This is a bit confusing, since the delta is printed after the corresponding vmexit but the wait is done before the vmentry. I think we can drop the tracepoint: - 8< >From ae148d98d49b96b5222e2c78ac1b1e13cc526d71 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 13:10:01 +0200 Subject: [PATCH] KVM: lapic: replace wait_lapic_expire tracepoint with restart_apic_timer wait_lapic_expire() call was moved above guest_enter_irqoff() because of its tracepoint, which violated the RCU extended quiescent state invoked by guest_enter_irqoff()[1][2]. We would like to move wait_lapic_expire() just before vmentry, which would place wait_lapic_expire() again inside the extended quiescent state. Drop the tracepoint, but add instead another one that can be useful and where we can check the status of the adaptive tuning procedure. [1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended quiescent state") [2] https://patchwork.kernel.org/patch/782/ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 4 +++- arch/x86/kvm/trace.h | 15 +++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c12b090f4fad..8f05c1d0b486 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1545,7 +1545,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); if (guest_tsc < tsc_deadline) __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); @@ -1763,6 +1762,9 @@ static void start_sw_timer(struct kvm_lapic *apic) static void restart_apic_timer(struct kvm_lapic *apic) { + trace_kvm_restart_apic_timer(apic->vcpu->vcpu_id, +apic->lapic_timer.timer_advance_ns); + preempt_disable(); if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending)) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4d47a2631d1f..f6e38f3f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -953,24 +953,23 @@ __entry->flags) ); -TRACE_EVENT(kvm_wait_lapic_expire, - TP_PROTO(unsigned int vcpu_id, s64 delta), - TP_ARGS(vcpu_id, delta), +TRACE_EVENT(kvm_restart_apic_timer, + TP_PROTO(unsigned int vcpu_id, u32 advance), + TP_ARGS(vcpu_id, advance), TP_STRUCT__entry( __field(unsigned int, vcpu_id ) - __field(s64,delta ) + __field(u32,advance ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; - __entry->delta = delta; + __entry->advance = advance; ), - TP_printk("vcpu %u: delta %lld (%s)", + TP_printk("vcpu %u: advance %u", __entry->vcpu_id, - __entry->delta, - __entry->delta < 0 ? "early" : "late") + __entry->advance) ); TRACE_EVENT(kvm_enter_smm,