Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta

2019-05-20 Thread Wanpeng Li
On Mon, 20 May 2019 at 19:41, Paolo Bonzini  wrote:
>
> On 20/05/19 13:36, Wanpeng Li wrote:
> >> Hmm, yeah, that makes sense.  The location of the tracepoint is a bit
> >> weird, but I guess we can add a comment in the code.
> > Do you need me to post a new patchset? :)
>
> No problem.  The final patch that I committed is this:
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index c12b090f4fad..f8615872ae64 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1502,27 +1502,27 @@ static inline void __wait_lapic_expire(struct 
> kvm_vcpu *vcpu, u64 guest_cycles)
>  }
>
>  static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
> - u64 guest_tsc, u64 tsc_deadline)
> + s64 advance_expire_delta)
>  {
> struct kvm_lapic *apic = vcpu->arch.apic;
> u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
> u64 ns;
>
> /* too early */
> -   if (guest_tsc < tsc_deadline) {
> -   ns = (tsc_deadline - guest_tsc) * 100ULL;
> +   if (advance_expire_delta < 0) {
> +   ns = -advance_expire_delta * 100ULL;
> do_div(ns, vcpu->arch.virtual_tsc_khz);
> timer_advance_ns -= min((u32)ns,
> timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
> } else {
> /* too late */
> -   ns = (guest_tsc - tsc_deadline) * 100ULL;
> +   ns = advance_expire_delta * 100ULL;
> do_div(ns, vcpu->arch.virtual_tsc_khz);
> timer_advance_ns += min((u32)ns,
> timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
> }
>
> -   if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
> +   if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
> apic->lapic_timer.timer_advance_adjust_done = true;
> if (unlikely(timer_advance_ns > 5000)) {
> timer_advance_ns = 0;
> @@ -1545,13 +1545,13 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
> tsc_deadline = apic->lapic_timer.expired_tscdeadline;
> apic->lapic_timer.expired_tscdeadline = 0;
> guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> -   trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
> +   apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
>
> if (guest_tsc < tsc_deadline)
> __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
>
> if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
> -   adjust_lapic_timer_advance(vcpu, guest_tsc, tsc_deadline);
> +   adjust_lapic_timer_advance(vcpu, 
> apic->lapic_timer.advance_expire_delta);
>  }
>
>  static void start_sw_tscdeadline(struct kvm_lapic *apic)
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index d6d049ba3045..3e72a255543d 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -32,6 +32,7 @@ struct kvm_timer {
> u64 tscdeadline;
> u64 expired_tscdeadline;
> u32 timer_advance_ns;
> +   s64 advance_expire_delta;
> atomic_t pending;   /* accumulated triggered 
> timers */
> bool hv_timer_in_use;
> bool timer_advance_adjust_done;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e7e57de50a3c..35631505421c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8008,6 +8008,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> ++vcpu->stat.exits;
>
> guest_exit_irqoff();
> +   if (lapic_in_kernel(vcpu)) {
> +   s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
> +   if (delta != S64_MIN) {
> +   trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
> +   vcpu->arch.apic->lapic_timer.advance_expire_delta = 
> S64_MIN;
> +   }
> +   }
>
> local_irq_enable();
> preempt_enable();
>
> so that KVM tracks whether wait_lapic_expire was called, and do not
> invoke the tracepoint if not.

Looks good to me, thank you. :)

Regards,
Wanpeng Li


Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta

2019-05-20 Thread Paolo Bonzini
On 20/05/19 13:36, Wanpeng Li wrote:
>> Hmm, yeah, that makes sense.  The location of the tracepoint is a bit
>> weird, but I guess we can add a comment in the code.
> Do you need me to post a new patchset? :)

No problem.  The final patch that I committed is this:

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c12b090f4fad..f8615872ae64 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1502,27 +1502,27 @@ static inline void __wait_lapic_expire(struct kvm_vcpu 
*vcpu, u64 guest_cycles)
 }
 
 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
- u64 guest_tsc, u64 tsc_deadline)
+ s64 advance_expire_delta)
 {
struct kvm_lapic *apic = vcpu->arch.apic;
u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
u64 ns;
 
/* too early */
-   if (guest_tsc < tsc_deadline) {
-   ns = (tsc_deadline - guest_tsc) * 100ULL;
+   if (advance_expire_delta < 0) {
+   ns = -advance_expire_delta * 100ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
timer_advance_ns -= min((u32)ns,
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
} else {
/* too late */
-   ns = (guest_tsc - tsc_deadline) * 100ULL;
+   ns = advance_expire_delta * 100ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
timer_advance_ns += min((u32)ns,
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
}
 
-   if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
+   if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
apic->lapic_timer.timer_advance_adjust_done = true;
if (unlikely(timer_advance_ns > 5000)) {
timer_advance_ns = 0;
@@ -1545,13 +1545,13 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
-   trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
+   apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
 
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
 
if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
-   adjust_lapic_timer_advance(vcpu, guest_tsc, tsc_deadline);
+   adjust_lapic_timer_advance(vcpu, 
apic->lapic_timer.advance_expire_delta);
 }
 
 static void start_sw_tscdeadline(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index d6d049ba3045..3e72a255543d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -32,6 +32,7 @@ struct kvm_timer {
u64 tscdeadline;
u64 expired_tscdeadline;
u32 timer_advance_ns;
+   s64 advance_expire_delta;
atomic_t pending;   /* accumulated triggered timers 
*/
bool hv_timer_in_use;
bool timer_advance_adjust_done;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e7e57de50a3c..35631505421c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8008,6 +8008,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
++vcpu->stat.exits;
 
guest_exit_irqoff();
+   if (lapic_in_kernel(vcpu)) {
+   s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
+   if (delta != S64_MIN) {
+   trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
+   vcpu->arch.apic->lapic_timer.advance_expire_delta = 
S64_MIN;
+   }
+   }
 
local_irq_enable();
preempt_enable();

so that KVM tracks whether wait_lapic_expire was called, and do not
invoke the tracepoint if not.

Thanks,

Paolo


Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta

2019-05-20 Thread Wanpeng Li
On Mon, 20 May 2019 at 19:33, Paolo Bonzini  wrote:
>
> On 20/05/19 13:22, Wanpeng Li wrote:
> >>
> >> We would like to move wait_lapic_expire() just before vmentry, which would
> >> place wait_lapic_expire() again inside the extended quiescent state.  Drop
> >> the tracepoint, but add instead another one that can be useful and where
> >> we can check the status of the adaptive tuning procedure.
> > https://lkml.org/lkml/2019/5/15/1435
> >
> > Maybe Sean's comment is reasonable, per-vCPU debugfs entry for
> > adaptive tuning and wait_lapic_expire() tracepoint for hand tuning.
>
> Hmm, yeah, that makes sense.  The location of the tracepoint is a bit
> weird, but I guess we can add a comment in the code.

Do you need me to post a new patchset? :)

Regards,
Wanpeng Li


Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta

2019-05-20 Thread Paolo Bonzini
On 20/05/19 13:22, Wanpeng Li wrote:
>>
>> We would like to move wait_lapic_expire() just before vmentry, which would
>> place wait_lapic_expire() again inside the extended quiescent state.  Drop
>> the tracepoint, but add instead another one that can be useful and where
>> we can check the status of the adaptive tuning procedure.
> https://lkml.org/lkml/2019/5/15/1435
> 
> Maybe Sean's comment is reasonable, per-vCPU debugfs entry for
> adaptive tuning and wait_lapic_expire() tracepoint for hand tuning.

Hmm, yeah, that makes sense.  The location of the tracepoint is a bit
weird, but I guess we can add a comment in the code.

Paolo


Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta

2019-05-20 Thread Wanpeng Li
On Mon, 20 May 2019 at 19:14, Paolo Bonzini  wrote:
>
> On 20/05/19 10:18, Wanpeng Li wrote:
> > From: Wanpeng Li 
> >
> > wait_lapic_expire() call was moved above guest_enter_irqoff() because of
> > its tracepoint, which violated the RCU extended quiescent state invoked
> > by guest_enter_irqoff()[1][2]. This patch simply moves the tracepoint
> > below guest_exit_irqoff() in vcpu_enter_guest(). Snapshot the delta before
> > VM-Enter, but trace it after VM-Exit. This can help us to move
> > wait_lapic_expire() just before vmentry in the later patch.
> >
> > [1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended 
> > quiescent state")
> > [2] https://patchwork.kernel.org/patch/782/
>
> This is a bit confusing, since the delta is printed after the
> corresponding vmexit but the wait is done before the vmentry.  I think
> we can drop the tracepoint:
>
> - 8< 
> From ae148d98d49b96b5222e2c78ac1b1e13cc526d71 Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini 
> Date: Mon, 20 May 2019 13:10:01 +0200
> Subject: [PATCH] KVM: lapic: replace wait_lapic_expire tracepoint with
>  restart_apic_timer
>
> wait_lapic_expire() call was moved above guest_enter_irqoff() because of
> its tracepoint, which violated the RCU extended quiescent state invoked
> by guest_enter_irqoff()[1][2].
>
> We would like to move wait_lapic_expire() just before vmentry, which would
> place wait_lapic_expire() again inside the extended quiescent state.  Drop
> the tracepoint, but add instead another one that can be useful and where
> we can check the status of the adaptive tuning procedure.

https://lkml.org/lkml/2019/5/15/1435

Maybe Sean's comment is reasonable, per-vCPU debugfs entry for
adaptive tuning and wait_lapic_expire() tracepoint for hand tuning.

Regards,
Wanpeng Li

>
> [1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended 
> quiescent state")
> [2] https://patchwork.kernel.org/patch/782/
>
> Signed-off-by: Paolo Bonzini 
>
> ---
>  arch/x86/kvm/lapic.c |  4 +++-
>  arch/x86/kvm/trace.h | 15 +++
>  2 files changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index c12b090f4fad..8f05c1d0b486 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1545,7 +1545,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
> tsc_deadline = apic->lapic_timer.expired_tscdeadline;
> apic->lapic_timer.expired_tscdeadline = 0;
> guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> -   trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
>
> if (guest_tsc < tsc_deadline)
> __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
> @@ -1763,6 +1762,9 @@ static void start_sw_timer(struct kvm_lapic *apic)
>
>  static void restart_apic_timer(struct kvm_lapic *apic)
>  {
> +   trace_kvm_restart_apic_timer(apic->vcpu->vcpu_id,
> +apic->lapic_timer.timer_advance_ns);
> +
> preempt_disable();
>
> if (!apic_lvtt_period(apic) && 
> atomic_read(&apic->lapic_timer.pending))
> diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
> index 4d47a2631d1f..f6e38f3f 100644
> --- a/arch/x86/kvm/trace.h
> +++ b/arch/x86/kvm/trace.h
> @@ -953,24 +953,23 @@
>   __entry->flags)
>  );
>
> -TRACE_EVENT(kvm_wait_lapic_expire,
> -   TP_PROTO(unsigned int vcpu_id, s64 delta),
> -   TP_ARGS(vcpu_id, delta),
> +TRACE_EVENT(kvm_restart_apic_timer,
> +   TP_PROTO(unsigned int vcpu_id, u32 advance),
> +   TP_ARGS(vcpu_id, advance),
>
> TP_STRUCT__entry(
> __field(unsigned int,   vcpu_id )
> -   __field(s64,delta   )
> +   __field(u32,advance )
> ),
>
> TP_fast_assign(
> __entry->vcpu_id   = vcpu_id;
> -   __entry->delta = delta;
> +   __entry->advance   = advance;
> ),
>
> -   TP_printk("vcpu %u: delta %lld (%s)",
> +   TP_printk("vcpu %u: advance %u",
>   __entry->vcpu_id,
> - __entry->delta,
> - __entry->delta < 0 ? "early" : "late")
> + __entry->advance)
>  );
>
>  TRACE_EVENT(kvm_enter_smm,


Re: [PATCH v4 4/5] KVM: LAPIC: Delay trace advance expire delta

2019-05-20 Thread Paolo Bonzini
On 20/05/19 10:18, Wanpeng Li wrote:
> From: Wanpeng Li 
> 
> wait_lapic_expire() call was moved above guest_enter_irqoff() because of 
> its tracepoint, which violated the RCU extended quiescent state invoked 
> by guest_enter_irqoff()[1][2]. This patch simply moves the tracepoint 
> below guest_exit_irqoff() in vcpu_enter_guest(). Snapshot the delta before 
> VM-Enter, but trace it after VM-Exit. This can help us to move 
> wait_lapic_expire() just before vmentry in the later patch.
> 
> [1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended 
> quiescent state")
> [2] https://patchwork.kernel.org/patch/782/

This is a bit confusing, since the delta is printed after the 
corresponding vmexit but the wait is done before the vmentry.  I think 
we can drop the tracepoint:

- 8< 
>From ae148d98d49b96b5222e2c78ac1b1e13cc526d71 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini 
Date: Mon, 20 May 2019 13:10:01 +0200
Subject: [PATCH] KVM: lapic: replace wait_lapic_expire tracepoint with
 restart_apic_timer

wait_lapic_expire() call was moved above guest_enter_irqoff() because of
its tracepoint, which violated the RCU extended quiescent state invoked
by guest_enter_irqoff()[1][2].

We would like to move wait_lapic_expire() just before vmentry, which would
place wait_lapic_expire() again inside the extended quiescent state.  Drop
the tracepoint, but add instead another one that can be useful and where
we can check the status of the adaptive tuning procedure.

[1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended quiescent 
state")
[2] https://patchwork.kernel.org/patch/782/

Signed-off-by: Paolo Bonzini 

---
 arch/x86/kvm/lapic.c |  4 +++-
 arch/x86/kvm/trace.h | 15 +++
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c12b090f4fad..8f05c1d0b486 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1545,7 +1545,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
-   trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
 
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
@@ -1763,6 +1762,9 @@ static void start_sw_timer(struct kvm_lapic *apic)
 
 static void restart_apic_timer(struct kvm_lapic *apic)
 {
+   trace_kvm_restart_apic_timer(apic->vcpu->vcpu_id,
+apic->lapic_timer.timer_advance_ns);
+
preempt_disable();
 
if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4d47a2631d1f..f6e38f3f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -953,24 +953,23 @@
  __entry->flags)
 );
 
-TRACE_EVENT(kvm_wait_lapic_expire,
-   TP_PROTO(unsigned int vcpu_id, s64 delta),
-   TP_ARGS(vcpu_id, delta),
+TRACE_EVENT(kvm_restart_apic_timer,
+   TP_PROTO(unsigned int vcpu_id, u32 advance),
+   TP_ARGS(vcpu_id, advance),
 
TP_STRUCT__entry(
__field(unsigned int,   vcpu_id )
-   __field(s64,delta   )
+   __field(u32,advance )
),
 
TP_fast_assign(
__entry->vcpu_id   = vcpu_id;
-   __entry->delta = delta;
+   __entry->advance   = advance;
),
 
-   TP_printk("vcpu %u: delta %lld (%s)",
+   TP_printk("vcpu %u: advance %u",
  __entry->vcpu_id,
- __entry->delta,
- __entry->delta < 0 ? "early" : "late")
+ __entry->advance)
 );
 
 TRACE_EVENT(kvm_enter_smm,