On entry to the guest, secondary threads now wait for the primary to
switch the MMU after loading up most of their state, rather than before.
This means that the secondary threads get into the guest sooner, in the
common case where the secondary threads get to kvmppc_hv_entry before
the primary thread.

On exit, the first thread out increments the exit count and interrupts
the other threads (to get them out of the guest) before saving most
of its state, rather than after.  That means that the other threads
exit sooner and means that the first thread doesn't spend so much
time waiting for the other threads at the point where the MMU gets
switched back to the host.

This pulls out the code that increments the exit count and interrupts
other threads into a separate function, kvmhv_commence_exit().
This also makes sure that r12 and vcpu->arch.trap are set correctly
in some corner cases.

Statistics from /sys/kernel/debug/kvm/vm*/vcpu*/timings show the
improvement.  Aggregating across vcpus for a guest with 32 vcpus,
8 threads/vcore, running on a POWER8, gives this before the change:

 rm_entry:     avg 3919.3ns (244 - 56492, 742665 samples)
  rm_exit:     avg 4102.5ns (130 - 36272, 704056 samples)
  rm_intr:     avg 1006.0ns (12 - 75040, 2819905 samples)

and this after the change:

 rm_entry:     avg 2979.8ns (258 - 83740, 836403 samples)
  rm_exit:     avg 3992.9ns (12 - 45572, 838034 samples)
  rm_intr:     avg  922.2ns (12 - 66694, 3127066 samples)

showing a substantial reduction in the time spent in the real-mode
guest entry code, and smaller reductions in the real mode guest exit
and interrupt handling times.  (The test was to start the guest and
boot Fedora 20 big-endian to the login prompt.)

Signed-off-by: Paul Mackerras <pau...@samba.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 240 +++++++++++++++++++-------------
 1 file changed, 141 insertions(+), 99 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 04728ce..ff1461d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -175,6 +175,19 @@ kvmppc_primary_no_guest:
        /* put the HDEC into the DEC, since HDEC interrupts don't wake us */
        mfspr   r3, SPRN_HDEC
        mtspr   SPRN_DEC, r3
+       /*
+        * Make sure the primary has finished the MMU switch.
+        * We should never get here on a secondary thread, but
+        * check it for robustness' sake.
+        */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+65:    lbz     r0, VCORE_IN_GUEST(r5)
+       cmpwi   r0, 0
+       beq     65b
+       /* Set LPCR. */
+       ld      r8,VCORE_LPCR(r5)
+       mtspr   SPRN_LPCR,r8
+       isync
        /* set our bit in napping_threads */
        ld      r5, HSTATE_KVM_VCORE(r13)
        lbz     r7, HSTATE_PTID(r13)
@@ -206,7 +219,7 @@ kvm_novcpu_wakeup:
 
        /* check the wake reason */
        bl      kvmppc_check_wake_reason
-       
+
        /* see if any other thread is already exiting */
        lwz     r0, VCORE_ENTRY_EXIT(r5)
        cmpwi   r0, 0x100
@@ -243,7 +256,12 @@ kvm_novcpu_wakeup:
 
 kvm_novcpu_exit:
        ld      r4, HSTATE_KVM_VCPU(r13)
-       b       hdec_soon
+       cmpdi   r4, 0
+       beq     13f
+       addi    r3, r4, VCPU_TB_RMEXIT
+       bl      kvmhv_accumulate_time
+13:    bl      kvmhv_commence_exit
+       b       kvmhv_switch_to_host
 
 /*
  * We come in here when wakened from nap mode.
@@ -417,7 +435,7 @@ kvmppc_hv_entry:
        ld      r9,VCORE_KVM(r5)        /* pointer to struct kvm */
        lbz     r6,HSTATE_PTID(r13)
        cmpwi   r6,0
-       bne     20f
+       bne     10f
        ld      r6,KVM_SDR1(r9)
        lwz     r7,KVM_LPID(r9)
        li      r0,LPID_RSVD            /* switch to reserved LPID */
@@ -488,26 +506,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        li      r0,1
        stb     r0,VCORE_IN_GUEST(r5)   /* signal secondaries to continue */
-       b       10f
-
-       /* Secondary threads wait for primary to have done partition switch */
-20:    lbz     r0,VCORE_IN_GUEST(r5)
-       cmpwi   r0,0
-       beq     20b
-
-       /* Set LPCR. */
-10:    ld      r8,VCORE_LPCR(r5)
-       mtspr   SPRN_LPCR,r8
-       isync
-
-       /* Check if HDEC expires soon */
-       mfspr   r3,SPRN_HDEC
-       cmpwi   r3,512          /* 1 microsecond */
-       li      r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-       blt     hdec_soon
 
        /* Do we have a guest vcpu to run? */
-       cmpdi   r4, 0
+10:    cmpdi   r4, 0
        beq     kvmppc_primary_no_guest
 kvmppc_got_guest:
 
@@ -832,6 +833,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        clrrdi  r6,r6,1
        mtspr   SPRN_CTRLT,r6
 4:
+       /* Secondary threads wait for primary to have done partition switch */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       lbz     r6, HSTATE_PTID(r13)
+       cmpwi   r6, 0
+       beq     21f
+       lbz     r0, VCORE_IN_GUEST(r5)
+       cmpwi   r0, 0
+       bne     21f
+       HMT_LOW
+20:    lbz     r0, VCORE_IN_GUEST(r5)
+       cmpwi   r0, 0
+       beq     20b
+       HMT_MEDIUM
+21:
+       /* Set LPCR. */
+       ld      r8,VCORE_LPCR(r5)
+       mtspr   SPRN_LPCR,r8
+       isync
+
+       /* Check if HDEC expires soon */
+       mfspr   r3, SPRN_HDEC
+       cmpwi   r3, 512         /* 1 microsecond */
+       blt     hdec_soon
+
        ld      r6, VCPU_CTR(r4)
        lwz     r7, VCPU_XER(r4)
 
@@ -936,18 +961,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        b       .
 
 secondary_too_late:
+       li      r12, 0
        cmpdi   r4, 0
        beq     11f
+       stw     r12, VCPU_TRAP(r4)
        addi    r3, r4, VCPU_TB_RMEXIT
        bl      kvmhv_accumulate_time
 11:    b       kvmhv_switch_to_host
 
 hdec_soon:
-       cmpdi   r4, 0
-       beq     12f
+       li      r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+       stw     r12, VCPU_TRAP(r4)
+       mr      r9, r4
        addi    r3, r4, VCPU_TB_RMEXIT
        bl      kvmhv_accumulate_time
-12:    b       kvmhv_do_exit
+       b       guest_exit_cont
 
 /******************************************************************************
  *                                                                            *
@@ -1108,7 +1136,7 @@ guest_exit_cont:          /* r9 = vcpu, r12 = trap, r13 = 
paca */
        stw     r7, VCPU_DSISR(r9)
        /* don't overwrite fault_dar/fault_dsisr if HDSI */
        cmpwi   r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
-       beq     6f
+       beq     mc_cont
        std     r6, VCPU_FAULT_DAR(r9)
        stw     r7, VCPU_FAULT_DSISR(r9)
 
@@ -1120,8 +1148,11 @@ mc_cont:
        mr      r4, r9
        bl      kvmhv_accumulate_time
 
+       /* Increment exit count, poke other threads to exit */
+       bl      kvmhv_commence_exit
+
        /* Save guest CTRL register, set runlatch to 1 */
-6:     mfspr   r6,SPRN_CTRLF
+       mfspr   r6,SPRN_CTRLF
        stw     r6,VCPU_CTRL(r9)
        andi.   r0,r6,1
        bne     4f
@@ -1463,83 +1494,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        slbia
        ptesync
 
-kvmhv_do_exit:                 /* r12 = trap, r13 = paca */
        /*
         * POWER7/POWER8 guest -> host partition switch code.
         * We don't have to lock against tlbies but we do
         * have to coordinate the hardware threads.
         */
-       /* Increment the threads-exiting-guest count in the 0xff00
-          bits of vcore->entry_exit_count */
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       addi    r6,r5,VCORE_ENTRY_EXIT
-41:    lwarx   r3,0,r6
-       addi    r0,r3,0x100
-       stwcx.  r0,0,r6
-       bne     41b
-       isync           /* order stwcx. vs. reading napping_threads */
-
-       /*
-        * At this point we have an interrupt that we have to pass
-        * up to the kernel or qemu; we can't handle it in real mode.
-        * Thus we have to do a partition switch, so we have to
-        * collect the other threads, if we are the first thread
-        * to take an interrupt.  To do this, we set the HDEC to 0,
-        * which causes an HDEC interrupt in all threads within 2ns
-        * because the HDEC register is shared between all 4 threads.
-        * However, we don't need to bother if this is an HDEC
-        * interrupt, since the other threads will already be on their
-        * way here in that case.
-        */
-       cmpwi   r3,0x100        /* Are we the first here? */
-       bge     43f
-       cmpwi   r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-       beq     43f
-       li      r0,0
-       mtspr   SPRN_HDEC,r0
-
-       /*
-        * Send a message or IPI to any napping threads, since an HDEC interrupt
-        * doesn't wake CPUs up from nap.
-        */
-       lwz     r3,VCORE_NAPPING_THREADS(r5)
-       lbz     r4,HSTATE_PTID(r13)
-       li      r0,1
-       sld     r0,r0,r4
-       andc.   r3,r3,r0                /* no sense IPI'ing ourselves */
-       beq     43f
-       /* Order entry/exit update vs. IPIs */
-       sync
-BEGIN_FTR_SECTION
-       b       45f
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-       /* Use msgsnd on POWER8 */
-       lhz     r6, VCORE_PCPU(r5)
-       clrldi  r6, r6, 64-3
-       oris    r6, r6, (PPC_DBELL_SERVER << (63-36))@h
-42:    andi.   r0, r3, 1
-       beq     44f
-       PPC_MSGSND(6)
-44:    srdi.   r3, r3, 1
-       addi    r6, r6, 1
-       bne     42b
-       b       kvmhv_switch_to_host
-       /* Use IPIs on POWER7 */
-45:    mulli   r4,r4,PACA_SIZE         /* get paca for thread 0 */
-       subf    r6,r4,r13
-42:    andi.   r0,r3,1
-       beq     44f
-       ld      r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
-       li      r0,IPI_PRIORITY
-       li      r7,XICS_MFRR
-       stbcix  r0,r7,r8                /* trigger the IPI */
-44:    srdi.   r3,r3,1
-       addi    r6,r6,PACA_SIZE
-       bne     42b
-
 kvmhv_switch_to_host:
        /* Secondary threads wait for primary to do partition switch */
-43:    ld      r5,HSTATE_KVM_VCORE(r13)
+       ld      r5,HSTATE_KVM_VCORE(r13)
        ld      r4,VCORE_KVM(r5)        /* pointer to struct kvm */
        lbz     r3,HSTATE_PTID(r13)
        cmpwi   r3,0
@@ -1639,6 +1601,84 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mtlr    r0
        blr
 
+kvmhv_commence_exit:           /* r12 = trap, r13 = paca, doesn't trash r9 */
+       mflr    r0
+       std     r0, PPC_LR_STKOFF(r1)
+       stdu    r1, -PPC_MIN_STKFRM(r1)
+
+       /* Increment the threads-exiting-guest count in the 0xff00
+          bits of vcore->entry_exit_count */
+       ld      r5,HSTATE_KVM_VCORE(r13)
+       addi    r6,r5,VCORE_ENTRY_EXIT
+41:    lwarx   r3,0,r6
+       addi    r0,r3,0x100
+       stwcx.  r0,0,r6
+       bne     41b
+       isync           /* order stwcx. vs. reading napping_threads */
+
+       /*
+        * At this point we have an interrupt that we have to pass
+        * up to the kernel or qemu; we can't handle it in real mode.
+        * Thus we have to do a partition switch, so we have to
+        * collect the other threads, if we are the first thread
+        * to take an interrupt.  To do this, we set the HDEC to 0,
+        * which causes an HDEC interrupt in all threads within 2ns
+        * because the HDEC register is shared between all 4 threads.
+        * However, we don't need to bother if this is an HDEC
+        * interrupt, since the other threads will already be on their
+        * way here in that case.
+        */
+       cmpwi   r3,0x100        /* Are we the first here? */
+       bge     43f
+       cmpwi   r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+       beq     43f
+       li      r0,0
+       mtspr   SPRN_HDEC,r0
+
+       /*
+        * Send a message or IPI to any napping threads, since an HDEC interrupt
+        * doesn't wake CPUs up from nap.
+        */
+       lwz     r3,VCORE_NAPPING_THREADS(r5)
+       lbz     r4,HSTATE_PTID(r13)
+       li      r0,1
+       sld     r0,r0,r4
+       andc.   r3,r3,r0                /* no sense IPI'ing ourselves */
+       beq     43f
+       /* Order entry/exit update vs. IPIs */
+       sync
+BEGIN_FTR_SECTION
+       b       45f
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+       /* Use msgsnd on POWER8 */
+       lhz     r6, VCORE_PCPU(r5)
+       clrldi  r6, r6, 64-3
+       oris    r6, r6, (PPC_DBELL_SERVER << (63-36))@h
+42:    andi.   r0, r3, 1
+       beq     44f
+       PPC_MSGSND(6)
+44:    srdi.   r3, r3, 1
+       addi    r6, r6, 1
+       bne     42b
+       b       43f
+       /* Use IPIs on POWER7 */
+45:    mulli   r4,r4,PACA_SIZE         /* get paca for thread 0 */
+       subf    r6,r4,r13
+42:    andi.   r0,r3,1
+       beq     44f
+       ld      r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
+       li      r0,IPI_PRIORITY
+       li      r7,XICS_MFRR
+       stbcix  r0,r7,r8                /* trigger the IPI */
+44:    srdi.   r3,r3,1
+       addi    r6,r6,PACA_SIZE
+       bne     42b
+
+43:    ld      r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
+       addi    r1, r1, PPC_MIN_STKFRM
+       mtlr    r0
+       blr
+
 /*
  * Check whether an HDSI is an HPTE not found fault or something else.
  * If it is an HPTE not found fault that is due to the guest accessing
@@ -2074,8 +2114,8 @@ _GLOBAL(kvmppc_h_cede)            /* r3 = vcpu pointer, 
r11 = msr, r13 = paca */
        lbz     r5,VCPU_PRODDED(r3)
        cmpwi   r5,0
        bne     kvm_cede_prodded
-       li      r0,0            /* set trap to 0 to say hcall is handled */
-       stw     r0,VCPU_TRAP(r3)
+       li      r12,0           /* set trap to 0 to say hcall is handled */
+       stw     r12,VCPU_TRAP(r3)
        li      r0,H_SUCCESS
        std     r0,VCPU_GPR(R3)(r3)
 
@@ -2279,7 +2319,8 @@ kvm_cede_prodded:
 
        /* we've ceded but we want to give control to the host */
 kvm_cede_exit:
-       b       hcall_real_fallback
+       ld      r9, HSTATE_KVM_VCPU(r13)
+       b       guest_exit_cont
 
        /* Try to handle a machine check in real mode */
 machine_check_realmode:
@@ -2417,6 +2458,7 @@ kvmppc_read_intr:
        bne-    43f
 
        /* OK, it's an IPI for us */
+       li      r12, 0
        li      r3, -1
 1:     blr
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to