On 30.03.2011, at 01:43, Scott Wood wrote:

> This is done lazily; the SPE save will be done only if the guest has used
> SPE since the last preemption or heavyweight exit, and restore will be
> done only on demand.
> 
> For SPEFSCR, Linux already switches it on context switch (non-lazily), so
> the only remaining bit is to save it between qemu and the guest.
> 
> A vcpu field "msr_block" is introduced in order to be the subtractive
> (and dynamic) equivalent of KVMPPC_MSR_MASK -- bits that are set in
> msr_block are prevented from being set in the hardware MSR while in the
> guest.
> 
> Signed-off-by: Liu Yu <[email protected]>
> Signed-off-by: Scott Wood <[email protected]>
> ---
> Changes in v3:
> - removed fsl ifdef
> - inverted sense of msr_block, so zero-initialization works
>   (more intuitive this way, too)
> - eliminated msr_block accesors
> - handled SPE exceptions when kernel doesn't have SPE enabled
> - dropped the wrapper around __kvmppc_vcpu_run(), as the state saving
>   in kvmppc_core_vcpu_put should be sufficient, and the loading
>   on heavyweight entry was a dubious optimization (we don't do it
>   on sched-in, it would only really improve things if you have
>   a tight loop of Qemu I/O combined with SPE instructions, and it
>   can cause a guest to be considered to be actively using SPE even if
>   it's just had the MSR bit set without use for a long time)
> 
> arch/powerpc/include/asm/kvm_host.h  |    7 ++++
> arch/powerpc/include/asm/reg_booke.h |    1 +
> arch/powerpc/kernel/asm-offsets.c    |    9 +++++
> arch/powerpc/kvm/booke.c             |   36 +++++++++++++++++-
> arch/powerpc/kvm/booke_interrupts.S  |   67 +++++++++++++++++++++++++++++++++-
> arch/powerpc/kvm/e500.c              |   26 +++++++++++++-
> 6 files changed, 142 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index bba3b9b..6717049 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -195,6 +195,12 @@ struct kvm_vcpu_arch {
>       u64 fpr[32];
>       u64 fpscr;
> 
> +#ifdef CONFIG_SPE
> +     ulong evr[32];
> +     ulong spefscr;
> +     ulong host_spefscr;
> +     u64 acc;
> +#endif
> #ifdef CONFIG_ALTIVEC
>       vector128 vr[32];
>       vector128 vscr;
> @@ -215,6 +221,7 @@ struct kvm_vcpu_arch {
>       ulong lr;
> 
>       ulong xer;
> +     ulong msr_block;
>       u32 cr;
> #endif
> 
> diff --git a/arch/powerpc/include/asm/reg_booke.h 
> b/arch/powerpc/include/asm/reg_booke.h
> index 3b1a9b7..2705f9a 100644
> --- a/arch/powerpc/include/asm/reg_booke.h
> +++ b/arch/powerpc/include/asm/reg_booke.h
> @@ -312,6 +312,7 @@
> #define ESR_ILK               0x00100000      /* Instr. Cache Locking */
> #define ESR_PUO               0x00040000      /* Unimplemented Operation 
> exception */
> #define ESR_BO                0x00020000      /* Byte Ordering */
> +#define ESR_SPV              0x00000080      /* Signal Processing operation 
> */
> 
> /* Bit definitions related to the DBCR0. */
> #if defined(CONFIG_40x)
> diff --git a/arch/powerpc/kernel/asm-offsets.c 
> b/arch/powerpc/kernel/asm-offsets.c
> index 23e6a93..52400ff 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -403,6 +403,9 @@ int main(void)
>       DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
>       DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
> 
> +#ifdef CONFIG_BOOKE
> +     DEFINE(VCPU_MSR_BLOCK, offsetof(struct kvm_vcpu, arch.msr_block));
> +#endif
>       /* book3s */
> #ifdef CONFIG_PPC_BOOK3S
>       DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
> @@ -494,6 +497,12 @@ int main(void)
>       DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3));
>       DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7));
> #endif
> +#ifdef CONFIG_SPE
> +     DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0]));
> +     DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc));
> +     DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr));
> +     DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
> +#endif /* CONFIG_SPE */
> 
> #ifdef CONFIG_KVM_EXIT_TIMING
>       DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index ef76acb..ade7cf6 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -13,6 +13,7 @@
>  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
>  *
>  * Copyright IBM Corp. 2007
> + * Copyright 2010-2011 Freescale Semiconductor, Inc.
>  *
>  * Authors: Hollis Blanchard <[email protected]>
>  *          Christian Ehrhardt <[email protected]>
> @@ -344,10 +345,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
> kvm_vcpu *vcpu,
>               r = RESUME_GUEST;
>               break;
> 
> -     case BOOKE_INTERRUPT_SPE_UNAVAIL:
> -             kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);
> +#ifdef CONFIG_SPE
> +     case BOOKE_INTERRUPT_SPE_UNAVAIL: {
> +             extern void kvmppc_vcpu_spe_load(struct kvm_vcpu *vcpu);
> +
> +             /* reload the SPE env if guest first use SPE since last save */
> +             if (vcpu->arch.msr_block & MSR_SPE)
> +                     kvmppc_vcpu_spe_load(vcpu);
> +
> +             if (!(vcpu->arch.shared->msr & MSR_SPE))
> +                     kvmppc_booke_queue_irqprio(vcpu, 
> BOOKE_IRQPRIO_SPE_UNAVAIL);

These are in the wrong order, no? If the guest doesn't do SPE, we can save 
ourselves the loading.

>               r = RESUME_GUEST;
>               break;
> +     }
> 
>       case BOOKE_INTERRUPT_SPE_FP_DATA:
>               kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
> @@ -358,6 +368,28 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
> kvm_vcpu *vcpu,
>               kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
>               r = RESUME_GUEST;
>               break;
> +#else
> +     case BOOKE_INTERRUPT_SPE_UNAVAIL:
> +             /*
> +              * Guest wants SPE, but host kernel doesn't support it.  Send
> +              * an "unimplemented operation" program check to the guest.
> +              */
> +             kvmppc_core_queue_program(vcpu, ESR_PUO | ESR_SPV);
> +             r = RESUME_GUEST;
> +             break;
> +
> +     /*
> +      * These really should never happen without CONFIG_SPE,
> +      * as we should never enable the real MSR[SPE] in the guest.
> +      */
> +     case BOOKE_INTERRUPT_SPE_FP_DATA:
> +     case BOOKE_INTERRUPT_SPE_FP_ROUND:
> +             printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
> +                    __func__, exit_nr, vcpu->arch.pc);
> +             run->hw.hardware_exit_reason = exit_nr;
> +             r = RESUME_HOST;
> +             break;
> +#endif
> 
>       case BOOKE_INTERRUPT_DATA_STORAGE:
>               kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
> diff --git a/arch/powerpc/kvm/booke_interrupts.S 
> b/arch/powerpc/kvm/booke_interrupts.S
> index 1cc471f..ebe882a 100644
> --- a/arch/powerpc/kvm/booke_interrupts.S
> +++ b/arch/powerpc/kvm/booke_interrupts.S
> @@ -13,6 +13,7 @@
>  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
>  *
>  * Copyright IBM Corp. 2007
> + * Copyright 2011 Freescale Semiconductor, Inc.
>  *
>  * Authors: Hollis Blanchard <[email protected]>
>  */
> @@ -241,6 +242,14 @@ _GLOBAL(kvmppc_resume_host)
> heavyweight_exit:
>       /* Not returning to guest. */
> 
> +#ifdef CONFIG_SPE
> +     /* save guest SPEFSCR and load host SPEFSCR */
> +     mfspr   r9, SPRN_SPEFSCR
> +     stw     r9, VCPU_SPEFSCR(r4)
> +     lwz     r9, VCPU_HOST_SPEFSCR(r4)
> +     mtspr   SPRN_SPEFSCR, r9
> +#endif
> +
>       /* We already saved guest volatile register state; now save the
>        * non-volatiles. */
>       stw     r15, VCPU_GPR(r15)(r4)
> @@ -342,6 +351,14 @@ _GLOBAL(__kvmppc_vcpu_run)
>       lwz     r30, VCPU_GPR(r30)(r4)
>       lwz     r31, VCPU_GPR(r31)(r4)
> 
> +#ifdef CONFIG_SPE
> +     /* save host SPEFSCR and load guest SPEFSCR */
> +     mfspr   r3, SPRN_SPEFSCR
> +     stw     r3, VCPU_HOST_SPEFSCR(r4)
> +     lwz     r3, VCPU_SPEFSCR(r4)
> +     mtspr   SPRN_SPEFSCR, r3
> +#endif
> +
> lightweight_exit:
>       stw     r2, HOST_R2(r1)
> 
> @@ -409,7 +426,6 @@ lightweight_exit:
>       mtctr   r3
>       lwz     r3, VCPU_CR(r4)
>       mtcr    r3
> -     lwz     r5, VCPU_GPR(r5)(r4)
>       lwz     r6, VCPU_GPR(r6)(r4)
>       lwz     r7, VCPU_GPR(r7)(r4)
>       lwz     r8, VCPU_GPR(r8)(r4)
> @@ -417,8 +433,11 @@ lightweight_exit:
>       mtsrr0  r3
>       lwz     r3, VCPU_SHARED(r4)
>       lwz     r3, (VCPU_SHARED_MSR + 4)(r3)
> +     lwz     r5, VCPU_MSR_BLOCK(r4)
>       oris    r3, r3, KVMPPC_MSR_MASK@h
>       ori     r3, r3, KVMPPC_MSR_MASK@l
> +     andc    r3, r3, r5
> +     lwz     r5, VCPU_GPR(r5)(r4)

This is a lot cleaner than the last version, thank you :).

I was wondering why we need to take the guest's MSR value into account in the 
first place though. Are there any bits in there that we wouldn't catch through 
a lazy trap? At the end of the day, the MSR will be shared between the guest 
and the host without traps, so we will depend on trapping for events anyways.

Why can't we just keep a "guest mode MSR" field in the vcpu that contains the 
bits activated inside the guest? This way no bit just accidently slips through 
and we save a good bunch of instructions in the lightweight exit path.

>       mtsrr1  r3
> 
>       /* Clear any debug events which occurred since we disabled MSR[DE].
> @@ -431,3 +450,49 @@ lightweight_exit:
>       lwz     r3, VCPU_GPR(r3)(r4)
>       lwz     r4, VCPU_GPR(r4)(r4)
>       rfi
> +
> +#ifdef CONFIG_SPE
> +#define KVMPPC_SAVE_EVR(n,s,base)    evmergehi s,s,n; stw s,(4*(n))(base)

Can't we somehow combine those with the normal Linux SAVE_EVR macros? All we 
need to do is define a new set of macros that take a generic offset and make 
the normal macros use that with offset=THREAD_EVR0 while we use offset=0. This 
means we could probably even specify the offset directly inside the kvm code. 
And maybe even the offset in the code that currently uses SAVE_EVR.

Sorry for only mentioning those things so late.

> +#define KVMPPC_SAVE_2EVR(n,s,base)   KVMPPC_SAVE_EVR(n,s,base); \
> +                                        KVMPPC_SAVE_EVR(n+1,s,base)
> +#define KVMPPC_SAVE_4EVR(n,s,base)   KVMPPC_SAVE_2EVR(n,s,base); \
> +                                        KVMPPC_SAVE_2EVR(n+2,s,base)
> +#define KVMPPC_SAVE_8EVR(n,s,base)   KVMPPC_SAVE_4EVR(n,s,base); \
> +                                        KVMPPC_SAVE_4EVR(n+4,s,base)
> +#define KVMPPC_SAVE_16EVR(n,s,base)  KVMPPC_SAVE_8EVR(n,s,base); \
> +                                        KVMPPC_SAVE_8EVR(n+8,s,base)
> +#define KVMPPC_SAVE_32EVR(n,s,base)  KVMPPC_SAVE_16EVR(n,s,base); \
> +                                        KVMPPC_SAVE_16EVR(n+16,s,base)
> +#define KVMPPC_LOAD_EVR(n,s,base)    lwz s,(4*(n))(base); evmergelo n,s,n
> +#define KVMPPC_LOAD_2EVR(n,s,base)   KVMPPC_LOAD_EVR(n,s,base); \
> +                                        KVMPPC_LOAD_EVR(n+1,s,base)
> +#define KVMPPC_LOAD_4EVR(n,s,base)   KVMPPC_LOAD_2EVR(n,s,base); \
> +                                        KVMPPC_LOAD_2EVR(n+2,s,base)
> +#define KVMPPC_LOAD_8EVR(n,s,base)   KVMPPC_LOAD_4EVR(n,s,base); \
> +                                        KVMPPC_LOAD_4EVR(n+4,s,base)
> +#define KVMPPC_LOAD_16EVR(n,s,base)  KVMPPC_LOAD_8EVR(n,s,base); \
> +                                        KVMPPC_LOAD_8EVR(n+8,s,base)
> +#define KVMPPC_LOAD_32EVR(n,s,base)  KVMPPC_LOAD_16EVR(n,s,base); \
> +                                        KVMPPC_LOAD_16EVR(n+16,s,base)
> +
> +_GLOBAL(kvmppc_save_guest_spe)
> +     cmpi    0,r3,0
> +     beqlr-
> +     addi    r5,r3,VCPU_EVR
> +     KVMPPC_SAVE_32EVR(0,r4,r5)      /* save evr[32] */
> +     evxor   evr6, evr6, evr6
> +     evmwumiaa evr6, evr6, evr6
> +     li      r4,VCPU_ACC
> +     evstddx evr6, r4, r3            /* save acc */
> +     blr
> +
> +_GLOBAL(kvmppc_load_guest_spe)
> +     cmpi    0,r3,0
> +     beqlr-
> +     li      r4,VCPU_ACC
> +     evlddx  evr6,r4,r3
> +     evmra   evr6,evr6               /* load acc */
> +     addi    r5,r3,VCPU_EVR
> +     KVMPPC_LOAD_32EVR(0,r4,r5)      /* load evr[32] */
> +     blr
> +#endif
> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
> index 0c1af12..fbd3bdc 100644
> --- a/arch/powerpc/kvm/e500.c
> +++ b/arch/powerpc/kvm/e500.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
> + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
>  *
>  * Author: Yu Liu, <[email protected]>
>  *
> @@ -25,6 +25,25 @@
> #include "booke.h"
> #include "e500_tlb.h"
> 
> +#ifdef CONFIG_SPE
> +extern void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
> +extern void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
> +
> +void kvmppc_vcpu_spe_put(struct kvm_vcpu *vcpu)
> +{
> +     enable_kernel_spe();
> +     kvmppc_save_guest_spe(vcpu);
> +     vcpu->msr_block |= MSR_SPE;
> +}
> +
> +void kvmppc_vcpu_spe_load(struct kvm_vcpu *vcpu)
> +{
> +     enable_kernel_spe();
> +     kvmppc_load_guest_spe(vcpu);
> +     vcpu->msr_block &= ~MSR_SPE;
> +}
> +#endif
> +
> void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
> {
> }
> @@ -41,6 +60,11 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
> {
>       kvmppc_e500_tlb_put(vcpu);
> +#ifdef CONFIG_SPE
> +     /* save SPE env if guest has used SPE since last save */
> +     if (!(vcpu->msr_block & MSR_SPE))
> +             kvmppc_vcpu_spe_put(vcpu);
> +#endif
> }
> 
> int kvmppc_core_check_processor_compat(void)
> -- 
> 1.7.1
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to