On Tue, Oct 02, 2018 at 09:31:23PM +1000, Paul Mackerras wrote: > From: Suraj Jitindar Singh <sjitindarsi...@gmail.com> > > When running a nested (L2) guest the guest (L1) hypervisor will use > hypervisor privileged tlb invalidation instructions (to manage the > partition scoped page tables) which will result in hypervisor > emulation assistance interrupts. We emulate these instructions on behalf > of the L1 guest. > > The tlbie instruction can invalidate different scopes: > > Invalidate TLB for a given target address: > - This invalidates a single L2 -> L1 pte > - We need to invalidate any L2 -> L0 shadow_pgtable ptes which map the L2 > address space which is being invalidated. This is because a single > L2 -> L1 pte may have been mapped with more than one pte in the > L2 -> L0 page tables. > > Invalidate the entire TLB for a given LPID or for all LPIDs: > - Invalidate the entire shadow_pgtable for a given nested guest, or > for all nested guests. > > Invalidate the PWC (page walk cache) for a given LPID or for all LPIDs: > - We don't cache the PWC, so nothing to do > > Invalidate the entire TLB, PWC and partition table for a given/all LPIDs: > - Here we re-read the partition table entry and remove the nested state > for any nested guest for which the first doubleword of the partition > table entry is now zero. > > This also implements the H_TLB_INVALIDATE hcall. It takes as parameters > the tlbie instruction word (of which the RIC, PRS and R fields are used), > the rS value (giving the lpid, where required) and the rB value (giving > the IS, AP and EPN values). > > [pau...@ozlabs.org - adapted to having the partition table in guest > memory, added the H_TLB_INVALIDATE implementation.] > > Signed-off-by: Suraj Jitindar Singh <sjitindarsi...@gmail.com> > Signed-off-by: Paul Mackerras <pau...@ozlabs.org>
Again, do we need this if we're moving to a paravirt tlbie? > --- > arch/powerpc/include/asm/book3s/64/mmu-hash.h | 12 ++ > arch/powerpc/include/asm/kvm_book3s.h | 1 + > arch/powerpc/include/asm/ppc-opcode.h | 1 + > arch/powerpc/kvm/book3s_emulate.c | 1 - > arch/powerpc/kvm/book3s_hv.c | 3 + > arch/powerpc/kvm/book3s_hv_nested.c | 210 > +++++++++++++++++++++++++- > 6 files changed, 225 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h > b/arch/powerpc/include/asm/book3s/64/mmu-hash.h > index b3520b5..66db23e 100644 > --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h > +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h > @@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned > int mmu_psize) > BUG(); > } > > +static inline unsigned int ap_to_shift(unsigned long ap) > +{ > + int psize; > + > + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { > + if (mmu_psize_defs[psize].ap == ap) > + return mmu_psize_defs[psize].shift; > + } > + > + return -1; > +} > + > static inline unsigned long get_sllp_encoding(int psize) > { > unsigned long sllp; > diff --git a/arch/powerpc/include/asm/kvm_book3s.h > b/arch/powerpc/include/asm/kvm_book3s.h > index 1d2286d..210e550 100644 > --- a/arch/powerpc/include/asm/kvm_book3s.h > +++ b/arch/powerpc/include/asm/kvm_book3s.h > @@ -301,6 +301,7 @@ long kvmhv_set_partition_table(struct kvm_vcpu *vcpu); > void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1); > void kvmhv_release_all_nested(struct kvm *kvm); > long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); > +long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu); > int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu, > u64 time_limit, unsigned long lpcr); > void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); > diff --git a/arch/powerpc/include/asm/ppc-opcode.h > b/arch/powerpc/include/asm/ppc-opcode.h > index 665af14..6093bc8 100644 > --- a/arch/powerpc/include/asm/ppc-opcode.h > +++ b/arch/powerpc/include/asm/ppc-opcode.h > @@ -104,6 +104,7 @@ > #define OP_31_XOP_LHZUX 311 > #define OP_31_XOP_MSGSNDP 142 > #define OP_31_XOP_MSGCLRP 174 > +#define OP_31_XOP_TLBIE 306 > #define OP_31_XOP_MFSPR 339 > #define OP_31_XOP_LWAX 341 > #define OP_31_XOP_LHAX 343 > diff --git a/arch/powerpc/kvm/book3s_emulate.c > b/arch/powerpc/kvm/book3s_emulate.c > index 2654df2..8c7e933 100644 > --- a/arch/powerpc/kvm/book3s_emulate.c > +++ b/arch/powerpc/kvm/book3s_emulate.c > @@ -36,7 +36,6 @@ > #define OP_31_XOP_MTSR 210 > #define OP_31_XOP_MTSRIN 242 > #define OP_31_XOP_TLBIEL 274 > -#define OP_31_XOP_TLBIE 306 > /* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */ > #define OP_31_XOP_FAKE_SC1 308 > #define OP_31_XOP_SLBMTE 402 > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 6629df4..3aa5d11e 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -974,6 +974,9 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) > break; > case H_TLB_INVALIDATE: > ret = H_FUNCTION; > + if (!vcpu->kvm->arch.nested_enable) > + break; > + ret = kvmhv_do_nested_tlbie(vcpu); > break; > > default: > diff --git a/arch/powerpc/kvm/book3s_hv_nested.c > b/arch/powerpc/kvm/book3s_hv_nested.c > index 4efa1f7..7abcc2b 100644 > --- a/arch/powerpc/kvm/book3s_hv_nested.c > +++ b/arch/powerpc/kvm/book3s_hv_nested.c > @@ -465,7 +465,7 @@ void kvmhv_release_all_nested(struct kvm *kvm) > } > > /* caller must hold gp->tlb_lock */ > -void kvmhv_flush_nested(struct kvm_nested_guest *gp) > +static void kvmhv_flush_nested(struct kvm_nested_guest *gp) > { > struct kvm *kvm = gp->l1_host; > > @@ -685,10 +685,216 @@ static int kvmhv_emulate_priv_mfspr(struct kvm_run > *run, struct kvm_vcpu *vcpu, > return EMULATE_FAIL; > } > > +static inline int get_ric(unsigned int instr) > +{ > + return (instr >> 18) & 0x3; > +} > + > +static inline int get_prs(unsigned int instr) > +{ > + return (instr >> 17) & 0x1; > +} > + > +static inline int get_r(unsigned int instr) > +{ > + return (instr >> 16) & 0x1; > +} > + > +static inline int get_lpid(unsigned long r_val) > +{ > + return r_val & 0xffffffff; > +} > + > +static inline int get_is(unsigned long r_val) > +{ > + return (r_val >> 10) & 0x3; > +} > + > +static inline int get_ap(unsigned long r_val) > +{ > + return (r_val >> 5) & 0x7; > +} > + > +static inline long get_epn(unsigned long r_val) > +{ > + return r_val >> 12; > +} > + > +static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid, > + int ap, long epn) > +{ > + struct kvm *kvm = vcpu->kvm; > + struct kvm_nested_guest *gp; > + long npages; > + int shift; > + unsigned long addr; > + > + shift = ap_to_shift(ap); > + addr = epn << 12; > + if (shift < 0) > + /* Invalid ap encoding */ > + return -EINVAL; > + > + addr &= ~((1UL << shift) - 1); > + npages = 1UL << (shift - PAGE_SHIFT); > + > + gp = kvmhv_get_nested(kvm, lpid, false); > + if (!gp) /* No such guest -> nothing to do */ > + return 0; > + mutex_lock(&gp->tlb_lock); > + > + /* There may be more than one host page backing this single guest pte */ > + do { > + kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shift); > + > + npages -= 1UL << (shift - PAGE_SHIFT); > + addr += 1UL << shift; > + } while (npages > 0); > + > + mutex_unlock(&gp->tlb_lock); > + kvmhv_put_nested(gp); > + return 0; > +} > + > +static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu, > + struct kvm_nested_guest *gp, int ric) > +{ > + struct kvm *kvm = vcpu->kvm; > + > + mutex_lock(&gp->tlb_lock); > + switch (ric) { > + case 0: > + /* Invalidate TLB */ > + spin_lock(&kvm->mmu_lock); > + kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, > + gp->shadow_lpid); > + radix__flush_tlb_lpid(gp->shadow_lpid); > + spin_unlock(&kvm->mmu_lock); > + break; > + case 1: > + /* > + * Invalidate PWC > + * We don't cache this -> nothing to do > + */ > + break; > + case 2: > + /* Invalidate TLB, PWC and caching of partition table entries */ > + kvmhv_flush_nested(gp); > + break; > + default: > + break; > + } > + mutex_unlock(&gp->tlb_lock); > +} > + > +static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric) > +{ > + struct kvm *kvm = vcpu->kvm; > + struct kvm_nested_guest *gp; > + int i; > + > + spin_lock(&kvm->mmu_lock); > + for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { > + gp = kvm->arch.nested_guests[i]; > + if (gp) { > + spin_unlock(&kvm->mmu_lock); > + kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); > + spin_lock(&kvm->mmu_lock); > + } > + } > + spin_unlock(&kvm->mmu_lock); > +} > + > +static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int > instr, > + int rs, int rb) > +{ > + struct kvm *kvm = vcpu->kvm; > + struct kvm_nested_guest *gp; > + int r, ric, prs, is, ap; > + int lpid; > + long epn; > + int ret = 0; > + > + ric = get_ric(instr); > + prs = get_prs(instr); > + r = get_r(instr); > + lpid = get_lpid(kvmppc_get_gpr(vcpu, rs)); > + is = get_is(kvmppc_get_gpr(vcpu, rb)); > + > + /* > + * These cases are invalid and __should__ have caused a machine check > + * r != 1 -> Only radix supported > + * prs == 1 -> Not HV privileged > + * ric == 3 -> No clusted bombs for radix > + * is == 1 -> Partition scoped translations not associated with pid > + * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA > + */ > + if ((!r) || (prs) || (ric == 3) || (is == 1) || > + ((!is) && (ric == 1 || ric == 2))) > + return -EINVAL; > + > + switch (is) { > + case 0: > + /* > + * We know ric == 0 > + * Invalidate TLB for a given target address > + */ > + epn = get_epn(kvmppc_get_gpr(vcpu, rb)); > + ap = get_ap(kvmppc_get_gpr(vcpu, rb)); > + ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn); > + break; > + case 2: > + /* Invalidate matching LPID */ > + gp = kvmhv_get_nested(kvm, lpid, false); > + if (gp) { > + kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); > + kvmhv_put_nested(gp); > + } > + break; > + case 3: > + /* Invalidate ALL LPIDs */ > + kvmhv_emulate_tlbie_all_lpid(vcpu, ric); > + break; > + default: > + ret = -EINVAL; > + break; > + } > + > + return ret; > +} > + > static int kvmhv_emulate_priv_op_31(struct kvm_run *run, struct kvm_vcpu > *vcpu, > unsigned int instr) > { > - return EMULATE_FAIL; > + int ret; > + > + switch (get_xop(instr)) { > + case OP_31_XOP_TLBIE: > + ret = kvmhv_emulate_priv_tlbie(vcpu, instr, get_rs(instr), > + get_rb(instr)); > + if (ret) { > + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); > + return EMULATE_AGAIN; > + } > + return EMULATE_DONE; > + default: > + return EMULATE_FAIL; > + } > +} > + > +/* > + * This handles the H_TLB_INVALIDATE hcall. > + * Parameters are (r4) tlbie instruction code, (r5) rS contents, > + * (r6) rB contents. > + */ > +long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu) > +{ > + int ret; > + > + ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4), 5, 6); > + if (ret) > + return H_PARAMETER; > + return H_SUCCESS; > } > > static int kvmhv_emulate_priv_op(struct kvm_run *run, struct kvm_vcpu *vcpu, -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature