Re: [PATCH 4/4] vfio powerpc: added real mode support
On Mon, Feb 11, 2013 at 11:12:43PM +1100, a...@ozlabs.ru wrote: > From: Alexey Kardashevskiy > > The patch allows the host kernel to handle H_PUT_TCE request > without involving QEMU in it what should save time on switching > from the kernel to QEMU and back. > > The patch adds an IOMMU ID parameter into the KVM_CAP_SPAPR_TCE ioctl, > QEMU needs to be fixed to support that. > > At the moment H_PUT_TCE is processed in the virtual mode as the page > to be mapped may not be present in the RAM so paging may be involved as > it can be done from the virtual mode only. > > Tests show that this patch increases tranmission speed from 220MB/s > to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card). [snip] > diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c > index b4fdabc..acb9cdc 100644 > --- a/arch/powerpc/kernel/iommu.c > +++ b/arch/powerpc/kernel/iommu.c > @@ -47,6 +47,8 @@ > #include > #include > #include > +#include > +#include > > #define DBG(...) > > @@ -727,6 +729,7 @@ void iommu_register_group(struct iommu_table * tbl, > return; > } > tbl->it_group = grp; > + INIT_LIST_HEAD(&tbl->it_hugepages); > iommu_group_set_iommudata(grp, tbl, group_release); > iommu_group_set_name(grp, kasprintf(GFP_KERNEL, "domain%d-pe%lx", > domain_number, pe_num)); > @@ -906,6 +909,83 @@ void kvm_iommu_unmap_pages(struct kvm *kvm, struct > kvm_memory_slot *slot) > { > } > > +/* > + * The KVM guest can be backed with 16MB pages (qemu switch > + * -mem-path /var/lib/hugetlbfs/global/pagesize-16MB/). > + * In this case, we cannot do page counting from the real mode > + * as the compound pages are used - they are linked in a list > + * with pointers as virtual addresses which are inaccessible > + * in real mode. > + * > + * The code below keeps a 16MB pages list and uses page struct > + * in real mode if it is already locked in RAM and inserted into > + * the list or switches to the virtual mode where it can be > + * handled in a usual manner. > + */ > +struct iommu_kvmppc_hugepages { > + struct list_head list; > + pte_t pte; /* Huge page PTE */ > + unsigned long pa; /* Base phys address used as a real TCE */ > + struct page *page; /* page struct of the very first subpage */ > + unsigned long size; /* Huge page size (always 16MB at the moment) */ > + bool dirty; /* Dirty bit */ > +}; > + > +static struct iommu_kvmppc_hugepages *find_hp_by_pte(struct iommu_table *tbl, > + pte_t pte) > +{ > + struct iommu_kvmppc_hugepages *hp; > + > + list_for_each_entry(hp, &tbl->it_hugepages, list) { > + if (hp->pte == pte) > + return hp; > + } > + > + return NULL; > +} > + > +static struct iommu_kvmppc_hugepages *find_hp_by_pa(struct iommu_table *tbl, > + unsigned long pa) > +{ > + struct iommu_kvmppc_hugepages *hp; > + > + list_for_each_entry(hp, &tbl->it_hugepages, list) { > + if ((hp->pa <= pa) && (pa < hp->pa + hp->size)) > + return hp; > + } > + > + return NULL; > +} > + > +static struct iommu_kvmppc_hugepages *add_hp(struct iommu_table *tbl, > + pte_t pte, unsigned long va, unsigned long pg_size) > +{ > + int ret; > + struct iommu_kvmppc_hugepages *hp; > + > + hp = kzalloc(sizeof(*hp), GFP_KERNEL); > + if (!hp) > + return NULL; > + > + hp->pte = pte; > + va = va & ~(pg_size - 1); > + ret = get_user_pages_fast(va, 1, true/*write*/, &hp->page); > + if ((ret != 1) || !hp->page) { > + kfree(hp); > + return NULL; > + } > +#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL) > +#error TODO: fix to avoid page_address() here > +#endif > + hp->pa = __pa((unsigned long) page_address(hp->page)); > + > + hp->size = pg_size; > + > + list_add(&hp->list, &tbl->it_hugepages); > + > + return hp; > +} I don't see any locking here. What stops one cpu doing add_hp() from racing with another doing find_hp_by_pte() or find_hp_by_pa()? [snip] > @@ -1021,6 +1123,24 @@ long iommu_clear_tce_user_mode(struct iommu_table > *tbl, unsigned long ioba, > } > EXPORT_SYMBOL_GPL(iommu_clear_tce_user_mode); > > +long iommu_clear_tce_real_mode(struct iommu_table *tbl, unsigned long ioba, > + unsigned long tce_value, unsigned long npages) > +{ > + long ret; > + unsigned long entry = ioba >> IOMMU_PAGE_SHIFT; > + > + ret = tce_clear_param_check(tbl, ioba, tce_value, npages); > + if (!ret) > + ret = clear_tce(tbl, true, entry, npages); > + > + if (ret < 0) > + pr_err("iommu_tce: %s failed ioba=%lx, tce_value=%lx ret=%ld\n", > + __func__, ioba, tce_value, ret); Better to avoid printk in real mode if at all possible, particularly if they're guest-triggerable. [snip] > @@ -195,15 +225,4
Re: [PATCH 2/4] powerpc kvm: added multiple TCEs requests support
On Mon, Feb 11, 2013 at 11:12:41PM +1100, a...@ozlabs.ru wrote: > +static long emulated_h_put_tce(struct kvmppc_spapr_tce_table *stt, > + unsigned long ioba, unsigned long tce) > +{ > + unsigned long idx = ioba >> SPAPR_TCE_SHIFT; > + struct page *page; > + u64 *tbl; > + > + /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", > */ > + /* liobn, stt, stt->window_size); */ > + if (ioba >= stt->window_size) { > + pr_err("%s failed on ioba=%lx\n", __func__, ioba); Doesn't this give the guest a way to spam the host logs? And in fact printk in real mode is potentially problematic. I would just leave out this statement. > + return H_PARAMETER; > + } > + > + page = stt->pages[idx / TCES_PER_PAGE]; > + tbl = (u64 *)page_address(page); I would like to see an explanation of why we are confident that page_address() will work correctly in real mode, across all the combinations of config options that we can have for a ppc64 book3s kernel. > + > + /* FIXME: Need to validate the TCE itself */ > + /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */ > + tbl[idx % TCES_PER_PAGE] = tce; > + > + return H_SUCCESS; > +} > + > +/* > + * Real mode handlers > */ > long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, > unsigned long ioba, unsigned long tce) > { > - struct kvm *kvm = vcpu->kvm; > struct kvmppc_spapr_tce_table *stt; > > - /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ > - /* liobn, ioba, tce); */ > + stt = find_tce_table(vcpu, liobn); > + /* Didn't find the liobn, put it to userspace */ > + if (!stt) > + return H_TOO_HARD; > + > + /* Emulated IO */ > + return emulated_h_put_tce(stt, ioba, tce); > +} > + > +long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, > + unsigned long liobn, unsigned long ioba, > + unsigned long tce_list, unsigned long npages) > +{ > + struct kvmppc_spapr_tce_table *stt; > + long i, ret = 0; > + unsigned long *tces; > + > + stt = find_tce_table(vcpu, liobn); > + /* Didn't find the liobn, put it to userspace */ > + if (!stt) > + return H_TOO_HARD; > > - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { > - if (stt->liobn == liobn) { > - unsigned long idx = ioba >> SPAPR_TCE_SHIFT; > - struct page *page; > - u64 *tbl; > + tces = (void *) get_real_address(vcpu, tce_list, false, NULL, NULL); > + if (!tces) > + return H_TOO_HARD; > > - /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p > window_size=0x%x\n", */ > - /* liobn, stt, stt->window_size); */ > - if (ioba >= stt->window_size) > - return H_PARAMETER; > + /* Emulated IO */ > + for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE) > + ret = emulated_h_put_tce(stt, ioba, tces[i]); So, tces is a pointer to somewhere inside a real page. Did we check somewhere that tces[npages-1] is in the same page as tces[0]? If so, I missed it. If we didn't, then we probably should check and do something about it. > > - page = stt->pages[idx / TCES_PER_PAGE]; > - tbl = (u64 *)page_address(page); > + return ret; > +} > > - /* FIXME: Need to validate the TCE itself */ > - /* udbg_printf("tce @ %p\n", &tbl[idx % > TCES_PER_PAGE]); */ > - tbl[idx % TCES_PER_PAGE] = tce; > - return H_SUCCESS; > - } > - } > +long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, > + unsigned long liobn, unsigned long ioba, > + unsigned long tce_value, unsigned long npages) > +{ > + struct kvmppc_spapr_tce_table *stt; > + long i, ret = 0; > + > + stt = find_tce_table(vcpu, liobn); > + /* Didn't find the liobn, put it to userspace */ > + if (!stt) > + return H_TOO_HARD; > > - /* Didn't find the liobn, punt it to userspace */ > - return H_TOO_HARD; > + /* Emulated IO */ > + for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE) > + ret = emulated_h_put_tce(stt, ioba, tce_value); > + > + return ret; > +} > + > +/* > + * Virtual mode handlers > + */ > +extern long kvmppc_virtmode_h_put_tce(struct kvm_vcpu *vcpu, > + unsigned long liobn, unsigned long ioba, > + unsigned long tce) > +{ > + /* At the moment emulated IO is handled the same way */ > + return kvmppc_h_put_tce(vcpu, liobn, ioba, tce); > +} > + > +extern long kvmppc_virtmode_h_put_tce_indirect(struct kvm_vcpu *vcpu, > + unsigned long liobn, unsigned long ioba, > + unsigned long tce_list, unsigne
Re: [PATCH 3/4] powerpc: preparing to support real mode optimization
On Mon, Feb 11, 2013 at 11:12:42PM +1100, a...@ozlabs.ru wrote: > From: Alexey Kardashevskiy > > he current VFIO-on-POWER implementation supports only user mode > driven mapping, i.e. QEMU is sending requests to map/unmap pages. > However this approach is really slow in really fast hardware so > it is better to be moved to the real mode. > > The patch adds an API to increment/decrement page counter as > get_user_pages API used for user mode mapping does not work > in the real mode. > > CONFIG_SPARSEMEM_VMEMMAP and CONFIG_FLATMEN are supported. > > Signed-off-by: Alexey Kardashevskiy > Cc: David Gibson > --- The names are slightly odd, in that they include "vmemmap_" but exist and work in the flatmem case as well. Apart from that... Reviewed-by: Paul Mackerras Paul. -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/4] powerpc: lookup_linux_pte has been made public
On Mon, Feb 11, 2013 at 11:12:40PM +1100, a...@ozlabs.ru wrote: > From: Alexey Kardashevskiy > > The lookup_linux_pte() function returns a linux PTE which > is required to convert KVM guest physical address into host real > address in real mode. > > This convertion will be used by upcoming support of H_PUT_TCE_INDIRECT > as TCE list address comes from the guest directly so it is a guest > physical. > > Signed-off-by: Alexey Kardashevskiy > Cc: David Gibson > --- > arch/powerpc/include/asm/pgtable-ppc64.h |3 +++ > arch/powerpc/kvm/book3s_hv_rm_mmu.c |4 ++-- > 2 files changed, 5 insertions(+), 2 deletions(-) > > diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h > b/arch/powerpc/include/asm/pgtable-ppc64.h > index 0182c20..ddcc898 100644 > --- a/arch/powerpc/include/asm/pgtable-ppc64.h > +++ b/arch/powerpc/include/asm/pgtable-ppc64.h > @@ -377,6 +377,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t > *pgdir, unsigned long ea, > } > #endif /* !CONFIG_HUGETLB_PAGE */ > > +pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva, > + int writing, unsigned long *pte_sizep); > + This seems a slightly odd place to put the declaration of a function which is defined in the KVM code. kvm-ppc.h might be a better place. Paul. -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 07/12] KVM: PPC: E500: Make clear_tlb_refs and clear_tlb1_bitmap static
Host shadow TLB flushing is logic that the guest TLB code should have no insight about. Declare the internal clear_tlb_refs and clear_tlb1_bitmap functions static to the host TLB handling file. Instead of these, we can use the already exported kvmppc_core_flush_tlb(). This gives us a common API across the board to say "please flush any pending host shadow translation". Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu.c |6 ++ arch/powerpc/kvm/e500_mmu_host.c |4 ++-- arch/powerpc/kvm/e500_mmu_host.h |2 -- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index c3d1721..623a192 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -541,10 +541,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) { int i; - clear_tlb1_bitmap(vcpu_e500); + kvmppc_core_flush_tlb(&vcpu_e500->vcpu); kfree(vcpu_e500->g2h_tlb1_map); - - clear_tlb_refs(vcpu_e500); kfree(vcpu_e500->gtlb_priv[0]); kfree(vcpu_e500->gtlb_priv[1]); @@ -735,7 +733,7 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); kvmppc_recalc_tlb1map_range(vcpu_e500); - clear_tlb_refs(vcpu_e500); + kvmppc_core_flush_tlb(vcpu); return 0; } diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 9a150bc..a222edf 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -262,7 +262,7 @@ static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) } } -void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) +static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) { if (vcpu_e500->g2h_tlb1_map) memset(vcpu_e500->g2h_tlb1_map, 0, @@ -284,7 +284,7 @@ static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) } } -void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) +static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) { int stlbsel = 1; int i; diff --git a/arch/powerpc/kvm/e500_mmu_host.h b/arch/powerpc/kvm/e500_mmu_host.h index 9e4d4a2..7624835 100644 --- a/arch/powerpc/kvm/e500_mmu_host.h +++ b/arch/powerpc/kvm/e500_mmu_host.h @@ -12,8 +12,6 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel); -void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500); -void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500); int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500); void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 06/12] KVM: PPC: e500: Implement TLB1-in-TLB0 mapping
When a host mapping fault happens in a guest TLB1 entry today, we map the translated guest entry into the host's TLB1. This isn't particularly clever when the guest is mapped by normal 4k pages, since these would be a lot better to put into TLB0 instead. This patch adds the required logic to map 4k TLB1 shadow maps into the host's TLB0. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500.h |1 + arch/powerpc/kvm/e500_mmu_host.c | 65 +++--- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index c70d37e..41cefd4 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -28,6 +28,7 @@ #define E500_TLB_VALID 1 #define E500_TLB_BITMAP 2 +#define E500_TLB_TLB0 (1 << 2) struct tlbe_ref { pfn_t pfn; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 4c32d65..9a150bc 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -216,10 +216,21 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, vcpu_e500->g2h_tlb1_map[esel] = 0; ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); local_irq_restore(flags); + } - return; + if (tlbsel == 1 && ref->flags & E500_TLB_TLB0) { + /* +* TLB1 entry is backed by 4k pages. This should happen +* rarely and is not worth optimizing. Invalidate everything. +*/ + kvmppc_e500_tlbil_all(vcpu_e500); + ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID); } + /* Already invalidated in between */ + if (!(ref->flags & E500_TLB_VALID)) + return; + /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); @@ -487,38 +498,54 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel, return 0; } +static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, +struct tlbe_ref *ref, +int esel) +{ + unsigned int sesel = vcpu_e500->host_tlb1_nv++; + + if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) + vcpu_e500->host_tlb1_nv = 0; + + vcpu_e500->tlb_refs[1][sesel] = *ref; + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + if (vcpu_e500->h2g_tlb1_rmap[sesel]) { + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; + vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); + } + vcpu_e500->h2g_tlb1_rmap[sesel] = esel; + + return sesel; +} + /* Caller must ensure that the specified guest TLB entry is safe to insert into * the shadow TLB. */ -/* XXX for both one-one and one-to-many , for now use TLB1 */ +/* For both one-one and one-to-many */ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, struct kvm_book3e_206_tlb_entry *stlbe, int esel) { - struct tlbe_ref *ref; - unsigned int sesel; + struct tlbe_ref ref; + int sesel; int r; - int stlbsel = 1; - - sesel = vcpu_e500->host_tlb1_nv++; - - if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) - vcpu_e500->host_tlb1_nv = 0; - ref = &vcpu_e500->tlb_refs[1][sesel]; + ref.flags = 0; r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, - ref); + &ref); if (r) return r; - vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; - if (vcpu_e500->h2g_tlb1_rmap[sesel]) { - unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; - vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); + /* Use TLB0 when we can only map a page with 4k */ + if (get_tlb_tsize(stlbe) == BOOK3E_PAGESZ_4K) { + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_TLB0; + write_stlbe(vcpu_e500, gtlbe, stlbe, 0, 0); + return 0; } - vcpu_e500->h2g_tlb1_rmap[sesel] = esel; - write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); + /* Otherwise map into TLB1 */ + sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, &ref, esel); + write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel); return 0; } -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 08/12] KVM: PPC: E500: Remove kvmppc_e500_tlbil_all usage from guest TLB code
The guest TLB handling code should not have any insight into how the host TLB shadow code works. kvmppc_e500_tlbil_all() is a function that is used for distinction between e500v2 and e500mc (E.HV) on how to flush shadow entries. This function really is private between the e500.c/e500mc.c file and e500_mmu_host.c. Instead of this one, use the public kvmppc_core_flush_tlb() function to flush all shadow TLB entries. As a nice side effect, with this we also end up flushing TLB1 entries which we forgot to do before. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu.c |8 1 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 623a192..5c44759 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -239,8 +239,8 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); - /* Invalidate all vcpu id mappings */ - kvmppc_e500_tlbil_all(vcpu_e500); + /* Invalidate all host shadow mappings */ + kvmppc_core_flush_tlb(&vcpu_e500->vcpu); return EMULATE_DONE; } @@ -269,8 +269,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea) kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); } - /* Invalidate all vcpu id mappings */ - kvmppc_e500_tlbil_all(vcpu_e500); + /* Invalidate all host shadow mappings */ + kvmppc_core_flush_tlb(&vcpu_e500->vcpu); return EMULATE_DONE; } -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09/12] KVM: PPC: booke: use vcpu reference from thread_struct
From: Bharat Bhushan Like other places, use thread_struct to get vcpu reference. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/reg.h |2 -- arch/powerpc/kernel/asm-offsets.c |2 +- arch/powerpc/kvm/booke_interrupts.S |6 ++ 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 97d3727..11ae3d8 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -919,8 +919,6 @@ #define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9 #define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9 #endif -#define SPRN_SPRG_RVCPUSPRN_SPRG1 -#define SPRN_SPRG_WVCPUSPRN_SPRG1 #endif #ifdef CONFIG_8xx diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4e23ba2..46f6afd 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -117,7 +117,7 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_32_HANDLER DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); #endif -#ifdef CONFIG_KVM_BOOKE_HV +#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); #endif diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index bb46b32..ca16d57 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -56,7 +56,8 @@ _GLOBAL(kvmppc_handler_\ivor_nr) /* Get pointer to vcpu and record exit number. */ mtspr \scratch , r4 - mfspr r4, SPRN_SPRG_RVCPU + mfspr r4, SPRN_SPRG_THREAD + lwz r4, THREAD_KVM_VCPU(r4) stw r3, VCPU_GPR(R3)(r4) stw r5, VCPU_GPR(R5)(r4) stw r6, VCPU_GPR(R6)(r4) @@ -402,9 +403,6 @@ lightweight_exit: lwz r8, kvmppc_booke_handlers@l(r8) mtspr SPRN_IVPR, r8 - /* Save vcpu pointer for the exception handlers. */ - mtspr SPRN_SPRG_WVCPU, r4 - lwz r5, VCPU_SHARED(r4) /* Can't switch the stack pointer until after IVPR is switched, -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 10/12] KVM: PPC: booke: Allow multiple exception types
From: Bharat Bhushan Current kvmppc_booke_handlers uses the same macro (KVM_HANDLER) and all handlers are considered to be the same size. This will not be the case if we want to use different macros for different handlers. This patch improves the kvmppc_booke_handler so that it can support different macros for different handlers. Signed-off-by: Liu Yu [bharat.bhus...@freescale.com: Substantial changes] Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h |2 - arch/powerpc/kvm/booke.c| 14 arch/powerpc/kvm/booke.h|1 + arch/powerpc/kvm/booke_interrupts.S | 37 -- arch/powerpc/kvm/e500.c | 16 +- 5 files changed, 54 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 493630e..44a657a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -49,8 +49,6 @@ enum emulation_result { extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); -extern char kvmppc_handlers_start[]; -extern unsigned long kvmppc_handler_len; extern void kvmppc_handler_highmem(void); extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 8779cd4..d2f502d 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1594,7 +1594,9 @@ int __init kvmppc_booke_init(void) { #ifndef CONFIG_KVM_BOOKE_HV unsigned long ivor[16]; + unsigned long *handler = kvmppc_booke_handler_addr; unsigned long max_ivor = 0; + unsigned long handler_len; int i; /* We install our own exception handlers by hijacking IVPR. IVPR must @@ -1627,14 +1629,16 @@ int __init kvmppc_booke_init(void) for (i = 0; i < 16; i++) { if (ivor[i] > max_ivor) - max_ivor = ivor[i]; + max_ivor = i; + handler_len = handler[i + 1] - handler[i]; memcpy((void *)kvmppc_booke_handlers + ivor[i], - kvmppc_handlers_start + i * kvmppc_handler_len, - kvmppc_handler_len); + (void *)handler[i], handler_len); } - flush_icache_range(kvmppc_booke_handlers, - kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + + handler_len = handler[max_ivor + 1] - handler[max_ivor]; + flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + + ivor[max_ivor] + handler_len); #endif /* !BOOKE_HV */ return 0; } diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index e9b88e4..5fd1ba6 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -65,6 +65,7 @@ (1 << BOOKE_IRQPRIO_CRITICAL)) extern unsigned long kvmppc_booke_handlers; +extern unsigned long kvmppc_booke_handler_addr[]; void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index ca16d57..eae8483 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -74,6 +74,14 @@ _GLOBAL(kvmppc_handler_\ivor_nr) bctr .endm +.macro KVM_HANDLER_ADDR ivor_nr + .long kvmppc_handler_\ivor_nr +.endm + +.macro KVM_HANDLER_END + .long kvmppc_handlers_end +.endm + _GLOBAL(kvmppc_handlers_start) KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0 @@ -94,9 +102,7 @@ KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 - -_GLOBAL(kvmppc_handler_len) - .long kvmppc_handler_1 - kvmppc_handler_0 +_GLOBAL(kvmppc_handlers_end) /* Registers: * SPRG_SCRATCH0: guest r4 @@ -461,6 +467,31 @@ lightweight_exit: lwz r4, VCPU_GPR(R4)(r4) rfi + .data + .align 4 + .globl kvmppc_booke_handler_addr +kvmppc_booke_handler_addr: +KVM_HANDLER_ADDR BOOKE_INTERRUPT_CRITICAL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_MACHINE_CHECK +KVM_HANDLER_ADDR BOOKE_INTERRUPT_DATA_STORAGE +KVM_HANDLER_ADDR BOOKE_INTERRUPT_INST_STORAGE +KVM_HANDLER_ADDR BOOKE_INTERRUPT_EXTERNAL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_ALIGNMENT +KVM_HANDLER_ADDR BOOKE_INTERRUPT_PROGRAM +KVM_HANDLER_ADDR BOOKE_INTERRUPT_FP_UNAVAIL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_SYSCALL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_AP_UNAVAIL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_D
[PATCH 02/12] KVM: PPC: E500: Explicitly mark shadow maps invalid
When we invalidate shadow TLB maps on the host, we don't mark them as not valid. But we should. Fix this by removing the E500_TLB_VALID from their flags when invalidating. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 13 ++--- 1 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index d38ad63..8efb2ac 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -204,9 +204,13 @@ static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, { struct kvm_book3e_206_tlb_entry *gtlbe = get_entry(vcpu_e500, tlbsel, esel); + struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; - if (tlbsel == 1 && - vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) { + /* Don't bother with unmapped entries */ + if (!(ref->flags & E500_TLB_VALID)) + return; + + if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; int hw_tlb_indx; unsigned long flags; @@ -224,7 +228,7 @@ static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, } mb(); vcpu_e500->g2h_tlb1_map[esel] = 0; - vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP; + ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); local_irq_restore(flags); return; @@ -232,6 +236,9 @@ static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); + + /* Mark the TLB as not backed by the host anymore */ + ref->flags &= ~E500_TLB_VALID; } static int tlb0_set_base(gva_t addr, int sets, int ways) -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 11/12] booke: Added DBCR4 SPR number
From: Bharat Bhushan Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/reg_booke.h |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index e07e6af..b417de3 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -56,6 +56,7 @@ #define SPRN_SPRG7W0x117 /* Special Purpose Register General 7 Write */ #define SPRN_EPCR 0x133 /* Embedded Processor Control Register */ #define SPRN_DBCR2 0x136 /* Debug Control Register 2 */ +#define SPRN_DBCR4 0x233 /* Debug Control Register 4 */ #define SPRN_MSRP 0x137 /* MSR Protect Register */ #define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */ #define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */ -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 03/12] KVM: PPC: E500: Propagate errors when shadow mapping
When shadow mapping a page, mapping this page can fail. In that case we don't have a shadow map. Take this case into account, otherwise we might end up writing bogus TLB entries into the host TLB. While at it, also move the write_stlbe() calls into the respective TLBn handlers. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 69 +- 1 files changed, 41 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 8efb2ac..3777167 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -432,7 +432,7 @@ static inline void kvmppc_e500_setup_stlbe( #endif } -static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, +static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, struct tlbe_ref *ref) @@ -551,7 +551,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (is_error_noslot_pfn(pfn)) { printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", (long)gfn); - return; + return -EINVAL; } /* Align guest and physical address to page map boundaries */ @@ -571,22 +571,33 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* Drop refcount on page, so that mmu notifiers can clear it */ kvm_release_pfn_clean(pfn); + + return 0; } /* XXX only map the one-one case, for now use TLB0 */ -static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, -int esel, -struct kvm_book3e_206_tlb_entry *stlbe) +static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, + int esel, + struct kvm_book3e_206_tlb_entry *stlbe) { struct kvm_book3e_206_tlb_entry *gtlbe; struct tlbe_ref *ref; + int stlbsel = 0; + int sesel = 0; + int r; gtlbe = get_entry(vcpu_e500, 0, esel); ref = &vcpu_e500->gtlb_priv[0][esel].ref; - kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), + r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), get_tlb_raddr(gtlbe) >> PAGE_SHIFT, gtlbe, 0, stlbe, ref); + if (r) + return r; + + write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); + + return 0; } /* Caller must ensure that the specified guest TLB entry is safe to insert into @@ -597,25 +608,32 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, struct kvm_book3e_206_tlb_entry *stlbe, int esel) { struct tlbe_ref *ref; - unsigned int victim; + unsigned int sesel; + int r; + int stlbsel = 1; - victim = vcpu_e500->host_tlb1_nv++; + sesel = vcpu_e500->host_tlb1_nv++; if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) vcpu_e500->host_tlb1_nv = 0; - ref = &vcpu_e500->tlb_refs[1][victim]; - kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref); + ref = &vcpu_e500->tlb_refs[1][sesel]; + r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, + ref); + if (r) + return r; - vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim; + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; - if (vcpu_e500->h2g_tlb1_rmap[victim]) { - unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim]; - vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim); + if (vcpu_e500->h2g_tlb1_rmap[sesel]) { + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; + vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->h2g_tlb1_rmap[victim] = esel; + vcpu_e500->h2g_tlb1_rmap[sesel] = esel; - return victim; + write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); + + return 0; } static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500) @@ -1034,30 +1052,27 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; int tlbsel = tlbsel_of(index); int esel = esel_of(index); - int stlbsel, sesel; gtlbe = get_entry(vcpu_e500, tlbsel, esel); switch (tlbsel) { case 0: - stlbsel = 0; - sesel = 0; /* unused */ priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - /* Only tri
[PATCH 12/12] KVM: PPC: BookE: Handle alignment interrupts
When the guest triggers an alignment interrupt, we don't handle it properly today and instead BUG_ON(). This really shouldn't happen. Instead, we should just pass the interrupt back into the guest so it can deal with it. Reported-by: Gao Guanhua-B22826 Tested-by: Gao Guanhua-B22826 Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c| 16 +++- arch/powerpc/kvm/booke_interrupts.S |6 -- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index d2f502d..020923e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -182,6 +182,14 @@ static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); } +static void kvmppc_core_queue_alignment(struct kvm_vcpu *vcpu, ulong dear_flags, + ulong esr_flags) +{ + vcpu->arch.queued_dear = dear_flags; + vcpu->arch.queued_esr = esr_flags; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALIGNMENT); +} + void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) { vcpu->arch.queued_esr = esr_flags; @@ -345,6 +353,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, switch (priority) { case BOOKE_IRQPRIO_DTLB_MISS: case BOOKE_IRQPRIO_DATA_STORAGE: + case BOOKE_IRQPRIO_ALIGNMENT: update_dear = true; /* fall through */ case BOOKE_IRQPRIO_INST_STORAGE: @@ -358,7 +367,6 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_SPE_FP_DATA: case BOOKE_IRQPRIO_SPE_FP_ROUND: case BOOKE_IRQPRIO_AP_UNAVAIL: - case BOOKE_IRQPRIO_ALIGNMENT: allowed = 1; msr_mask = MSR_CE | MSR_ME | MSR_DE; int_class = INT_CLASS_NONCRIT; @@ -971,6 +979,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; + case BOOKE_INTERRUPT_ALIGNMENT: + kvmppc_core_queue_alignment(vcpu, vcpu->arch.fault_dear, + vcpu->arch.fault_esr); + r = RESUME_GUEST; + break; + #ifdef CONFIG_KVM_BOOKE_HV case BOOKE_INTERRUPT_HV_SYSCALL: if (!(vcpu->arch.shared->msr & MSR_PR)) { diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index eae8483..f4bb55c 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -45,12 +45,14 @@ (1
[PATCH 01/12] KVM: PPC: E500: Move write_stlbe higher
Later patches want to call the function and it doesn't have dependencies on anything below write_host_tlbe. Move it higher up in the file. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 32 1 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index cf3f180..d38ad63 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -156,6 +156,22 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, } } +/* sesel is for tlb1 only */ +static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe, + int stlbsel, int sesel) +{ + int stid; + + preempt_disable(); + stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); + + stlbe->mas1 |= MAS1_TID(stid); + write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); + preempt_enable(); +} + #ifdef CONFIG_KVM_E500V2 void kvmppc_map_magic(struct kvm_vcpu *vcpu) { @@ -834,22 +850,6 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) return EMULATE_DONE; } -/* sesel is for tlb1 only */ -static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - struct kvm_book3e_206_tlb_entry *gtlbe, - struct kvm_book3e_206_tlb_entry *stlbe, - int stlbsel, int sesel) -{ - int stid; - - preempt_disable(); - stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); - - stlbe->mas1 |= MAS1_TID(stid); - write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); - preempt_enable(); -} - int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 04/12] KVM: PPC: e500: Call kvmppc_mmu_map for initial mapping
When emulating tlbwe, we want to automatically map the entry that just got written in our shadow TLB map, because chances are quite high that it's going to be used very soon. Today this happens explicitly, duplicating all the logic that is in kvmppc_mmu_map() already. Just call that one instead. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 38 +++--- 1 files changed, 7 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 3777167..48d1a4f 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -878,8 +878,8 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; - int tlbsel, esel, stlbsel, sesel; + struct kvm_book3e_206_tlb_entry *gtlbe; + int tlbsel, esel; int recal = 0; tlbsel = get_tlb_tlbsel(vcpu); @@ -917,40 +917,16 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { - u64 eaddr; - u64 raddr; + u64 eaddr = get_tlb_eaddr(gtlbe); + u64 raddr = get_tlb_raddr(gtlbe); - switch (tlbsel) { - case 0: - /* TLB0 */ + if (tlbsel == 0) { gtlbe->mas1 &= ~MAS1_TSIZE(~0); gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); - - stlbsel = 0; - kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); - sesel = 0; /* unused */ - - break; - - case 1: - /* TLB1 */ - eaddr = get_tlb_eaddr(gtlbe); - raddr = get_tlb_raddr(gtlbe); - - /* Create a 4KB mapping on the host. -* If the guest wanted a large page, -* only the first 4KB is mapped here and the rest -* are mapped on the fly. */ - stlbsel = 1; - sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, - raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel); - break; - - default: - BUG(); } - write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); + /* Premap the faulting page */ + kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel)); } kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PULL 00/14] ppc patch queue 2013-02-15
Hi Marcelo / Gleb, This is my current patch queue for ppc. Please pull. Highlights of this queue drop are: - BookE: Fast mapping support for 4k backed memory - BookE: Handle alignment interrupts Alex The following changes since commit cbd29cb6e38af6119df2cdac0c58acf0e85c177e: Jan Kiszka (1): KVM: nVMX: Remove redundant get_vmcs12 from nested_vmx_exit_handled_msr are available in the git repository at: git://github.com/agraf/linux-2.6.git kvm-ppc-next Alexander Graf (11): KVM: PPC: E500: Move write_stlbe higher KVM: PPC: E500: Explicitly mark shadow maps invalid KVM: PPC: E500: Propagate errors when shadow mapping KVM: PPC: e500: Call kvmppc_mmu_map for initial mapping KVM: PPC: E500: Split host and guest MMU parts KVM: PPC: e500: Implement TLB1-in-TLB0 mapping KVM: PPC: E500: Make clear_tlb_refs and clear_tlb1_bitmap static KVM: PPC: E500: Remove kvmppc_e500_tlbil_all usage from guest TLB code Merge commit 'origin/next' into kvm-ppc-next KVM: PPC: BookE: Handle alignment interrupts Merge commit 'origin/next' into kvm-ppc-next Bharat Bhushan (3): KVM: PPC: booke: use vcpu reference from thread_struct KVM: PPC: booke: Allow multiple exception types booke: Added DBCR4 SPR number arch/powerpc/include/asm/kvm_ppc.h |2 - arch/powerpc/include/asm/reg.h |2 - arch/powerpc/include/asm/reg_booke.h |1 + arch/powerpc/kernel/asm-offsets.c|2 +- arch/powerpc/kvm/Makefile|9 +- arch/powerpc/kvm/booke.c | 30 +- arch/powerpc/kvm/booke.h |1 + arch/powerpc/kvm/booke_interrupts.S | 49 +- arch/powerpc/kvm/e500.c | 16 +- arch/powerpc/kvm/e500.h |1 + arch/powerpc/kvm/e500_mmu.c | 809 +++ arch/powerpc/kvm/e500_mmu_host.c | 699 + arch/powerpc/kvm/e500_mmu_host.h | 18 + arch/powerpc/kvm/e500_tlb.c | 1430 -- 14 files changed, 1610 insertions(+), 1459 deletions(-) create mode 100644 arch/powerpc/kvm/e500_mmu.c create mode 100644 arch/powerpc/kvm/e500_mmu_host.c create mode 100644 arch/powerpc/kvm/e500_mmu_host.h delete mode 100644 arch/powerpc/kvm/e500_tlb.c -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/9] KVM: PPC: Book3S HV: Add support for real mode ICP in XICS emulation
From: Benjamin Herrenschmidt This adds an implementation of the XICS hypercalls in real mode for HV KVM, which allows us to avoid exiting the guest MMU context on all threads for a variety of operations such as fetching a pending interrupt, EOI of messages, IPIs, etc. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/Makefile |1 + arch/powerpc/kvm/book3s_hv_rm_xics.c| 402 +++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 10 +- arch/powerpc/kvm/book3s_xics.c | 64 - arch/powerpc/kvm/book3s_xics.h | 16 ++ 5 files changed, 475 insertions(+), 18 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_hv_rm_xics.c diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index e2eb04c..895e880 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -77,6 +77,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_hv_rm_mmu.o \ book3s_64_vio_hv.o \ book3s_hv_ras.o \ + book3s_hv_rm_xics.o \ book3s_hv_builtin.o kvm-book3s_64-module-objs := \ diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c new file mode 100644 index 000..3605e0c --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -0,0 +1,402 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "book3s_xics.h" + +#define DEBUG_PASSUP + +static inline void rm_writeb(unsigned long paddr, u8 val) +{ + __asm__ __volatile__("sync; stbcix %0,0,%1" + : : "r" (val), "r" (paddr) : "memory"); +} + +static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, struct kvm_vcpu *this_vcpu) +{ + struct kvmppc_icp *this_icp = this_vcpu->arch.icp; + unsigned long xics_phys; + int cpu; + + /* Mark the target VCPU as having an interrupt pending */ + vcpu->stat.queue_intr++; + set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); + + /* Kick self ? Just set MER and return */ + if (vcpu == this_vcpu) { + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER); + return; + } + + /* Check if the core is loaded, if not, too hard */ + cpu = vcpu->cpu; + if (cpu < 0 || cpu >= nr_cpu_ids) { + this_icp->rm_action |= XICS_RM_KICK_VCPU; + this_icp->rm_kick_target = vcpu; + return; + } + /* In SMT cpu will always point to thread 0, we adjust it */ + cpu += vcpu->arch.ptid; + + /* Not too hard, then poke the target */ + xics_phys = paca[cpu].kvm_hstate.xics_phys; + rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); +} + +static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu) +{ + /* Note: Only called on self ! */ + clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER); +} + +static inline bool icp_rm_try_update(struct kvmppc_icp *icp, +union kvmppc_icp_state old, +union kvmppc_icp_state new) +{ + struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu; + bool success; + + /* Calculate new output value */ + new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); + + /* Attempt atomic update */ + success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; + if (!success) + goto bail; + + /* +* Check for output state update +* +* Note that this is racy since another processor could be updating +* the state already. This is why we never clear the interrupt output +* here, we only ever set it. The clear only happens prior to doing +* an update and only by the processor itself. Currently we do it +* in Accept (H_XIRR) and Up_Cppr (H_XPPR). +* +* We also do not try to figure out whether the EE state has changed, +* we unconditionally set it if the new state calls for it. The reason +* for that is that we opportunistically remove the pending interrupt +* flag when raising CPPR, so we need to set it back here if an +* interrupt is still pending. +*/ + if (new.out_ee) + icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu); + + /* Expose the state change for debug purposes */ + this_vcpu->arch.icp->rm_dbgstate = new; + this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu; + + bail: + return success; +} + +s
[PATCH 6/9] KVM: PPC: Book3S HV: Speed up wakeups of CPUs on HV KVM
From: Benjamin Herrenschmidt Currently, we wake up a CPU by sending a host IPI with smp_send_reschedule() to thread 0 of that core, which will take all threads out of the guest, and cause them to re-evaluate their interrupt status on the way back in. This adds a mechanism to differentiate real host IPIs from IPIs sent by KVM for guest threads to poke each other, in order to target the guest threads precisely when possible and avoid that global switch of the core to host state. We then use this new facility in the in-kernel XICS code. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s_asm.h |8 ++- arch/powerpc/include/asm/kvm_ppc.h| 29 arch/powerpc/kernel/asm-offsets.c |2 + arch/powerpc/kvm/book3s_hv.c | 26 +++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 102 - arch/powerpc/kvm/book3s_xics.c|2 +- arch/powerpc/sysdev/xics/icp-native.c |8 +++ 7 files changed, 158 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 88609b2..923522d 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -20,6 +20,11 @@ #ifndef __ASM_KVM_BOOK3S_ASM_H__ #define __ASM_KVM_BOOK3S_ASM_H__ +/* XICS ICP register offsets */ +#define XICS_XIRR 4 +#define XICS_MFRR 0xc +#define XICS_IPI 2 /* interrupt source # for IPIs */ + #ifdef __ASSEMBLY__ #ifdef CONFIG_KVM_BOOK3S_HANDLER @@ -81,10 +86,11 @@ struct kvmppc_host_state { #ifdef CONFIG_KVM_BOOK3S_64_HV u8 hwthread_req; u8 hwthread_state; - + u8 host_ipi; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; unsigned long xics_phys; + u32 saved_xirr; u64 dabr; u64 host_mmcr[3]; u32 host_pmc[8]; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2308fff..f371af8a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -259,6 +259,21 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) paca[cpu].kvm_hstate.xics_phys = addr; } +static inline u32 kvmppc_get_xics_latch(void) +{ + u32 xirr = get_paca()->kvm_hstate.saved_xirr; + + get_paca()->kvm_hstate.saved_xirr = 0; + + return xirr; +} + +static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +{ + paca[cpu].kvm_hstate.host_ipi = host_ipi; +} + +extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); extern void kvm_linear_init(void); #else @@ -268,6 +283,18 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline void kvm_linear_init(void) {} +static inline u32 kvmppc_get_xics_latch(void) +{ + return 0; +} + +static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +{} + +static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_kick(vcpu); +} #endif static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) @@ -332,4 +359,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) return ea; } +extern void xics_wake_cpu(int cpu); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 46f6afd..c564ac3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -543,6 +543,8 @@ int main(void) HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); + HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); + HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_MMCR, host_mmcr); HSTATE_FIELD(HSTATE_PMC, host_pmc); HSTATE_FIELD(HSTATE_PURR, host_purr); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8f09c36..1365440 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -66,6 +66,31 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int me; + int cpu = vcpu->cpu; + wait_queue_head_t *wqp; + + wqp = kvm_arch_vcpu_wq(vcpu); + if (waitqueue_active(wqp)) { + wake_up_interruptible(wqp); + ++vcpu->stat.halt_wakeup; + } + + me = get_cpu(); + + /* CPU points to the first thread of the core */ + if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { + int real_cpu = cpu + vcpu->arch.ptid; + if (paca[real_cpu].kvm_hstate.xics_phys) + xics_wake_cpu(real_cpu); + else if (cpu_online(cpu)) + smp_se
[PATCH 2/9] KVM: PPC: Remove unused argument to kvmppc_core_dequeue_external
Currently kvmppc_core_dequeue_external() takes a struct kvm_interrupt * argument and does nothing with it, in any of its implementations. This removes it in order to make things easier for forthcoming in-kernel interrupt controller emulation code. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h |3 +-- arch/powerpc/kvm/book3s.c |3 +-- arch/powerpc/kvm/booke.c |3 +-- arch/powerpc/kvm/powerpc.c |2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index dd08cfa..be611f6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); -extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq); +extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a4b6452..6548445 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, vec); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 020923e..f72abd1 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, prio); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 26d8003..1772883 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -741,7 +741,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { - kvmppc_core_dequeue_external(vcpu, irq); + kvmppc_core_dequeue_external(vcpu); return 0; } -- 1.7.10.rc3.219.g53414 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/9] KVM: PPC: Book3S: Add kernel emulation for the XICS interrupt controller
From: Benjamin Herrenschmidt This adds in-kernel emulation of the XICS (eXternal Interrupt Controller Specification) interrupt controller specified by PAPR, for both HV and PR KVM guests. This adds a new KVM_CREATE_IRQCHIP_ARGS ioctl, which is like KVM_CREATE_IRQCHIP in that it indicates that the virtual machine should use in-kernel interrupt controller emulation, but also takes an argument struct that contains the type of interrupt controller architecture and an optional parameter. Currently only one type value is defined, that which indicates the XICS architecture. The XICS emulation supports up to 1048560 interrupt sources. Interrupt source numbers below 16 are reserved; 0 is used to mean no interrupt and 2 is used for IPIs. Internally these are represented in blocks of 1024, called ICS (interrupt controller source) entities, but that is not visible to userspace. Two other new ioctls allow userspace to control the interrupt sources. The KVM_IRQCHIP_SET_SOURCES ioctl sets the priority, destination cpu, level/edge sensitivity and pending state of a range of interrupt sources, creating them if they don't already exist. The KVM_IRQCHIP_GET_SOURCES ioctl returns that information for a range of interrupt sources (they are required to already exist). Each vcpu gets one ICP (interrupt controller presentation) entity. They are created automatically when the vcpu is created provided the KVM_CREATE_IRQCHIP_ARGS ioctl has been performed. This is based on an initial implementation by Michael Ellerman reworked by Benjamin Herrenschmidt and Paul Mackerras. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 51 ++ arch/powerpc/include/asm/kvm_book3s.h |1 + arch/powerpc/include/asm/kvm_host.h |8 + arch/powerpc/include/asm/kvm_ppc.h| 19 + arch/powerpc/kvm/Makefile |1 + arch/powerpc/kvm/book3s.c |2 +- arch/powerpc/kvm/book3s_hv.c | 20 + arch/powerpc/kvm/book3s_pr.c | 13 + arch/powerpc/kvm/book3s_pr_papr.c | 16 + arch/powerpc/kvm/book3s_rtas.c| 51 +- arch/powerpc/kvm/book3s_xics.c| 1101 + arch/powerpc/kvm/book3s_xics.h| 111 arch/powerpc/kvm/powerpc.c| 23 + include/uapi/linux/kvm.h | 29 + 14 files changed, 1444 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_xics.c create mode 100644 arch/powerpc/kvm/book3s_xics.h diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index d3e2d60..0ff9dcf 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2141,6 +2141,57 @@ associated with the service will be forgotten, and subsequent RTAS calls by the guest for that service will be passed to userspace to be handled. +4.80 KVM_CREATE_IRQCHIP_ARGS + +Capability: KVM_CAP_IRQCHIP_ARGS +Architectures: ppc +Type: vm ioctl +Parameters: struct kvm_irqchip_args +Returns: 0 on success, -1 on error + +Creates an interrupt controller model in the kernel. The type field +of the argument struct indicates the interrupt controller architecture +of the virtual machine. Currently the only value permitted for the +type field is 1, indicating the XICS (eXternal Interrupt Controller +Specification) model defined in PAPR. For XICS, this ioctl indicates +to the kernel that an interrupt controller presentation (ICP) entity +should be created for every vcpu, and interrupt controller source +(ICS) entities should be created to accommodate the sources that are +configured with the KVM_IRQCHIP_SET_SOURCES ioctl. + +4.81 KVM_IRQCHIP_GET_SOURCES + +Capability: KVM_CAP_IRQCHIP_ARGS +Architectures: ppc +Type: vm ioctl +Parameters: struct kvm_irq_sources +Returns: 0 on success, -1 on error + +Copies configuration and status information about a range of interrupt +sources into a user-supplied buffer. The argument struct gives the +starting interrupt source number and the number of interrupt sources. +The user buffer is an array of 64-bit quantities, one per interrupt +source, with (from the least- significant bit) 32 bits of interrupt +server number, 8 bits of priority, and 1 bit each for a +level-sensitive indicator, a masked indicator, and a pending +indicator. If some of the sources in the range don't exist, that is, +have not yet been created with the KVM_IRQCHIP_SET_SOURCES ioctl, +this returns an ENODEV error. + +4.82 KVM_IRQCHIP_SET_SOURCES + +Capability: KVM_CAP_IRQCHIP_ARGS +Architectures: ppc +Type: vm ioctl +Parameters: struct kvm_irq_sources +Returns: 0 on success, -1 on error + +Sets the configuration and status for a range of interrupt sources +from information supplied in a user-supplied buffer, creating the +sources if they don't already exist. The argument struct gives the +starting interrupt source number and the number of interrupt sources. +The user buffer is formatted as for KVM_IRQCHIP_G
[PATCH 0/9] In-kernel XICS interrupt controller emulation
This patch series implements in-kernel emulation of the XICS interrupt controller architecture defined in PAPR (Power Architecture Platform Requirements, the document that defines IBM's pSeries platform architecture). One of the things I have done in this version is to provide a way for this to coexist with other interrupt controller emulations. The XICS emulation exports a vector of function pointers that the generic book3s code uses to call in to it. Other emulations could set this vector to point to their own functions. (I realize that Scott Wood's recently-posted patch series uses an entirely orthogonal approach and may or may not find this useful.) The interface defined here consists of: * KVM_CREATE_IRQCHIP_ARGS: like KVM_CREATE_IRQCHIP but takes an argument struct containing a `type' field, specifying what overall interrupt controller architecture to emulate, and a `param' field (unused by the XICS emulation). This is only called once per VM, before VCPUs are created. * KVM_IRQCHIP_SET_SOURCES: used to create and configure interrupt sources in bulk. The notion of "interrupt source" is fairly generic; a source has an identifying number (20 bits in the current implementation), a priority, a destination (a vcpu number or potentially a vcpu group identifier), plus flags indicating edge/level sensitivity, a masked bit, and a pending bit. * KVM_IRQCHIP_GET_SOURCES: used to query the configuration and status of interrupt sources in bulk. * A 64-bit one_reg identifier, KVM_REG_PPC_ICP_STATE, used to get and set per-vcpu interrupt controller state (per-vcpu priority, interrupt status, and pending IPI and interrupt state). I believe this corresponds reasonably well to what was discussed at KVM Forum. This series is against Alex's kvm-ppc-queue branch, although it also applies cleanly on the kvm tree's next branch. Paul. -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/9] KVM: PPC: Book3S: Facilities to save/restore XICS presentation ctrler state
This adds the ability for userspace to save and restore the state of the XICS interrupt presentation controllers (ICPs) via the KVM_GET/SET_ONE_REG interface. Since there is one ICP per vcpu, we simply define a new 64-bit register in the ONE_REG space for the ICP state. The state includes the CPU priority setting, the pending IPI priority, and the priority and source number of any pending external interrupt. Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt |1 + arch/powerpc/include/asm/kvm_host.h |2 + arch/powerpc/include/uapi/asm/kvm.h | 13 + arch/powerpc/kvm/book3s.c | 19 arch/powerpc/kvm/book3s_xics.c | 92 +++ 5 files changed, 127 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0ff9dcf..466636b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1762,6 +1762,7 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_DTL | 128 PPC | KVM_REG_PPC_EPCR | 32 PPC | KVM_REG_PPC_EPR | 32 + PPC | KVM_REG_PPC_ICP_STATE | 64 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8af4c0b..2eb4c27 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -236,6 +236,8 @@ struct kvm_irq_ctrler { void (*free_ctrler)(struct kvm *kvm); int (*hcall)(struct kvm_vcpu *vcpu, unsigned long req); int (*ioctl)(struct kvm *kvm, unsigned int ioctl, unsigned long arg); + u64 (*get_one_reg)(struct kvm_vcpu *vcpu, u64 reg); + int (*set_one_reg)(struct kvm_vcpu *vcpu, u64 reg, u64 val); }; struct kvm_arch { diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index d90743c..8e34553 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -423,4 +423,17 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) #define KVM_REG_PPC_EPR(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) +/* Per-vcpu interrupt controller state */ +#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x87) + +/* Layout of above for XICS */ +#define KVM_REG_PPC_ICP_CPPR_SHIFT56 /* current proc priority */ +#define KVM_REG_PPC_ICP_CPPR_MASK 0xff +#define KVM_REG_PPC_ICP_XISR_SHIFT32 /* interrupt status field */ +#define KVM_REG_PPC_ICP_XISR_MASK 0xff +#define KVM_REG_PPC_ICP_MFRR_SHIFT24 /* pending IPI priority */ +#define KVM_REG_PPC_ICP_MFRR_MASK 0xff +#define KVM_REG_PPC_ICP_PPRI_SHIFT16 /* pending irq priority */ +#define KVM_REG_PPC_ICP_PPRI_MASK 0xff + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index c5a4478..8d5ee31 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -529,6 +529,15 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); break; #endif /* CONFIG_ALTIVEC */ + case KVM_REG_PPC_ICP_STATE: { + struct kvm_irq_ctrler *ic = vcpu->kvm->arch.irq_ctrler; + if (!ic) { + r = -ENXIO; + break; + } + val = get_reg_val(reg->id, ic->get_one_reg(vcpu, reg->id)); + break; + } default: r = -EINVAL; break; @@ -591,6 +600,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); break; #endif /* CONFIG_ALTIVEC */ + case KVM_REG_PPC_ICP_STATE: { + struct kvm_irq_ctrler *ic = vcpu->kvm->arch.irq_ctrler; + + if (!ic) { + r = -ENXIO; + break; + } + r = ic->set_one_reg(vcpu, reg->id, set_reg_val(reg->id, val)); + break; + } default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 2312b56..f54b934 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -888,6 +888,96 @@ static void kvmppc_xics_free(struct kvm *kvm) kfree(xics); } +static u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu, u64 reg_id) +{ + struct kvmppc_icp *icp = vcpu->arch.icp; + union kvmppc_icp_state state; + + if (!icp) + return 0; + state = icp->state; + ret
[PATCH 4/9] KVM: PPC: Book3S: Generalize interfaces to interrupt controller emulation
This makes the XICS interrupt controller emulation code export a struct containing function pointers for the various calls into the XICS code. The generic book3s code then uses these function pointers instead of calling directly into the XICS code (except for the XICS instantiation function). This should make it possible for other interrupt controller emulations to coexist with the XICS emulation. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 11 - arch/powerpc/include/asm/kvm_ppc.h | 14 -- arch/powerpc/kvm/book3s_hv.c| 18 +++ arch/powerpc/kvm/book3s_pr.c| 14 +++--- arch/powerpc/kvm/book3s_pr_papr.c | 10 ++-- arch/powerpc/kvm/book3s_xics.c | 93 +++ arch/powerpc/kvm/powerpc.c |4 +- 7 files changed, 85 insertions(+), 79 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index b05e7cd..8af4c0b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -230,8 +230,18 @@ struct kvm_arch_memory_slot { #endif /* CONFIG_KVM_BOOK3S_64_HV */ }; +struct kvm_irq_ctrler { + int (*setup_vcpu)(struct kvm_vcpu *vcpu); + void (*teardown_vcpu)(struct kvm_vcpu *vcpu); + void (*free_ctrler)(struct kvm *kvm); + int (*hcall)(struct kvm_vcpu *vcpu, unsigned long req); + int (*ioctl)(struct kvm *kvm, unsigned int ioctl, unsigned long arg); +}; + struct kvm_arch { unsigned int lpid; + struct kvm_irq_ctrler *irq_ctrler; + void *irq_ctrler_private; #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; struct revmap_entry *revmap; @@ -260,7 +270,6 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; - struct kvmppc_xics *xics; #endif }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f0fd22b..2308fff 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -130,12 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); -extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); -extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg); extern int kvmppc_xics_create(struct kvm *kvm, struct kvm_irqchip_args *args); -extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); -extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu); -extern void kvmppc_xics_free(struct kvm *kvm); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); @@ -275,15 +270,6 @@ static inline void kvm_linear_init(void) #endif -#ifdef CONFIG_PPC_BOOK3S_64 -static inline int kvmppc_xics_enabled(struct kvm *kvm) -{ - return kvm->arch.xics != NULL; -} -#else -static inline int kvmppc_xics_enabled(struct kvm *kvm) { return 0; } -#endif - static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) { #ifdef CONFIG_KVM_BOOKE_HV diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index aa3a0db..8f09c36 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -532,8 +532,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_CPPR: case H_EOI: case H_IPI: - if (kvmppc_xics_enabled(vcpu->kvm)) { - ret = kvmppc_xics_hcall(vcpu, req); + if (vcpu->kvm->arch.irq_ctrler) { + ret = vcpu->kvm->arch.irq_ctrler->hcall(vcpu, req); break; } /* fallthrough */ default: @@ -894,9 +894,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) spin_lock_init(&vcpu->arch.tbacct_lock); vcpu->arch.busy_preempt = TB_NIL; - /* Create the XICS */ - if (kvmppc_xics_enabled(kvm)) { - err = kvmppc_xics_create_icp(vcpu); + /* Create the interrupt-controller-specific state */ + if (kvm->arch.irq_ctrler) { + err = kvm->arch.irq_ctrler->setup_vcpu(vcpu); if (err < 0) goto free_vcpu; } @@ -952,8 +952,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); spin_unlock(&vcpu->arch.vpa_update_lock); kvm_vcpu_uninit(vcpu); - if (kvmppc_xics_enabled(vcpu->kvm)) - kvmppc_xics_free_icp(vcpu); + if (vcpu->kvm->arch.irq_ctrler) + vcpu->kvm->arch.irq_ctrler->teardown_vcpu(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -1899,8 +1899,8 @@ void kvmppc_core_destroy_vm(struct kvm *k
[PATCH 8/9] KVM: PPC: Book3S HV: Improve real-mode handling of external interrupts
This streamlines our handling of external interrupts that come in while we're in the guest. First, when waking up a hardware thread that was napping, we split off the "napping due to H_CEDE" case earlier, and use the code that handles an external interrupt (0x500) in the guest to handle that too. Secondly, the code that handles those external interrupts now checks if any other thread is exiting to the host before bouncing an external interrupt to the guest, and also checks that there is actually an external interrupt pending for the guest before setting the LPCR MER bit (mediated external request). This also makes sure that we clear the "ceded" flag when we handle a wakeup from cede in real mode, and fixes a potential infinite loop in kvmppc_run_vcpu() which can occur if we ever end up with the ceded flag set but MSR[EE] off. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/reg.h |1 + arch/powerpc/kvm/book3s_hv.c|5 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 140 +-- 3 files changed, 81 insertions(+), 65 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 11ae3d8..abe34e0 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -254,6 +254,7 @@ #define LPCR_PECE1 0x2000 /* decrementer can cause exit */ #define LPCR_PECE2 0x1000 /* machine check etc can cause exit */ #define LPCR_MER 0x0800 /* Mediated External Exception */ +#define LPCR_MER_SH 11 #define LPCR_LPES0x000c #define LPCR_LPES0 0x0008 /* LPAR Env selector 0 */ #define LPCR_LPES1 0x0004 /* LPAR Env selector 1 */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1365440..bd751a3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1384,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) break; vc->runner = vcpu; n_ceded = 0; - list_for_each_entry(v, &vc->runnable_threads, arch.run_list) + list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { if (!v->arch.pending_exceptions) n_ceded += v->arch.ceded; + else + v->arch.ceded = 0; + } if (n_ceded == vc->n_runnable) kvmppc_vcore_blocked(vc); else diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index fe908f5..0c519cb 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -97,50 +97,51 @@ kvm_start_guest: li r0,1 stb r0,PACA_NAPSTATELOST(r13) - /* get vcpu pointer, NULL if we have no vcpu to run */ - ld r4,HSTATE_KVM_VCPU(r13) - cmpdi cr1,r4,0 + /* were we napping due to cede? */ + lbz r0,HSTATE_NAPPING(r13) + cmpwi r0,0 + bne kvm_end_cede + + /* +* We weren't napping due to cede, so this must be a secondary +* thread being woken up to run a guest, or being woken up due +* to a stray IPI. (Or due to some machine check or hypervisor +* maintenance interrupt while the core is in KVM.) +*/ /* Check the wake reason in SRR1 to see why we got here */ mfspr r3,SPRN_SRR1 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ cmpwi r3,4/* was it an external interrupt? */ - bne 27f - - /* -* External interrupt - for now assume it is an IPI, since we -* should never get any other interrupts sent to offline threads. -* Only do this for secondary threads. -*/ - beq cr1,25f - lwz r3,VCPU_PTID(r4) - cmpwi r3,0 - beq 27f -25:ld r5,HSTATE_XICS_PHYS(r13) - li r0,0xff - li r6,XICS_MFRR - li r7,XICS_XIRR + bne 27f /* if not */ + ld r5,HSTATE_XICS_PHYS(r13) + li r7,XICS_XIRR/* if it was an external interrupt, */ lwzcix r8,r5,r7/* get and ack the interrupt */ sync clrldi. r9,r8,40/* get interrupt source ID. */ - beq 27f /* none there? */ - cmpwi r9,XICS_IPI - bne 26f + beq 28f /* none there? */ + cmpwi r9,XICS_IPI /* was it an IPI? */ + bne 29f + li r0,0xff + li r6,XICS_MFRR stbcix r0,r5,r6/* clear IPI */ -26:stwcix r8,r5,r7/* EOI the interrupt */ - -27:/* XXX should handle hypervisor maintenance interrupts etc. here */ +
[PATCH 1/9] KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS calls
From: Michael Ellerman For pseries machine emulation, in order to move the interrupt controller code to the kernel, we need to intercept some RTAS calls in the kernel itself. This adds an infrastructure to allow in-kernel handlers to be registered for RTAS services by name. A new ioctl, KVM_PPC_RTAS_DEFINE_TOKEN, then allows userspace to associate token values with those service names. Then, when the guest requests an RTAS service with one of those token values, it will be handled by the relevant in-kernel handler rather than being passed up to userspace as at present. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 19 arch/powerpc/include/asm/hvcall.h |3 + arch/powerpc/include/asm/kvm_host.h |1 + arch/powerpc/include/asm/kvm_ppc.h |4 + arch/powerpc/include/uapi/asm/kvm.h |6 ++ arch/powerpc/kvm/Makefile |1 + arch/powerpc/kvm/book3s_hv.c| 18 +++- arch/powerpc/kvm/book3s_pr_papr.c |7 ++ arch/powerpc/kvm/book3s_rtas.c | 182 +++ arch/powerpc/kvm/powerpc.c |9 +- include/uapi/linux/kvm.h|3 + 11 files changed, 251 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_rtas.c diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c2534c3..d3e2d60 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2122,6 +2122,25 @@ header; first `n_valid' valid entries with contents from the data written, then `n_invalid' invalid entries, invalidating any previously valid entries found. +4.79 KVM_PPC_RTAS_DEFINE_TOKEN + +Capability: KVM_CAP_PPC_RTAS +Architectures: ppc +Type: vm ioctl +Parameters: struct kvm_rtas_token_args +Returns: 0 on success, -1 on error + +Defines a token value for a RTAS (Run Time Abstraction Services) +service in order to allow it to be handled in the kernel. The +argument struct gives the name of the service, which must be the name +of a service that has a kernel-side implementation. If the token +value is non-zero, it will be associated with that service, and +subsequent RTAS calls by the guest specifying that token will be +handled by the kernel. If the token value is 0, then any token +associated with the service will be forgotten, and subsequent RTAS +calls by the guest for that service will be passed to userspace to be +handled. + 5. The kvm_run structure diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 7a86706..9ea22b2 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -269,6 +269,9 @@ #define H_GET_MPP_X0x314 #define MAX_HCALL_OPCODE H_GET_MPP_X +/* Platform specific hcalls, used by KVM */ +#define H_RTAS 0xf000 + #ifndef __ASSEMBLY__ /** diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8a72d59..8295dc7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -255,6 +255,7 @@ struct kvm_arch { #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; + struct list_head rtas_tokens; #endif }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 44a657a..dd08cfa 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -165,6 +165,10 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); +extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); +extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); +extern void kvmppc_rtas_tokens_free(struct kvm *kvm); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 16064d0..d90743c 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -299,6 +299,12 @@ struct kvm_allocate_rma { __u64 rma_size; }; +/* for KVM_CAP_PPC_RTAS */ +struct kvm_rtas_token_args { + char name[120]; + __u64 token;/* Use a token of 0 to undefine a mapping */ +}; + struct kvm_book3e_206_tlb_entry { __u32 mas8; __u32 mas1; diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index b772ede..432132c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -86,6 +86,7 @@ kvm-book3s_64-module-objs := \ emulate.o \ book3s.o \ book3s_64_vio.o \ + book3s_rtas.o \ $(kvm-book3s_64-objs-y) kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) diff --git a/arch/pow
[PATCH 9/9] KVM: PPC: Book3S: Add support for ibm,int-on/off RTAS calls
This adds support for the ibm,int-on and ibm,int-off RTAS calls to the in-kernel XICS emulation and corrects the handling of the saved priority by the ibm,set-xive RTAS call. With this, ibm,int-off sets the specified interrupt's priority in its saved_priority field and sets the priority to 0xff (the least favoured value). ibm,int-on restores the saved_priority to the priority field, and ibm,set-xive sets both the priority and the saved_priority to the specified priority value. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h |2 + arch/powerpc/kvm/book3s_rtas.c | 40 ++ arch/powerpc/kvm/book3s_xics.c | 101 +--- arch/powerpc/kvm/book3s_xics.h |2 +- 4 files changed, 125 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f371af8a..dd7c1fc 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -171,6 +171,8 @@ extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); extern void kvmppc_rtas_tokens_free(struct kvm *kvm); extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority); extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority); +extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); +extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); /* * Cuts out inst bits with ordering according to spec. diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 6a6c1fe..fc1a749 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -64,6 +64,44 @@ out: args->rets[0] = rc; } +static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq; + int rc; + + if (args->nargs != 1 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + rc = kvmppc_xics_int_off(vcpu->kvm, irq); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq; + int rc; + + if (args->nargs != 1 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + rc = kvmppc_xics_int_on(vcpu->kvm, irq); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} + struct rtas_handler { void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args); char *name; @@ -72,6 +110,8 @@ struct rtas_handler { static struct rtas_handler rtas_handlers[] = { { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive }, { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive }, + { .name = "ibm,int-off", .handler = kvm_rtas_int_off }, + { .name = "ibm,int-on", .handler = kvm_rtas_int_on }, }; struct rtas_token_definition { diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index abd2dde..665777e 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -116,6 +116,28 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, mutex_unlock(&ics->lock); } +static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, + struct ics_irq_state *state, + u32 server, u32 priority, u32 saved_priority) +{ + bool deliver; + + mutex_lock(&ics->lock); + + state->server = server; + state->priority = priority; + state->saved_priority = saved_priority; + deliver = false; + if ((state->masked_pending || state->resend) && priority != MASKED) { + state->masked_pending = 0; + deliver = true; + } + + mutex_unlock(&ics->lock); + + return deliver; +} + int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) { struct kvmppc_xics *xics = kvm->arch.irq_ctrler_private; @@ -123,7 +145,6 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) struct kvmppc_ics *ics; struct ics_irq_state *state; u16 src; - bool deliver; if (!xics) return -ENODEV; @@ -137,23 +158,11 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) if (!icp) return -EINVAL; - mutex_lock(&ics->lock); - XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n", irq, server, priority, state->masked_pending, state->resend); - state->server = server; - state->priority = priority; - deliver = false; - if ((state->masked_pending || state->resend) && priority != MASKED) { - state->masked_pending = 0; - deliver = true; - } - - mutex_unlock(&ics->lock);