Re: [PATCH 4/4] vfio powerpc: added real mode support

2013-02-14 Thread Paul Mackerras
On Mon, Feb 11, 2013 at 11:12:43PM +1100, a...@ozlabs.ru wrote:
> From: Alexey Kardashevskiy 
> 
> The patch allows the host kernel to handle H_PUT_TCE request
> without involving QEMU in it what should save time on switching
> from the kernel to QEMU and back.
> 
> The patch adds an IOMMU ID parameter into the KVM_CAP_SPAPR_TCE ioctl,
> QEMU needs to be fixed to support that.
> 
> At the moment H_PUT_TCE is processed in the virtual mode as the page
> to be mapped may not be present in the RAM so paging may be involved as
> it can be done from the virtual mode only.
> 
> Tests show that this patch increases tranmission speed from 220MB/s
> to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card).
[snip]
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index b4fdabc..acb9cdc 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -47,6 +47,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #define DBG(...)
>  
> @@ -727,6 +729,7 @@ void iommu_register_group(struct iommu_table * tbl,
>   return;
>   }
>   tbl->it_group = grp;
> + INIT_LIST_HEAD(&tbl->it_hugepages);
>   iommu_group_set_iommudata(grp, tbl, group_release);
>   iommu_group_set_name(grp, kasprintf(GFP_KERNEL, "domain%d-pe%lx",
>   domain_number, pe_num));
> @@ -906,6 +909,83 @@ void kvm_iommu_unmap_pages(struct kvm *kvm, struct 
> kvm_memory_slot *slot)
>  {
>  }
>  
> +/*
> + * The KVM guest can be backed with 16MB pages (qemu switch
> + * -mem-path /var/lib/hugetlbfs/global/pagesize-16MB/).
> + * In this case, we cannot do page counting from the real mode
> + * as the compound pages are used - they are linked in a list
> + * with pointers as virtual addresses which are inaccessible
> + * in real mode.
> + *
> + * The code below keeps a 16MB pages list and uses page struct
> + * in real mode if it is already locked in RAM and inserted into
> + * the list or switches to the virtual mode where it can be
> + * handled in a usual manner.
> + */
> +struct iommu_kvmppc_hugepages {
> + struct list_head list;
> + pte_t pte;  /* Huge page PTE */
> + unsigned long pa;   /* Base phys address used as a real TCE */
> + struct page *page;  /* page struct of the very first subpage */
> + unsigned long size; /* Huge page size (always 16MB at the moment) */
> + bool dirty; /* Dirty bit */
> +};
> +
> +static struct iommu_kvmppc_hugepages *find_hp_by_pte(struct iommu_table *tbl,
> + pte_t pte)
> +{
> + struct iommu_kvmppc_hugepages *hp;
> +
> + list_for_each_entry(hp, &tbl->it_hugepages, list) {
> + if (hp->pte == pte)
> + return hp;
> + }
> +
> + return NULL;
> +}
> +
> +static struct iommu_kvmppc_hugepages *find_hp_by_pa(struct iommu_table *tbl,
> + unsigned long pa)
> +{
> + struct iommu_kvmppc_hugepages *hp;
> +
> + list_for_each_entry(hp, &tbl->it_hugepages, list) {
> + if ((hp->pa <= pa) && (pa < hp->pa + hp->size))
> + return hp;
> + }
> +
> + return NULL;
> +}
> +
> +static struct iommu_kvmppc_hugepages *add_hp(struct iommu_table *tbl,
> + pte_t pte, unsigned long va, unsigned long pg_size)
> +{
> + int ret;
> + struct iommu_kvmppc_hugepages *hp;
> +
> + hp = kzalloc(sizeof(*hp), GFP_KERNEL);
> + if (!hp)
> + return NULL;
> +
> + hp->pte = pte;
> + va = va & ~(pg_size - 1);
> + ret = get_user_pages_fast(va, 1, true/*write*/, &hp->page);
> + if ((ret != 1) || !hp->page) {
> + kfree(hp);
> + return NULL;
> + }
> +#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
> +#error TODO: fix to avoid page_address() here
> +#endif
> + hp->pa = __pa((unsigned long) page_address(hp->page));
> +
> + hp->size = pg_size;
> +
> + list_add(&hp->list, &tbl->it_hugepages);
> +
> + return hp;
> +}

I don't see any locking here.  What stops one cpu doing add_hp() from
racing with another doing find_hp_by_pte() or find_hp_by_pa()?

[snip]
> @@ -1021,6 +1123,24 @@ long iommu_clear_tce_user_mode(struct iommu_table 
> *tbl, unsigned long ioba,
>  }
>  EXPORT_SYMBOL_GPL(iommu_clear_tce_user_mode);
>  
> +long iommu_clear_tce_real_mode(struct iommu_table *tbl, unsigned long ioba,
> + unsigned long tce_value, unsigned long npages)
> +{
> + long ret;
> + unsigned long entry = ioba >> IOMMU_PAGE_SHIFT;
> +
> + ret = tce_clear_param_check(tbl, ioba, tce_value, npages);
> + if (!ret)
> + ret = clear_tce(tbl, true, entry, npages);
> +
> + if (ret < 0)
> + pr_err("iommu_tce: %s failed ioba=%lx, tce_value=%lx ret=%ld\n",
> + __func__, ioba, tce_value, ret);

Better to avoid printk in real mode if at all possible, particularly
if they're guest-triggerable.

[snip]
> @@ -195,15 +225,4

Re: [PATCH 2/4] powerpc kvm: added multiple TCEs requests support

2013-02-14 Thread Paul Mackerras
On Mon, Feb 11, 2013 at 11:12:41PM +1100, a...@ozlabs.ru wrote:

> +static long emulated_h_put_tce(struct kvmppc_spapr_tce_table *stt,
> + unsigned long ioba, unsigned long tce)
> +{
> + unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
> + struct page *page;
> + u64 *tbl;
> +
> + /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p  window_size=0x%x\n", 
> */
> + /*  liobn, stt, stt->window_size); */
> + if (ioba >= stt->window_size) {
> + pr_err("%s failed on ioba=%lx\n", __func__, ioba);

Doesn't this give the guest a way to spam the host logs?  And in fact
printk in real mode is potentially problematic.  I would just leave
out this statement.

> + return H_PARAMETER;
> + }
> +
> + page = stt->pages[idx / TCES_PER_PAGE];
> + tbl = (u64 *)page_address(page);

I would like to see an explanation of why we are confident that
page_address() will work correctly in real mode, across all the
combinations of config options that we can have for a ppc64 book3s
kernel.

> +
> + /* FIXME: Need to validate the TCE itself */
> + /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
> + tbl[idx % TCES_PER_PAGE] = tce;
> +
> + return H_SUCCESS;
> +}
> +
> +/*
> + * Real mode handlers
>   */
>  long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
> unsigned long ioba, unsigned long tce)
>  {
> - struct kvm *kvm = vcpu->kvm;
>   struct kvmppc_spapr_tce_table *stt;
>  
> - /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
> - /*  liobn, ioba, tce); */
> + stt = find_tce_table(vcpu, liobn);
> + /* Didn't find the liobn, put it to userspace */
> + if (!stt)
> + return H_TOO_HARD;
> +
> + /* Emulated IO */
> + return emulated_h_put_tce(stt, ioba, tce);
> +}
> +
> +long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
> + unsigned long liobn, unsigned long ioba,
> + unsigned long tce_list, unsigned long npages)
> +{
> + struct kvmppc_spapr_tce_table *stt;
> + long i, ret = 0;
> + unsigned long *tces;
> +
> + stt = find_tce_table(vcpu, liobn);
> + /* Didn't find the liobn, put it to userspace */
> + if (!stt)
> + return H_TOO_HARD;
>  
> - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
> - if (stt->liobn == liobn) {
> - unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
> - struct page *page;
> - u64 *tbl;
> + tces = (void *) get_real_address(vcpu, tce_list, false, NULL, NULL);
> + if (!tces)
> + return H_TOO_HARD;
>  
> - /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p  
> window_size=0x%x\n", */
> - /*  liobn, stt, stt->window_size); */
> - if (ioba >= stt->window_size)
> - return H_PARAMETER;
> + /* Emulated IO */
> + for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
> + ret = emulated_h_put_tce(stt, ioba, tces[i]);

So, tces is a pointer to somewhere inside a real page.  Did we check
somewhere that tces[npages-1] is in the same page as tces[0]?  If so,
I missed it.  If we didn't, then we probably should check and do
something about it.

>  
> - page = stt->pages[idx / TCES_PER_PAGE];
> - tbl = (u64 *)page_address(page);
> + return ret;
> +}
>  
> - /* FIXME: Need to validate the TCE itself */
> - /* udbg_printf("tce @ %p\n", &tbl[idx % 
> TCES_PER_PAGE]); */
> - tbl[idx % TCES_PER_PAGE] = tce;
> - return H_SUCCESS;
> - }
> - }
> +long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
> + unsigned long liobn, unsigned long ioba,
> + unsigned long tce_value, unsigned long npages)
> +{
> + struct kvmppc_spapr_tce_table *stt;
> + long i, ret = 0;
> +
> + stt = find_tce_table(vcpu, liobn);
> + /* Didn't find the liobn, put it to userspace */
> + if (!stt)
> + return H_TOO_HARD;
>  
> - /* Didn't find the liobn, punt it to userspace */
> - return H_TOO_HARD;
> + /* Emulated IO */
> + for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
> + ret = emulated_h_put_tce(stt, ioba, tce_value);
> +
> + return ret;
> +}
> +
> +/*
> + * Virtual mode handlers
> + */
> +extern long kvmppc_virtmode_h_put_tce(struct kvm_vcpu *vcpu,
> + unsigned long liobn, unsigned long ioba,
> + unsigned long tce)
> +{
> + /* At the moment emulated IO is handled the same way */
> + return kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
> +}
> +
> +extern long kvmppc_virtmode_h_put_tce_indirect(struct kvm_vcpu *vcpu,
> + unsigned long liobn, unsigned long ioba,
> + unsigned long tce_list, unsigne

Re: [PATCH 3/4] powerpc: preparing to support real mode optimization

2013-02-14 Thread Paul Mackerras
On Mon, Feb 11, 2013 at 11:12:42PM +1100, a...@ozlabs.ru wrote:
> From: Alexey Kardashevskiy 
> 
> he current VFIO-on-POWER implementation supports only user mode
> driven mapping, i.e. QEMU is sending requests to map/unmap pages.
> However this approach is really slow in really fast hardware so
> it is better to be moved to the real mode.
> 
> The patch adds an API to increment/decrement page counter as
> get_user_pages API used for user mode mapping does not work
> in the real mode.
> 
> CONFIG_SPARSEMEM_VMEMMAP and CONFIG_FLATMEN are supported.
> 
> Signed-off-by: Alexey Kardashevskiy 
> Cc: David Gibson 
> ---

The names are slightly odd, in that they include "vmemmap_" but exist
and work in the flatmem case as well.  Apart from that...

Reviewed-by: Paul Mackerras 

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/4] powerpc: lookup_linux_pte has been made public

2013-02-14 Thread Paul Mackerras
On Mon, Feb 11, 2013 at 11:12:40PM +1100, a...@ozlabs.ru wrote:
> From: Alexey Kardashevskiy 
> 
> The lookup_linux_pte() function returns a linux PTE which
> is required to convert KVM guest physical address into host real
> address in real mode.
> 
> This convertion will be used by upcoming support of H_PUT_TCE_INDIRECT
> as TCE list address comes from the guest directly so it is a guest
> physical.
> 
> Signed-off-by: Alexey Kardashevskiy 
> Cc: David Gibson 
> ---
>  arch/powerpc/include/asm/pgtable-ppc64.h |3 +++
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c  |4 ++--
>  2 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h 
> b/arch/powerpc/include/asm/pgtable-ppc64.h
> index 0182c20..ddcc898 100644
> --- a/arch/powerpc/include/asm/pgtable-ppc64.h
> +++ b/arch/powerpc/include/asm/pgtable-ppc64.h
> @@ -377,6 +377,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t 
> *pgdir, unsigned long ea,
>  }
>  #endif /* !CONFIG_HUGETLB_PAGE */
>  
> +pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
> + int writing, unsigned long *pte_sizep);
> +

This seems a slightly odd place to put the declaration of a function
which is defined in the KVM code.  kvm-ppc.h might be a better place.

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/12] KVM: PPC: E500: Make clear_tlb_refs and clear_tlb1_bitmap static

2013-02-14 Thread Alexander Graf
Host shadow TLB flushing is logic that the guest TLB code should have
no insight about. Declare the internal clear_tlb_refs and clear_tlb1_bitmap
functions static to the host TLB handling file.

Instead of these, we can use the already exported kvmppc_core_flush_tlb().
This gives us a common API across the board to say "please flush any
pending host shadow translation".

Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/e500_mmu.c  |6 ++
 arch/powerpc/kvm/e500_mmu_host.c |4 ++--
 arch/powerpc/kvm/e500_mmu_host.h |2 --
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index c3d1721..623a192 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -541,10 +541,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
int i;
 
-   clear_tlb1_bitmap(vcpu_e500);
+   kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
kfree(vcpu_e500->g2h_tlb1_map);
-
-   clear_tlb_refs(vcpu_e500);
kfree(vcpu_e500->gtlb_priv[0]);
kfree(vcpu_e500->gtlb_priv[1]);
 
@@ -735,7 +733,7 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
 {
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
kvmppc_recalc_tlb1map_range(vcpu_e500);
-   clear_tlb_refs(vcpu_e500);
+   kvmppc_core_flush_tlb(vcpu);
return 0;
 }
 
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 9a150bc..a222edf 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -262,7 +262,7 @@ static inline void kvmppc_e500_ref_release(struct tlbe_ref 
*ref)
}
 }
 
-void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
+static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
if (vcpu_e500->g2h_tlb1_map)
memset(vcpu_e500->g2h_tlb1_map, 0,
@@ -284,7 +284,7 @@ static void clear_tlb_privs(struct kvmppc_vcpu_e500 
*vcpu_e500)
}
 }
 
-void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
+static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
int stlbsel = 1;
int i;
diff --git a/arch/powerpc/kvm/e500_mmu_host.h b/arch/powerpc/kvm/e500_mmu_host.h
index 9e4d4a2..7624835 100644
--- a/arch/powerpc/kvm/e500_mmu_host.h
+++ b/arch/powerpc/kvm/e500_mmu_host.h
@@ -12,8 +12,6 @@
 void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
 int esel);
 
-void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500);
-void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500);
 int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500);
 void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
 
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/12] KVM: PPC: e500: Implement TLB1-in-TLB0 mapping

2013-02-14 Thread Alexander Graf
When a host mapping fault happens in a guest TLB1 entry today, we
map the translated guest entry into the host's TLB1.

This isn't particularly clever when the guest is mapped by normal 4k
pages, since these would be a lot better to put into TLB0 instead.

This patch adds the required logic to map 4k TLB1 shadow maps into
the host's TLB0.

Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/e500.h  |1 +
 arch/powerpc/kvm/e500_mmu_host.c |   65 +++---
 2 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index c70d37e..41cefd4 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -28,6 +28,7 @@
 
 #define E500_TLB_VALID 1
 #define E500_TLB_BITMAP 2
+#define E500_TLB_TLB0  (1 << 2)
 
 struct tlbe_ref {
pfn_t pfn;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 4c32d65..9a150bc 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -216,10 +216,21 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 
*vcpu_e500, int tlbsel,
vcpu_e500->g2h_tlb1_map[esel] = 0;
ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID);
local_irq_restore(flags);
+   }
 
-   return;
+   if (tlbsel == 1 && ref->flags & E500_TLB_TLB0) {
+   /*
+* TLB1 entry is backed by 4k pages. This should happen
+* rarely and is not worth optimizing. Invalidate everything.
+*/
+   kvmppc_e500_tlbil_all(vcpu_e500);
+   ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID);
}
 
+   /* Already invalidated in between */
+   if (!(ref->flags & E500_TLB_VALID))
+   return;
+
/* Guest tlbe is backed by at most one host tlbe per shadow pid. */
kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
 
@@ -487,38 +498,54 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 
*vcpu_e500, int esel,
return 0;
 }
 
+static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500,
+struct tlbe_ref *ref,
+int esel)
+{
+   unsigned int sesel = vcpu_e500->host_tlb1_nv++;
+
+   if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
+   vcpu_e500->host_tlb1_nv = 0;
+
+   vcpu_e500->tlb_refs[1][sesel] = *ref;
+   vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel;
+   vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
+   if (vcpu_e500->h2g_tlb1_rmap[sesel]) {
+   unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel];
+   vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel);
+   }
+   vcpu_e500->h2g_tlb1_rmap[sesel] = esel;
+
+   return sesel;
+}
+
 /* Caller must ensure that the specified guest TLB entry is safe to insert into
  * the shadow TLB. */
-/* XXX for both one-one and one-to-many , for now use TLB1 */
+/* For both one-one and one-to-many */
 static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
struct kvm_book3e_206_tlb_entry *stlbe, int esel)
 {
-   struct tlbe_ref *ref;
-   unsigned int sesel;
+   struct tlbe_ref ref;
+   int sesel;
int r;
-   int stlbsel = 1;
-
-   sesel = vcpu_e500->host_tlb1_nv++;
-
-   if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
-   vcpu_e500->host_tlb1_nv = 0;
 
-   ref = &vcpu_e500->tlb_refs[1][sesel];
+   ref.flags = 0;
r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe,
-  ref);
+  &ref);
if (r)
return r;
 
-   vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel;
-   vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
-   if (vcpu_e500->h2g_tlb1_rmap[sesel]) {
-   unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel];
-   vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel);
+   /* Use TLB0 when we can only map a page with 4k */
+   if (get_tlb_tsize(stlbe) == BOOK3E_PAGESZ_4K) {
+   vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_TLB0;
+   write_stlbe(vcpu_e500, gtlbe, stlbe, 0, 0);
+   return 0;
}
-   vcpu_e500->h2g_tlb1_rmap[sesel] = esel;
 
-   write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel);
+   /* Otherwise map into TLB1 */
+   sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, &ref, esel);
+   write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel);
 
return 0;
 }
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/12] KVM: PPC: E500: Remove kvmppc_e500_tlbil_all usage from guest TLB code

2013-02-14 Thread Alexander Graf
The guest TLB handling code should not have any insight into how the host
TLB shadow code works.

kvmppc_e500_tlbil_all() is a function that is used for distinction between
e500v2 and e500mc (E.HV) on how to flush shadow entries. This function really
is private between the e500.c/e500mc.c file and e500_mmu_host.c.

Instead of this one, use the public kvmppc_core_flush_tlb() function to flush
all shadow TLB entries. As a nice side effect, with this we also end up
flushing TLB1 entries which we forgot to do before.

Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/e500_mmu.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 623a192..5c44759 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -239,8 +239,8 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 
*vcpu_e500, ulong value)
for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
 
-   /* Invalidate all vcpu id mappings */
-   kvmppc_e500_tlbil_all(vcpu_e500);
+   /* Invalidate all host shadow mappings */
+   kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
 
return EMULATE_DONE;
 }
@@ -269,8 +269,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t 
ea)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
}
 
-   /* Invalidate all vcpu id mappings */
-   kvmppc_e500_tlbil_all(vcpu_e500);
+   /* Invalidate all host shadow mappings */
+   kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
 
return EMULATE_DONE;
 }
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/12] KVM: PPC: booke: use vcpu reference from thread_struct

2013-02-14 Thread Alexander Graf
From: Bharat Bhushan 

Like other places, use thread_struct to get vcpu reference.

Signed-off-by: Bharat Bhushan 
Signed-off-by: Alexander Graf 
---
 arch/powerpc/include/asm/reg.h  |2 --
 arch/powerpc/kernel/asm-offsets.c   |2 +-
 arch/powerpc/kvm/booke_interrupts.S |6 ++
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 97d3727..11ae3d8 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -919,8 +919,6 @@
 #define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9
 #define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9
 #endif
-#define SPRN_SPRG_RVCPUSPRN_SPRG1
-#define SPRN_SPRG_WVCPUSPRN_SPRG1
 #endif
 
 #ifdef CONFIG_8xx
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 4e23ba2..46f6afd 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -117,7 +117,7 @@ int main(void)
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, 
kvm_shadow_vcpu));
 #endif
-#ifdef CONFIG_KVM_BOOKE_HV
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu));
 #endif
 
diff --git a/arch/powerpc/kvm/booke_interrupts.S 
b/arch/powerpc/kvm/booke_interrupts.S
index bb46b32..ca16d57 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -56,7 +56,8 @@
 _GLOBAL(kvmppc_handler_\ivor_nr)
/* Get pointer to vcpu and record exit number. */
mtspr   \scratch , r4
-   mfspr   r4, SPRN_SPRG_RVCPU
+   mfspr   r4, SPRN_SPRG_THREAD
+   lwz r4, THREAD_KVM_VCPU(r4)
stw r3, VCPU_GPR(R3)(r4)
stw r5, VCPU_GPR(R5)(r4)
stw r6, VCPU_GPR(R6)(r4)
@@ -402,9 +403,6 @@ lightweight_exit:
lwz r8, kvmppc_booke_handlers@l(r8)
mtspr   SPRN_IVPR, r8
 
-   /* Save vcpu pointer for the exception handlers. */
-   mtspr   SPRN_SPRG_WVCPU, r4
-
lwz r5, VCPU_SHARED(r4)
 
/* Can't switch the stack pointer until after IVPR is switched,
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/12] KVM: PPC: booke: Allow multiple exception types

2013-02-14 Thread Alexander Graf
From: Bharat Bhushan 

Current kvmppc_booke_handlers uses the same macro (KVM_HANDLER) and
all handlers are considered to be the same size. This will not be
the case if we want to use different macros for different handlers.

This patch improves the kvmppc_booke_handler so that it can
support different macros for different handlers.

Signed-off-by: Liu Yu 
[bharat.bhus...@freescale.com: Substantial changes]
Signed-off-by: Bharat Bhushan 
Signed-off-by: Alexander Graf 
---
 arch/powerpc/include/asm/kvm_ppc.h  |2 -
 arch/powerpc/kvm/booke.c|   14 
 arch/powerpc/kvm/booke.h|1 +
 arch/powerpc/kvm/booke_interrupts.S |   37 --
 arch/powerpc/kvm/e500.c |   16 +-
 5 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 493630e..44a657a 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -49,8 +49,6 @@ enum emulation_result {
 
 extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
-extern char kvmppc_handlers_start[];
-extern unsigned long kvmppc_handler_len;
 extern void kvmppc_handler_highmem(void);
 
 extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8779cd4..d2f502d 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1594,7 +1594,9 @@ int __init kvmppc_booke_init(void)
 {
 #ifndef CONFIG_KVM_BOOKE_HV
unsigned long ivor[16];
+   unsigned long *handler = kvmppc_booke_handler_addr;
unsigned long max_ivor = 0;
+   unsigned long handler_len;
int i;
 
/* We install our own exception handlers by hijacking IVPR. IVPR must
@@ -1627,14 +1629,16 @@ int __init kvmppc_booke_init(void)
 
for (i = 0; i < 16; i++) {
if (ivor[i] > max_ivor)
-   max_ivor = ivor[i];
+   max_ivor = i;
 
+   handler_len = handler[i + 1] - handler[i];
memcpy((void *)kvmppc_booke_handlers + ivor[i],
-  kvmppc_handlers_start + i * kvmppc_handler_len,
-  kvmppc_handler_len);
+  (void *)handler[i], handler_len);
}
-   flush_icache_range(kvmppc_booke_handlers,
-  kvmppc_booke_handlers + max_ivor + 
kvmppc_handler_len);
+
+   handler_len = handler[max_ivor + 1] - handler[max_ivor];
+   flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
+  ivor[max_ivor] + handler_len);
 #endif /* !BOOKE_HV */
return 0;
 }
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index e9b88e4..5fd1ba6 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -65,6 +65,7 @@
  (1 << BOOKE_IRQPRIO_CRITICAL))
 
 extern unsigned long kvmppc_booke_handlers;
+extern unsigned long kvmppc_booke_handler_addr[];
 
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
 void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
diff --git a/arch/powerpc/kvm/booke_interrupts.S 
b/arch/powerpc/kvm/booke_interrupts.S
index ca16d57..eae8483 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -74,6 +74,14 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
bctr
 .endm
 
+.macro KVM_HANDLER_ADDR ivor_nr
+   .long   kvmppc_handler_\ivor_nr
+.endm
+
+.macro KVM_HANDLER_END
+   .long   kvmppc_handlers_end
+.endm
+
 _GLOBAL(kvmppc_handlers_start)
 KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
 KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK  SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0
@@ -94,9 +102,7 @@ KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT 
SPRN_CSRR0
 KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-
-_GLOBAL(kvmppc_handler_len)
-   .long kvmppc_handler_1 - kvmppc_handler_0
+_GLOBAL(kvmppc_handlers_end)
 
 /* Registers:
  *  SPRG_SCRATCH0: guest r4
@@ -461,6 +467,31 @@ lightweight_exit:
lwz r4, VCPU_GPR(R4)(r4)
rfi
 
+   .data
+   .align  4
+   .globl  kvmppc_booke_handler_addr
+kvmppc_booke_handler_addr:
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_CRITICAL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_MACHINE_CHECK
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DATA_STORAGE
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_INST_STORAGE
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_EXTERNAL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_ALIGNMENT
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_PROGRAM
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_FP_UNAVAIL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_SYSCALL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_AP_UNAVAIL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_D

[PATCH 02/12] KVM: PPC: E500: Explicitly mark shadow maps invalid

2013-02-14 Thread Alexander Graf
When we invalidate shadow TLB maps on the host, we don't mark them
as not valid. But we should.

Fix this by removing the E500_TLB_VALID from their flags when
invalidating.

Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/e500_tlb.c |   13 ++---
 1 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index d38ad63..8efb2ac 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -204,9 +204,13 @@ static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 
*vcpu_e500,
 {
struct kvm_book3e_206_tlb_entry *gtlbe =
get_entry(vcpu_e500, tlbsel, esel);
+   struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref;
 
-   if (tlbsel == 1 &&
-   vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) {
+   /* Don't bother with unmapped entries */
+   if (!(ref->flags & E500_TLB_VALID))
+   return;
+
+   if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) {
u64 tmp = vcpu_e500->g2h_tlb1_map[esel];
int hw_tlb_indx;
unsigned long flags;
@@ -224,7 +228,7 @@ static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 
*vcpu_e500,
}
mb();
vcpu_e500->g2h_tlb1_map[esel] = 0;
-   vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP;
+   ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID);
local_irq_restore(flags);
 
return;
@@ -232,6 +236,9 @@ static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 
*vcpu_e500,
 
/* Guest tlbe is backed by at most one host tlbe per shadow pid. */
kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
+
+   /* Mark the TLB as not backed by the host anymore */
+   ref->flags &= ~E500_TLB_VALID;
 }
 
 static int tlb0_set_base(gva_t addr, int sets, int ways)
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/12] booke: Added DBCR4 SPR number

2013-02-14 Thread Alexander Graf
From: Bharat Bhushan 

Signed-off-by: Bharat Bhushan 
Signed-off-by: Alexander Graf 
---
 arch/powerpc/include/asm/reg_booke.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/reg_booke.h 
b/arch/powerpc/include/asm/reg_booke.h
index e07e6af..b417de3 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -56,6 +56,7 @@
 #define SPRN_SPRG7W0x117   /* Special Purpose Register General 7 Write */
 #define SPRN_EPCR  0x133   /* Embedded Processor Control Register */
 #define SPRN_DBCR2 0x136   /* Debug Control Register 2 */
+#define SPRN_DBCR4 0x233   /* Debug Control Register 4 */
 #define SPRN_MSRP  0x137   /* MSR Protect Register */
 #define SPRN_IAC3  0x13A   /* Instruction Address Compare 3 */
 #define SPRN_IAC4  0x13B   /* Instruction Address Compare 4 */
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/12] KVM: PPC: E500: Propagate errors when shadow mapping

2013-02-14 Thread Alexander Graf
When shadow mapping a page, mapping this page can fail. In that case we
don't have a shadow map.

Take this case into account, otherwise we might end up writing bogus TLB
entries into the host TLB.

While at it, also move the write_stlbe() calls into the respective TLBn
handlers.

Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/e500_tlb.c |   69 +-
 1 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 8efb2ac..3777167 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -432,7 +432,7 @@ static inline void kvmppc_e500_setup_stlbe(
 #endif
 }
 
-static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe,
struct tlbe_ref *ref)
@@ -551,7 +551,7 @@ static inline void kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,
if (is_error_noslot_pfn(pfn)) {
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
(long)gfn);
-   return;
+   return -EINVAL;
}
 
/* Align guest and physical address to page map boundaries */
@@ -571,22 +571,33 @@ static inline void kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,
 
/* Drop refcount on page, so that mmu notifiers can clear it */
kvm_release_pfn_clean(pfn);
+
+   return 0;
 }
 
 /* XXX only map the one-one case, for now use TLB0 */
-static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-int esel,
-struct kvm_book3e_206_tlb_entry *stlbe)
+static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+   int esel,
+   struct kvm_book3e_206_tlb_entry *stlbe)
 {
struct kvm_book3e_206_tlb_entry *gtlbe;
struct tlbe_ref *ref;
+   int stlbsel = 0;
+   int sesel = 0;
+   int r;
 
gtlbe = get_entry(vcpu_e500, 0, esel);
ref = &vcpu_e500->gtlb_priv[0][esel].ref;
 
-   kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
+   r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
gtlbe, 0, stlbe, ref);
+   if (r)
+   return r;
+
+   write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel);
+
+   return 0;
 }
 
 /* Caller must ensure that the specified guest TLB entry is safe to insert into
@@ -597,25 +608,32 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 
*vcpu_e500,
struct kvm_book3e_206_tlb_entry *stlbe, int esel)
 {
struct tlbe_ref *ref;
-   unsigned int victim;
+   unsigned int sesel;
+   int r;
+   int stlbsel = 1;
 
-   victim = vcpu_e500->host_tlb1_nv++;
+   sesel = vcpu_e500->host_tlb1_nv++;
 
if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
vcpu_e500->host_tlb1_nv = 0;
 
-   ref = &vcpu_e500->tlb_refs[1][victim];
-   kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref);
+   ref = &vcpu_e500->tlb_refs[1][sesel];
+   r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe,
+  ref);
+   if (r)
+   return r;
 
-   vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim;
+   vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel;
vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
-   if (vcpu_e500->h2g_tlb1_rmap[victim]) {
-   unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim];
-   vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim);
+   if (vcpu_e500->h2g_tlb1_rmap[sesel]) {
+   unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel];
+   vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel);
}
-   vcpu_e500->h2g_tlb1_rmap[victim] = esel;
+   vcpu_e500->h2g_tlb1_rmap[sesel] = esel;
 
-   return victim;
+   write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel);
+
+   return 0;
 }
 
 static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500)
@@ -1034,30 +1052,27 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, 
gpa_t gpaddr,
struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
int tlbsel = tlbsel_of(index);
int esel = esel_of(index);
-   int stlbsel, sesel;
 
gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 
switch (tlbsel) {
case 0:
-   stlbsel = 0;
-   sesel = 0; /* unused */
priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
 
-   /* Only tri

[PATCH 12/12] KVM: PPC: BookE: Handle alignment interrupts

2013-02-14 Thread Alexander Graf
When the guest triggers an alignment interrupt, we don't handle it properly
today and instead BUG_ON(). This really shouldn't happen.

Instead, we should just pass the interrupt back into the guest so it can deal
with it.

Reported-by: Gao Guanhua-B22826 
Tested-by: Gao Guanhua-B22826 
Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/booke.c|   16 +++-
 arch/powerpc/kvm/booke_interrupts.S |6 --
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d2f502d..020923e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -182,6 +182,14 @@ static void kvmppc_core_queue_inst_storage(struct kvm_vcpu 
*vcpu,
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
 }
 
+static void kvmppc_core_queue_alignment(struct kvm_vcpu *vcpu, ulong 
dear_flags,
+   ulong esr_flags)
+{
+   vcpu->arch.queued_dear = dear_flags;
+   vcpu->arch.queued_esr = esr_flags;
+   kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALIGNMENT);
+}
+
 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
 {
vcpu->arch.queued_esr = esr_flags;
@@ -345,6 +353,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu 
*vcpu,
switch (priority) {
case BOOKE_IRQPRIO_DTLB_MISS:
case BOOKE_IRQPRIO_DATA_STORAGE:
+   case BOOKE_IRQPRIO_ALIGNMENT:
update_dear = true;
/* fall through */
case BOOKE_IRQPRIO_INST_STORAGE:
@@ -358,7 +367,6 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu 
*vcpu,
case BOOKE_IRQPRIO_SPE_FP_DATA:
case BOOKE_IRQPRIO_SPE_FP_ROUND:
case BOOKE_IRQPRIO_AP_UNAVAIL:
-   case BOOKE_IRQPRIO_ALIGNMENT:
allowed = 1;
msr_mask = MSR_CE | MSR_ME | MSR_DE;
int_class = INT_CLASS_NONCRIT;
@@ -971,6 +979,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
 
+   case BOOKE_INTERRUPT_ALIGNMENT:
+   kvmppc_core_queue_alignment(vcpu, vcpu->arch.fault_dear,
+   vcpu->arch.fault_esr);
+   r = RESUME_GUEST;
+   break;
+
 #ifdef CONFIG_KVM_BOOKE_HV
case BOOKE_INTERRUPT_HV_SYSCALL:
if (!(vcpu->arch.shared->msr & MSR_PR)) {
diff --git a/arch/powerpc/kvm/booke_interrupts.S 
b/arch/powerpc/kvm/booke_interrupts.S
index eae8483..f4bb55c 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -45,12 +45,14 @@
 (1

[PATCH 01/12] KVM: PPC: E500: Move write_stlbe higher

2013-02-14 Thread Alexander Graf
Later patches want to call the function and it doesn't have
dependencies on anything below write_host_tlbe.

Move it higher up in the file.

Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/e500_tlb.c |   32 
 1 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index cf3f180..d38ad63 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -156,6 +156,22 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
*vcpu_e500,
}
 }
 
+/* sesel is for tlb1 only */
+static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+   struct kvm_book3e_206_tlb_entry *gtlbe,
+   struct kvm_book3e_206_tlb_entry *stlbe,
+   int stlbsel, int sesel)
+{
+   int stid;
+
+   preempt_disable();
+   stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe);
+
+   stlbe->mas1 |= MAS1_TID(stid);
+   write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
+   preempt_enable();
+}
+
 #ifdef CONFIG_KVM_E500V2
 void kvmppc_map_magic(struct kvm_vcpu *vcpu)
 {
@@ -834,22 +850,6 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
return EMULATE_DONE;
 }
 
-/* sesel is for tlb1 only */
-static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-   struct kvm_book3e_206_tlb_entry *gtlbe,
-   struct kvm_book3e_206_tlb_entry *stlbe,
-   int stlbsel, int sesel)
-{
-   int stid;
-
-   preempt_disable();
-   stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe);
-
-   stlbe->mas1 |= MAS1_TID(stid);
-   write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
-   preempt_enable();
-}
-
 int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 {
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/12] KVM: PPC: e500: Call kvmppc_mmu_map for initial mapping

2013-02-14 Thread Alexander Graf
When emulating tlbwe, we want to automatically map the entry that just got
written in our shadow TLB map, because chances are quite high that it's
going to be used very soon.

Today this happens explicitly, duplicating all the logic that is in
kvmppc_mmu_map() already. Just call that one instead.

Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/e500_tlb.c |   38 +++---
 1 files changed, 7 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 3777167..48d1a4f 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -878,8 +878,8 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
 int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 {
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-   struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
-   int tlbsel, esel, stlbsel, sesel;
+   struct kvm_book3e_206_tlb_entry *gtlbe;
+   int tlbsel, esel;
int recal = 0;
 
tlbsel = get_tlb_tlbsel(vcpu);
@@ -917,40 +917,16 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
if (tlbe_is_host_safe(vcpu, gtlbe)) {
-   u64 eaddr;
-   u64 raddr;
+   u64 eaddr = get_tlb_eaddr(gtlbe);
+   u64 raddr = get_tlb_raddr(gtlbe);
 
-   switch (tlbsel) {
-   case 0:
-   /* TLB0 */
+   if (tlbsel == 0) {
gtlbe->mas1 &= ~MAS1_TSIZE(~0);
gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
-
-   stlbsel = 0;
-   kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
-   sesel = 0; /* unused */
-
-   break;
-
-   case 1:
-   /* TLB1 */
-   eaddr = get_tlb_eaddr(gtlbe);
-   raddr = get_tlb_raddr(gtlbe);
-
-   /* Create a 4KB mapping on the host.
-* If the guest wanted a large page,
-* only the first 4KB is mapped here and the rest
-* are mapped on the fly. */
-   stlbsel = 1;
-   sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
-   raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel);
-   break;
-
-   default:
-   BUG();
}
 
-   write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
+   /* Premap the faulting page */
+   kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel));
}
 
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PULL 00/14] ppc patch queue 2013-02-15

2013-02-14 Thread Alexander Graf
Hi Marcelo / Gleb,

This is my current patch queue for ppc.  Please pull.

Highlights of this queue drop are:

  - BookE: Fast mapping support for 4k backed memory
  - BookE: Handle alignment interrupts

Alex


The following changes since commit cbd29cb6e38af6119df2cdac0c58acf0e85c177e:
  Jan Kiszka (1):
KVM: nVMX: Remove redundant get_vmcs12 from nested_vmx_exit_handled_msr

are available in the git repository at:

  git://github.com/agraf/linux-2.6.git kvm-ppc-next

Alexander Graf (11):
  KVM: PPC: E500: Move write_stlbe higher
  KVM: PPC: E500: Explicitly mark shadow maps invalid
  KVM: PPC: E500: Propagate errors when shadow mapping
  KVM: PPC: e500: Call kvmppc_mmu_map for initial mapping
  KVM: PPC: E500: Split host and guest MMU parts
  KVM: PPC: e500: Implement TLB1-in-TLB0 mapping
  KVM: PPC: E500: Make clear_tlb_refs and clear_tlb1_bitmap static
  KVM: PPC: E500: Remove kvmppc_e500_tlbil_all usage from guest TLB code
  Merge commit 'origin/next' into kvm-ppc-next
  KVM: PPC: BookE: Handle alignment interrupts
  Merge commit 'origin/next' into kvm-ppc-next

Bharat Bhushan (3):
  KVM: PPC: booke: use vcpu reference from thread_struct
  KVM: PPC: booke: Allow multiple exception types
  booke: Added DBCR4 SPR number

 arch/powerpc/include/asm/kvm_ppc.h   |2 -
 arch/powerpc/include/asm/reg.h   |2 -
 arch/powerpc/include/asm/reg_booke.h |1 +
 arch/powerpc/kernel/asm-offsets.c|2 +-
 arch/powerpc/kvm/Makefile|9 +-
 arch/powerpc/kvm/booke.c |   30 +-
 arch/powerpc/kvm/booke.h |1 +
 arch/powerpc/kvm/booke_interrupts.S  |   49 +-
 arch/powerpc/kvm/e500.c  |   16 +-
 arch/powerpc/kvm/e500.h  |1 +
 arch/powerpc/kvm/e500_mmu.c  |  809 +++
 arch/powerpc/kvm/e500_mmu_host.c |  699 +
 arch/powerpc/kvm/e500_mmu_host.h |   18 +
 arch/powerpc/kvm/e500_tlb.c  | 1430 --
 14 files changed, 1610 insertions(+), 1459 deletions(-)
 create mode 100644 arch/powerpc/kvm/e500_mmu.c
 create mode 100644 arch/powerpc/kvm/e500_mmu_host.c
 create mode 100644 arch/powerpc/kvm/e500_mmu_host.h
 delete mode 100644 arch/powerpc/kvm/e500_tlb.c
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/9] KVM: PPC: Book3S HV: Add support for real mode ICP in XICS emulation

2013-02-14 Thread Paul Mackerras
From: Benjamin Herrenschmidt 

This adds an implementation of the XICS hypercalls in real mode for HV
KVM, which allows us to avoid exiting the guest MMU context on all
threads for a variety of operations such as fetching a pending
interrupt, EOI of messages, IPIs, etc.

Signed-off-by: Benjamin Herrenschmidt 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/kvm/Makefile   |1 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c|  402 +++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |   10 +-
 arch/powerpc/kvm/book3s_xics.c  |   64 -
 arch/powerpc/kvm/book3s_xics.h  |   16 ++
 5 files changed, 475 insertions(+), 18 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_rm_xics.c

diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index e2eb04c..895e880 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -77,6 +77,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
book3s_hv_ras.o \
+   book3s_hv_rm_xics.o \
book3s_hv_builtin.o
 
 kvm-book3s_64-module-objs := \
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
new file mode 100644
index 000..3605e0c
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "book3s_xics.h"
+
+#define DEBUG_PASSUP
+
+static inline void rm_writeb(unsigned long paddr, u8 val)
+{
+   __asm__ __volatile__("sync; stbcix %0,0,%1"
+   : : "r" (val), "r" (paddr) : "memory");
+}
+
+static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, struct kvm_vcpu 
*this_vcpu)
+{
+   struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
+   unsigned long xics_phys;
+   int cpu;
+
+   /* Mark the target VCPU as having an interrupt pending */
+   vcpu->stat.queue_intr++;
+   set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+
+   /* Kick self ? Just set MER and return */
+   if (vcpu == this_vcpu) {
+   mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
+   return;
+   }
+
+   /* Check if the core is loaded, if not, too hard */
+   cpu = vcpu->cpu;
+   if (cpu < 0 || cpu >= nr_cpu_ids) {
+   this_icp->rm_action |= XICS_RM_KICK_VCPU;
+   this_icp->rm_kick_target = vcpu;
+   return;
+   }
+   /* In SMT cpu will always point to thread 0, we adjust it */
+   cpu += vcpu->arch.ptid;
+
+   /* Not too hard, then poke the target */
+   xics_phys = paca[cpu].kvm_hstate.xics_phys;
+   rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+}
+
+static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
+{
+   /* Note: Only called on self ! */
+   clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 
&vcpu->arch.pending_exceptions);
+   mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
+}
+
+static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
+union kvmppc_icp_state old,
+union kvmppc_icp_state new)
+{
+   struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
+   bool success;
+
+   /* Calculate new output value */
+   new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+   /* Attempt atomic update */
+   success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+   if (!success)
+   goto bail;
+
+   /*
+* Check for output state update
+*
+* Note that this is racy since another processor could be updating
+* the state already. This is why we never clear the interrupt output
+* here, we only ever set it. The clear only happens prior to doing
+* an update and only by the processor itself. Currently we do it
+* in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+*
+* We also do not try to figure out whether the EE state has changed,
+* we unconditionally set it if the new state calls for it. The reason
+* for that is that we opportunistically remove the pending interrupt
+* flag when raising CPPR, so we need to set it back here if an
+* interrupt is still pending.
+*/
+   if (new.out_ee)
+   icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
+
+   /* Expose the state change for debug purposes */
+   this_vcpu->arch.icp->rm_dbgstate = new;
+   this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
+
+ bail:
+   return success;
+}
+
+s

[PATCH 6/9] KVM: PPC: Book3S HV: Speed up wakeups of CPUs on HV KVM

2013-02-14 Thread Paul Mackerras
From: Benjamin Herrenschmidt 

Currently, we wake up a CPU by sending a host IPI with
smp_send_reschedule() to thread 0 of that core, which will take all
threads out of the guest, and cause them to re-evaluate their
interrupt status on the way back in.

This adds a mechanism to differentiate real host IPIs from IPIs sent
by KVM for guest threads to poke each other, in order to target the
guest threads precisely when possible and avoid that global switch of
the core to host state.

We then use this new facility in the in-kernel XICS code.

Signed-off-by: Benjamin Herrenschmidt 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/kvm_book3s_asm.h |8 ++-
 arch/powerpc/include/asm/kvm_ppc.h|   29 
 arch/powerpc/kernel/asm-offsets.c |2 +
 arch/powerpc/kvm/book3s_hv.c  |   26 +++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  102 -
 arch/powerpc/kvm/book3s_xics.c|2 +-
 arch/powerpc/sysdev/xics/icp-native.c |8 +++
 7 files changed, 158 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h 
b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 88609b2..923522d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -20,6 +20,11 @@
 #ifndef __ASM_KVM_BOOK3S_ASM_H__
 #define __ASM_KVM_BOOK3S_ASM_H__
 
+/* XICS ICP register offsets */
+#define XICS_XIRR  4
+#define XICS_MFRR  0xc
+#define XICS_IPI   2   /* interrupt source # for IPIs */
+
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -81,10 +86,11 @@ struct kvmppc_host_state {
 #ifdef CONFIG_KVM_BOOK3S_64_HV
u8 hwthread_req;
u8 hwthread_state;
-
+   u8 host_ipi;
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
unsigned long xics_phys;
+   u32 saved_xirr;
u64 dabr;
u64 host_mmcr[3];
u32 host_pmc[8];
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 2308fff..f371af8a 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -259,6 +259,21 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned 
long addr)
paca[cpu].kvm_hstate.xics_phys = addr;
 }
 
+static inline u32 kvmppc_get_xics_latch(void)
+{
+   u32 xirr = get_paca()->kvm_hstate.saved_xirr;
+
+   get_paca()->kvm_hstate.saved_xirr = 0;
+
+   return xirr;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+{
+   paca[cpu].kvm_hstate.host_ipi = host_ipi;
+}
+
+extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
 extern void kvm_linear_init(void);
 
 #else
@@ -268,6 +283,18 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned 
long addr)
 static inline void kvm_linear_init(void)
 {}
 
+static inline u32 kvmppc_get_xics_latch(void)
+{
+   return 0;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+{}
+
+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+   kvm_vcpu_kick(vcpu);
+}
 #endif
 
 static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
@@ -332,4 +359,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu 
*vcpu, int ra, int rb)
return ea;
 }
 
+extern void xics_wake_cpu(int cpu);
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 46f6afd..c564ac3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -543,6 +543,8 @@ int main(void)
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
+   HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
+   HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
HSTATE_FIELD(HSTATE_PMC, host_pmc);
HSTATE_FIELD(HSTATE_PURR, host_purr);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8f09c36..1365440 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -66,6 +66,31 @@
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
+void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+   int me;
+   int cpu = vcpu->cpu;
+   wait_queue_head_t *wqp;
+
+   wqp = kvm_arch_vcpu_wq(vcpu);
+   if (waitqueue_active(wqp)) {
+   wake_up_interruptible(wqp);
+   ++vcpu->stat.halt_wakeup;
+   }
+
+   me = get_cpu();
+
+   /* CPU points to the first thread of the core */
+   if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
+   int real_cpu = cpu + vcpu->arch.ptid;
+   if (paca[real_cpu].kvm_hstate.xics_phys)
+   xics_wake_cpu(real_cpu);
+   else if (cpu_online(cpu))
+   smp_se

[PATCH 2/9] KVM: PPC: Remove unused argument to kvmppc_core_dequeue_external

2013-02-14 Thread Paul Mackerras
Currently kvmppc_core_dequeue_external() takes a struct kvm_interrupt *
argument and does nothing with it, in any of its implementations.
This removes it in order to make things easier for forthcoming
in-kernel interrupt controller emulation code.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/kvm_ppc.h |3 +--
 arch/powerpc/kvm/book3s.c  |3 +--
 arch/powerpc/kvm/booke.c   |3 +--
 arch/powerpc/kvm/powerpc.c |2 +-
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index dd08cfa..be611f6 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
-extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq);
+extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a4b6452..6548445 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_book3s_queue_irqprio(vcpu, vec);
 }
 
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
-  struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
 {
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 020923e..f72abd1 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, prio);
 }
 
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
-  struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
 {
clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 26d8003..1772883 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -741,7 +741,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
if (irq->irq == KVM_INTERRUPT_UNSET) {
-   kvmppc_core_dequeue_external(vcpu, irq);
+   kvmppc_core_dequeue_external(vcpu);
return 0;
}
 
-- 
1.7.10.rc3.219.g53414

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/9] KVM: PPC: Book3S: Add kernel emulation for the XICS interrupt controller

2013-02-14 Thread Paul Mackerras
From: Benjamin Herrenschmidt 

This adds in-kernel emulation of the XICS (eXternal Interrupt
Controller Specification) interrupt controller specified by PAPR, for
both HV and PR KVM guests.

This adds a new KVM_CREATE_IRQCHIP_ARGS ioctl, which is like
KVM_CREATE_IRQCHIP in that it indicates that the virtual machine
should use in-kernel interrupt controller emulation, but also takes an
argument struct that contains the type of interrupt controller
architecture and an optional parameter.  Currently only one type value
is defined, that which indicates the XICS architecture.

The XICS emulation supports up to 1048560 interrupt sources.
Interrupt source numbers below 16 are reserved; 0 is used to mean no
interrupt and 2 is used for IPIs.  Internally these are represented in
blocks of 1024, called ICS (interrupt controller source) entities, but
that is not visible to userspace.

Two other new ioctls allow userspace to control the interrupt
sources.  The KVM_IRQCHIP_SET_SOURCES ioctl sets the priority,
destination cpu, level/edge sensitivity and pending state of a range
of interrupt sources, creating them if they don't already exist.  The
KVM_IRQCHIP_GET_SOURCES ioctl returns that information for a range of
interrupt sources (they are required to already exist).

Each vcpu gets one ICP (interrupt controller presentation) entity.
They are created automatically when the vcpu is created provided the
KVM_CREATE_IRQCHIP_ARGS ioctl has been performed.

This is based on an initial implementation by Michael Ellerman
 reworked by Benjamin Herrenschmidt and
Paul Mackerras.

Signed-off-by: Benjamin Herrenschmidt 
Signed-off-by: Paul Mackerras 
---
 Documentation/virtual/kvm/api.txt |   51 ++
 arch/powerpc/include/asm/kvm_book3s.h |1 +
 arch/powerpc/include/asm/kvm_host.h   |8 +
 arch/powerpc/include/asm/kvm_ppc.h|   19 +
 arch/powerpc/kvm/Makefile |1 +
 arch/powerpc/kvm/book3s.c |2 +-
 arch/powerpc/kvm/book3s_hv.c  |   20 +
 arch/powerpc/kvm/book3s_pr.c  |   13 +
 arch/powerpc/kvm/book3s_pr_papr.c |   16 +
 arch/powerpc/kvm/book3s_rtas.c|   51 +-
 arch/powerpc/kvm/book3s_xics.c| 1101 +
 arch/powerpc/kvm/book3s_xics.h|  111 
 arch/powerpc/kvm/powerpc.c|   23 +
 include/uapi/linux/kvm.h  |   29 +
 14 files changed, 1444 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_xics.c
 create mode 100644 arch/powerpc/kvm/book3s_xics.h

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index d3e2d60..0ff9dcf 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2141,6 +2141,57 @@ associated with the service will be forgotten, and 
subsequent RTAS
 calls by the guest for that service will be passed to userspace to be
 handled.
 
+4.80 KVM_CREATE_IRQCHIP_ARGS
+
+Capability: KVM_CAP_IRQCHIP_ARGS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_irqchip_args
+Returns: 0 on success, -1 on error
+
+Creates an interrupt controller model in the kernel.  The type field
+of the argument struct indicates the interrupt controller architecture
+of the virtual machine.  Currently the only value permitted for the
+type field is 1, indicating the XICS (eXternal Interrupt Controller
+Specification) model defined in PAPR.  For XICS, this ioctl indicates
+to the kernel that an interrupt controller presentation (ICP) entity
+should be created for every vcpu, and interrupt controller source
+(ICS) entities should be created to accommodate the sources that are
+configured with the KVM_IRQCHIP_SET_SOURCES ioctl.
+
+4.81 KVM_IRQCHIP_GET_SOURCES
+
+Capability: KVM_CAP_IRQCHIP_ARGS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_irq_sources
+Returns: 0 on success, -1 on error
+
+Copies configuration and status information about a range of interrupt
+sources into a user-supplied buffer.  The argument struct gives the
+starting interrupt source number and the number of interrupt sources.
+The user buffer is an array of 64-bit quantities, one per interrupt
+source, with (from the least- significant bit) 32 bits of interrupt
+server number, 8 bits of priority, and 1 bit each for a
+level-sensitive indicator, a masked indicator, and a pending
+indicator.  If some of the sources in the range don't exist, that is,
+have not yet been created with the KVM_IRQCHIP_SET_SOURCES ioctl,
+this returns an ENODEV error.
+
+4.82 KVM_IRQCHIP_SET_SOURCES
+
+Capability: KVM_CAP_IRQCHIP_ARGS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_irq_sources
+Returns: 0 on success, -1 on error
+
+Sets the configuration and status for a range of interrupt sources
+from information supplied in a user-supplied buffer, creating the
+sources if they don't already exist.  The argument struct gives the
+starting interrupt source number and the number of interrupt sources.
+The user buffer is formatted as for KVM_IRQCHIP_G

[PATCH 0/9] In-kernel XICS interrupt controller emulation

2013-02-14 Thread Paul Mackerras
This patch series implements in-kernel emulation of the XICS interrupt
controller architecture defined in PAPR (Power Architecture Platform
Requirements, the document that defines IBM's pSeries platform
architecture).

One of the things I have done in this version is to provide a way for
this to coexist with other interrupt controller emulations.  The XICS
emulation exports a vector of function pointers that the generic
book3s code uses to call in to it.  Other emulations could set this
vector to point to their own functions.  (I realize that Scott Wood's
recently-posted patch series uses an entirely orthogonal approach and
may or may not find this useful.)

The interface defined here consists of:

* KVM_CREATE_IRQCHIP_ARGS: like KVM_CREATE_IRQCHIP but takes an
  argument struct containing a `type' field, specifying what overall
  interrupt controller architecture to emulate, and a `param' field
  (unused by the XICS emulation).  This is only called once per VM,
  before VCPUs are created.

* KVM_IRQCHIP_SET_SOURCES: used to create and configure interrupt
  sources in bulk.  The notion of "interrupt source" is fairly
  generic; a source has an identifying number (20 bits in the current
  implementation), a priority, a destination (a vcpu number or
  potentially a vcpu group identifier), plus flags indicating
  edge/level sensitivity, a masked bit, and a pending bit.

* KVM_IRQCHIP_GET_SOURCES: used to query the configuration and status
  of interrupt sources in bulk.

* A 64-bit one_reg identifier, KVM_REG_PPC_ICP_STATE, used to get and
  set per-vcpu interrupt controller state (per-vcpu priority,
  interrupt status, and pending IPI and interrupt state).

I believe this corresponds reasonably well to what was discussed at
KVM Forum.

This series is against Alex's kvm-ppc-queue branch, although it also
applies cleanly on the kvm tree's next branch.

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/9] KVM: PPC: Book3S: Facilities to save/restore XICS presentation ctrler state

2013-02-14 Thread Paul Mackerras
This adds the ability for userspace to save and restore the state
of the XICS interrupt presentation controllers (ICPs) via the
KVM_GET/SET_ONE_REG interface.  Since there is one ICP per vcpu, we
simply define a new 64-bit register in the ONE_REG space for the ICP
state.  The state includes the CPU priority setting, the pending IPI
priority, and the priority and source number of any pending external
interrupt.

Signed-off-by: Paul Mackerras 
---
 Documentation/virtual/kvm/api.txt   |1 +
 arch/powerpc/include/asm/kvm_host.h |2 +
 arch/powerpc/include/uapi/asm/kvm.h |   13 +
 arch/powerpc/kvm/book3s.c   |   19 
 arch/powerpc/kvm/book3s_xics.c  |   92 +++
 5 files changed, 127 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 0ff9dcf..466636b 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1762,6 +1762,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_VPA_DTL   | 128
   PPC   | KVM_REG_PPC_EPCR | 32
   PPC   | KVM_REG_PPC_EPR  | 32
+  PPC   | KVM_REG_PPC_ICP_STATE | 64
 
 4.69 KVM_GET_ONE_REG
 
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 8af4c0b..2eb4c27 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -236,6 +236,8 @@ struct kvm_irq_ctrler {
void (*free_ctrler)(struct kvm *kvm);
int (*hcall)(struct kvm_vcpu *vcpu, unsigned long req);
int (*ioctl)(struct kvm *kvm, unsigned int ioctl, unsigned long arg);
+   u64 (*get_one_reg)(struct kvm_vcpu *vcpu, u64 reg);
+   int (*set_one_reg)(struct kvm_vcpu *vcpu, u64 reg, u64 val);
 };
 
 struct kvm_arch {
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index d90743c..8e34553 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -423,4 +423,17 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_EPCR   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
 #define KVM_REG_PPC_EPR(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
 
+/* Per-vcpu interrupt controller state */
+#define KVM_REG_PPC_ICP_STATE  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x87)
+
+/* Layout of above for XICS */
+#define  KVM_REG_PPC_ICP_CPPR_SHIFT56  /* current proc priority */
+#define  KVM_REG_PPC_ICP_CPPR_MASK 0xff
+#define  KVM_REG_PPC_ICP_XISR_SHIFT32  /* interrupt status field */
+#define  KVM_REG_PPC_ICP_XISR_MASK 0xff
+#define  KVM_REG_PPC_ICP_MFRR_SHIFT24  /* pending IPI priority */
+#define  KVM_REG_PPC_ICP_MFRR_MASK 0xff
+#define  KVM_REG_PPC_ICP_PPRI_SHIFT16  /* pending irq priority */
+#define  KVM_REG_PPC_ICP_PPRI_MASK 0xff
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index c5a4478..8d5ee31 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -529,6 +529,15 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
break;
 #endif /* CONFIG_ALTIVEC */
+   case KVM_REG_PPC_ICP_STATE: {
+   struct kvm_irq_ctrler *ic = vcpu->kvm->arch.irq_ctrler;
+   if (!ic) {
+   r = -ENXIO;
+   break;
+   }
+   val = get_reg_val(reg->id, ic->get_one_reg(vcpu, 
reg->id));
+   break;
+   }
default:
r = -EINVAL;
break;
@@ -591,6 +600,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
break;
 #endif /* CONFIG_ALTIVEC */
+   case KVM_REG_PPC_ICP_STATE: {
+   struct kvm_irq_ctrler *ic = vcpu->kvm->arch.irq_ctrler;
+
+   if (!ic) {
+   r = -ENXIO;
+   break;
+   }
+   r = ic->set_one_reg(vcpu, reg->id, set_reg_val(reg->id, 
val));
+   break;
+   }
default:
r = -EINVAL;
break;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 2312b56..f54b934 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -888,6 +888,96 @@ static void kvmppc_xics_free(struct kvm *kvm)
kfree(xics);
 }
 
+static u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu, u64 reg_id)
+{
+   struct kvmppc_icp *icp = vcpu->arch.icp;
+   union kvmppc_icp_state state;
+
+   if (!icp)
+   return 0;
+   state = icp->state;
+   ret

[PATCH 4/9] KVM: PPC: Book3S: Generalize interfaces to interrupt controller emulation

2013-02-14 Thread Paul Mackerras
This makes the XICS interrupt controller emulation code export a struct
containing function pointers for the various calls into the XICS code.
The generic book3s code then uses these function pointers instead of
calling directly into the XICS code (except for the XICS instantiation
function).

This should make it possible for other interrupt controller emulations
to coexist with the XICS emulation.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/kvm_host.h |   11 -
 arch/powerpc/include/asm/kvm_ppc.h  |   14 --
 arch/powerpc/kvm/book3s_hv.c|   18 +++
 arch/powerpc/kvm/book3s_pr.c|   14 +++---
 arch/powerpc/kvm/book3s_pr_papr.c   |   10 ++--
 arch/powerpc/kvm/book3s_xics.c  |   93 +++
 arch/powerpc/kvm/powerpc.c  |4 +-
 7 files changed, 85 insertions(+), 79 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index b05e7cd..8af4c0b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -230,8 +230,18 @@ struct kvm_arch_memory_slot {
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 };
 
+struct kvm_irq_ctrler {
+   int (*setup_vcpu)(struct kvm_vcpu *vcpu);
+   void (*teardown_vcpu)(struct kvm_vcpu *vcpu);
+   void (*free_ctrler)(struct kvm *kvm);
+   int (*hcall)(struct kvm_vcpu *vcpu, unsigned long req);
+   int (*ioctl)(struct kvm *kvm, unsigned int ioctl, unsigned long arg);
+};
+
 struct kvm_arch {
unsigned int lpid;
+   struct kvm_irq_ctrler *irq_ctrler;
+   void *irq_ctrler_private;
 #ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
@@ -260,7 +270,6 @@ struct kvm_arch {
 #ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
struct list_head rtas_tokens;
-   struct kvmppc_xics *xics;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index f0fd22b..2308fff 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -130,12 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot, unsigned long porder);
 extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
-extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
-extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long 
arg);
 extern int kvmppc_xics_create(struct kvm *kvm, struct kvm_irqchip_args *args);
-extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
-extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu);
-extern void kvmppc_xics_free(struct kvm *kvm);
 
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
@@ -275,15 +270,6 @@ static inline void kvm_linear_init(void)
 
 #endif
 
-#ifdef CONFIG_PPC_BOOK3S_64
-static inline int kvmppc_xics_enabled(struct kvm *kvm)
-{
-   return kvm->arch.xics != NULL;
-}
-#else
-static inline int kvmppc_xics_enabled(struct kvm *kvm) { return 0; }
-#endif
-
 static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index aa3a0db..8f09c36 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -532,8 +532,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_CPPR:
case H_EOI:
case H_IPI:
-   if (kvmppc_xics_enabled(vcpu->kvm)) {
-   ret = kvmppc_xics_hcall(vcpu, req);
+   if (vcpu->kvm->arch.irq_ctrler) {
+   ret = vcpu->kvm->arch.irq_ctrler->hcall(vcpu, req);
break;
} /* fallthrough */
default:
@@ -894,9 +894,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, 
unsigned int id)
spin_lock_init(&vcpu->arch.tbacct_lock);
vcpu->arch.busy_preempt = TB_NIL;
 
-   /* Create the XICS */
-   if (kvmppc_xics_enabled(kvm)) {
-   err = kvmppc_xics_create_icp(vcpu);
+   /* Create the interrupt-controller-specific state */
+   if (kvm->arch.irq_ctrler) {
+   err = kvm->arch.irq_ctrler->setup_vcpu(vcpu);
if (err < 0)
goto free_vcpu;
}
@@ -952,8 +952,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
spin_unlock(&vcpu->arch.vpa_update_lock);
kvm_vcpu_uninit(vcpu);
-   if (kvmppc_xics_enabled(vcpu->kvm))
-   kvmppc_xics_free_icp(vcpu);
+   if (vcpu->kvm->arch.irq_ctrler)
+   vcpu->kvm->arch.irq_ctrler->teardown_vcpu(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
@@ -1899,8 +1899,8 @@ void kvmppc_core_destroy_vm(struct kvm *k

[PATCH 8/9] KVM: PPC: Book3S HV: Improve real-mode handling of external interrupts

2013-02-14 Thread Paul Mackerras
This streamlines our handling of external interrupts that come in
while we're in the guest.  First, when waking up a hardware thread
that was napping, we split off the "napping due to H_CEDE" case
earlier, and use the code that handles an external interrupt (0x500)
in the guest to handle that too.  Secondly, the code that handles
those external interrupts now checks if any other thread is exiting
to the host before bouncing an external interrupt to the guest, and
also checks that there is actually an external interrupt pending for
the guest before setting the LPCR MER bit (mediated external request).

This also makes sure that we clear the "ceded" flag when we handle a
wakeup from cede in real mode, and fixes a potential infinite loop
in kvmppc_run_vcpu() which can occur if we ever end up with the ceded
flag set but MSR[EE] off.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/reg.h  |1 +
 arch/powerpc/kvm/book3s_hv.c|5 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  140 +--
 3 files changed, 81 insertions(+), 65 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 11ae3d8..abe34e0 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -254,6 +254,7 @@
 #define LPCR_PECE1 0x2000  /* decrementer can cause exit */
 #define LPCR_PECE2 0x1000  /* machine check etc can cause exit */
 #define   LPCR_MER 0x0800  /* Mediated External Exception */
+#define   LPCR_MER_SH  11
 #define   LPCR_LPES0x000c
 #define   LPCR_LPES0   0x0008  /* LPAR Env selector 0 */
 #define   LPCR_LPES1   0x0004  /* LPAR Env selector 1 */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1365440..bd751a3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1384,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, 
struct kvm_vcpu *vcpu)
break;
vc->runner = vcpu;
n_ceded = 0;
-   list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
+   list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
if (!v->arch.pending_exceptions)
n_ceded += v->arch.ceded;
+   else
+   v->arch.ceded = 0;
+   }
if (n_ceded == vc->n_runnable)
kvmppc_vcore_blocked(vc);
else
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index fe908f5..0c519cb 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -97,50 +97,51 @@ kvm_start_guest:
li  r0,1
stb r0,PACA_NAPSTATELOST(r13)
 
-   /* get vcpu pointer, NULL if we have no vcpu to run */
-   ld  r4,HSTATE_KVM_VCPU(r13)
-   cmpdi   cr1,r4,0
+   /* were we napping due to cede? */
+   lbz r0,HSTATE_NAPPING(r13)
+   cmpwi   r0,0
+   bne kvm_end_cede
+
+   /*
+* We weren't napping due to cede, so this must be a secondary
+* thread being woken up to run a guest, or being woken up due
+* to a stray IPI.  (Or due to some machine check or hypervisor
+* maintenance interrupt while the core is in KVM.)
+*/
 
/* Check the wake reason in SRR1 to see why we got here */
mfspr   r3,SPRN_SRR1
rlwinm  r3,r3,44-31,0x7 /* extract wake reason field */
cmpwi   r3,4/* was it an external interrupt? */
-   bne 27f
-
-   /*
-* External interrupt - for now assume it is an IPI, since we
-* should never get any other interrupts sent to offline threads.
-* Only do this for secondary threads.
-*/
-   beq cr1,25f
-   lwz r3,VCPU_PTID(r4)
-   cmpwi   r3,0
-   beq 27f
-25:ld  r5,HSTATE_XICS_PHYS(r13)
-   li  r0,0xff
-   li  r6,XICS_MFRR
-   li  r7,XICS_XIRR
+   bne 27f /* if not */
+   ld  r5,HSTATE_XICS_PHYS(r13)
+   li  r7,XICS_XIRR/* if it was an external interrupt, */
lwzcix  r8,r5,r7/* get and ack the interrupt */
sync
clrldi. r9,r8,40/* get interrupt source ID. */
-   beq 27f /* none there? */
-   cmpwi   r9,XICS_IPI
-   bne 26f
+   beq 28f /* none there? */
+   cmpwi   r9,XICS_IPI /* was it an IPI? */
+   bne 29f
+   li  r0,0xff
+   li  r6,XICS_MFRR
stbcix  r0,r5,r6/* clear IPI */
-26:stwcix  r8,r5,r7/* EOI the interrupt */
-
-27:/* XXX should handle hypervisor maintenance interrupts etc. here */
+ 

[PATCH 1/9] KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS calls

2013-02-14 Thread Paul Mackerras
From: Michael Ellerman 

For pseries machine emulation, in order to move the interrupt
controller code to the kernel, we need to intercept some RTAS
calls in the kernel itself.  This adds an infrastructure to allow
in-kernel handlers to be registered for RTAS services by name.
A new ioctl, KVM_PPC_RTAS_DEFINE_TOKEN, then allows userspace to
associate token values with those service names.  Then, when the
guest requests an RTAS service with one of those token values, it
will be handled by the relevant in-kernel handler rather than being
passed up to userspace as at present.

Signed-off-by: Michael Ellerman 
Signed-off-by: Benjamin Herrenschmidt 
Signed-off-by: Paul Mackerras 
---
 Documentation/virtual/kvm/api.txt   |   19 
 arch/powerpc/include/asm/hvcall.h   |3 +
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/include/asm/kvm_ppc.h  |4 +
 arch/powerpc/include/uapi/asm/kvm.h |6 ++
 arch/powerpc/kvm/Makefile   |1 +
 arch/powerpc/kvm/book3s_hv.c|   18 +++-
 arch/powerpc/kvm/book3s_pr_papr.c   |7 ++
 arch/powerpc/kvm/book3s_rtas.c  |  182 +++
 arch/powerpc/kvm/powerpc.c  |9 +-
 include/uapi/linux/kvm.h|3 +
 11 files changed, 251 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_rtas.c

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index c2534c3..d3e2d60 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2122,6 +2122,25 @@ header; first `n_valid' valid entries with contents from 
the data
 written, then `n_invalid' invalid entries, invalidating any previously
 valid entries found.
 
+4.79 KVM_PPC_RTAS_DEFINE_TOKEN
+
+Capability: KVM_CAP_PPC_RTAS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_rtas_token_args
+Returns: 0 on success, -1 on error
+
+Defines a token value for a RTAS (Run Time Abstraction Services)
+service in order to allow it to be handled in the kernel.  The
+argument struct gives the name of the service, which must be the name
+of a service that has a kernel-side implementation.  If the token
+value is non-zero, it will be associated with that service, and
+subsequent RTAS calls by the guest specifying that token will be
+handled by the kernel.  If the token value is 0, then any token
+associated with the service will be forgotten, and subsequent RTAS
+calls by the guest for that service will be passed to userspace to be
+handled.
+
 
 5. The kvm_run structure
 
diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 7a86706..9ea22b2 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -269,6 +269,9 @@
 #define H_GET_MPP_X0x314
 #define MAX_HCALL_OPCODE   H_GET_MPP_X
 
+/* Platform specific hcalls, used by KVM */
+#define H_RTAS 0xf000
+
 #ifndef __ASSEMBLY__
 
 /**
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 8a72d59..8295dc7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -255,6 +255,7 @@ struct kvm_arch {
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
+   struct list_head rtas_tokens;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 44a657a..dd08cfa 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -165,6 +165,10 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
 
 extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
 
+extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
+extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
+extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 16064d0..d90743c 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -299,6 +299,12 @@ struct kvm_allocate_rma {
__u64 rma_size;
 };
 
+/* for KVM_CAP_PPC_RTAS */
+struct kvm_rtas_token_args {
+   char name[120];
+   __u64 token;/* Use a token of 0 to undefine a mapping */
+};
+
 struct kvm_book3e_206_tlb_entry {
__u32 mas8;
__u32 mas1;
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index b772ede..432132c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -86,6 +86,7 @@ kvm-book3s_64-module-objs := \
emulate.o \
book3s.o \
book3s_64_vio.o \
+   book3s_rtas.o \
$(kvm-book3s_64-objs-y)
 
 kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
diff --git a/arch/pow

[PATCH 9/9] KVM: PPC: Book3S: Add support for ibm,int-on/off RTAS calls

2013-02-14 Thread Paul Mackerras
This adds support for the ibm,int-on and ibm,int-off RTAS calls to the
in-kernel XICS emulation and corrects the handling of the saved
priority by the ibm,set-xive RTAS call.  With this, ibm,int-off sets
the specified interrupt's priority in its saved_priority field and
sets the priority to 0xff (the least favoured value).  ibm,int-on
restores the saved_priority to the priority field, and ibm,set-xive
sets both the priority and the saved_priority to the specified
priority value.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/kvm_ppc.h |2 +
 arch/powerpc/kvm/book3s_rtas.c |   40 ++
 arch/powerpc/kvm/book3s_xics.c |  101 +---
 arch/powerpc/kvm/book3s_xics.h |2 +-
 4 files changed, 125 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index f371af8a..dd7c1fc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -171,6 +171,8 @@ extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
 extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
 extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 
priority);
 extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 
*priority);
+extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
+extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
 
 /*
  * Cuts out inst bits with ordering according to spec.
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 6a6c1fe..fc1a749 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -64,6 +64,44 @@ out:
args->rets[0] = rc;
 }
 
+static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+   u32 irq;
+   int rc;
+
+   if (args->nargs != 1 || args->nret != 1) {
+   rc = -3;
+   goto out;
+   }
+
+   irq = args->args[0];
+
+   rc = kvmppc_xics_int_off(vcpu->kvm, irq);
+   if (rc)
+   rc = -3;
+out:
+   args->rets[0] = rc;
+}
+
+static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+   u32 irq;
+   int rc;
+
+   if (args->nargs != 1 || args->nret != 1) {
+   rc = -3;
+   goto out;
+   }
+
+   irq = args->args[0];
+
+   rc = kvmppc_xics_int_on(vcpu->kvm, irq);
+   if (rc)
+   rc = -3;
+out:
+   args->rets[0] = rc;
+}
+
 struct rtas_handler {
void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
char *name;
@@ -72,6 +110,8 @@ struct rtas_handler {
 static struct rtas_handler rtas_handlers[] = {
{ .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
{ .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
+   { .name = "ibm,int-off",  .handler = kvm_rtas_int_off },
+   { .name = "ibm,int-on",   .handler = kvm_rtas_int_on },
 };
 
 struct rtas_token_definition {
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index abd2dde..665777e 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -116,6 +116,28 @@ static void ics_check_resend(struct kvmppc_xics *xics, 
struct kvmppc_ics *ics,
mutex_unlock(&ics->lock);
 }
 
+static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+  struct ics_irq_state *state,
+  u32 server, u32 priority, u32 saved_priority)
+{
+   bool deliver;
+
+   mutex_lock(&ics->lock);
+
+   state->server = server;
+   state->priority = priority;
+   state->saved_priority = saved_priority;
+   deliver = false;
+   if ((state->masked_pending || state->resend) && priority != MASKED) {
+   state->masked_pending = 0;
+   deliver = true;
+   }
+
+   mutex_unlock(&ics->lock);
+
+   return deliver;
+}
+
 int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
 {
struct kvmppc_xics *xics = kvm->arch.irq_ctrler_private;
@@ -123,7 +145,6 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 
server, u32 priority)
struct kvmppc_ics *ics;
struct ics_irq_state *state;
u16 src;
-   bool deliver;
 
if (!xics)
return -ENODEV;
@@ -137,23 +158,11 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 
server, u32 priority)
if (!icp)
return -EINVAL;
 
-   mutex_lock(&ics->lock);
-
XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
 irq, server, priority,
 state->masked_pending, state->resend);
 
-   state->server = server;
-   state->priority = priority;
-   deliver = false;
-   if ((state->masked_pending || state->resend) && priority != MASKED) {
-   state->masked_pending = 0;
-   deliver = true;
-   }
-
-   mutex_unlock(&ics->lock);