RE: [PATCH 5/6 v2] kvm: powerpc: booke: Add linux pte lookup like booke3s

2013-08-05 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org]
 Sent: Saturday, August 03, 2013 9:54 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; ag...@suse.de; kvm-ppc@vger.kernel.org;
 k...@vger.kernel.org; linuxppc-...@lists.ozlabs.org
 Subject: Re: [PATCH 5/6 v2] kvm: powerpc: booke: Add linux pte lookup like
 booke3s
 
 On Sat, 2013-08-03 at 02:58 +, Bhushan Bharat-R65777 wrote:
  One of the problem I saw was that if I put this code in
  asm/pgtable-32.h and asm/pgtable-64.h then pte_persent() and other
  friend function (on which this code depends) are defined in pgtable.h.
  And pgtable.h includes asm/pgtable-32.h and asm/pgtable-64.h before it
  defines pte_present() and friends functions.
 
  Ok I move wove this in asm/pgtable*.h, initially I fought with myself
  to take this code in pgtable* but finally end up doing here (got
  biased by book3s :)).
 
 Is there a reason why these routines can not be completely generic in 
 pgtable.h
 ?

How about the generic function:

diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h 
b/arch/powerpc/include/asm/pgtable-ppc64.h
index d257d98..21daf28 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -221,6 +221,27 @@ static inline unsigned long pte_update(struct mm_struct 
*mm,
return old;
 }

+static inline unsigned long pte_read(pte_t *p)
+{
+#ifdef PTE_ATOMIC_UPDATES
+   pte_t pte;
+   pte_t tmp;
+   __asm__ __volatile__ (
+   1: ldarx   %0,0,%3\n
+  andi.   %1,%0,%4\n
+  bne-1b\n
+  ori %1,%0,%4\n
+  stdcx.  %1,0,%3\n
+  bne-1b
+   : =r (pte), =r (tmp), =m (*p)
+   : r (p), i (_PAGE_BUSY)
+   : cc);
+
+   return pte;
+#else  
+   return pte_val(*p);
+#endif
+#endif
+}
 static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
  unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 690c8c2..dad712c 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -254,6 +254,45 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t 
*pgdir, unsigned long ea,
 }
 #endif /* !CONFIG_HUGETLB_PAGE */

+static inline pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
+int writing, unsigned long *pte_sizep)
+{
+   pte_t *ptep;
+   pte_t pte;
+   unsigned long ps = *pte_sizep;
+   unsigned int shift;
+
+   ptep = find_linux_pte_or_hugepte(pgdir, hva, shift);
+   if (!ptep)
+   return __pte(0);
+   if (shift)
+   *pte_sizep = 1ul  shift;
+   else
+   *pte_sizep = PAGE_SIZE;
+
+   if (ps  *pte_sizep)
+   return __pte(0);
+
+   if (!pte_present(*ptep))
+   return __pte(0);
+
+#ifdef CONFIG_PPC64
+   /* Lock PTE (set _PAGE_BUSY) and read */
+   pte = pte_read(ptep);
+#else
+   pte = pte_val(*ptep);
+#endif
+   if (pte_present(pte)) {
+   pte = pte_mkyoung(pte);
+   if (writing  pte_write(pte))
+   pte = pte_mkdirty(pte);
+   }
+
+   *ptep = __pte(pte); /* 64bit: Also unlock pte (clear _PAGE_BUSY) */
+
+   return pte;
+}
+
 #endif /* __ASSEMBLY__ */

 #endif /* __KERNEL__ */


Re: [PATCH 6/6 v2] kvm: powerpc: use caching attributes as per linux pte

2013-08-05 Thread Scott Wood
On Sat, 2013-08-03 at 14:25 +1000, Benjamin Herrenschmidt wrote:
 On Sat, 2013-08-03 at 03:11 +, Bhushan Bharat-R65777 wrote:
  
   
   Could you explain why we need to set dirty/referenced on the PTE, when we 
   didn't
   need to do that before? All we're getting from the PTE is wimg.
   We have MMU notifiers to take care of the page being unmapped, and we've 
   already
   marked the page itself as dirty if the TLB entry is writeable.
  
  I pulled this code from book3s.
  
  Ben, can you describe why we need this on book3s ?
 
 If you let the guest write to the page you must set the dirty bit on the PTE
 (or the struct page, at least one of them), similar with accessed on any 
 access.
 
 If you don't, the VM might swap the page out without writing it back to disk
 for example, assuming it contains no modified data.

We've already marked the page itself as dirty using kvm_set_pfn_dirty(),
and if the VM swaps it out we'll get an MMU notifier callback.  If we
marked the PTE dirty/accessed instead, is there any guarantee it will
stay marked dirty/accessed until the next MMU notifier?

-Scott



--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/6 v2] kvm: powerpc: use caching attributes as per linux pte

2013-08-05 Thread Scott Wood
On Fri, 2013-08-02 at 22:11 -0500, Bhushan Bharat-R65777 wrote:
  How does wimg get set in the pfnmap case?
 
 Pfnmap is not kernel managed pages, right? So should we set I+G there ?

It could depend on ppc_md.phys_mem_access_prot().  Can't you pull it
from the PTE regardless of pfnmap?

-Scott



--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 03/10] vfio: add external user support

2013-08-05 Thread Alex Williamson
On Tue, 2013-07-23 at 19:07 +1000, Alexey Kardashevskiy wrote:
 On 07/23/2013 12:23 PM, Alex Williamson wrote:
  On Tue, 2013-07-16 at 10:53 +1000, Alexey Kardashevskiy wrote:
  VFIO is designed to be used via ioctls on file descriptors
  returned by VFIO.
 
  However in some situations support for an external user is required.
  The first user is KVM on PPC64 (SPAPR TCE protocol) which is going to
  use the existing VFIO groups for exclusive access in real/virtual mode
  on a host to avoid passing map/unmap requests to the user space which
  would made things pretty slow.
 
  The protocol includes:
 
  1. do normal VFIO init operation:
 - opening a new container;
 - attaching group(s) to it;
 - setting an IOMMU driver for a container.
  When IOMMU is set for a container, all groups in it are
  considered ready to use by an external user.
 
  2. User space passes a group fd to an external user.
  The external user calls vfio_group_get_external_user()
  to verify that:
 - the group is initialized;
 - IOMMU is set for it.
  If both checks passed, vfio_group_get_external_user()
  increments the container user counter to prevent
  the VFIO group from disposal before KVM exits.
 
  3. The external user calls vfio_external_user_iommu_id()
  to know an IOMMU ID. PPC64 KVM uses it to link logical bus
  number (LIOBN) with IOMMU ID.
 
  4. When the external KVM finishes, it calls
  vfio_group_put_external_user() to release the VFIO group.
  This call decrements the container user counter.
  Everything gets released.
 
  The vfio: Limit group opens patch is also required for the consistency.
 
  Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
  
  This looks fine to me.  Is the plan to add this through the ppc tree
  again?  Thanks,
 
 
 Nope, better to add this through your tree. And faster for sure :) Thanks!

Applied to my next branch for v3.12.  Thanks,

Alex


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6 v2] kvm: powerpc: booke: Add linux pte lookup like booke3s

2013-08-05 Thread Scott Wood
On Mon, 2013-08-05 at 09:27 -0500, Bhushan Bharat-R65777 wrote:
 
  -Original Message-
  From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org]
  Sent: Saturday, August 03, 2013 9:54 AM
  To: Bhushan Bharat-R65777
  Cc: Wood Scott-B07421; ag...@suse.de; kvm-ppc@vger.kernel.org;
  k...@vger.kernel.org; linuxppc-...@lists.ozlabs.org
  Subject: Re: [PATCH 5/6 v2] kvm: powerpc: booke: Add linux pte lookup like
  booke3s
  
  On Sat, 2013-08-03 at 02:58 +, Bhushan Bharat-R65777 wrote:
   One of the problem I saw was that if I put this code in
   asm/pgtable-32.h and asm/pgtable-64.h then pte_persent() and other
   friend function (on which this code depends) are defined in pgtable.h.
   And pgtable.h includes asm/pgtable-32.h and asm/pgtable-64.h before it
   defines pte_present() and friends functions.
  
   Ok I move wove this in asm/pgtable*.h, initially I fought with myself
   to take this code in pgtable* but finally end up doing here (got
   biased by book3s :)).
  
  Is there a reason why these routines can not be completely generic in 
  pgtable.h
  ?
 
 How about the generic function:
 
 diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h 
 b/arch/powerpc/include/asm/pgtable-ppc64.h
 index d257d98..21daf28 100644
 --- a/arch/powerpc/include/asm/pgtable-ppc64.h
 +++ b/arch/powerpc/include/asm/pgtable-ppc64.h
 @@ -221,6 +221,27 @@ static inline unsigned long pte_update(struct mm_struct 
 *mm,
 return old;
  }
 
 +static inline unsigned long pte_read(pte_t *p)
 +{
 +#ifdef PTE_ATOMIC_UPDATES
 +   pte_t pte;
 +   pte_t tmp;
 +   __asm__ __volatile__ (
 +   1: ldarx   %0,0,%3\n
 +  andi.   %1,%0,%4\n
 +  bne-1b\n
 +  ori %1,%0,%4\n
 +  stdcx.  %1,0,%3\n
 +  bne-1b
 +   : =r (pte), =r (tmp), =m (*p)
 +   : r (p), i (_PAGE_BUSY)
 +   : cc);
 +
 +   return pte;
 +#else  
 +   return pte_val(*p);
 +#endif
 +#endif
 +}
  static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)

Please leave a blank line between functions.

  {
 diff --git a/arch/powerpc/include/asm/pgtable.h 
 b/arch/powerpc/include/asm/pgtable.h
 index 690c8c2..dad712c 100644
 --- a/arch/powerpc/include/asm/pgtable.h
 +++ b/arch/powerpc/include/asm/pgtable.h
 @@ -254,6 +254,45 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t 
 *pgdir, unsigned long ea,
  }
  #endif /* !CONFIG_HUGETLB_PAGE */
 
 +static inline pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
 +int writing, unsigned long *pte_sizep)

The name implies that it just reads the PTE.  Setting accessed/dirty
shouldn't be an undocumented side-effect.  Why can't the caller do that
(or a different function that the caller calls afterward if desired)?  

Though even then you have the undocumented side effect of locking the
PTE on certain targets.

 +{
 +   pte_t *ptep;
 +   pte_t pte;
 +   unsigned long ps = *pte_sizep;
 +   unsigned int shift;
 +
 +   ptep = find_linux_pte_or_hugepte(pgdir, hva, shift);
 +   if (!ptep)
 +   return __pte(0);
 +   if (shift)
 +   *pte_sizep = 1ul  shift;
 +   else
 +   *pte_sizep = PAGE_SIZE;
 +
 +   if (ps  *pte_sizep)
 +   return __pte(0);
 +
 +   if (!pte_present(*ptep))
 +   return __pte(0);
 +
 +#ifdef CONFIG_PPC64
 +   /* Lock PTE (set _PAGE_BUSY) and read */
 +   pte = pte_read(ptep);
 +#else
 +   pte = pte_val(*ptep);
 +#endif

What about 32-bit platforms that need atomic PTEs?

-Scott



--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 5/6 v2] kvm: powerpc: booke: Add linux pte lookup like booke3s

2013-08-05 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, August 06, 2013 12:49 AM
 To: Bhushan Bharat-R65777
 Cc: Benjamin Herrenschmidt; Wood Scott-B07421; ag...@suse.de; kvm-
 p...@vger.kernel.org; k...@vger.kernel.org; linuxppc-...@lists.ozlabs.org
 Subject: Re: [PATCH 5/6 v2] kvm: powerpc: booke: Add linux pte lookup like
 booke3s
 
 On Mon, 2013-08-05 at 09:27 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org]
   Sent: Saturday, August 03, 2013 9:54 AM
   To: Bhushan Bharat-R65777
   Cc: Wood Scott-B07421; ag...@suse.de; kvm-ppc@vger.kernel.org;
   k...@vger.kernel.org; linuxppc-...@lists.ozlabs.org
   Subject: Re: [PATCH 5/6 v2] kvm: powerpc: booke: Add linux pte
   lookup like booke3s
  
   On Sat, 2013-08-03 at 02:58 +, Bhushan Bharat-R65777 wrote:
One of the problem I saw was that if I put this code in
asm/pgtable-32.h and asm/pgtable-64.h then pte_persent() and other
friend function (on which this code depends) are defined in pgtable.h.
And pgtable.h includes asm/pgtable-32.h and asm/pgtable-64.h
before it defines pte_present() and friends functions.
   
Ok I move wove this in asm/pgtable*.h, initially I fought with
myself to take this code in pgtable* but finally end up doing here
(got biased by book3s :)).
  
   Is there a reason why these routines can not be completely generic
   in pgtable.h ?
 
  How about the generic function:
 
  diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h
  b/arch/powerpc/include/asm/pgtable-ppc64.h
  index d257d98..21daf28 100644
  --- a/arch/powerpc/include/asm/pgtable-ppc64.h
  +++ b/arch/powerpc/include/asm/pgtable-ppc64.h
  @@ -221,6 +221,27 @@ static inline unsigned long pte_update(struct mm_struct
 *mm,
  return old;
   }
 
  +static inline unsigned long pte_read(pte_t *p) { #ifdef
  +PTE_ATOMIC_UPDATES
  +   pte_t pte;
  +   pte_t tmp;
  +   __asm__ __volatile__ (
  +   1: ldarx   %0,0,%3\n
  +  andi.   %1,%0,%4\n
  +  bne-1b\n
  +  ori %1,%0,%4\n
  +  stdcx.  %1,0,%3\n
  +  bne-1b
  +   : =r (pte), =r (tmp), =m (*p)
  +   : r (p), i (_PAGE_BUSY)
  +   : cc);
  +
  +   return pte;
  +#else
  +   return pte_val(*p);
  +#endif
  +#endif
  +}
   static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
unsigned long addr,
  pte_t *ptep)
 
 Please leave a blank line between functions.
 
   {
  diff --git a/arch/powerpc/include/asm/pgtable.h
  b/arch/powerpc/include/asm/pgtable.h
  index 690c8c2..dad712c 100644
  --- a/arch/powerpc/include/asm/pgtable.h
  +++ b/arch/powerpc/include/asm/pgtable.h
  @@ -254,6 +254,45 @@ static inline pte_t
  *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,  }  #endif
  /* !CONFIG_HUGETLB_PAGE */
 
  +static inline pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
  +int writing, unsigned long
  +*pte_sizep)
 
 The name implies that it just reads the PTE.  Setting accessed/dirty shouldn't
 be an undocumented side-effect.

Ok, will rename and document.

 Why can't the caller do that (or a different
 function that the caller calls afterward if desired)?

The current implementation in book3s is;
 1) find a pte/hugepte
 2) return null if pte not present
 3) take _PAGE_BUSY lock
 4) set accessed/dirty
 5) clear _PAGE_BUSY.

What I tried was 
1) find a pte/hugepte
2) return null if pte not present
3) return pte (not take lock by not setting _PAGE_BUSY)

4) then user calls  __ptep_set_access_flags() to atomic update the 
dirty/accessed flags in pte.

- but the benchmark results were not good
- Also can there be race as we do not take lock in step 3 and update in step 4 ?
  
 
 Though even then you have the undocumented side effect of locking the PTE on
 certain targets.
 
  +{
  +   pte_t *ptep;
  +   pte_t pte;
  +   unsigned long ps = *pte_sizep;
  +   unsigned int shift;
  +
  +   ptep = find_linux_pte_or_hugepte(pgdir, hva, shift);
  +   if (!ptep)
  +   return __pte(0);
  +   if (shift)
  +   *pte_sizep = 1ul  shift;
  +   else
  +   *pte_sizep = PAGE_SIZE;
  +
  +   if (ps  *pte_sizep)
  +   return __pte(0);
  +
  +   if (!pte_present(*ptep))
  +   return __pte(0);
  +
  +#ifdef CONFIG_PPC64
  +   /* Lock PTE (set _PAGE_BUSY) and read */
  +   pte = pte_read(ptep);
  +#else
  +   pte = pte_val(*ptep);
  +#endif
 
 What about 32-bit platforms that need atomic PTEs?

I called __ptep_set_access_flags() for both 32/64bit (for 64bit I was not 
calling pte_read()), which handles atomic updates. Somehow the benchmark result 
were not good, will try again.

Thanks
-Bharat
 
 -Scott
 



[PATCH 08/23] KVM: PPC: Book3S PR: Handle PP0 page-protection bit in guest HPTEs

2013-08-05 Thread Paul Mackerras
64-bit POWER processors have a three-bit field for page protection in
the hashed page table entry (HPTE).  Currently we only interpret the two
bits that were present in older versions of the architecture.  The only
defined combination that has the new bit set is 110, meaning read-only
for supervisor and no access for user mode.

This adds code to kvmppc_mmu_book3s_64_xlate() to interpret the extra
bit appropriately.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/book3s_64_mmu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 658ccd7..563fbf7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -298,6 +298,8 @@ do_second:
v = pteg[i];
r = pteg[i+1];
pp = (r  HPTE_R_PP) | key;
+   if (r  HPTE_R_PP0)
+   pp |= 8;
 
gpte-eaddr = eaddr;
gpte-vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
@@ -319,6 +321,7 @@ do_second:
case 3:
case 5:
case 7:
+   case 10:
gpte-may_read = true;
break;
}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/23] KVM: PPC: Book3S PR: Correct errors in H_ENTER implementation

2013-08-05 Thread Paul Mackerras
The implementation of H_ENTER in PR KVM has some errors:

* With H_EXACT not set, if the HPTEG is full, we return H_PTEG_FULL
  as the return value of kvmppc_h_pr_enter, but the caller is expecting
  one of the EMULATE_* values.  The H_PTEG_FULL needs to go in the
  guest's R3 instead.

* With H_EXACT set, if the selected HPTE is already valid, the H_ENTER
  call should return a H_PTEG_FULL error.

This fixes these errors and also makes it write only the selected HPTE,
not the whole group, since only the selected HPTE has been modified.
This also micro-optimizes the calculations involving pte_index and i.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/book3s_pr_papr.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_pr_papr.c 
b/arch/powerpc/kvm/book3s_pr_papr.c
index da0e0bc..38f1899 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -21,6 +21,8 @@
 #include asm/kvm_ppc.h
 #include asm/kvm_book3s.h
 
+#define HPTE_SIZE  16  /* bytes per HPT entry */
+
 static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
 {
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
@@ -40,32 +42,39 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
long pte_index = kvmppc_get_gpr(vcpu, 5);
unsigned long pteg[2 * 8];
unsigned long pteg_addr, i, *hpte;
+   long int ret;
 
+   i = pte_index  7;
pte_index = ~7UL;
pteg_addr = get_pteg_addr(vcpu, pte_index);
 
copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
hpte = pteg;
 
+   ret = H_PTEG_FULL;
if (likely((flags  H_EXACT) == 0)) {
-   pte_index = ~7UL;
for (i = 0; ; ++i) {
if (i == 8)
-   return H_PTEG_FULL;
+   goto done;
if ((*hpte  HPTE_V_VALID) == 0)
break;
hpte += 2;
}
} else {
-   i = kvmppc_get_gpr(vcpu, 5)  7UL;
hpte += i * 2;
+   if (*hpte  HPTE_V_VALID)
+   goto done;
}
 
hpte[0] = kvmppc_get_gpr(vcpu, 6);
hpte[1] = kvmppc_get_gpr(vcpu, 7);
-   copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg));
-   kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+   pteg_addr += i * HPTE_SIZE;
+   copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
kvmppc_set_gpr(vcpu, 4, pte_index | i);
+   ret = H_SUCCESS;
+
+ done:
+   kvmppc_set_gpr(vcpu, 3, ret);
 
return EMULATE_DONE;
 }
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/23] KVM: PPC: Book3S: Merge implementations of KVM_PPC_GET_SMMU_INFO ioctl

2013-08-05 Thread Paul Mackerras
This merges the PR and HV implementations of kvm_vm_ioctl_get_smmu_info()
into a single implementation in book3s.c.  Since userspace tends to
call this ioctl very early in the life of a VM, before (for instance)
enabling PAPR mode, we will need this to return results that are
compatible with both PR and HV guests, once we are able to compile both
PR and HV into one kernel image.  For HV guests, the capabilities and
encodings need to be consistent with what the real hardware we are
running on can do, whereas for PR guests, the MMU is completely
virtual and so the set of capabilities and encodings is arbitrary.

To achieve this, we report a set of segment and page sizes and
encodings that are consistent with what real POWER processors do.
If the guest could potentially use HV mode then we filter that set
to remove anything that is not implemented by the CPU that we are
running on.  The helper function, kvm_book3s_hv_possible(), that add
to trigger this filtering is currently just defined based on the
kernel configuration.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_ppc.h |  4 +++
 arch/powerpc/kvm/book3s.c  | 53 ++
 arch/powerpc/kvm/book3s_hv.c   | 38 ---
 arch/powerpc/kvm/book3s_pr.c   | 30 -
 4 files changed, 57 insertions(+), 68 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index b15554a..af7fe62 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -283,6 +283,8 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 
 extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
 
+static inline int kvm_book3s_hv_possible(void) { return 1; }
+
 #else
 static inline void __init kvm_cma_reserve(void)
 {}
@@ -302,6 +304,8 @@ static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu 
*vcpu)
 {
kvm_vcpu_kick(vcpu);
 }
+
+static inline int kvm_book3s_hv_possible(void) { return 0; }
 #endif
 
 #ifdef CONFIG_KVM_XICS
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 4b136be..06abd84 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -728,6 +728,59 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct 
kvm_dirty_log *log)
return -ENOTTY;
 }
 
+#ifdef CONFIG_PPC64
+static void add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
+ int linux_psize, int shift, int sllp, int lp)
+{
+   struct mmu_psize_def *def = mmu_psize_defs[linux_psize];
+
+   if (kvm_book3s_hv_possible()) {
+   /* Check this matches what the hardware does */
+   if (shift != def-shift || sllp != def-sllp ||
+   lp != def-penc[linux_psize])
+   return;
+   }
+
+   (*sps)-page_shift = shift;
+   (*sps)-slb_enc = sllp;
+   (*sps)-enc[0].page_shift = shift;
+   (*sps)-enc[0].pte_enc = lp;
+   (*sps)++;
+}
+
+int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
+  struct kvm_ppc_smmu_info *info)
+{
+   struct kvm_ppc_one_seg_page_size *sps;
+
+   /*
+* At this stage we don't know whether this VM will be
+* HV or PR, so if it could be HV, restrict what we report
+* to what the hardware can do.
+*/
+   if (kvm_book3s_hv_possible()) {
+   info-slb_size = mmu_slb_size;
+   info-flags = KVM_PPC_PAGE_SIZES_REAL;
+   if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+   info-flags |= KVM_PPC_1T_SEGMENTS;
+   } else {
+   /* emulated SLB is always 64 entries */
+   info-slb_size = 64;
+   info-flags = KVM_PPC_1T_SEGMENTS;
+   }
+
+   /* No multi-page size segments (MPSS) support yet */
+   sps = info-sps[0];
+   add_seg_page_size(sps, MMU_PAGE_4K, 12, 0, 0);
+   add_seg_page_size(sps, MMU_PAGE_64K, 16,
+ SLB_VSID_L | SLB_VSID_LP_01, 1);
+   add_seg_page_size(sps, MMU_PAGE_16M, 24,
+ SLB_VSID_L | SLB_VSID_LP_00, 0);
+
+   return 0;
+}
+#endif /* CONFIG_PPC64 */
+
 void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
  struct kvm_memory_slot *dont)
 {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index fcf0564..13f79dd 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1568,44 +1568,6 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct 
kvm_allocate_rma *ret)
return fd;
 }
 
-static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
-int linux_psize)
-{
-   struct mmu_psize_def *def = mmu_psize_defs[linux_psize];
-
-   if (!def-shift)
-   return;
-   (*sps)-page_shift = def-shift;
-   (*sps)-slb_enc = def-sllp;
-   

[PATCH 10/23] KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe

2013-08-05 Thread Paul Mackerras
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits.  The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that.  Also, real hardware does single-byte writes to update R and C.

The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.).  Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.

The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus.  To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().

For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.

With this, PR KVM can successfully run SMP guests.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_host.h |  3 +++
 arch/powerpc/kvm/book3s_32_mmu.c| 36 ++--
 arch/powerpc/kvm/book3s_64_mmu.c| 33 +++--
 arch/powerpc/kvm/book3s_pr.c|  1 +
 arch/powerpc/kvm/book3s_pr_papr.c   | 33 +++--
 5 files changed, 72 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 2d3c770..c37207f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -259,6 +259,9 @@ struct kvm_arch {
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
+#ifdef CONFIG_KVM_BOOK3S_PR
+   struct mutex hpt_mutex;
+#endif
 #ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
struct list_head rtas_tokens;
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index af04553..856af98 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -271,19 +271,22 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu 
*vcpu, gva_t eaddr,
/* Update PTE C and A bits, so the guest's swapper knows we used the
   page */
if (found) {
-   u32 oldpte = pteg[i+1];
-
-   if (pte-may_read)
-   pteg[i+1] |= PTEG_FLAG_ACCESSED;
-   if (pte-may_write)
-   pteg[i+1] |= PTEG_FLAG_DIRTY;
-   else
-   dprintk_pte(KVM: Mapping read-only page!\n);
-
-   /* Write back into the PTEG */
-   if (pteg[i+1] != oldpte)
-   copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
-
+   u32 pte_r = pteg[i+1];
+   char __user *addr = (char __user *) pteg[i+1];
+
+   /*
+* Use single-byte writes to update the HPTE, to
+* conform to what real hardware does.
+*/
+   if (pte-may_read  !(pte_r  PTEG_FLAG_ACCESSED)) {
+   pte_r |= PTEG_FLAG_ACCESSED;
+   put_user(pte_r  8, addr + 2);
+   }
+   if (pte-may_write  !(pte_r  PTEG_FLAG_DIRTY)) {
+   /* XXX should only set this for stores */
+   pte_r |= PTEG_FLAG_DIRTY;
+   put_user(pte_r, addr + 3);
+   }
return 0;
}
 
@@ -348,7 +351,12 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu 
*vcpu, u32 srnum,
 
 static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool 
large)
 {
-   kvmppc_mmu_pte_flush(vcpu, ea, 0x0000);
+   int i;
+   struct kvm_vcpu *v;
+
+   /* flush this VA on all cpus */
+   kvm_for_each_vcpu(i, v, vcpu-kvm)
+   kvmppc_mmu_pte_flush(v, ea, 0x0000);
 }
 
 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 563fbf7..26a57ca 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -257,6 +257,8 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu 
*vcpu, gva_t eaddr,
 
pgsize = slbe-large ? MMU_PAGE_16M : MMU_PAGE_4K;
 
+   mutex_lock(vcpu-kvm-arch.hpt_mutex);
+
 do_second:
ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
if (kvm_is_error_hva(ptegp))
@@ -332,30 +334,37 @@ do_second:
 
/* Update PTE R and C bits, so the 

[PATCH 17/23] KVM: PPC: Book3S HV: Factorize kvmppc_core_vcpu_create_hv()

2013-08-05 Thread Paul Mackerras
This splits kvmppc_core_vcpu_create_hv() into three functions and
adds a new kvmppc_free_vcores() to free the kvmppc_vcore structures
that we allocate for a guest, which are currently being leaked.
The reason for the split is to make the split-out code available
for later use in converting PR kvm_vcpu structs to HV use.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/book3s_hv.c | 95 +++-
 1 file changed, 59 insertions(+), 36 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 13f79dd..c524d6b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -891,32 +891,51 @@ int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
return r;
 }
 
-struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, unsigned int id)
+static int kvmppc_alloc_vcore(struct kvm_vcpu *vcpu, unsigned int id)
 {
-   struct kvm_vcpu *vcpu;
-   int err = -EINVAL;
-   int core;
+   struct kvm *kvm = vcpu-kvm;
struct kvmppc_vcore *vcore;
+   int core;
 
core = id / threads_per_core;
if (core = KVM_MAX_VCORES)
-   goto out;
+   return -EINVAL;
 
-   err = -ENOMEM;
-   vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-   if (!vcpu)
-   goto out;
+   vcore = kvm-arch.vcores[core];
+   if (!vcore) {
+   vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
+   if (!vcore)
+   return -ENOMEM;
+   INIT_LIST_HEAD(vcore-runnable_threads);
+   spin_lock_init(vcore-lock);
+   init_waitqueue_head(vcore-wq);
+   vcore-preempt_tb = TB_NIL;
+   kvm-arch.vcores[core] = vcore;
+   kvm-arch.online_vcores++;
+   }
 
-   err = kvm_vcpu_init(vcpu, kvm, id);
-   if (err)
-   goto free_vcpu;
+   spin_lock(vcore-lock);
+   ++vcore-num_threads;
+   spin_unlock(vcore-lock);
+   vcpu-arch.vcore = vcore;
+
+   return 0;
+}
 
+static void kvmppc_free_vcores(struct kvm *kvm)
+{
+   long int i;
+
+   for (i = 0; i  KVM_MAX_VCORES; ++i)
+   kfree(kvm-arch.vcores[i]);
+   kvm-arch.online_vcores = 0;
+}
+
+static void kvmppc_setup_hv_vcpu(struct kvm_vcpu *vcpu)
+{
vcpu-arch.shared = vcpu-arch.shregs;
vcpu-arch.mmcr[0] = MMCR0_FC;
vcpu-arch.ctrl = CTRL_RUNLATCH;
-   /* default to host PVR, since we can't spoof it */
-   vcpu-arch.pvr = mfspr(SPRN_PVR);
-   kvmppc_set_pvr_hv(vcpu, vcpu-arch.pvr);
spin_lock_init(vcpu-arch.vpa_update_lock);
spin_lock_init(vcpu-arch.tbacct_lock);
vcpu-arch.busy_preempt = TB_NIL;
@@ -927,31 +946,34 @@ struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm 
*kvm, unsigned int id)
 
init_waitqueue_head(vcpu-arch.cpu_run);
 
-   mutex_lock(kvm-lock);
-   vcore = kvm-arch.vcores[core];
-   if (!vcore) {
-   vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
-   if (vcore) {
-   INIT_LIST_HEAD(vcore-runnable_threads);
-   spin_lock_init(vcore-lock);
-   init_waitqueue_head(vcore-wq);
-   vcore-preempt_tb = TB_NIL;
-   }
-   kvm-arch.vcores[core] = vcore;
-   kvm-arch.online_vcores++;
-   }
-   mutex_unlock(kvm-lock);
+   vcpu-arch.cpu_type = KVM_CPU_3S_64;
+   kvmppc_sanity_check(vcpu);
+}
 
-   if (!vcore)
+struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, unsigned int id)
+{
+   struct kvm_vcpu *vcpu;
+   int err = -EINVAL;
+
+   err = -ENOMEM;
+   vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+   if (!vcpu)
+   goto out;
+
+   err = kvm_vcpu_init(vcpu, kvm, id);
+   if (err)
goto free_vcpu;
 
-   spin_lock(vcore-lock);
-   ++vcore-num_threads;
-   spin_unlock(vcore-lock);
-   vcpu-arch.vcore = vcore;
+   /* default to host PVR, since we can't spoof it */
+   vcpu-arch.pvr = mfspr(SPRN_PVR);
 
-   vcpu-arch.cpu_type = KVM_CPU_3S_64;
-   kvmppc_sanity_check(vcpu);
+   mutex_lock(kvm-lock);
+   err = kvmppc_alloc_vcore(vcpu, id);
+   mutex_unlock(kvm-lock);
+   if (err)
+   goto free_vcpu;
+
+   kvmppc_setup_hv_vcpu(vcpu);
 
return vcpu;
 
@@ -1890,6 +1912,7 @@ void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 {
uninhibit_secondary_onlining();
 
+   kvmppc_free_vcores(kvm);
if (kvm-arch.rma) {
kvm_release_rma(kvm-arch.rma);
kvm-arch.rma = NULL;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/23] KVM: PPC: Book3S PR: Don't corrupt guest state when kernel uses VMX

2013-08-05 Thread Paul Mackerras
Currently the code assumes that once we load up guest FP/VSX or VMX
state into the CPU, it stays valid in the CPU registers until we
explicitly flush it to the thread_struct.  However, on POWER7,
copy_page() and memcpy() can use VMX.  These functions do flush the
VMX state to the thread_struct before using VMX instructions, but if
this happens while we have guest state in the VMX registers, and we
then re-enter the guest, we don't reload the VMX state from the
thread_struct, leading to guest corruption.  This has been observed
to cause guest processes to segfault.

To fix this, we check before re-entering the guest that all of the
bits corresponding to facilities owned by the guest, as expressed
in vcpu-arch.guest_owned_ext, are set in current-thread.regs-msr.
Any bits that have been cleared correspond to facilities that have
been used by kernel code and thus flushed to the thread_struct, so
for them we reload the state from the thread_struct.

We also need to check current-thread.regs-msr before calling
giveup_fpu() or giveup_altivec(), since if the relevant bit is
clear, the state has already been flushed to the thread_struct and
to flush it again would corrupt it.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/book3s_pr.c | 29 +
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ddfaf56..adeab19 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 * both the traditional FP registers and the added VSX
 * registers into thread.fpr[].
 */
-   giveup_fpu(current);
+   if (current-thread.regs-msr  MSR_FP)
+   giveup_fpu(current);
for (i = 0; i  ARRAY_SIZE(vcpu-arch.fpr); i++)
vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
 
@@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 
 #ifdef CONFIG_ALTIVEC
if (msr  MSR_VEC) {
-   giveup_altivec(current);
+   if (current-thread.regs-msr  MSR_VEC)
+   giveup_altivec(current);
memcpy(vcpu-arch.vr, t-vr, sizeof(vcpu-arch.vr));
vcpu-arch.vscr = t-vscr;
}
@@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, 
unsigned int exit_nr,
printk(KERN_INFO Loading up ext 0x%lx\n, msr);
 #endif
 
-   current-thread.regs-msr |= msr;
-
if (msr  MSR_FP) {
for (i = 0; i  ARRAY_SIZE(vcpu-arch.fpr); i++)
thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
@@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, 
unsigned int exit_nr,
 #endif
}
 
+   current-thread.regs-msr |= msr;
vcpu-arch.guest_owned_ext |= msr;
kvmppc_recalc_shadow_msr(vcpu);
 
return RESUME_GUEST;
 }
 
+/*
+ * Kernel code using FP or VMX could have flushed guest state to
+ * the thread_struct; if so, get it back now.
+ */
+static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
+{
+   unsigned long lost_ext;
+
+   lost_ext = vcpu-arch.guest_owned_ext  ~current-thread.regs-msr;
+   if (!lost_ext)
+   return;
+
+   if (lost_ext  MSR_FP)
+   kvmppc_load_up_fpu();
+   if (lost_ext  MSR_VEC)
+   kvmppc_load_up_altivec();
+   current-thread.regs-msr |= lost_ext;
+}
+
 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
 {
@@ -892,6 +912,7 @@ program_interrupt:
} else {
kvmppc_fix_ee_before_entry();
}
+   kvmppc_handle_lost_ext(vcpu);
}
 
trace_kvm_book3s_reenter(r, vcpu);
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/23] KVM: PPC: Book3S PR: Allow guest to use 64k pages

2013-08-05 Thread Paul Mackerras
This adds the code to interpret 64k HPTEs in the guest hashed page
table (HPT), 64k SLB entries, and to tell the guest about 64k pages
in kvm_vm_ioctl_get_smmu_info().  Guest 64k pages are still shadowed
by 4k pages.

This also adds another hash table to the four we have already in
book3s_mmu_hpte.c to allow us to find all the PTEs that we have
instantiated that match a given 64k guest page.

The tlbie instruction changed starting with POWER6 to use a bit in
the RB operand to indicate large page invalidations, and to use other
RB bits to indicate the base and actual page sizes and the segment
size.  64k pages came in slightly earlier, with POWER5++.  At present
we use one bit in vcpu-arch.hflags to indicate that the emulated
cpu supports 64k pages and also has the new tlbie definition.  If
we ever want to support emulation of POWER5++, we will need to use
another bit.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_asm.h|  1 +
 arch/powerpc/include/asm/kvm_book3s.h |  6 +++
 arch/powerpc/include/asm/kvm_host.h   |  4 ++
 arch/powerpc/kvm/book3s_64_mmu.c  | 92 +++
 arch/powerpc/kvm/book3s_mmu_hpte.c| 50 +++
 arch/powerpc/kvm/book3s_pr.c  | 30 +++-
 6 files changed, 173 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_asm.h 
b/arch/powerpc/include/asm/kvm_asm.h
index 851bac7..3d70b7e 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -123,6 +123,7 @@
 #define BOOK3S_HFLAG_SLB   0x2
 #define BOOK3S_HFLAG_PAIRED_SINGLE 0x4
 #define BOOK3S_HFLAG_NATIVE_PS 0x8
+#define BOOK3S_HFLAG_MULTI_PGSIZE  0x10
 
 #define RESUME_FLAG_NV  (10)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST(11)  /* Resume host? */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index a8897c1..175f876 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -58,6 +58,9 @@ struct hpte_cache {
struct hlist_node list_pte_long;
struct hlist_node list_vpte;
struct hlist_node list_vpte_long;
+#ifdef CONFIG_PPC_BOOK3S_64
+   struct hlist_node list_vpte_64k;
+#endif
struct rcu_head rcu_head;
u64 host_vpn;
u64 pfn;
@@ -99,6 +102,9 @@ struct kvmppc_vcpu_book3s {
struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
+#ifdef CONFIG_PPC_BOOK3S_64
+   struct hlist_head hpte_hash_vpte_64k[HPTEG_HASH_NUM_VPTE_64K];
+#endif
int hpte_cache_count;
spinlock_t mmu_lock;
 };
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 7b26395..2d3c770 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -73,10 +73,12 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long 
hva, pte_t pte);
 #define HPTEG_HASH_BITS_PTE_LONG   12
 #define HPTEG_HASH_BITS_VPTE   13
 #define HPTEG_HASH_BITS_VPTE_LONG  5
+#define HPTEG_HASH_BITS_VPTE_64K   11
 #define HPTEG_HASH_NUM_PTE (1  HPTEG_HASH_BITS_PTE)
 #define HPTEG_HASH_NUM_PTE_LONG(1  HPTEG_HASH_BITS_PTE_LONG)
 #define HPTEG_HASH_NUM_VPTE(1  HPTEG_HASH_BITS_VPTE)
 #define HPTEG_HASH_NUM_VPTE_LONG   (1  HPTEG_HASH_BITS_VPTE_LONG)
+#define HPTEG_HASH_NUM_VPTE_64K(1  HPTEG_HASH_BITS_VPTE_64K)
 
 /* Physical Address Mask - allowed range of real mode RAM access */
 #define KVM_PAM0x0fffULL
@@ -328,6 +330,7 @@ struct kvmppc_pte {
bool may_read   : 1;
bool may_write  : 1;
bool may_execute: 1;
+   u8 page_size;   /* MMU_PAGE_xxx */
 };
 
 struct kvmppc_mmu {
@@ -360,6 +363,7 @@ struct kvmppc_slb {
bool large  : 1;/* PTEs are 16MB */
bool tb : 1;/* 1TB segment */
bool class  : 1;
+   u8 base_page_size;  /* MMU_PAGE_xxx */
 };
 
 # ifdef CONFIG_PPC_FSL_BOOK3E
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 7e345e0..d5fa26c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -107,9 +107,20 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu 
*vcpu, gva_t eaddr,
return kvmppc_slb_calc_vpn(slb, eaddr);
 }
 
+static int mmu_pagesize(int mmu_pg)
+{
+   switch (mmu_pg) {
+   case MMU_PAGE_64K:
+   return 16;
+   case MMU_PAGE_16M:
+   return 24;
+   }
+   return 12;
+}
+
 static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
 {
-   return slbe-large ? 24 : 12;
+   return mmu_pagesize(slbe-base_page_size);
 

[PATCH 03/23] KVM: PPC: Book3S PR: Make instruction fetch fallback work for system calls

2013-08-05 Thread Paul Mackerras
It turns out that if we exit the guest due to a hcall instruction (sc 1),
and the loading of the instruction in the guest exit path fails for any
reason, the call to kvmppc_ld() in kvmppc_get_last_inst() fetches the
instruction after the hcall instruction rather than the hcall itself.
This in turn means that the instruction doesn't get recognized as an
hcall in kvmppc_handle_exit_pr() but gets passed to the guest kernel
as a sc instruction.  That usually results in the guest kernel getting
a return code of 38 (ENOSYS) from an hcall, which often triggers a
BUG_ON() or other failure.

This fixes the problem by adding a new variant of kvmppc_get_last_inst()
called kvmppc_get_last_sc(), which fetches the instruction if necessary
from pc - 4 rather than pc.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h | 38 +++
 arch/powerpc/kvm/book3s_pr.c  |  2 +-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 08891d0..fa19e2f 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -334,6 +334,27 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu 
*vcpu)
return r;
 }
 
+/*
+ * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
+ * Because the sc instruction sets SRR0 to point to the following
+ * instruction, we have to fetch from pc - 4.
+ */
+static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
+{
+   ulong pc = kvmppc_get_pc(vcpu) - 4;
+   struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+   u32 r;
+
+   /* Load the instruction manually if it failed to do so in the
+* exit path */
+   if (svcpu-last_inst == KVM_INST_FETCH_FAILED)
+   kvmppc_ld(vcpu, pc, sizeof(u32), svcpu-last_inst, false);
+
+   r = svcpu-last_inst;
+   svcpu_put(svcpu);
+   return r;
+}
+
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
@@ -446,6 +467,23 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu 
*vcpu)
return vcpu-arch.last_inst;
 }
 
+/*
+ * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
+ * Because the sc instruction sets SRR0 to point to the following
+ * instruction, we have to fetch from pc - 4.
+ */
+static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
+{
+   ulong pc = kvmppc_get_pc(vcpu) - 4;
+
+   /* Load the instruction manually if it failed to do so in the
+* exit path */
+   if (vcpu-arch.last_inst == KVM_INST_FETCH_FAILED)
+   kvmppc_ld(vcpu, pc, sizeof(u32), vcpu-arch.last_inst, false);
+
+   return vcpu-arch.last_inst;
+}
+
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
return vcpu-arch.fault_dar;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index adeab19..6cb29ef 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -792,7 +792,7 @@ program_interrupt:
}
case BOOK3S_INTERRUPT_SYSCALL:
if (vcpu-arch.papr_enabled 
-   (kvmppc_get_last_inst(vcpu) == 0x4422) 
+   (kvmppc_get_last_sc(vcpu) == 0x4422) 
!(vcpu-arch.shared-msr  MSR_PR)) {
/* SC 1 papr hypercalls */
ulong cmd = kvmppc_get_gpr(vcpu, 3);
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 20/23] KVM: PPC: Book3S PR: Better handling of host-side read-only pages

2013-08-05 Thread Paul Mackerras
Currently we request write access to all pages that get mapped into the
guest, even if the guest is only loading from the page.  This reduces
the effectiveness of KSM because it means that we unshare every page we
access.  Also, we always set the changed (C) bit in the guest HPTE if
it allows writing, even for a guest load.

This fixes both these problems.  We pass an 'iswrite' flag to the
mmu.xlate() functions and to kvmppc_mmu_map_page() to indicate whether
the access is a load or a store.  The mmu.xlate() functions now only
set C for stores.  kvmppc_gfn_to_pfn() now calls gfn_to_pfn_prot()
instead of gfn_to_pfn() so that it can indicate whether we need write
access to the page, and get back a 'writable' flag to indicate whether
the page is writable or not.  If that 'writable' flag is clear, we then
make the host HPTE read-only even if the guest HPTE allowed writing.

This means that we can get a protection fault when the guest writes to a
page that it has mapped read-write but which is read-only on the host
side (perhaps due to KSM having merged the page).  Thus we now call
kvmppc_handle_pagefault() for protection faults as well as HPTE not found
faults.  In kvmppc_handle_pagefault(), if the access was allowed by the
guest HPTE and we thus need to install a new host HPTE, we then need to
remove the old host HPTE if there is one.  This is done with a new
function, kvmppc_mmu_unmap_page(), which uses kvmppc_mmu_pte_vflush() to
find and remove the old host HPTE.

Since the memslot-related functions require the KVM SRCU read lock to
be held, this adds srcu_read_lock/unlock pairs around the calls to
kvmppc_handle_pagefault().

Finally, this changes kvmppc_mmu_book3s_32_xlate_pte() to not ignore
guest HPTEs that don't permit access, and to return -EPERM for accesses
that are not permitted by the page protections.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h |  7 +--
 arch/powerpc/include/asm/kvm_host.h   |  3 ++-
 arch/powerpc/kvm/book3s.c | 15 +--
 arch/powerpc/kvm/book3s_32_mmu.c  | 32 +---
 arch/powerpc/kvm/book3s_32_mmu_host.c | 14 +++---
 arch/powerpc/kvm/book3s_64_mmu.c  |  9 +
 arch/powerpc/kvm/book3s_64_mmu_host.c | 20 +---
 arch/powerpc/kvm/book3s_64_mmu_hv.c   |  2 +-
 arch/powerpc/kvm/book3s_pr.c  | 29 -
 9 files changed, 91 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index e0bc83b..4fe6864 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -129,7 +129,9 @@ extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 
new_msr);
 extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
-extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
+extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
+  bool iswrite);
+extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte 
*pte);
 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
 extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong 
seg_size);
 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
@@ -158,7 +160,8 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct 
kvmppc_bat *bat,
   bool upper, u32 val);
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu 
*vcpu);
-extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+   bool *writable);
 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
unsigned long *rmap, long pte_index, int realmode);
 extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 138e781..52c7b80 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -356,7 +356,8 @@ struct kvmppc_mmu {
/* book3s */
void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value);
u32  (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
-   int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte 
*pte, bool data);
+   int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *pte, bool data, bool iswrite);
void (*reset_msr)(struct kvm_vcpu *vcpu);
void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
int  (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong 

[PATCH 01/23] KVM: PPC: Book3S: Fix compile error in XICS emulation

2013-08-05 Thread Paul Mackerras
Commit 8e44ddc3f3 (powerpc/kvm/book3s: Add support for H_IPOLL and
H_XIRR_X in XICS emulation) added a call to get_tb() but didn't
include the header that defines it, and on some configs this means
book3s_xics.c fails to compile:

arch/powerpc/kvm/book3s_xics.c: In function ‘kvmppc_xics_hcall’:
arch/powerpc/kvm/book3s_xics.c:812:3: error: implicit declaration of function 
‘get_tb’ [-Werror=implicit-function-declaration]

Cc: sta...@vger.kernel.org [v3.10]
Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/book3s_xics.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 94c1dd4..a3a5cb8 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -19,6 +19,7 @@
 #include asm/hvcall.h
 #include asm/xics.h
 #include asm/debug.h
+#include asm/time.h
 
 #include linux/debugfs.h
 #include linux/seq_file.h
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 23/23] KVM: PPC: Book3S PR: Reduce number of shadow PTEs invalidated by MMU notifiers

2013-08-05 Thread Paul Mackerras
Currently, whenever any of the MMU notifier callbacks get called, we
invalidate all the shadow PTEs.  This is inefficient because it means
that we typically then get a lot of DSIs and ISIs in the guest to fault
the shadow PTEs back in.  We do this even if the address range being
notified doesn't correspond to guest memory.

This commit adds code to scan the memslot array to find out what range(s)
of guest physical addresses corresponds to the host virtual address range
being affected.  For each such range we flush only the shadow PTEs
for the range, on all cpus.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/book3s_pr.c | 40 
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 71f7cfe..2336d9c 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -150,16 +150,41 @@ int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
 }
 
 /* MMU Notifiers */
+static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
+unsigned long end)
+{
+   long i;
+   struct kvm_vcpu *vcpu;
+   struct kvm_memslots *slots;
+   struct kvm_memory_slot *memslot;
+
+   slots = kvm_memslots(kvm);
+   kvm_for_each_memslot(memslot, slots) {
+   unsigned long hva_start, hva_end;
+   gfn_t gfn, gfn_end;
+
+   hva_start = max(start, memslot-userspace_addr);
+   hva_end = min(end, memslot-userspace_addr +
+   (memslot-npages  PAGE_SHIFT));
+   if (hva_start = hva_end)
+   continue;
+   /*
+* {gfn(page) | page intersects with [hva_start, hva_end)} =
+* {gfn, gfn+1, ..., gfn_end-1}.
+*/
+   gfn = hva_to_gfn_memslot(hva_start, memslot);
+   gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+   kvm_for_each_vcpu(i, vcpu, kvm)
+   kvmppc_mmu_pte_pflush(vcpu, gfn  PAGE_SHIFT,
+ gfn_end  PAGE_SHIFT);
+   }
+}
 
 int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
 {
trace_kvm_unmap_hva(hva);
 
-   /*
-* Flush all shadow tlb entries everywhere. This is slow, but
-* we are 100% sure that we catch the to be unmapped page
-*/
-   kvm_flush_remote_tlbs(kvm);
+   do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
 
return 0;
 }
@@ -167,8 +192,7 @@ int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
 int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
   unsigned long end)
 {
-   /* kvm_unmap_hva flushes everything anyways */
-   kvm_unmap_hva(kvm, start);
+   do_kvm_unmap_hva(kvm, start, end);
 
return 0;
 }
@@ -188,7 +212,7 @@ int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)
 void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
/* The page will get remapped properly on its next fault */
-   kvm_unmap_hva(kvm, hva);
+   do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
 }
 
 /*/
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 21/23] KVM: PPC: Book3S PR: Use mmu_notifier_retry() in kvmppc_mmu_map_page()

2013-08-05 Thread Paul Mackerras
When the MM code is invalidating a range of pages, it calls the KVM
kvm_mmu_notifier_invalidate_range_start() notifier function, which calls
kvm_unmap_hva_range(), which arranges to flush all the existing host
HPTEs for guest pages.  However, the Linux PTEs for the range being
flushed are still valid at that point.  We are not supposed to establish
any new references to pages in the range until the ...range_end()
notifier gets called.  The PPC-specific KVM code doesn't get any
explicit notification of that; instead, we are supposed to use
mmu_notifier_retry() to test whether we are or have been inside a
range flush notifier pair while we have been getting a page and
instantiating a host HPTE for the page.

This therefore adds a call to mmu_notifier_retry inside
kvmppc_mmu_map_page().  This call is inside a region locked with
kvm-mmu_lock, which is the same lock that is called by the KVM
MMU notifier functions, thus ensuring that no new notification can
proceed while we are in the locked region.  Inside this region we
also create the host HPTE and link the corresponding hpte_cache
structure into the lists used to find it later.  We cannot allocate
the hpte_cache structure inside this locked region because that can
lead to deadlock, so we allocate it outside the region and free it
if we end up not using it.

This also moves the updates of vcpu3s-hpte_cache_count inside the
regions locked with vcpu3s-mmu_lock, and does the increment in
kvmppc_mmu_hpte_cache_map() when the pte is added to the cache
rather than when it is allocated, in order that the hpte_cache_count
is accurate.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h |  1 +
 arch/powerpc/kvm/book3s_64_mmu_host.c | 37 ++-
 arch/powerpc/kvm/book3s_mmu_hpte.c| 14 +
 3 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 4fe6864..e711e77 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -143,6 +143,7 @@ extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t 
eaddr,
 
 extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache 
*pte);
 extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte);
 extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
 extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache 
*pte);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c 
b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 7fcf38f..b7e9504 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -93,6 +93,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct 
kvmppc_pte *orig_pte,
int r = 0;
int hpsize = MMU_PAGE_4K;
bool writable;
+   unsigned long mmu_seq;
+   struct kvm *kvm = vcpu-kvm;
+   struct hpte_cache *cpte;
+
+   /* used to check for invalidations in progress */
+   mmu_seq = kvm-mmu_notifier_seq;
+   smp_rmb();
 
/* Get host physical address for gpa */
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte-raddr  PAGE_SHIFT,
@@ -143,6 +150,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct 
kvmppc_pte *orig_pte,
 
hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
 
+   cpte = kvmppc_mmu_hpte_cache_next(vcpu);
+
+   spin_lock(kvm-mmu_lock);
+   if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
+   r = -EAGAIN;
+   goto out_unlock;
+   }
+
 map_again:
hpteg = ((hash  htab_hash_mask) * HPTES_PER_GROUP);
 
@@ -150,7 +165,7 @@ map_again:
if (attempt  1)
if (ppc_md.hpte_remove(hpteg)  0) {
r = -1;
-   goto out;
+   goto out_unlock;
}
 
ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
@@ -163,8 +178,6 @@ map_again:
attempt++;
goto map_again;
} else {
-   struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
-
trace_kvm_book3s_64_mmu_map(rflags, hpteg,
vpn, hpaddr, orig_pte);
 
@@ -175,15 +188,21 @@ map_again:
hpteg = ((hash  htab_hash_mask) * HPTES_PER_GROUP);
}
 
-   pte-slot = hpteg + (ret  7);
-   pte-host_vpn = vpn;
-   pte-pte = *orig_pte;
-   pte-pfn = hpaddr  PAGE_SHIFT;
-   pte-pagesize = hpsize;
+   cpte-slot = hpteg + (ret  7);
+   cpte-host_vpn = vpn;
+   cpte-pte = *orig_pte;
+   cpte-pfn = hpaddr  PAGE_SHIFT;
+   cpte-pagesize = hpsize;
 
-   

[PATCH 04/23] KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu

2013-08-05 Thread Paul Mackerras
Currently PR-style KVM keeps the volatile guest register values
(R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than
the main kvm_vcpu struct.  For 64-bit, the shadow_vcpu exists in two
places, a kmalloc'd struct and in the PACA, and it gets copied back
and forth in kvmppc_core_vcpu_load/put(), because the real-mode code
can't rely on being able to access the kmalloc'd struct.

This changes the code to copy the volatile values into the shadow_vcpu
as one of the last things done before entering the guest.  Similarly
the values are copied back out of the shadow_vcpu to the kvm_vcpu
immediately after exiting the guest.  We arrange for interrupts to be
still disabled at this point so that we can't get preempted on 64-bit
and end up copying values from the wrong PACA.

This means that the accessor functions in kvm_book3s.h for these
registers are greatly simplified, and are same between PR and HV KVM.
In places where accesses to shadow_vcpu fields are now replaced by
accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs.
Finally, on 64-bit, we don't need the kmalloc'd struct at all any more.

With this, the time to read the PVR one million times in a loop went
from 582.1ms to 584.3ms (averages of 10 values), a difference which is
not statistically significant given the variability of the results
(the standard deviations were 9.5ms and 8.6ms respectively).  A version
of the patch that used loops to copy the GPR values increased that time
by around 5% to 611.2ms, so the loop has been unrolled.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h | 220 +-
 arch/powerpc/include/asm/kvm_book3s_asm.h |   6 +-
 arch/powerpc/include/asm/kvm_host.h   |   1 +
 arch/powerpc/kernel/asm-offsets.c |   4 +-
 arch/powerpc/kvm/book3s_emulate.c |   8 +-
 arch/powerpc/kvm/book3s_interrupts.S  |  26 +++-
 arch/powerpc/kvm/book3s_pr.c  | 122 -
 arch/powerpc/kvm/book3s_rmhandlers.S  |   5 -
 arch/powerpc/kvm/trace.h  |   7 +-
 9 files changed, 156 insertions(+), 243 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index fa19e2f..a8897c1 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -198,140 +198,76 @@ extern void kvm_return_point(void);
 #include asm/kvm_book3s_64.h
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_PR
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-   return to_book3s(vcpu)-hior;
-}
-
-static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
-   unsigned long pending_now, unsigned long old_pending)
-{
-   if (pending_now)
-   vcpu-arch.shared-int_pending = 1;
-   else if (old_pending)
-   vcpu-arch.shared-int_pending = 0;
-}
-
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
-   if ( num  14 ) {
-   struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-   svcpu-gpr[num] = val;
-   svcpu_put(svcpu);
-   to_book3s(vcpu)-shadow_vcpu-gpr[num] = val;
-   } else
-   vcpu-arch.gpr[num] = val;
+   vcpu-arch.gpr[num] = val;
 }
 
 static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
 {
-   if ( num  14 ) {
-   struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-   ulong r = svcpu-gpr[num];
-   svcpu_put(svcpu);
-   return r;
-   } else
-   return vcpu-arch.gpr[num];
+   return vcpu-arch.gpr[num];
 }
 
 static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
 {
-   struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-   svcpu-cr = val;
-   svcpu_put(svcpu);
-   to_book3s(vcpu)-shadow_vcpu-cr = val;
+   vcpu-arch.cr = val;
 }
 
 static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 {
-   struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-   u32 r;
-   r = svcpu-cr;
-   svcpu_put(svcpu);
-   return r;
+   return vcpu-arch.cr;
 }
 
 static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
 {
-   struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-   svcpu-xer = val;
-   to_book3s(vcpu)-shadow_vcpu-xer = val;
-   svcpu_put(svcpu);
+   vcpu-arch.xer = val;
 }
 
 static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
-   struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-   u32 r;
-   r = svcpu-xer;
-   svcpu_put(svcpu);
-   return r;
+   return vcpu-arch.xer;
 }
 
 static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
 {
-   struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-   svcpu-ctr = val;
-   svcpu_put(svcpu);
+   vcpu-arch.ctr = val;
 }
 
 static inline ulong kvmppc_get_ctr(struct 

[PATCH 05/23] KVM: PPC: Book3S PR: Rework kvmppc_mmu_book3s_64_xlate()

2013-08-05 Thread Paul Mackerras
This reworks kvmppc_mmu_book3s_64_xlate() to make it check the large
page bit in the hashed page table entries (HPTEs) it looks at, and
to simplify and streamline the code.  The checking of the first dword
of each HPTE is now done with a single mask and compare operation,
and all the code dealing with the matching HPTE, if we find one,
is consolidated in one place in the main line of the function flow.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/book3s_64_mmu.c | 150 +++
 1 file changed, 72 insertions(+), 78 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 739bfba..7e345e0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu 
*vcpu, gva_t eaddr,
hva_t ptegp;
u64 pteg[16];
u64 avpn = 0;
+   u64 v, r;
+   u64 v_val, v_mask;
+   u64 eaddr_mask;
int i;
-   u8 key = 0;
+   u8 pp, key = 0;
bool found = false;
-   int second = 0;
+   bool second = false;
ulong mp_ea = vcpu-arch.magic_page_ea;
 
/* Magic page override */
@@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu 
*vcpu, gva_t eaddr,
goto no_seg_found;
 
avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
+   v_val = avpn  HPTE_V_AVPN;
+
if (slbe-tb)
-   avpn |= SLB_VSID_B_1T;
+   v_val |= SLB_VSID_B_1T;
+   if (slbe-large)
+   v_val |= HPTE_V_LARGE;
+   v_val |= HPTE_V_VALID;
+
+   v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
+   HPTE_V_SECONDARY;
 
 do_second:
ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
@@ -227,91 +238,74 @@ do_second:
key = 4;
 
for (i=0; i16; i+=2) {
-   u64 v = pteg[i];
-   u64 r = pteg[i+1];
-
-   /* Valid check */
-   if (!(v  HPTE_V_VALID))
-   continue;
-   /* Hash check */
-   if ((v  HPTE_V_SECONDARY) != second)
-   continue;
-
-   /* AVPN compare */
-   if (HPTE_V_COMPARE(avpn, v)) {
-   u8 pp = (r  HPTE_R_PP) | key;
-   int eaddr_mask = 0xFFF;
-
-   gpte-eaddr = eaddr;
-   gpte-vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu,
-   eaddr,
-   data);
-   if (slbe-large)
-   eaddr_mask = 0xFF;
-   gpte-raddr = (r  HPTE_R_RPN) | (eaddr  eaddr_mask);
-   gpte-may_execute = ((r  HPTE_R_N) ? false : true);
-   gpte-may_read = false;
-   gpte-may_write = false;
-
-   switch (pp) {
-   case 0:
-   case 1:
-   case 2:
-   case 6:
-   gpte-may_write = true;
-   /* fall through */
-   case 3:
-   case 5:
-   case 7:
-   gpte-may_read = true;
-   break;
-   }
-
-   dprintk(KVM MMU: Translated 0x%lx [0x%llx] - 0x%llx 
-   - 0x%lx\n,
-   eaddr, avpn, gpte-vpage, gpte-raddr);
+   /* Check all relevant fields of 1st dword */
+   if ((pteg[i]  v_mask) == v_val) {
found = true;
break;
}
}
 
-   /* Update PTE R and C bits, so the guest's swapper knows we used the
-* page */
-   if (found) {
-   u32 oldr = pteg[i+1];
+   if (!found) {
+   if (second)
+   goto no_page_found;
+   v_val |= HPTE_V_SECONDARY;
+   second = true;
+   goto do_second;
+   }
 
-   if (gpte-may_read) {
-   /* Set the accessed flag */
-   pteg[i+1] |= HPTE_R_R;
-   }
-   if (gpte-may_write) {
-   /* Set the dirty flag */
-   pteg[i+1] |= HPTE_R_C;
-   } else {
-   dprintk(KVM: Mapping read-only page!\n);
-   }
+   v = pteg[i];
+   r = pteg[i+1];
+   pp = (r  HPTE_R_PP) | key;
+   eaddr_mask = 0xFFF;
+
+   gpte-eaddr = eaddr;
+   gpte-vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
+   if (slbe-large)
+   eaddr_mask = 0xFF;
+   gpte-raddr = (r  

[PATCH 07/23] KVM: PPC: Book3S PR: Use 64k host pages where possible

2013-08-05 Thread Paul Mackerras
Currently, PR KVM uses 4k pages for the host-side mappings of guest
memory, regardless of the host page size.  When the host page size is
64kB, we might as well use 64k host page mappings for guest mappings
of 64kB and larger pages and for guest real-mode mappings.  However,
the magic page has to remain a 4k page.

To implement this, we first add another flag bit to the guest VSID
values we use, to indicate that this segment is one where host pages
should be mapped using 64k pages.  For segments with this bit set
we set the bits in the shadow SLB entry to indicate a 64k base page
size.  When faulting in host HPTEs for this segment, we make them
64k HPTEs instead of 4k.  We record the pagesize in struct hpte_cache
for use when invalidating the HPTE.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h |  6 --
 arch/powerpc/kvm/book3s_32_mmu.c  |  1 +
 arch/powerpc/kvm/book3s_64_mmu.c  | 35 ++-
 arch/powerpc/kvm/book3s_64_mmu_host.c | 27 +--
 arch/powerpc/kvm/book3s_pr.c  |  1 +
 5 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 175f876..322b539 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -66,6 +66,7 @@ struct hpte_cache {
u64 pfn;
ulong slot;
struct kvmppc_pte pte;
+   int pagesize;
 };
 
 struct kvmppc_vcpu_book3s {
@@ -113,8 +114,9 @@ struct kvmppc_vcpu_book3s {
 #define CONTEXT_GUEST  1
 #define CONTEXT_GUEST_END  2
 
-#define VSID_REAL  0x0fc0ULL
-#define VSID_BAT   0x0fb0ULL
+#define VSID_REAL  0x07c0ULL
+#define VSID_BAT   0x07b0ULL
+#define VSID_64K   0x0800ULL
 #define VSID_1T0x1000ULL
 #define VSID_REAL_DR   0x2000ULL
 #define VSID_REAL_IR   0x4000ULL
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index c8cefdd..af04553 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -308,6 +308,7 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu 
*vcpu, gva_t eaddr,
ulong mp_ea = vcpu-arch.magic_page_ea;
 
pte-eaddr = eaddr;
+   pte-page_size = MMU_PAGE_4K;
 
/* Magic page override */
if (unlikely(mp_ea) 
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index d5fa26c..658ccd7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -542,6 +542,16 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu 
*vcpu, ulong va,
kvmppc_mmu_pte_vflush(vcpu, va  12, mask);
 }
 
+#ifdef CONFIG_PPC_64K_PAGES
+static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid)
+{
+   ulong mp_ea = vcpu-arch.magic_page_ea;
+
+   return mp_ea  !(vcpu-arch.shared-msr  MSR_PR) 
+   (mp_ea  SID_SHIFT) == esid;
+}
+#endif
+
 static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 u64 *vsid)
 {
@@ -549,11 +559,13 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct 
kvm_vcpu *vcpu, ulong esid,
struct kvmppc_slb *slb;
u64 gvsid = esid;
ulong mp_ea = vcpu-arch.magic_page_ea;
+   int pagesize = MMU_PAGE_64K;
 
if (vcpu-arch.shared-msr  (MSR_DR|MSR_IR)) {
slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
if (slb) {
gvsid = slb-vsid;
+   pagesize = slb-base_page_size;
if (slb-tb) {
gvsid = SID_SHIFT_1T - SID_SHIFT;
gvsid |= esid  ((1ul  (SID_SHIFT_1T - 
SID_SHIFT)) - 1);
@@ -564,28 +576,41 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct 
kvm_vcpu *vcpu, ulong esid,
 
switch (vcpu-arch.shared-msr  (MSR_DR|MSR_IR)) {
case 0:
-   *vsid = VSID_REAL | esid;
+   gvsid = VSID_REAL | esid;
break;
case MSR_IR:
-   *vsid = VSID_REAL_IR | gvsid;
+   gvsid |= VSID_REAL_IR;
break;
case MSR_DR:
-   *vsid = VSID_REAL_DR | gvsid;
+   gvsid |= VSID_REAL_DR;
break;
case MSR_DR|MSR_IR:
if (!slb)
goto no_slb;
 
-   *vsid = gvsid;
break;
default:
BUG();
break;
}
 
+#ifdef CONFIG_PPC_64K_PAGES
+   /*
+* Mark this as a 64k segment if the host is using
+* 64k pages, the host MMU supports 64k pages and
+* the guest segment page size is = 64k,
+* but not if this segment contains the magic page.
+*/
+   if (pagesize = MMU_PAGE_64K 
+  

[PATCH 18/23] KVM: PPC: Book3S: Allow both PR and HV KVM to be selected

2013-08-05 Thread Paul Mackerras
This makes the config options for PR and HV KVM independently selectable,
making it possible to compile a KVM module with both PR and HV code in
it.  This adds fields to struct kvm_arch and struct kvm_vcpu_arch to
indicate whether the guest is using PR or HV KVM, though at this stage
all guests in a given kernel instance are of the same type: HV KVM if
HV is enabled and the machine supports it (i.e. has suitable CPUs and
has a working hypervisor mode available), otherwise PR.

Since the code in book3s_64_vio_hv.c is called from real mode with HV
KVM, and therefore has to be built into the main kernel binary, this
makes it always built-in rather than part of the KVM module.  It gets
called from the KVM module by PR KVM, so this adds an EXPORT_SYMBOL_GPL().

If both HV and PR KVM are included, interrupts come in to the HV version
of the kvmppc_interrupt code, which then jumps to the PR handler,
renamed to kvmppc_interrupt_pr, if the guest is a PR guest.

Allowing both PR and HV in the same kernel required some changes to
kvm_dev_ioctl_check_extension(), since the values returned now can't
be selected with #ifdefs as much as previously.  For capabilities that
are only provided by HV KVM (for example, KVM_PPC_ALLOCATE_HTAB), we
return the HV value only if HV KVM is possible on the current machine.
For capabilities provided by PR KVM but not HV, we return the PR
value unless only HV KVM has been configured.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h   | 67 +
 arch/powerpc/include/asm/kvm_host.h |  6 +++
 arch/powerpc/include/asm/kvm_ppc.h  |  5 ++-
 arch/powerpc/kvm/Kconfig| 15 +++-
 arch/powerpc/kvm/Makefile   | 11 +++---
 arch/powerpc/kvm/book3s.c   | 56 +++
 arch/powerpc/kvm/book3s_64_vio_hv.c |  1 +
 arch/powerpc/kvm/book3s_emulate.c   |  9 +
 arch/powerpc/kvm/book3s_exports.c   |  3 +-
 arch/powerpc/kvm/book3s_hv.c|  3 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  4 ++
 arch/powerpc/kvm/book3s_segment.S   |  7 
 arch/powerpc/kvm/book3s_xics.c  |  2 +-
 arch/powerpc/kvm/powerpc.c  | 57 ++--
 14 files changed, 184 insertions(+), 62 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 476d862..f6af43f 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -272,6 +272,29 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct 
kvm_vcpu *vcpu)
 #include asm/kvm_book3s_64.h
 #endif
 
+#if defined(CONFIG_KVM_BOOK3S_PR)  defined(CONFIG_KVM_BOOK3S_64_HV)
+static inline int kvmppc_vcpu_pr(struct kvm_vcpu *vcpu)
+{
+   return !vcpu-arch.use_hv;
+}
+
+static inline int kvmppc_vcpu_hv(struct kvm_vcpu *vcpu)
+{
+   return vcpu-arch.use_hv;
+}
+
+#else
+static inline int kvmppc_vcpu_pr(struct kvm_vcpu *vcpu)
+{
+   return IS_ENABLED(CONFIG_KVM_BOOK3S_PR);
+}
+
+static inline int kvmppc_vcpu_hv(struct kvm_vcpu *vcpu)
+{
+   return IS_ENABLED(CONFIG_KVM_BOOK3S_64_HV);
+}
+#endif
+
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
vcpu-arch.gpr[num] = val;
@@ -366,28 +389,38 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu 
*vcpu)
return vcpu-arch.fault_dar;
 }
 
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
 
 static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 {
-   return to_book3s(vcpu)-hior;
+   if (kvmppc_vcpu_pr(vcpu))
+   return to_book3s(vcpu)-hior;
+   return 0;
 }
 
 static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
unsigned long pending_now, unsigned long old_pending)
 {
-   if (pending_now)
-   vcpu-arch.shared-int_pending = 1;
-   else if (old_pending)
-   vcpu-arch.shared-int_pending = 0;
+   if (kvmppc_vcpu_pr(vcpu)) {
+   if (pending_now)
+   vcpu-arch.shared-int_pending = 1;
+   else if (old_pending)
+   vcpu-arch.shared-int_pending = 0;
+   }
 }
 
 static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 {
-   ulong crit_raw = vcpu-arch.shared-critical;
-   ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
+   ulong crit_raw;
+   ulong crit_r1;
bool crit;
 
+   if (!kvmppc_vcpu_pr(vcpu))
+   return false;
+
+   crit_raw = vcpu-arch.shared-critical;
+   crit_r1 = kvmppc_get_gpr(vcpu, 1);
+
/* Truncate crit indicators in 32 bit mode */
if (!(vcpu-arch.shared-msr  MSR_SF)) {
crit_raw = 0x;
@@ -401,23 +434,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu 
*vcpu)
 
return crit;
 }
-#else /* CONFIG_KVM_BOOK3S_PR */
-
-static inline unsigned long kvmppc_interrupt_offset(struct 

[PATCH 11/23] KVM: PPC: Book3S PR: Allocate kvm_vcpu structs from kvm_vcpu_cache

2013-08-05 Thread Paul Mackerras
This makes PR KVM allocate its kvm_vcpu structs from the kvm_vcpu_cache
rather than having them embedded in the kvmppc_vcpu_book3s struct,
which is allocated with vzalloc.  The reason is to reduce the
differences between PR and HV KVM in order to make is easier to have
them coexist in one kernel binary.

With this, the kvm_vcpu struct has a pointer to the kvmppc_vcpu_book3s
struct.  The pointer to the kvmppc_book3s_shadow_vcpu struct has moved
from the kvmppc_vcpu_book3s struct to the kvm_vcpu struct.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h|  4 +---
 arch/powerpc/include/asm/kvm_book3s_32.h |  2 +-
 arch/powerpc/include/asm/kvm_host.h  |  5 +
 arch/powerpc/kvm/book3s_32_mmu.c |  8 
 arch/powerpc/kvm/book3s_64_mmu.c | 11 +--
 arch/powerpc/kvm/book3s_pr.c | 29 ++---
 6 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 322b539..1b32f6c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -70,8 +70,6 @@ struct hpte_cache {
 };
 
 struct kvmppc_vcpu_book3s {
-   struct kvm_vcpu vcpu;
-   struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
struct kvmppc_sid_map sid_map[SID_MAP_NUM];
struct {
u64 esid;
@@ -192,7 +190,7 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long 
cmd);
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 {
-   return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu);
+   return vcpu-arch.book3s;
 }
 
 extern void kvm_return_point(void);
diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h 
b/arch/powerpc/include/asm/kvm_book3s_32.h
index ce0ef6c..c720e0b 100644
--- a/arch/powerpc/include/asm/kvm_book3s_32.h
+++ b/arch/powerpc/include/asm/kvm_book3s_32.h
@@ -22,7 +22,7 @@
 
 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu 
*vcpu)
 {
-   return to_book3s(vcpu)-shadow_vcpu;
+   return vcpu-arch.shadow_vcpu;
 }
 
 static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index c37207f..4d83972 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -91,6 +91,9 @@ struct lppaca;
 struct slb_shadow;
 struct dtl_entry;
 
+struct kvmppc_vcpu_book3s;
+struct kvmppc_book3s_shadow_vcpu;
+
 struct kvm_vm_stat {
u32 remote_tlb_flush;
 };
@@ -409,6 +412,8 @@ struct kvm_vcpu_arch {
int slb_max;/* 1 + index of last valid entry in slb[] */
int slb_nr; /* total number of entries in SLB */
struct kvmppc_mmu mmu;
+   struct kvmppc_vcpu_book3s *book3s;
+   struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
 #endif
 
ulong gpr[32];
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 856af98..b14af6d 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -111,10 +111,11 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct 
kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, 0);
 }
 
-static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s 
*vcpu_book3s,
+static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
  u32 sre, gva_t eaddr,
  bool primary)
 {
+   struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
u32 page, hash, pteg, htabmask;
hva_t r;
 
@@ -132,7 +133,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct 
kvmppc_vcpu_book3s *vcpu_book3
kvmppc_get_pc(vcpu_book3s-vcpu), eaddr, vcpu_book3s-sdr1, 
pteg,
sr_vsid(sre));
 
-   r = gfn_to_hva(vcpu_book3s-vcpu.kvm, pteg  PAGE_SHIFT);
+   r = gfn_to_hva(vcpu-kvm, pteg  PAGE_SHIFT);
if (kvm_is_error_hva(r))
return r;
return r | (pteg  ~PAGE_MASK);
@@ -203,7 +204,6 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu 
*vcpu, gva_t eaddr,
 struct kvmppc_pte *pte, bool data,
 bool primary)
 {
-   struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
u32 sre;
hva_t ptegp;
u32 pteg[16];
@@ -218,7 +218,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu 
*vcpu, gva_t eaddr,
 
pte-vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
 
-   ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary);
+   ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary);
if (kvm_is_error_hva(ptegp)) {
printk(KERN_INFO KVM: Invalid PTEG!\n);
goto no_page_found;
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c

[PATCH 15/23] KVM: PPC: Book3S: Rename symbols that exist in both PR and HV KVM

2013-08-05 Thread Paul Mackerras
This renames almost all of the symbols that exist in both PR and HV
KVM, as one step towards making it possible to compile both in one
kernel image.  Symbols in the PR KVM implementation get _pr
appended, and those in the HV KVM implementation get _hv.  Then,
in book3s.c, we add a function with the name without the suffix and
arrange for it to call the appropriate suffixed function using either
the VCPU_DO_PR/VCPU_DO_HV pair of macros or the DO_IF_PR/DO_IF_HV
pair.  These macros take a kvm or vcpu argument that is currently
unused, but which will be used in future patches.

There are a few exceptions to this general scheme:

* kvmppc_core_free_memslot() and kvmppc_core_create_memslot() don't
  take a kvm or vcpu argument, so for them we call the HV function
  if HV is selected in the kernel config (the PR implementation of
  these is empty).

* kvmppc_core_init_vm() and kvmppc_core_destroy_vm() have some common
code factored into the book3s.c implementation.

* kvmppc_book3s_init(), kvmppc_book3s_exit() and
  kvmppc_core_check_processor_compat() have been moved entirely
  into book3s.c

* kvmppc_interrupt and kvm_vm_ioctl_get_smmu_info() are not handled
  here.

* The kvmppc_handler_highmem label is unused and is removed here.

* kvm_return_point() is declared but not defined or used anywhere,
  so this removes the declaration.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h   |  74 +-
 arch/powerpc/kvm/book3s.c   | 232 +++-
 arch/powerpc/kvm/book3s_32_mmu_host.c   |   2 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c   |   2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  17 +--
 arch/powerpc/kvm/book3s_hv.c| 106 +--
 arch/powerpc/kvm/book3s_hv_interrupts.S |   3 -
 arch/powerpc/kvm/book3s_interrupts.S|   5 +-
 arch/powerpc/kvm/book3s_pr.c| 116 
 9 files changed, 374 insertions(+), 183 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 1b32f6c..476d862 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -24,6 +24,8 @@
 #include linux/kvm_host.h
 #include asm/kvm_book3s_asm.h
 
+union kvmppc_one_reg;
+
 struct kvmppc_bat {
u64 raw;
u32 bepi;
@@ -124,7 +126,6 @@ extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, 
ulong ea, ulong ea_mask)
 extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask);
 extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong 
pa_end);
 extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr);
-extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr);
 extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
@@ -188,13 +189,80 @@ extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, 
unsigned int inst);
 extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
 extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
 
+/* Functions that have implementations in both PR and HV KVM */
+extern struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
+  unsigned int id);
+extern struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+  unsigned int id);
+extern void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
+   struct kvm_memory_slot *dont);
+extern int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
+unsigned long npages);
+extern int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
+   struct kvm_memory_slot *memslot,
+   struct kvm_userspace_memory_region *mem);
+extern void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
+   struct kvm_userspace_memory_region *mem,
+   const struct kvm_memory_slot *old);
+extern int kvmppc_core_init_vm_pr(struct kvm *kvm);
+extern int kvmppc_core_init_vm_hv(struct kvm *kvm);
+extern void kvmppc_core_destroy_vm_pr(struct kvm *kvm);
+extern void kvmppc_core_destroy_vm_hv(struct kvm *kvm);
+
+extern void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu);
+extern void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu);
+extern void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu);
+extern void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr);
+extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+extern void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr);
+extern void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr);
+extern int 

[PATCH 12/23] KVM: PPC: Book3S HV: Better handling of exceptions that happen in real mode

2013-08-05 Thread Paul Mackerras
When an interrupt or exception happens in the guest that comes to the
host, the CPU goes to hypervisor real mode (MMU off) to handle the
exception but doesn't change the MMU context.  After saving a few
registers, we then clear the in guest flag.  If, for any reason,
we get an exception in the real-mode code, that then gets handled
by the normal kernel exception handlers, which turn the MMU on.  This
is disasterous if the MMU is still set to the guest context, since we
end up executing instructions from random places in the guest kernel
with hypervisor privilege.

In order to catch this situation, we define a new value for the in guest
flag, KVM_GUEST_MODE_HOST_HV, to indicate that we are in hypervisor real
mode with guest MMU context.  If the in guest flag is set to this value,
we branch off to an emergency handler.  For the moment, this just does
a branch to self to stop the CPU from doing anything further.

While we're here, we define another new flag value to indicate that we
are in a HV guest, as distinct from a PR guest.  This will be useful
when we have a kernel that can support both PR and HV guests concurrently.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_asm.h|  2 ++
 arch/powerpc/include/asm/kvm_book3s_asm.h |  1 +
 arch/powerpc/kernel/asm-offsets.c |  1 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 38 +++
 4 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_asm.h 
b/arch/powerpc/include/asm/kvm_asm.h
index 3d70b7e..9ca0228 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -137,6 +137,8 @@
 #define KVM_GUEST_MODE_NONE0
 #define KVM_GUEST_MODE_GUEST   1
 #define KVM_GUEST_MODE_SKIP2
+#define KVM_GUEST_MODE_GUEST_HV3
+#define KVM_GUEST_MODE_HOST_HV 4
 
 #define KVM_INST_FETCH_FAILED  -1
 
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h 
b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 4141409..360742a 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -79,6 +79,7 @@ struct kvmppc_host_state {
ulong vmhandler;
ulong scratch0;
ulong scratch1;
+   ulong scratch2;
u8 in_guest;
u8 restore_hid5;
u8 napping;
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 14a8004..cbd9366 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -574,6 +574,7 @@ int main(void)
HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);
HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
+   HSTATE_FIELD(HSTATE_SCRATCH2, scratch2);
HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
HSTATE_FIELD(HSTATE_NAPPING, napping);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 60dce5b..cf3d045 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -266,6 +266,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
mtspr   SPRN_DAR, r5
mtspr   SPRN_DSISR, r6
 
+   li  r6, KVM_GUEST_MODE_HOST_HV
+   stb r6, HSTATE_IN_GUEST(r13)
+
 BEGIN_FTR_SECTION
/* Restore AMR and UAMOR, set AMOR to all 1s */
ld  r5,VCPU_AMR(r4)
@@ -533,7 +536,7 @@ fast_guest_return:
mtspr   SPRN_HSRR1,r11
 
/* Activate guest mode, so faults get handled by KVM */
-   li  r9, KVM_GUEST_MODE_GUEST
+   li  r9, KVM_GUEST_MODE_GUEST_HV
stb r9, HSTATE_IN_GUEST(r13)
 
/* Enter guest */
@@ -585,8 +588,15 @@ kvmppc_interrupt:
 * guest CR, R12 saved in shadow VCPU SCRATCH1/0
 * guest R13 saved in SPRN_SCRATCH0
 */
-   /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
-   std r9, HSTATE_HOST_R2(r13)
+   std r9, HSTATE_SCRATCH2(r13)
+
+   lbz r9, HSTATE_IN_GUEST(r13)
+   cmpwi   r9, KVM_GUEST_MODE_HOST_HV
+   beq kvmppc_bad_host_intr
+   /* We're now back in the host but in guest MMU context */
+   li  r9, KVM_GUEST_MODE_HOST_HV
+   stb r9, HSTATE_IN_GUEST(r13)
+
ld  r9, HSTATE_KVM_VCPU(r13)
 
/* Save registers */
@@ -600,7 +610,7 @@ kvmppc_interrupt:
std r6, VCPU_GPR(R6)(r9)
std r7, VCPU_GPR(R7)(r9)
std r8, VCPU_GPR(R8)(r9)
-   ld  r0, HSTATE_HOST_R2(r13)
+   ld  r0, HSTATE_SCRATCH2(r13)
std r0, VCPU_GPR(R9)(r9)
std r10, VCPU_GPR(R10)(r9)
std r11, VCPU_GPR(R11)(r9)
@@ -634,10 +644,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
std r3, VCPU_GPR(R13)(r9)
std r4, VCPU_LR(r9)
 
-   /* Unset guest mode */
-   li  r0, KVM_GUEST_MODE_NONE
-   stb r0, HSTATE_IN_GUEST(r13)
-
  

[PATCH 13/23] KVM: PPC: Book3S: Move skip-interrupt handlers to common code

2013-08-05 Thread Paul Mackerras
Both PR and HV KVM have separate, identical copies of the
kvmppc_skip_interrupt and kvmppc_skip_Hinterrupt handlers that are
used for the situation where an interrupt happens when loading the
instruction that caused an exit from the guest.  To eliminate this
duplication and make it easier to compile in both PR and HV KVM,
this moves this code to arch/powerpc/kernel/exceptions-64s.S along
with other kernel interrupt handler code.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kernel/exceptions-64s.S| 26 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 24 
 arch/powerpc/kvm/book3s_rmhandlers.S| 26 --
 3 files changed, 26 insertions(+), 50 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 40e4a17..e3c8a03 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -636,6 +636,32 @@ slb_miss_user_pseries:
b   .   /* prevent spec. execution */
 #endif /* __DISABLED__ */
 
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+kvmppc_skip_interrupt:
+   /*
+* Here all GPRs are unchanged from when the interrupt happened
+* except for r13, which is saved in SPRG_SCRATCH0.
+*/
+   mfspr   r13, SPRN_SRR0
+   addir13, r13, 4
+   mtspr   SPRN_SRR0, r13
+   GET_SCRATCH0(r13)
+   rfid
+   b   .
+
+kvmppc_skip_Hinterrupt:
+   /*
+* Here all GPRs are unchanged from when the interrupt happened
+* except for r13, which is saved in SPRG_SCRATCH0.
+*/
+   mfspr   r13, SPRN_HSRR0
+   addir13, r13, 4
+   mtspr   SPRN_HSRR0, r13
+   GET_SCRATCH0(r13)
+   hrfid
+   b   .
+#endif
+
 /*
  * Code from here down to __end_handlers is invoked from the
  * exception prologs above.  Because the prologs assemble the
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index cf3d045..af9ba85 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -29,30 +29,6 @@
 #include asm/kvm_book3s_asm.h
 #include asm/mmu-hash64.h
 
-/*
- *   *
- *Real Mode handlers that need to be in the linear mapping   *
- *   *
- /
-
-   .globl  kvmppc_skip_interrupt
-kvmppc_skip_interrupt:
-   mfspr   r13,SPRN_SRR0
-   addir13,r13,4
-   mtspr   SPRN_SRR0,r13
-   GET_SCRATCH0(r13)
-   rfid
-   b   .
-
-   .globl  kvmppc_skip_Hinterrupt
-kvmppc_skip_Hinterrupt:
-   mfspr   r13,SPRN_HSRR0
-   addir13,r13,4
-   mtspr   SPRN_HSRR0,r13
-   GET_SCRATCH0(r13)
-   hrfid
-   b   .
-
 /*
  * Call kvmppc_hv_entry in real mode.
  * Must be called with interrupts hard-disabled.
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S 
b/arch/powerpc/kvm/book3s_rmhandlers.S
index b64d7f9..b746c38 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -38,32 +38,6 @@
 
 #define FUNC(name) GLUE(.,name)
 
-   .globl  kvmppc_skip_interrupt
-kvmppc_skip_interrupt:
-   /*
-* Here all GPRs are unchanged from when the interrupt happened
-* except for r13, which is saved in SPRG_SCRATCH0.
-*/
-   mfspr   r13, SPRN_SRR0
-   addir13, r13, 4
-   mtspr   SPRN_SRR0, r13
-   GET_SCRATCH0(r13)
-   rfid
-   b   .
-
-   .globl  kvmppc_skip_Hinterrupt
-kvmppc_skip_Hinterrupt:
-   /*
-* Here all GPRs are unchanged from when the interrupt happened
-* except for r13, which is saved in SPRG_SCRATCH0.
-*/
-   mfspr   r13, SPRN_HSRR0
-   addir13, r13, 4
-   mtspr   SPRN_HSRR0, r13
-   GET_SCRATCH0(r13)
-   hrfid
-   b   .
-
 #elif defined(CONFIG_PPC_BOOK3S_32)
 
 #define FUNC(name) name
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/23] KVM: PPC: Book3S PR: Delay disabling relocation-on interrupts

2013-08-05 Thread Paul Mackerras
When we are running a PR KVM guest on POWER8, we have to disable the
new POWER8 feature of taking interrupts with relocation on, that is,
of taking interrupts without disabling the MMU, because the SLB does
not contain the normal kernel SLB entries while in the guest.
Currently we disable relocation-on interrupts when a PR guest is
created, and leave it disabled until there are no more PR guests in
existence.

This defers the disabling of relocation-on interrupts until the first
time a PR KVM guest vcpu is run.  The reason is that in future we will
support both PR and HV guests in the same kernel, and this will avoid
disabling relocation-on interrupts unnecessarily for guests which turn
out to be HV guests, as we will not know at VM creation time whether
it will be a PR or a HV guest.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/kvm/book3s_pr.c| 71 ++---
 2 files changed, 52 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 4d83972..c012db2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -264,6 +264,7 @@ struct kvm_arch {
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_KVM_BOOK3S_PR
struct mutex hpt_mutex;
+   bool relon_disabled;
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 5b06a70..2759ddc 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1197,6 +1197,47 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
+/*
+ * On POWER8, we have to disable relocation-on interrupts while
+ * we are in the guest, since the guest doesn't have the normal
+ * kernel SLB contents.  Since disabling relocation-on interrupts
+ * is a fairly heavy-weight operation, we do it once when starting
+ * the first guest vcpu and leave it disabled until the last guest
+ * has been destroyed.
+ */
+static unsigned int kvm_global_user_count = 0;
+static DEFINE_SPINLOCK(kvm_global_user_count_lock);
+
+static void disable_relon_interrupts(struct kvm *kvm)
+{
+   mutex_lock(kvm-lock);
+   if (!kvm-arch.relon_disabled) {
+   if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+   spin_lock(kvm_global_user_count_lock);
+   if (++kvm_global_user_count == 1)
+   pSeries_disable_reloc_on_exc();
+   spin_unlock(kvm_global_user_count_lock);
+   }
+   /* order disabling above with setting relon_disabled */
+   smp_mb();
+   kvm-arch.relon_disabled = true;
+   }
+   mutex_unlock(kvm-lock);
+}
+
+static void enable_relon_interrupts(struct kvm *kvm)
+{
+   if (kvm-arch.relon_disabled 
+   firmware_has_feature(FW_FEATURE_SET_MODE)) {
+   spin_lock(kvm_global_user_count_lock);
+   BUG_ON(kvm_global_user_count == 0);
+   if (--kvm_global_user_count == 0)
+   pSeries_enable_reloc_on_exc();
+   spin_unlock(kvm_global_user_count_lock);
+   }
+   kvm-arch.relon_disabled = false;
+}
+
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
int ret;
@@ -1234,6 +1275,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
goto out;
}
 
+   if (!vcpu-kvm-arch.relon_disabled)
+   disable_relon_interrupts(vcpu-kvm);
+
/* Save FPU state in stack */
if (current-thread.regs-msr  MSR_FP)
giveup_fpu(current);
@@ -1400,9 +1444,6 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct 
kvm_memory_slot *memslot)
 {
 }
 
-static unsigned int kvm_global_user_count = 0;
-static DEFINE_SPINLOCK(kvm_global_user_count_lock);
-
 int kvmppc_core_init_vm(struct kvm *kvm)
 {
 #ifdef CONFIG_PPC64
@@ -1411,28 +1452,18 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 #endif
mutex_init(kvm-arch.hpt_mutex);
 
-   if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
-   spin_lock(kvm_global_user_count_lock);
-   if (++kvm_global_user_count == 1)
-   pSeries_disable_reloc_on_exc();
-   spin_unlock(kvm_global_user_count_lock);
-   }
+   /*
+* If we don't have relocation-on interrupts at all,
+* then we can consider them to be already disabled.
+*/
+   kvm-arch.relon_disabled = !firmware_has_feature(FW_FEATURE_SET_MODE);
+
return 0;
 }
 
 void kvmppc_core_destroy_vm(struct kvm *kvm)
 {
-#ifdef CONFIG_PPC64
-   WARN_ON(!list_empty(kvm-arch.spapr_tce_tables));
-#endif
-
-   if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
-   spin_lock(kvm_global_user_count_lock);
-