[PATCH] KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size

2018-09-04 Thread Nicholas Piggin
THP paths can defer splitting compound pages until after the actual
remap and TLB flushes to split a huge PMD/PUD. This causes radix
partition scope page table mappings to get out of synch with the host
qemu page table mappings.

This results in random memory corruption in the guest when running
with THP. The easiest way to reproduce is use KVM baloon to free up
a lot of memory in the guest and then shrink the balloon to give the
memory back, while some work is being done in the guest.

Cc: Paul Mackerras 
Cc: David Gibson 
Cc: "Aneesh Kumar K.V" 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 88 ++
 1 file changed, 34 insertions(+), 54 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 0af1c0aea1fe..d8792445d95a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
   unsigned long ea, unsigned long dsisr)
 {
struct kvm *kvm = vcpu->kvm;
-   unsigned long mmu_seq, pte_size;
-   unsigned long gpa, gfn, hva, pfn;
+   unsigned long mmu_seq;
+   unsigned long gpa, gfn, hva;
struct kvm_memory_slot *memslot;
struct page *page = NULL;
long ret;
@@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
 */
hva = gfn_to_hva_memslot(memslot, gfn);
if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
-   pfn = page_to_pfn(page);
upgrade_write = true;
} else {
+   unsigned long pfn;
+
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
   writing, upgrade_p);
@@ -639,63 +640,42 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
}
}
 
-   /* See if we can insert a 1GB or 2MB large PTE here */
-   level = 0;
-   if (page && PageCompound(page)) {
-   pte_size = PAGE_SIZE << compound_order(compound_head(page));
-   if (pte_size >= PUD_SIZE &&
-   (gpa & (PUD_SIZE - PAGE_SIZE)) ==
-   (hva & (PUD_SIZE - PAGE_SIZE))) {
-   level = 2;
-   pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
-   } else if (pte_size >= PMD_SIZE &&
-  (gpa & (PMD_SIZE - PAGE_SIZE)) ==
-  (hva & (PMD_SIZE - PAGE_SIZE))) {
-   level = 1;
-   pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
-   }
-   }
-
/*
-* Compute the PTE value that we need to insert.
+* Read the PTE from the process' radix tree and use that
+* so we get the shift and attribute bits.
 */
-   if (page) {
-   pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
-   _PAGE_ACCESSED;
-   if (writing || upgrade_write)
-   pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
-   pte = pfn_pte(pfn, __pgprot(pgflags));
+   local_irq_disable();
+   ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+   pte = *ptep;
+   local_irq_enable();
+
+   /* Get pte level from shift/size */
+   if (shift == PUD_SHIFT &&
+   (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+   (hva & (PUD_SIZE - PAGE_SIZE))) {
+   level = 2;
+   } else if (shift == PMD_SHIFT &&
+  (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+  (hva & (PMD_SIZE - PAGE_SIZE))) {
+   level = 1;
} else {
-   /*
-* Read the PTE from the process' radix tree and use that
-* so we get the attribute bits.
-*/
-   local_irq_disable();
-   ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
-   pte = *ptep;
-   local_irq_enable();
-   if (shift == PUD_SHIFT &&
-   (gpa & (PUD_SIZE - PAGE_SIZE)) ==
-   (hva & (PUD_SIZE - PAGE_SIZE))) {
-   level = 2;
-   } else if (shift == PMD_SHIFT &&
-  (gpa & (PMD_SIZE - PAGE_SIZE)) ==
-  (hva & (PMD_SIZE - PAGE_SIZE))) {
-   level = 1;
-   } else if (shift && shift != PAGE_SHIFT) {
-   /* Adjust PFN */
-   unsigned long mask = (1ul << shift) - PAGE_SIZE;
-   pte = __pte(pte_val(pte) | (hva & mask));
-   }
-   pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
-  

Re: [PATCH] KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size

2018-09-04 Thread David Gibson
On Tue, Sep 04, 2018 at 06:16:01PM +1000, Nicholas Piggin wrote:
> THP paths can defer splitting compound pages until after the actual
> remap and TLB flushes to split a huge PMD/PUD. This causes radix
> partition scope page table mappings to get out of synch with the host
> qemu page table mappings.
> 
> This results in random memory corruption in the guest when running
> with THP. The easiest way to reproduce is use KVM baloon to free up
> a lot of memory in the guest and then shrink the balloon to give the
> memory back, while some work is being done in the guest.
> 
> Cc: Paul Mackerras 
> Cc: David Gibson 
> Cc: "Aneesh Kumar K.V" 
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Nicholas Piggin 

Seems to fix the problem on my test case.

Tested-by: David Gibson 

> ---
>  arch/powerpc/kvm/book3s_64_mmu_radix.c | 88 ++
>  1 file changed, 34 insertions(+), 54 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
> b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index 0af1c0aea1fe..d8792445d95a 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
> struct kvm_vcpu *vcpu,
>  unsigned long ea, unsigned long dsisr)
>  {
>   struct kvm *kvm = vcpu->kvm;
> - unsigned long mmu_seq, pte_size;
> - unsigned long gpa, gfn, hva, pfn;
> + unsigned long mmu_seq;
> + unsigned long gpa, gfn, hva;
>   struct kvm_memory_slot *memslot;
>   struct page *page = NULL;
>   long ret;
> @@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
> struct kvm_vcpu *vcpu,
>*/
>   hva = gfn_to_hva_memslot(memslot, gfn);
>   if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
> - pfn = page_to_pfn(page);
>   upgrade_write = true;
>   } else {
> + unsigned long pfn;
> +
>   /* Call KVM generic code to do the slow-path check */
>   pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
>  writing, upgrade_p);
> @@ -639,63 +640,42 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
> struct kvm_vcpu *vcpu,
>   }
>   }
>  
> - /* See if we can insert a 1GB or 2MB large PTE here */
> - level = 0;
> - if (page && PageCompound(page)) {
> - pte_size = PAGE_SIZE << compound_order(compound_head(page));
> - if (pte_size >= PUD_SIZE &&
> - (gpa & (PUD_SIZE - PAGE_SIZE)) ==
> - (hva & (PUD_SIZE - PAGE_SIZE))) {
> - level = 2;
> - pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
> - } else if (pte_size >= PMD_SIZE &&
> -(gpa & (PMD_SIZE - PAGE_SIZE)) ==
> -(hva & (PMD_SIZE - PAGE_SIZE))) {
> - level = 1;
> - pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
> - }
> - }
> -
>   /*
> -  * Compute the PTE value that we need to insert.
> +  * Read the PTE from the process' radix tree and use that
> +  * so we get the shift and attribute bits.
>*/
> - if (page) {
> - pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
> - _PAGE_ACCESSED;
> - if (writing || upgrade_write)
> - pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
> - pte = pfn_pte(pfn, __pgprot(pgflags));
> + local_irq_disable();
> + ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
> + pte = *ptep;
> + local_irq_enable();
> +
> + /* Get pte level from shift/size */
> + if (shift == PUD_SHIFT &&
> + (gpa & (PUD_SIZE - PAGE_SIZE)) ==
> + (hva & (PUD_SIZE - PAGE_SIZE))) {
> + level = 2;
> + } else if (shift == PMD_SHIFT &&
> +(gpa & (PMD_SIZE - PAGE_SIZE)) ==
> +(hva & (PMD_SIZE - PAGE_SIZE))) {
> + level = 1;
>   } else {
> - /*
> -  * Read the PTE from the process' radix tree and use that
> -  * so we get the attribute bits.
> -  */
> - local_irq_disable();
> - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
> - pte = *ptep;
> - local_irq_enable();
> - if (shift == PUD_SHIFT &&
> - (gpa & (PUD_SIZE - PAGE_SIZE)) ==
> - (hva & (PUD_SIZE - PAGE_SIZE))) {
> - level = 2;
> - } else if (shift == PMD_SHIFT &&
> -(gpa & (PMD_SIZE - PAGE_SIZE)) ==
> -(hva & (PMD_SIZE - PAGE_SIZE))) {
> - level = 1;
> - } else if (shift && shift != PAGE_SHIFT) {
> - /* Adjust PFN */
> - unsigned lon

Re: [PATCH] KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size

2018-09-11 Thread Paul Mackerras
On Tue, Sep 04, 2018 at 06:16:01PM +1000, Nicholas Piggin wrote:
> THP paths can defer splitting compound pages until after the actual
> remap and TLB flushes to split a huge PMD/PUD. This causes radix
> partition scope page table mappings to get out of synch with the host
> qemu page table mappings.
> 
> This results in random memory corruption in the guest when running
> with THP. The easiest way to reproduce is use KVM baloon to free up
> a lot of memory in the guest and then shrink the balloon to give the
> memory back, while some work is being done in the guest.

I'm hitting the WARN_ON you added.  I think I have an old qemu that
doesn't 2M-align the guest ram and so we get to the level = 0 case
because of misalignment.  The patch below on top of yours seems to
work just fine.  In the case where the pte is 2M or 1G but we have
misalignment, it ORs in address bits from hva into the pte so we get
to the specific single page we want.

Care to fold this in and resend?

Paul.

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index c290f59ae925..933c574e1cf7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -660,11 +660,14 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
level = 1;
} else {
level = 0;
-
-   /* Can not cope with unknown page shift */
-   if (shift && shift != PAGE_SHIFT) {
-   WARN_ON_ONCE(1);
-   return -EFAULT;
+   if (shift > PAGE_SHIFT) {
+   /*
+* If the pte maps more than one page, bring over
+* bits from the virtual address to get the real
+* address of the specific single page we want.
+*/
+   unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+   pte = __pte(pte_val(pte) | (hva & rpnmask));
}
}
 


Re: [PATCH] KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size

2018-09-11 Thread Nicholas Piggin
On Tue, 11 Sep 2018 20:01:54 +1000
Paul Mackerras  wrote:

> On Tue, Sep 04, 2018 at 06:16:01PM +1000, Nicholas Piggin wrote:
> > THP paths can defer splitting compound pages until after the actual
> > remap and TLB flushes to split a huge PMD/PUD. This causes radix
> > partition scope page table mappings to get out of synch with the host
> > qemu page table mappings.
> > 
> > This results in random memory corruption in the guest when running
> > with THP. The easiest way to reproduce is use KVM baloon to free up
> > a lot of memory in the guest and then shrink the balloon to give the
> > memory back, while some work is being done in the guest.  
> 
> I'm hitting the WARN_ON you added.  I think I have an old qemu that
> doesn't 2M-align the guest ram and so we get to the level = 0 case
> because of misalignment.  The patch below on top of yours seems to
> work just fine.  In the case where the pte is 2M or 1G but we have
> misalignment, it ORs in address bits from hva into the pte so we get
> to the specific single page we want.
> 
> Care to fold this in and resend?

Thanks for that, I misunderstood the unaligned adjustment case.
Good thing you caught it.

Thanks,
Nick


Re: [PATCH] KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size

2018-09-04 Thread Aneesh Kumar K.V

On 09/04/2018 01:46 PM, Nicholas Piggin wrote:

THP paths can defer splitting compound pages until after the actual
remap and TLB flushes to split a huge PMD/PUD. This causes radix
partition scope page table mappings to get out of synch with the host
qemu page table mappings.


May be we can improve this further?

With deferred_split_huge_page() during partial unmap we split the huge 
pmd entries but defer splitting the compound page to shrinker 
(9a982250f773cc8c76f1eee68a770b7cbf2faf78). That means we can find the 
page as huge/compound page even when the actual mapping is not.


Instead of looking at whether the page is compound or not, always walk 
the page table and find the pte shift so that we map it correctly in the

partition scoped table.

Reviewed-by: Aneesh Kumar K.V 




This results in random memory corruption in the guest when running
with THP. The easiest way to reproduce is use KVM baloon to free up
a lot of memory in the guest and then shrink the balloon to give the
memory back, while some work is being done in the guest.

Cc: Paul Mackerras 
Cc: David Gibson 
Cc: "Aneesh Kumar K.V" 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Nicholas Piggin 
---
  arch/powerpc/kvm/book3s_64_mmu_radix.c | 88 ++
  1 file changed, 34 insertions(+), 54 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 0af1c0aea1fe..d8792445d95a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
   unsigned long ea, unsigned long dsisr)
  {
struct kvm *kvm = vcpu->kvm;
-   unsigned long mmu_seq, pte_size;
-   unsigned long gpa, gfn, hva, pfn;
+   unsigned long mmu_seq;
+   unsigned long gpa, gfn, hva;
struct kvm_memory_slot *memslot;
struct page *page = NULL;
long ret;
@@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
 */
hva = gfn_to_hva_memslot(memslot, gfn);
if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
-   pfn = page_to_pfn(page);
upgrade_write = true;
} else {
+   unsigned long pfn;
+
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
   writing, upgrade_p);
@@ -639,63 +640,42 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
}
}
  
-	/* See if we can insert a 1GB or 2MB large PTE here */

-   level = 0;
-   if (page && PageCompound(page)) {
-   pte_size = PAGE_SIZE << compound_order(compound_head(page));
-   if (pte_size >= PUD_SIZE &&
-   (gpa & (PUD_SIZE - PAGE_SIZE)) ==
-   (hva & (PUD_SIZE - PAGE_SIZE))) {
-   level = 2;
-   pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
-   } else if (pte_size >= PMD_SIZE &&
-  (gpa & (PMD_SIZE - PAGE_SIZE)) ==
-  (hva & (PMD_SIZE - PAGE_SIZE))) {
-   level = 1;
-   pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
-   }
-   }
-
/*
-* Compute the PTE value that we need to insert.
+* Read the PTE from the process' radix tree and use that
+* so we get the shift and attribute bits.
 */
-   if (page) {
-   pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
-   _PAGE_ACCESSED;
-   if (writing || upgrade_write)
-   pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
-   pte = pfn_pte(pfn, __pgprot(pgflags));
+   local_irq_disable();
+   ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+   pte = *ptep;
+   local_irq_enable();
+
+   /* Get pte level from shift/size */
+   if (shift == PUD_SHIFT &&
+   (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+   (hva & (PUD_SIZE - PAGE_SIZE))) {
+   level = 2;
+   } else if (shift == PMD_SHIFT &&
+  (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+  (hva & (PMD_SIZE - PAGE_SIZE))) {
+   level = 1;
} else {
-   /*
-* Read the PTE from the process' radix tree and use that
-* so we get the attribute bits.
-*/
-   local_irq_disable();
-   ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
-   pte = *ptep;
-   local_irq_enable();
-   if (shift == PUD_SHIFT &&
-   (gpa & (PUD_SIZE - PAGE_SIZE)) ==
-   (hva & (PUD_SIZE - PAGE_SIZE))) {
-

Patch "KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size" has been added to the 4.18-stable tree

2018-10-16 Thread gregkh


This is a note to let you know that I've just added the patch titled

KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size

to the 4.18-stable tree which can be found at:

http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
 
kvm-ppc-book3s-hv-don-t-use-compound_order-to-determine-host-mapping-size.patch
and it can be found in the queue-4.18 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let  know about it.


>From foo@baz Tue Oct 16 11:10:21 CEST 2018
From: Nicholas Piggin 
Date: Tue, 11 Sep 2018 20:48:34 +1000
Subject: KVM: PPC: Book3S HV: Don't use compound_order to determine host 
mapping size

From: Nicholas Piggin 

[ Upstream commit 71d29f43b6332badc5598c656616a62575e83342 ]

THP paths can defer splitting compound pages until after the actual
remap and TLB flushes to split a huge PMD/PUD. This causes radix
partition scope page table mappings to get out of synch with the host
qemu page table mappings.

This results in random memory corruption in the guest when running
with THP. The easiest way to reproduce is use KVM balloon to free up
a lot of memory in the guest and then shrink the balloon to give the
memory back, while some work is being done in the guest.

Cc: David Gibson 
Cc: "Aneesh Kumar K.V" 
Cc: kvm-...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Nicholas Piggin 
Signed-off-by: Paul Mackerras 
Signed-off-by: Sasha Levin 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c |   91 +
 1 file changed, 37 insertions(+), 54 deletions(-)

--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -538,8 +538,8 @@ int kvmppc_book3s_radix_page_fault(struc
   unsigned long ea, unsigned long dsisr)
 {
struct kvm *kvm = vcpu->kvm;
-   unsigned long mmu_seq, pte_size;
-   unsigned long gpa, gfn, hva, pfn;
+   unsigned long mmu_seq;
+   unsigned long gpa, gfn, hva;
struct kvm_memory_slot *memslot;
struct page *page = NULL;
long ret;
@@ -636,9 +636,10 @@ int kvmppc_book3s_radix_page_fault(struc
 */
hva = gfn_to_hva_memslot(memslot, gfn);
if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
-   pfn = page_to_pfn(page);
upgrade_write = true;
} else {
+   unsigned long pfn;
+
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
   writing, upgrade_p);
@@ -652,63 +653,45 @@ int kvmppc_book3s_radix_page_fault(struc
}
}
 
-   /* See if we can insert a 1GB or 2MB large PTE here */
-   level = 0;
-   if (page && PageCompound(page)) {
-   pte_size = PAGE_SIZE << compound_order(compound_head(page));
-   if (pte_size >= PUD_SIZE &&
-   (gpa & (PUD_SIZE - PAGE_SIZE)) ==
-   (hva & (PUD_SIZE - PAGE_SIZE))) {
-   level = 2;
-   pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
-   } else if (pte_size >= PMD_SIZE &&
-  (gpa & (PMD_SIZE - PAGE_SIZE)) ==
-  (hva & (PMD_SIZE - PAGE_SIZE))) {
-   level = 1;
-   pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
-   }
-   }
-
/*
-* Compute the PTE value that we need to insert.
+* Read the PTE from the process' radix tree and use that
+* so we get the shift and attribute bits.
 */
-   if (page) {
-   pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
-   _PAGE_ACCESSED;
-   if (writing || upgrade_write)
-   pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
-   pte = pfn_pte(pfn, __pgprot(pgflags));
+   local_irq_disable();
+   ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+   pte = *ptep;
+   local_irq_enable();
+
+   /* Get pte level from shift/size */
+   if (shift == PUD_SHIFT &&
+   (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+   (hva & (PUD_SIZE - PAGE_SIZE))) {
+   level = 2;
+   } else if (shift == PMD_SHIFT &&
+  (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+  (hva & (PMD_SIZE - PAGE_SIZE))) {
+   level = 1;
} else {
-   /*
-* Read the PTE from the process' radix tree and use that
-* so we get the attribute bits.
-*/
-   local_irq_disable();
-   ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
-   pte = *ptep;
-   local_irq_enable();
-   if (shift == PUD_S