Re: [kvm-devel] [PATCH] KVM swapping with mmu notifiers #v7

2008-02-28 Thread izik eidus
ציטוט Andrea Arcangeli:
 Same as before but one one hand ported to #v7 API and on the other
 hand ported to latest kvm.git.

 Signed-off-by: Andrea Arcangeli [EMAIL PROTECTED]

 diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
 index 41962e7..e1287ab 100644
 --- a/arch/x86/kvm/Kconfig
 +++ b/arch/x86/kvm/Kconfig
 @@ -21,6 +21,7 @@ config KVM
   tristate Kernel-based Virtual Machine (KVM) support
   depends on HAVE_KVM  EXPERIMENTAL
   select PREEMPT_NOTIFIERS
 + select MMU_NOTIFIER
   select ANON_INODES
   ---help---
 Support hosting fully virtualized guest machines using hardware
 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 4583329..4067b0f 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -642,6 +642,110 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
   account_shadowed(kvm, gfn);
  }
  
 +static void kvm_unmap_spte(struct kvm *kvm, u64 *spte)
 +{
 + struct page *page = pfn_to_page((*spte  PT64_BASE_ADDR_MASK)  
 PAGE_SHIFT);
 + get_page(page);
 + rmap_remove(kvm, spte);
 + set_shadow_pte(spte, shadow_trap_nonpresent_pte);
 + kvm_flush_remote_tlbs(kvm);
 + __free_page(page);
   

with large page support i think we need here put_page...

 +}
 +
 +static void kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
 +{
 + u64 *spte, *curr_spte;
 +
 + spte = rmap_next(kvm, rmapp, NULL);
 + while (spte) {
 + BUG_ON(!(*spte  PT_PRESENT_MASK));
 + rmap_printk(kvm_rmap_unmap_hva: spte %p %llx\n, spte, *spte);
 + curr_spte = spte;
 + spte = rmap_next(kvm, rmapp, spte);
 + kvm_unmap_spte(kvm, curr_spte);
 + }
 +}
 +
 +void kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 +{
 + int i;
 +
 + /*
 +  * If mmap_sem isn't taken, we can look the memslots with only
 +  * the mmu_lock by skipping over the slots with userspace_addr == 0.
 +  */
 + spin_lock(kvm-mmu_lock);
 + for (i = 0; i  kvm-nmemslots; i++) {
 + struct kvm_memory_slot *memslot = kvm-memslots[i];
 + unsigned long start = memslot-userspace_addr;
 + unsigned long end;
 +
 + /* mmu_lock protects userspace_addr */
 + if (!start)
 + continue;
 +
 + end = start + (memslot-npages  PAGE_SHIFT);
 + if (hva = start  hva  end) {
 + gfn_t gfn_offset = (hva - start)  PAGE_SHIFT;
 + kvm_unmap_rmapp(kvm, memslot-rmap[gfn_offset]);
 + }
 + }
 + spin_unlock(kvm-mmu_lock);
 +}
 +
 +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
 +{
 + u64 *spte;
 + int young = 0;
 +
 + spte = rmap_next(kvm, rmapp, NULL);
 + while (spte) {
 + int _young;
 + u64 _spte = *spte;
 + BUG_ON(!(_spte  PT_PRESENT_MASK));
 + _young = _spte  PT_ACCESSED_MASK;
 + if (_young) {
 + young = !!_young;
 + set_shadow_pte(spte, _spte  ~PT_ACCESSED_MASK);
 + }
 + spte = rmap_next(kvm, rmapp, spte);
 + }
 + return young;
 +}
 +
 +int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 +{
 + int i;
 + int young = 0;
 +
 + /*
 +  * If mmap_sem isn't taken, we can look the memslots with only
 +  * the mmu_lock by skipping over the slots with userspace_addr == 0.
 +  */
 + spin_lock(kvm-mmu_lock);
 + for (i = 0; i  kvm-nmemslots; i++) {
 + struct kvm_memory_slot *memslot = kvm-memslots[i];
 + unsigned long start = memslot-userspace_addr;
 + unsigned long end;
 +
 + /* mmu_lock protects userspace_addr */
 + if (!start)
 + continue;
 +
 + end = start + (memslot-npages  PAGE_SHIFT);
 + if (hva = start  hva  end) {
 + gfn_t gfn_offset = (hva - start)  PAGE_SHIFT;
 + young |= kvm_age_rmapp(kvm, memslot-rmap[gfn_offset]);
 + }
 + }
 + spin_unlock(kvm-mmu_lock);
 +
 + if (young)
 + kvm_flush_remote_tlbs(kvm);
 +
 + return young;
 +}
 +
  #ifdef MMU_DEBUG
  static int is_empty_shadow_page(u64 *spt)
  {
 diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
 index 17f9d16..b014b19 100644
 --- a/arch/x86/kvm/paging_tmpl.h
 +++ b/arch/x86/kvm/paging_tmpl.h
 @@ -380,6 +380,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
 addr,
   int r;
   struct page *page;
   int largepage = 0;
 + unsigned mmu_seq;
  
   pgprintk(%s: addr %lx err %x\n, __FUNCTION__, addr, error_code);
   kvm_mmu_audit(vcpu, pre page fault);
 @@ -415,6 +416,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
 addr,
   largepage = 1;
   }
   }
 + mmu_seq = 

[kvm-devel] [PATCH] KVM swapping with mmu notifiers #v7

2008-02-27 Thread Andrea Arcangeli
Same as before but one one hand ported to #v7 API and on the other
hand ported to latest kvm.git.

Signed-off-by: Andrea Arcangeli [EMAIL PROTECTED]

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 41962e7..e1287ab 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
tristate Kernel-based Virtual Machine (KVM) support
depends on HAVE_KVM  EXPERIMENTAL
select PREEMPT_NOTIFIERS
+   select MMU_NOTIFIER
select ANON_INODES
---help---
  Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4583329..4067b0f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -642,6 +642,110 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
account_shadowed(kvm, gfn);
 }
 
+static void kvm_unmap_spte(struct kvm *kvm, u64 *spte)
+{
+   struct page *page = pfn_to_page((*spte  PT64_BASE_ADDR_MASK)  
PAGE_SHIFT);
+   get_page(page);
+   rmap_remove(kvm, spte);
+   set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+   kvm_flush_remote_tlbs(kvm);
+   __free_page(page);
+}
+
+static void kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+   u64 *spte, *curr_spte;
+
+   spte = rmap_next(kvm, rmapp, NULL);
+   while (spte) {
+   BUG_ON(!(*spte  PT_PRESENT_MASK));
+   rmap_printk(kvm_rmap_unmap_hva: spte %p %llx\n, spte, *spte);
+   curr_spte = spte;
+   spte = rmap_next(kvm, rmapp, spte);
+   kvm_unmap_spte(kvm, curr_spte);
+   }
+}
+
+void kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+   int i;
+
+   /*
+* If mmap_sem isn't taken, we can look the memslots with only
+* the mmu_lock by skipping over the slots with userspace_addr == 0.
+*/
+   spin_lock(kvm-mmu_lock);
+   for (i = 0; i  kvm-nmemslots; i++) {
+   struct kvm_memory_slot *memslot = kvm-memslots[i];
+   unsigned long start = memslot-userspace_addr;
+   unsigned long end;
+
+   /* mmu_lock protects userspace_addr */
+   if (!start)
+   continue;
+
+   end = start + (memslot-npages  PAGE_SHIFT);
+   if (hva = start  hva  end) {
+   gfn_t gfn_offset = (hva - start)  PAGE_SHIFT;
+   kvm_unmap_rmapp(kvm, memslot-rmap[gfn_offset]);
+   }
+   }
+   spin_unlock(kvm-mmu_lock);
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+   u64 *spte;
+   int young = 0;
+
+   spte = rmap_next(kvm, rmapp, NULL);
+   while (spte) {
+   int _young;
+   u64 _spte = *spte;
+   BUG_ON(!(_spte  PT_PRESENT_MASK));
+   _young = _spte  PT_ACCESSED_MASK;
+   if (_young) {
+   young = !!_young;
+   set_shadow_pte(spte, _spte  ~PT_ACCESSED_MASK);
+   }
+   spte = rmap_next(kvm, rmapp, spte);
+   }
+   return young;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+   int i;
+   int young = 0;
+
+   /*
+* If mmap_sem isn't taken, we can look the memslots with only
+* the mmu_lock by skipping over the slots with userspace_addr == 0.
+*/
+   spin_lock(kvm-mmu_lock);
+   for (i = 0; i  kvm-nmemslots; i++) {
+   struct kvm_memory_slot *memslot = kvm-memslots[i];
+   unsigned long start = memslot-userspace_addr;
+   unsigned long end;
+
+   /* mmu_lock protects userspace_addr */
+   if (!start)
+   continue;
+
+   end = start + (memslot-npages  PAGE_SHIFT);
+   if (hva = start  hva  end) {
+   gfn_t gfn_offset = (hva - start)  PAGE_SHIFT;
+   young |= kvm_age_rmapp(kvm, memslot-rmap[gfn_offset]);
+   }
+   }
+   spin_unlock(kvm-mmu_lock);
+
+   if (young)
+   kvm_flush_remote_tlbs(kvm);
+
+   return young;
+}
+
 #ifdef MMU_DEBUG
 static int is_empty_shadow_page(u64 *spt)
 {
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 17f9d16..b014b19 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -380,6 +380,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
addr,
int r;
struct page *page;
int largepage = 0;
+   unsigned mmu_seq;
 
pgprintk(%s: addr %lx err %x\n, __FUNCTION__, addr, error_code);
kvm_mmu_audit(vcpu, pre page fault);
@@ -415,6 +416,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
addr,
largepage = 1;
}
}
+   mmu_seq = read_seqbegin(vcpu-kvm-arch.mmu_notifier_invalidate_lock);
page = gfn_to_page(vcpu-kvm, walker.gfn);
 

Re: [kvm-devel] [PATCH] KVM swapping with MMU Notifiers V7

2008-02-18 Thread Andrea Arcangeli
On Sat, Feb 16, 2008 at 03:08:17AM -0800, Andrew Morton wrote:
 On Sat, 16 Feb 2008 11:48:27 +0100 Andrea Arcangeli [EMAIL PROTECTED] wrote:
 
  +void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
  +  struct mm_struct *mm,
  +  unsigned long start, unsigned long 
  end,
  +  int lock)
  +{
  +   for (; start  end; start += PAGE_SIZE)
  +   kvm_mmu_notifier_invalidate_page(mn, mm, start);
  +}
  +
  +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
  +   .invalidate_page= kvm_mmu_notifier_invalidate_page,
  +   .age_page   = kvm_mmu_notifier_age_page,
  +   .invalidate_range_end   = kvm_mmu_notifier_invalidate_range_end,
  +};
 
 So this doesn't implement -invalidate_range_start().

Correct. range_start is needed by subsystems that don't pin the pages
(so they've to drop the secondary mmu mappings on the physical page
before the page is released by the linux VM).

 By what means does it prevent new mappings from being established in the
 range after core mm has tried to call -invalidate_rande_start()?
 mmap_sem, I assume?

No, populate range only takes the mmap_sem in read mode and the kvm page
fault also is of course taking it only in read mode.

What makes it safe, is that invalidate_range_end is called _after_ the
linux pte is clear. The kvm page fault, if it triggers, it will call
into get_user_pages again to re-establish the linux pte _before_
establishing the spte.

It's the same reason why it's safe to flush the tlb after clearing the
linux pte. sptes are like a secondary tlb.

  +   /* set userspace_addr atomically for kvm_hva_to_rmapp */
  +   spin_lock(kvm-mmu_lock);
  +   memslot-userspace_addr = userspace_addr;
  +   spin_unlock(kvm-mmu_lock);
 
 are you sure?  kvm_unmap_hva() and kvm_age_hva() read -userspace_addr a
 single time and it doesn't immediately look like there's a need to take the
 lock here?

gcc will always write it with a movq but this is to be
C-specs-compliant and because this is by far not a performance
critical path I thought it was simpler than some other atomic move in
a single insn.

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] KVM swapping with MMU Notifiers V7

2008-02-18 Thread Andrea Arcangeli
On Sat, Feb 16, 2008 at 05:51:38AM -0600, Robin Holt wrote:
 I am doing this in xpmem with a stack-based structure in the function
 calling get_user_pages.  That structure describes the start and
 end address of the range we are doing the get_user_pages on.  If an
 invalidate_range_begin comes in while we are off to the kernel doing
 the get_user_pages, the invalidate_range_begin marks that structure
 indicating an invalidate came in.  When the get_user_pages gets the
 structures relocked, it checks that flag (really a generation counter)
 and if it is set, retries the get_user_pages.  After 3 retries, it
 returns -EAGAIN and the fault is started over from the remote side.

A seqlock sounds a good optimization for the non-swapping fast path, a
per-VM-guest seqlock number can allow us to know when we need to worry
to call get_user_pages a second time, but won't be really a retry like
in 99% of seqlock usages for the reader side, but just a second
get_user_pages to trigger a minor fault. Then if the page is different
in the second run, we'll really retry (so not in function of the
seqlock but in function of the get_user_pages page array), and there's
no risk of livelocks because get_user_pages returning a different page
won't be the common case. The seqlock should be increased first before
the invalidate and a second time once the invalidate is over.

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] KVM swapping with MMU Notifiers V7

2008-02-16 Thread Andrew Morton
On Sat, 16 Feb 2008 11:48:27 +0100 Andrea Arcangeli [EMAIL PROTECTED] wrote:

 +void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 +struct mm_struct *mm,
 +unsigned long start, unsigned long 
 end,
 +int lock)
 +{
 + for (; start  end; start += PAGE_SIZE)
 + kvm_mmu_notifier_invalidate_page(mn, mm, start);
 +}
 +
 +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
 + .invalidate_page= kvm_mmu_notifier_invalidate_page,
 + .age_page   = kvm_mmu_notifier_age_page,
 + .invalidate_range_end   = kvm_mmu_notifier_invalidate_range_end,
 +};

So this doesn't implement -invalidate_range_start().

By what means does it prevent new mappings from being established in the
range after core mm has tried to call -invalidate_rande_start()?
mmap_sem, I assume?


 + /* set userspace_addr atomically for kvm_hva_to_rmapp */
 + spin_lock(kvm-mmu_lock);
 + memslot-userspace_addr = userspace_addr;
 + spin_unlock(kvm-mmu_lock);

are you sure?  kvm_unmap_hva() and kvm_age_hva() read -userspace_addr a
single time and it doesn't immediately look like there's a need to take the
lock here?



-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH] KVM swapping with MMU Notifiers V7

2008-02-16 Thread Andrea Arcangeli
Those below two patches enable KVM to swap the guest physical memory
through Christoph's V7.

There's one last _purely_theoretical_ race condition I figured out and
that I'm wondering how to best fix. The race condition worst case is
that a few guest physical pages could remain pinned by sptes. The race
can materialize if the linux pte is zapped after get_user_pages
returns but before the page is mapped by the spte and tracked by
rmap. The invalidate_ calls can also likely be optimized further but
it's not a fast path so it's not urgent.

Signed-off-by: Andrea Arcangeli [EMAIL PROTECTED]

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 41962e7..e1287ab 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
tristate Kernel-based Virtual Machine (KVM) support
depends on HAVE_KVM  EXPERIMENTAL
select PREEMPT_NOTIFIERS
+   select MMU_NOTIFIER
select ANON_INODES
---help---
  Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fd39cd1..b56e388 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -533,6 +533,110 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
kvm_flush_remote_tlbs(kvm);
 }
 
+static void kvm_unmap_spte(struct kvm *kvm, u64 *spte)
+{
+   struct page *page = pfn_to_page((*spte  PT64_BASE_ADDR_MASK)  
PAGE_SHIFT);
+   get_page(page);
+   rmap_remove(kvm, spte);
+   set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+   kvm_flush_remote_tlbs(kvm);
+   __free_page(page);
+}
+
+static void kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+   u64 *spte, *curr_spte;
+
+   spte = rmap_next(kvm, rmapp, NULL);
+   while (spte) {
+   BUG_ON(!(*spte  PT_PRESENT_MASK));
+   rmap_printk(kvm_rmap_unmap_hva: spte %p %llx\n, spte, *spte);
+   curr_spte = spte;
+   spte = rmap_next(kvm, rmapp, spte);
+   kvm_unmap_spte(kvm, curr_spte);
+   }
+}
+
+void kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+   int i;
+
+   /*
+* If mmap_sem isn't taken, we can look the memslots with only
+* the mmu_lock by skipping over the slots with userspace_addr == 0.
+*/
+   spin_lock(kvm-mmu_lock);
+   for (i = 0; i  kvm-nmemslots; i++) {
+   struct kvm_memory_slot *memslot = kvm-memslots[i];
+   unsigned long start = memslot-userspace_addr;
+   unsigned long end;
+
+   /* mmu_lock protects userspace_addr */
+   if (!start)
+   continue;
+
+   end = start + (memslot-npages  PAGE_SHIFT);
+   if (hva = start  hva  end) {
+   gfn_t gfn_offset = (hva - start)  PAGE_SHIFT;
+   kvm_unmap_rmapp(kvm, memslot-rmap[gfn_offset]);
+   }
+   }
+   spin_unlock(kvm-mmu_lock);
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+   u64 *spte;
+   int young = 0;
+
+   spte = rmap_next(kvm, rmapp, NULL);
+   while (spte) {
+   int _young;
+   u64 _spte = *spte;
+   BUG_ON(!(_spte  PT_PRESENT_MASK));
+   _young = _spte  PT_ACCESSED_MASK;
+   if (_young) {
+   young = !!_young;
+   set_shadow_pte(spte, _spte  ~PT_ACCESSED_MASK);
+   }
+   spte = rmap_next(kvm, rmapp, spte);
+   }
+   return young;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+   int i;
+   int young = 0;
+
+   /*
+* If mmap_sem isn't taken, we can look the memslots with only
+* the mmu_lock by skipping over the slots with userspace_addr == 0.
+*/
+   spin_lock(kvm-mmu_lock);
+   for (i = 0; i  kvm-nmemslots; i++) {
+   struct kvm_memory_slot *memslot = kvm-memslots[i];
+   unsigned long start = memslot-userspace_addr;
+   unsigned long end;
+
+   /* mmu_lock protects userspace_addr */
+   if (!start)
+   continue;
+
+   end = start + (memslot-npages  PAGE_SHIFT);
+   if (hva = start  hva  end) {
+   gfn_t gfn_offset = (hva - start)  PAGE_SHIFT;
+   young |= kvm_age_rmapp(kvm, memslot-rmap[gfn_offset]);
+   }
+   }
+   spin_unlock(kvm-mmu_lock);
+
+   if (young)
+   kvm_flush_remote_tlbs(kvm);
+
+   return young;
+}
+
 #ifdef MMU_DEBUG
 static int is_empty_shadow_page(u64 *spt)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0c910c7..2b2398f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3185,6 +3185,46 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu-arch.pio_data);
 }
 
+static inline struct kvm 

Re: [kvm-devel] [PATCH] KVM swapping with MMU Notifiers V7

2008-02-16 Thread Robin Holt
On Sat, Feb 16, 2008 at 11:48:27AM +0100, Andrea Arcangeli wrote:
 Those below two patches enable KVM to swap the guest physical memory
 through Christoph's V7.
 
 There's one last _purely_theoretical_ race condition I figured out and
 that I'm wondering how to best fix. The race condition worst case is
 that a few guest physical pages could remain pinned by sptes. The race
 can materialize if the linux pte is zapped after get_user_pages
 returns but before the page is mapped by the spte and tracked by
 rmap. The invalidate_ calls can also likely be optimized further but
 it's not a fast path so it's not urgent.

I am doing this in xpmem with a stack-based structure in the function
calling get_user_pages.  That structure describes the start and
end address of the range we are doing the get_user_pages on.  If an
invalidate_range_begin comes in while we are off to the kernel doing
the get_user_pages, the invalidate_range_begin marks that structure
indicating an invalidate came in.  When the get_user_pages gets the
structures relocked, it checks that flag (really a generation counter)
and if it is set, retries the get_user_pages.  After 3 retries, it
returns -EAGAIN and the fault is started over from the remote side.

Thanks,
Robin

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel