[PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-15 Thread Xiao Guangrong
This patch tries to introduce a very simple and scale way to invalid all
mmio sptes - it need not walk any shadow pages and hold mmu-lock

KVM maintains a global mmio invalid generation-number which is stored in
kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
generation-number into his available bits when it is created

When KVM need zap all mmio sptes, it just simply increase the global
generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
then it walks the shadow page table and get the mmio spte. If the
generation-number on the spte does not equal the global generation-number,
it will go to the normal #PF handler to update the mmio spte

Since 19 bits are used to store generation-number on mmio spte, the
generation-number can be round after 33554432 times. It is large enough
for nearly all most cases, but making the code be more strong, we zap all
shadow pages when the number is round

Signed-off-by: Xiao Guangrong 
---
 arch/x86/include/asm/kvm_host.h |2 +
 arch/x86/kvm/mmu.c  |   61 +--
 arch/x86/kvm/mmutrace.h |   17 +++
 arch/x86/kvm/paging_tmpl.h  |7 +++-
 arch/x86/kvm/vmx.c  |4 ++
 arch/x86/kvm/x86.c  |6 +--
 6 files changed, 82 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ef7f4a5..572398e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -529,6 +529,7 @@ struct kvm_arch {
unsigned int n_requested_mmu_pages;
unsigned int n_max_mmu_pages;
unsigned int indirect_shadow_pages;
+   unsigned int mmio_invalid_gen;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
 * Hash table of struct kvm_mmu_page.
@@ -765,6 +766,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int 
slot);
 void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 struct kvm_memory_slot *slot,
 gfn_t gfn_offset, unsigned long mask);
+void kvm_mmu_invalid_mmio_spte(struct kvm *kvm);
 void kvm_mmu_zap_all(struct kvm *kvm);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 13626f4..7093a92 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -234,12 +234,13 @@ static unsigned int get_mmio_spte_generation(u64 spte)
 static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
   unsigned access)
 {
-   u64 mask = generation_mmio_spte_mask(0);
+   unsigned int gen = ACCESS_ONCE(kvm->arch.mmio_invalid_gen);
+   u64 mask = generation_mmio_spte_mask(gen);

access &= ACC_WRITE_MASK | ACC_USER_MASK;
mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;

-   trace_mark_mmio_spte(sptep, gfn, access, 0);
+   trace_mark_mmio_spte(sptep, gfn, access, gen);
mmu_spte_set(sptep, mask);
 }

@@ -269,6 +270,34 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, 
gfn_t gfn,
return false;
 }

+static bool check_mmio_spte(struct kvm *kvm, u64 spte)
+{
+   return get_mmio_spte_generation(spte) ==
+   ACCESS_ONCE(kvm->arch.mmio_invalid_gen);
+}
+
+/*
+ * The caller should protect concurrent access on
+ * kvm->arch.mmio_invalid_gen. Currently, it is used by
+ * kvm_arch_commit_memory_region and protected by kvm->slots_lock.
+ */
+void kvm_mmu_invalid_mmio_spte(struct kvm *kvm)
+{
+   /* Ensure update memslot has been completed. */
+   smp_mb();
+
+trace_kvm_mmu_invalid_mmio_spte(kvm);
+
+   /*
+* The very rare case: if the generation-number is round,
+* zap all shadow pages.
+*/
+   if (unlikely(kvm->arch.mmio_invalid_gen++ == MAX_GEN)) {
+   kvm->arch.mmio_invalid_gen = 0;
+   return kvm_mmu_zap_all(kvm);
+   }
+}
+
 static inline u64 rsvd_bits(int s, int e)
 {
return ((1ULL << (e - s + 1)) - 1) << s;
@@ -3183,9 +3212,12 @@ static u64 walk_shadow_page_get_mmio_spte(struct 
kvm_vcpu *vcpu, u64 addr)
 }

 /*
- * If it is a real mmio page fault, return 1 and emulat the instruction
- * directly, return 0 to let CPU fault again on the address, -1 is
- * returned if bug is detected.
+ * Return value:
+ * 2: invalid spte is detected then let the real page fault path
+ *update the mmio spte.
+ * 1: it is a real mmio page fault, emulate the instruction directly.
+ * 0: let CPU fault again on the address.
+ * -1: bug is detected.
  */
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
@@ -3200,6 +3232,9 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, 
u64 addr, bool direct)
gfn_t gfn = get_mmio_spte_gfn(spte);
unsigned access = get_mmio_spte_access(spte);

+ 

Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-15 Thread Takuya Yoshikawa
On Fri, 15 Mar 2013 23:29:53 +0800
Xiao Guangrong  wrote:

> +/*
> + * The caller should protect concurrent access on
> + * kvm->arch.mmio_invalid_gen. Currently, it is used by
> + * kvm_arch_commit_memory_region and protected by kvm->slots_lock.
> + */
> +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm)

kvm_mmu_invalidate_mmio_sptes() may be a better name.

Thanks,
Takuya

> +{
> + /* Ensure update memslot has been completed. */
> + smp_mb();
> +
> +  trace_kvm_mmu_invalid_mmio_spte(kvm);
> +
> + /*
> +  * The very rare case: if the generation-number is round,
> +  * zap all shadow pages.
> +  */
> + if (unlikely(kvm->arch.mmio_invalid_gen++ == MAX_GEN)) {
> + kvm->arch.mmio_invalid_gen = 0;
> + return kvm_mmu_zap_all(kvm);
> + }
> +}
> +
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-17 Thread Gleb Natapov
On Fri, Mar 15, 2013 at 11:29:53PM +0800, Xiao Guangrong wrote:
> This patch tries to introduce a very simple and scale way to invalid all
> mmio sptes - it need not walk any shadow pages and hold mmu-lock
> 
> KVM maintains a global mmio invalid generation-number which is stored in
> kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
> generation-number into his available bits when it is created
> 
> When KVM need zap all mmio sptes, it just simply increase the global
> generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
> then it walks the shadow page table and get the mmio spte. If the
> generation-number on the spte does not equal the global generation-number,
> it will go to the normal #PF handler to update the mmio spte
> 
> Since 19 bits are used to store generation-number on mmio spte, the
> generation-number can be round after 33554432 times. It is large enough
> for nearly all most cases, but making the code be more strong, we zap all
> shadow pages when the number is round
> 
Very nice idea, but why drop Takuya patches instead of using
kvm_mmu_zap_mmio_sptes() when generation number overflows.


> Signed-off-by: Xiao Guangrong 
> ---
>  arch/x86/include/asm/kvm_host.h |2 +
>  arch/x86/kvm/mmu.c  |   61 
> +--
>  arch/x86/kvm/mmutrace.h |   17 +++
>  arch/x86/kvm/paging_tmpl.h  |7 +++-
>  arch/x86/kvm/vmx.c  |4 ++
>  arch/x86/kvm/x86.c  |6 +--
>  6 files changed, 82 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ef7f4a5..572398e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -529,6 +529,7 @@ struct kvm_arch {
>   unsigned int n_requested_mmu_pages;
>   unsigned int n_max_mmu_pages;
>   unsigned int indirect_shadow_pages;
> + unsigned int mmio_invalid_gen;
Why invalid? Should be mmio_valid_gen or mmio_current_get.

>   struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
>   /*
>* Hash table of struct kvm_mmu_page.
> @@ -765,6 +766,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
> int slot);
>  void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
>struct kvm_memory_slot *slot,
>gfn_t gfn_offset, unsigned long mask);
> +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm);
Agree with Takuya that kvm_mmu_invalidate_mmio_sptes() is a better name.

>  void kvm_mmu_zap_all(struct kvm *kvm);
>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
>  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
> kvm_nr_mmu_pages);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 13626f4..7093a92 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -234,12 +234,13 @@ static unsigned int get_mmio_spte_generation(u64 spte)
>  static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
>  unsigned access)
>  {
> - u64 mask = generation_mmio_spte_mask(0);
> + unsigned int gen = ACCESS_ONCE(kvm->arch.mmio_invalid_gen);
> + u64 mask = generation_mmio_spte_mask(gen);
> 
>   access &= ACC_WRITE_MASK | ACC_USER_MASK;
>   mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
> 
> - trace_mark_mmio_spte(sptep, gfn, access, 0);
> + trace_mark_mmio_spte(sptep, gfn, access, gen);
>   mmu_spte_set(sptep, mask);
>  }
> 
> @@ -269,6 +270,34 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, 
> gfn_t gfn,
>   return false;
>  }
> 
> +static bool check_mmio_spte(struct kvm *kvm, u64 spte)
> +{
> + return get_mmio_spte_generation(spte) ==
> + ACCESS_ONCE(kvm->arch.mmio_invalid_gen);
> +}
> +
> +/*
> + * The caller should protect concurrent access on
> + * kvm->arch.mmio_invalid_gen. Currently, it is used by
> + * kvm_arch_commit_memory_region and protected by kvm->slots_lock.
> + */
> +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm)
> +{
> + /* Ensure update memslot has been completed. */
> + smp_mb();
What barrier this one is paired with?

> +
> +  trace_kvm_mmu_invalid_mmio_spte(kvm);
Something wrong with indentation.

> +
> + /*
> +  * The very rare case: if the generation-number is round,
> +  * zap all shadow pages.
> +  */
> + if (unlikely(kvm->arch.mmio_invalid_gen++ == MAX_GEN)) {
> + kvm->arch.mmio_invalid_gen = 0;
> + return kvm_mmu_zap_all(kvm);
> + }
> +}
> +
>  static inline u64 rsvd_bits(int s, int e)
>  {
>   return ((1ULL << (e - s + 1)) - 1) << s;
> @@ -3183,9 +3212,12 @@ static u64 walk_shadow_page_get_mmio_spte(struct 
> kvm_vcpu *vcpu, u64 addr)
>  }
> 
>  /*
> - * If it is a real mmio page fault, return 1 and emulat the instruction
> - * directly, return 0 to let CPU fault again on the address, -1 is
> - * returned if bug is detected.
> + * Retu

Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Xiao Guangrong
On 03/16/2013 10:13 AM, Takuya Yoshikawa wrote:
> On Fri, 15 Mar 2013 23:29:53 +0800
> Xiao Guangrong  wrote:
> 
>> +/*
>> + * The caller should protect concurrent access on
>> + * kvm->arch.mmio_invalid_gen. Currently, it is used by
>> + * kvm_arch_commit_memory_region and protected by kvm->slots_lock.
>> + */
>> +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm)
> 
> kvm_mmu_invalidate_mmio_sptes() may be a better name.

Yes, i am not good at naming, this is a better name, thank you!

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Xiao Guangrong
On 03/17/2013 11:02 PM, Gleb Natapov wrote:
> On Fri, Mar 15, 2013 at 11:29:53PM +0800, Xiao Guangrong wrote:
>> This patch tries to introduce a very simple and scale way to invalid all
>> mmio sptes - it need not walk any shadow pages and hold mmu-lock
>>
>> KVM maintains a global mmio invalid generation-number which is stored in
>> kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
>> generation-number into his available bits when it is created
>>
>> When KVM need zap all mmio sptes, it just simply increase the global
>> generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
>> then it walks the shadow page table and get the mmio spte. If the
>> generation-number on the spte does not equal the global generation-number,
>> it will go to the normal #PF handler to update the mmio spte
>>
>> Since 19 bits are used to store generation-number on mmio spte, the
>> generation-number can be round after 33554432 times. It is large enough
>> for nearly all most cases, but making the code be more strong, we zap all
>> shadow pages when the number is round
>>
> Very nice idea, but why drop Takuya patches instead of using
> kvm_mmu_zap_mmio_sptes() when generation number overflows.

I am not sure whether it is still needed. Requesting to zap all mmio sptes for
more than 50 times is really really rare, it nearly does not happen.
(By the way, 33554432 is wrong in the changelog, i just copy that for my origin
implantation.) And, after my patch optimizing zapping all shadow pages,
zap-all-sps should not be a problem anymore since it does not take too much lock
time.

Your idea?

> 
> 
>> Signed-off-by: Xiao Guangrong 
>> ---
>>  arch/x86/include/asm/kvm_host.h |2 +
>>  arch/x86/kvm/mmu.c  |   61 
>> +--
>>  arch/x86/kvm/mmutrace.h |   17 +++
>>  arch/x86/kvm/paging_tmpl.h  |7 +++-
>>  arch/x86/kvm/vmx.c  |4 ++
>>  arch/x86/kvm/x86.c  |6 +--
>>  6 files changed, 82 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h 
>> b/arch/x86/include/asm/kvm_host.h
>> index ef7f4a5..572398e 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -529,6 +529,7 @@ struct kvm_arch {
>>  unsigned int n_requested_mmu_pages;
>>  unsigned int n_max_mmu_pages;
>>  unsigned int indirect_shadow_pages;
>> +unsigned int mmio_invalid_gen;
> Why invalid? Should be mmio_valid_gen or mmio_current_get.

mmio_invalid_gen is only updated in kvm_mmu_invalidate_mmio_sptes,
so i named it as _invalid_. But mmio_valid_gen is good for me.

> 
>>  struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
>>  /*
>>   * Hash table of struct kvm_mmu_page.
>> @@ -765,6 +766,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
>> int slot);
>>  void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
>>   struct kvm_memory_slot *slot,
>>   gfn_t gfn_offset, unsigned long mask);
>> +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm);
> Agree with Takuya that kvm_mmu_invalidate_mmio_sptes() is a better name.

Me too.

> 
>>  void kvm_mmu_zap_all(struct kvm *kvm);
>>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
>>  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
>> kvm_nr_mmu_pages);
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 13626f4..7093a92 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -234,12 +234,13 @@ static unsigned int get_mmio_spte_generation(u64 spte)
>>  static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
>> unsigned access)
>>  {
>> -u64 mask = generation_mmio_spte_mask(0);
>> +unsigned int gen = ACCESS_ONCE(kvm->arch.mmio_invalid_gen);
>> +u64 mask = generation_mmio_spte_mask(gen);
>>
>>  access &= ACC_WRITE_MASK | ACC_USER_MASK;
>>  mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
>>
>> -trace_mark_mmio_spte(sptep, gfn, access, 0);
>> +trace_mark_mmio_spte(sptep, gfn, access, gen);
>>  mmu_spte_set(sptep, mask);
>>  }
>>
>> @@ -269,6 +270,34 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, 
>> gfn_t gfn,
>>  return false;
>>  }
>>
>> +static bool check_mmio_spte(struct kvm *kvm, u64 spte)
>> +{
>> +return get_mmio_spte_generation(spte) ==
>> +ACCESS_ONCE(kvm->arch.mmio_invalid_gen);
>> +}
>> +
>> +/*
>> + * The caller should protect concurrent access on
>> + * kvm->arch.mmio_invalid_gen. Currently, it is used by
>> + * kvm_arch_commit_memory_region and protected by kvm->slots_lock.
>> + */
>> +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm)
>> +{
>> +/* Ensure update memslot has been completed. */
>> +smp_mb();
> What barrier this one is paired with?

It is paired with nothing. :)

I used mb here just for avoid increasing the generation-number before updating
the memslot

Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Gleb Natapov
On Mon, Mar 18, 2013 at 04:08:50PM +0800, Xiao Guangrong wrote:
> On 03/17/2013 11:02 PM, Gleb Natapov wrote:
> > On Fri, Mar 15, 2013 at 11:29:53PM +0800, Xiao Guangrong wrote:
> >> This patch tries to introduce a very simple and scale way to invalid all
> >> mmio sptes - it need not walk any shadow pages and hold mmu-lock
> >>
> >> KVM maintains a global mmio invalid generation-number which is stored in
> >> kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
> >> generation-number into his available bits when it is created
> >>
> >> When KVM need zap all mmio sptes, it just simply increase the global
> >> generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
> >> then it walks the shadow page table and get the mmio spte. If the
> >> generation-number on the spte does not equal the global generation-number,
> >> it will go to the normal #PF handler to update the mmio spte
> >>
> >> Since 19 bits are used to store generation-number on mmio spte, the
> >> generation-number can be round after 33554432 times. It is large enough
> >> for nearly all most cases, but making the code be more strong, we zap all
> >> shadow pages when the number is round
> >>
> > Very nice idea, but why drop Takuya patches instead of using
> > kvm_mmu_zap_mmio_sptes() when generation number overflows.
> 
> I am not sure whether it is still needed. Requesting to zap all mmio sptes for
> more than 50 times is really really rare, it nearly does not happen.
> (By the way, 33554432 is wrong in the changelog, i just copy that for my 
> origin
> implantation.) And, after my patch optimizing zapping all shadow pages,
> zap-all-sps should not be a problem anymore since it does not take too much 
> lock
> time.
> 
> Your idea?
> 
I expect 50 to become less since I already had plans to store some
information in mmio spte. Even if all zap-all-sptes becomes faster we
still needlessly zap all sptes while we can zap only mmio.

> > 
> > 
> >> Signed-off-by: Xiao Guangrong 
> >> ---
> >>  arch/x86/include/asm/kvm_host.h |2 +
> >>  arch/x86/kvm/mmu.c  |   61 
> >> +--
> >>  arch/x86/kvm/mmutrace.h |   17 +++
> >>  arch/x86/kvm/paging_tmpl.h  |7 +++-
> >>  arch/x86/kvm/vmx.c  |4 ++
> >>  arch/x86/kvm/x86.c  |6 +--
> >>  6 files changed, 82 insertions(+), 15 deletions(-)
> >>
> >> diff --git a/arch/x86/include/asm/kvm_host.h 
> >> b/arch/x86/include/asm/kvm_host.h
> >> index ef7f4a5..572398e 100644
> >> --- a/arch/x86/include/asm/kvm_host.h
> >> +++ b/arch/x86/include/asm/kvm_host.h
> >> @@ -529,6 +529,7 @@ struct kvm_arch {
> >>unsigned int n_requested_mmu_pages;
> >>unsigned int n_max_mmu_pages;
> >>unsigned int indirect_shadow_pages;
> >> +  unsigned int mmio_invalid_gen;
> > Why invalid? Should be mmio_valid_gen or mmio_current_get.
> 
> mmio_invalid_gen is only updated in kvm_mmu_invalidate_mmio_sptes,
> so i named it as _invalid_. But mmio_valid_gen is good for me.
> 
It holds currently valid value though, so calling it "invalid" is
confusing.

> > 
> >>struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
> >>/*
> >> * Hash table of struct kvm_mmu_page.
> >> @@ -765,6 +766,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
> >> int slot);
> >>  void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
> >> struct kvm_memory_slot *slot,
> >> gfn_t gfn_offset, unsigned long mask);
> >> +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm);
> > Agree with Takuya that kvm_mmu_invalidate_mmio_sptes() is a better name.
> 
> Me too.
> 
> > 
> >>  void kvm_mmu_zap_all(struct kvm *kvm);
> >>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
> >>  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
> >> kvm_nr_mmu_pages);
> >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> >> index 13626f4..7093a92 100644
> >> --- a/arch/x86/kvm/mmu.c
> >> +++ b/arch/x86/kvm/mmu.c
> >> @@ -234,12 +234,13 @@ static unsigned int get_mmio_spte_generation(u64 
> >> spte)
> >>  static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
> >>   unsigned access)
> >>  {
> >> -  u64 mask = generation_mmio_spte_mask(0);
> >> +  unsigned int gen = ACCESS_ONCE(kvm->arch.mmio_invalid_gen);
> >> +  u64 mask = generation_mmio_spte_mask(gen);
> >>
> >>access &= ACC_WRITE_MASK | ACC_USER_MASK;
> >>mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
> >>
> >> -  trace_mark_mmio_spte(sptep, gfn, access, 0);
> >> +  trace_mark_mmio_spte(sptep, gfn, access, gen);
> >>mmu_spte_set(sptep, mask);
> >>  }
> >>
> >> @@ -269,6 +270,34 @@ static bool set_mmio_spte(struct kvm *kvm, u64 
> >> *sptep, gfn_t gfn,
> >>return false;
> >>  }
> >>
> >> +static bool check_mmio_spte(struct kvm *kvm, u64 spte)
> >> +{
> >> +  return get_mmio_spte_generation(spte) ==
> >> +  ACCESS_ONCE(kvm->

Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Xiao Guangrong
On 03/18/2013 05:13 PM, Gleb Natapov wrote:
> On Mon, Mar 18, 2013 at 04:08:50PM +0800, Xiao Guangrong wrote:
>> On 03/17/2013 11:02 PM, Gleb Natapov wrote:
>>> On Fri, Mar 15, 2013 at 11:29:53PM +0800, Xiao Guangrong wrote:
 This patch tries to introduce a very simple and scale way to invalid all
 mmio sptes - it need not walk any shadow pages and hold mmu-lock

 KVM maintains a global mmio invalid generation-number which is stored in
 kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
 generation-number into his available bits when it is created

 When KVM need zap all mmio sptes, it just simply increase the global
 generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
 then it walks the shadow page table and get the mmio spte. If the
 generation-number on the spte does not equal the global generation-number,
 it will go to the normal #PF handler to update the mmio spte

 Since 19 bits are used to store generation-number on mmio spte, the
 generation-number can be round after 33554432 times. It is large enough
 for nearly all most cases, but making the code be more strong, we zap all
 shadow pages when the number is round

>>> Very nice idea, but why drop Takuya patches instead of using
>>> kvm_mmu_zap_mmio_sptes() when generation number overflows.
>>
>> I am not sure whether it is still needed. Requesting to zap all mmio sptes 
>> for
>> more than 50 times is really really rare, it nearly does not happen.
>> (By the way, 33554432 is wrong in the changelog, i just copy that for my 
>> origin
>> implantation.) And, after my patch optimizing zapping all shadow pages,
>> zap-all-sps should not be a problem anymore since it does not take too much 
>> lock
>> time.
>>
>> Your idea?
>>
> I expect 50 to become less since I already had plans to store some

Interesting, just curious, what are the plans? ;)

> information in mmio spte. Even if all zap-all-sptes becomes faster we
> still needlessly zap all sptes while we can zap only mmio.

Okay.

> 
>>>
>>>
 Signed-off-by: Xiao Guangrong 
 ---
  arch/x86/include/asm/kvm_host.h |2 +
  arch/x86/kvm/mmu.c  |   61 
 +--
  arch/x86/kvm/mmutrace.h |   17 +++
  arch/x86/kvm/paging_tmpl.h  |7 +++-
  arch/x86/kvm/vmx.c  |4 ++
  arch/x86/kvm/x86.c  |6 +--
  6 files changed, 82 insertions(+), 15 deletions(-)

 diff --git a/arch/x86/include/asm/kvm_host.h 
 b/arch/x86/include/asm/kvm_host.h
 index ef7f4a5..572398e 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -529,6 +529,7 @@ struct kvm_arch {
unsigned int n_requested_mmu_pages;
unsigned int n_max_mmu_pages;
unsigned int indirect_shadow_pages;
 +  unsigned int mmio_invalid_gen;
>>> Why invalid? Should be mmio_valid_gen or mmio_current_get.
>>
>> mmio_invalid_gen is only updated in kvm_mmu_invalidate_mmio_sptes,
>> so i named it as _invalid_. But mmio_valid_gen is good for me.
>>
> It holds currently valid value though, so calling it "invalid" is
> confusing.

I agree.

> 
>>>
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
 * Hash table of struct kvm_mmu_page.
 @@ -765,6 +766,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
 int slot);
  void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 struct kvm_memory_slot *slot,
 gfn_t gfn_offset, unsigned long mask);
 +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm);
>>> Agree with Takuya that kvm_mmu_invalidate_mmio_sptes() is a better name.
>>
>> Me too.
>>
>>>
  void kvm_mmu_zap_all(struct kvm *kvm);
  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
 kvm_nr_mmu_pages);
 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 13626f4..7093a92 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -234,12 +234,13 @@ static unsigned int get_mmio_spte_generation(u64 
 spte)
  static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
   unsigned access)
  {
 -  u64 mask = generation_mmio_spte_mask(0);
 +  unsigned int gen = ACCESS_ONCE(kvm->arch.mmio_invalid_gen);
 +  u64 mask = generation_mmio_spte_mask(gen);

access &= ACC_WRITE_MASK | ACC_USER_MASK;
mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;

 -  trace_mark_mmio_spte(sptep, gfn, access, 0);
 +  trace_mark_mmio_spte(sptep, gfn, access, gen);
mmu_spte_set(sptep, mask);
  }

 @@ -269,6 +270,34 @@ static bool set_mmio_spte(struct kvm *kvm, u64 
 *sptep, gfn_t gfn,
return false;
  }

 +s

Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Gleb Natapov
On Mon, Mar 18, 2013 at 08:29:29PM +0800, Xiao Guangrong wrote:
> On 03/18/2013 05:13 PM, Gleb Natapov wrote:
> > On Mon, Mar 18, 2013 at 04:08:50PM +0800, Xiao Guangrong wrote:
> >> On 03/17/2013 11:02 PM, Gleb Natapov wrote:
> >>> On Fri, Mar 15, 2013 at 11:29:53PM +0800, Xiao Guangrong wrote:
>  This patch tries to introduce a very simple and scale way to invalid all
>  mmio sptes - it need not walk any shadow pages and hold mmu-lock
> 
>  KVM maintains a global mmio invalid generation-number which is stored in
>  kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
>  generation-number into his available bits when it is created
> 
>  When KVM need zap all mmio sptes, it just simply increase the global
>  generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
>  then it walks the shadow page table and get the mmio spte. If the
>  generation-number on the spte does not equal the global 
>  generation-number,
>  it will go to the normal #PF handler to update the mmio spte
> 
>  Since 19 bits are used to store generation-number on mmio spte, the
>  generation-number can be round after 33554432 times. It is large enough
>  for nearly all most cases, but making the code be more strong, we zap all
>  shadow pages when the number is round
> 
> >>> Very nice idea, but why drop Takuya patches instead of using
> >>> kvm_mmu_zap_mmio_sptes() when generation number overflows.
> >>
> >> I am not sure whether it is still needed. Requesting to zap all mmio sptes 
> >> for
> >> more than 50 times is really really rare, it nearly does not happen.
> >> (By the way, 33554432 is wrong in the changelog, i just copy that for my 
> >> origin
> >> implantation.) And, after my patch optimizing zapping all shadow pages,
> >> zap-all-sps should not be a problem anymore since it does not take too 
> >> much lock
> >> time.
> >>
> >> Your idea?
> >>
> > I expect 50 to become less since I already had plans to store some
> 
> Interesting, just curious, what are the plans? ;)
> 
Currently we uses pio to signal that work is pending to virtio devices. The
requirement is that signaling should be fast and PIO is fast since there
is not need to emulate instruction. PCIE though is not really designed
with PIO in mind, so we will have to use MMIO to do signaling. To avoid
instruction emulation I thought about making guest access these devices using
predefined variety of MOV instruction so that emulation can be skipped.
The idea is to mark mmio spte to know that emulation is not needed.

> > information in mmio spte. Even if all zap-all-sptes becomes faster we
> > still needlessly zap all sptes while we can zap only mmio.
> 
> Okay.
> 
> > 
> >>>
> >>>
>  Signed-off-by: Xiao Guangrong 
>  ---
>   arch/x86/include/asm/kvm_host.h |2 +
>   arch/x86/kvm/mmu.c  |   61 
>  +--
>   arch/x86/kvm/mmutrace.h |   17 +++
>   arch/x86/kvm/paging_tmpl.h  |7 +++-
>   arch/x86/kvm/vmx.c  |4 ++
>   arch/x86/kvm/x86.c  |6 +--
>   6 files changed, 82 insertions(+), 15 deletions(-)
> 
>  diff --git a/arch/x86/include/asm/kvm_host.h 
>  b/arch/x86/include/asm/kvm_host.h
>  index ef7f4a5..572398e 100644
>  --- a/arch/x86/include/asm/kvm_host.h
>  +++ b/arch/x86/include/asm/kvm_host.h
>  @@ -529,6 +529,7 @@ struct kvm_arch {
>   unsigned int n_requested_mmu_pages;
>   unsigned int n_max_mmu_pages;
>   unsigned int indirect_shadow_pages;
>  +unsigned int mmio_invalid_gen;
> >>> Why invalid? Should be mmio_valid_gen or mmio_current_get.
> >>
> >> mmio_invalid_gen is only updated in kvm_mmu_invalidate_mmio_sptes,
> >> so i named it as _invalid_. But mmio_valid_gen is good for me.
> >>
> > It holds currently valid value though, so calling it "invalid" is
> > confusing.
> 
> I agree.
> 
> > 
> >>>
>   struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
>   /*
>    * Hash table of struct kvm_mmu_page.
>  @@ -765,6 +766,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm 
>  *kvm, int slot);
>   void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
>    struct kvm_memory_slot *slot,
>    gfn_t gfn_offset, unsigned long 
>  mask);
>  +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm);
> >>> Agree with Takuya that kvm_mmu_invalidate_mmio_sptes() is a better name.
> >>
> >> Me too.
> >>
> >>>
>   void kvm_mmu_zap_all(struct kvm *kvm);
>   unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
>   void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
>  kvm_nr_mmu_pages);
>  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>  index 13626f4..7093a92 100644
>  --

Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Xiao Guangrong
On 03/18/2013 08:46 PM, Gleb Natapov wrote:
> On Mon, Mar 18, 2013 at 08:29:29PM +0800, Xiao Guangrong wrote:
>> On 03/18/2013 05:13 PM, Gleb Natapov wrote:
>>> On Mon, Mar 18, 2013 at 04:08:50PM +0800, Xiao Guangrong wrote:
 On 03/17/2013 11:02 PM, Gleb Natapov wrote:
> On Fri, Mar 15, 2013 at 11:29:53PM +0800, Xiao Guangrong wrote:
>> This patch tries to introduce a very simple and scale way to invalid all
>> mmio sptes - it need not walk any shadow pages and hold mmu-lock
>>
>> KVM maintains a global mmio invalid generation-number which is stored in
>> kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
>> generation-number into his available bits when it is created
>>
>> When KVM need zap all mmio sptes, it just simply increase the global
>> generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
>> then it walks the shadow page table and get the mmio spte. If the
>> generation-number on the spte does not equal the global 
>> generation-number,
>> it will go to the normal #PF handler to update the mmio spte
>>
>> Since 19 bits are used to store generation-number on mmio spte, the
>> generation-number can be round after 33554432 times. It is large enough
>> for nearly all most cases, but making the code be more strong, we zap all
>> shadow pages when the number is round
>>
> Very nice idea, but why drop Takuya patches instead of using
> kvm_mmu_zap_mmio_sptes() when generation number overflows.

 I am not sure whether it is still needed. Requesting to zap all mmio sptes 
 for
 more than 50 times is really really rare, it nearly does not happen.
 (By the way, 33554432 is wrong in the changelog, i just copy that for my 
 origin
 implantation.) And, after my patch optimizing zapping all shadow pages,
 zap-all-sps should not be a problem anymore since it does not take too 
 much lock
 time.

 Your idea?

>>> I expect 50 to become less since I already had plans to store some
>>
>> Interesting, just curious, what are the plans? ;)
>>
> Currently we uses pio to signal that work is pending to virtio devices. The
> requirement is that signaling should be fast and PIO is fast since there
> is not need to emulate instruction. PCIE though is not really designed
> with PIO in mind, so we will have to use MMIO to do signaling. To avoid
> instruction emulation I thought about making guest access these devices using
> predefined variety of MOV instruction so that emulation can be skipped.
> The idea is to mark mmio spte to know that emulation is not needed.

How to know page-fault is caused by the predefined instruction?

> 
>>> information in mmio spte. Even if all zap-all-sptes becomes faster we
>>> still needlessly zap all sptes while we can zap only mmio.
>>
>> Okay.
>>
>>>
>
>
>> Signed-off-by: Xiao Guangrong 
>> ---
>>  arch/x86/include/asm/kvm_host.h |2 +
>>  arch/x86/kvm/mmu.c  |   61 
>> +--
>>  arch/x86/kvm/mmutrace.h |   17 +++
>>  arch/x86/kvm/paging_tmpl.h  |7 +++-
>>  arch/x86/kvm/vmx.c  |4 ++
>>  arch/x86/kvm/x86.c  |6 +--
>>  6 files changed, 82 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h 
>> b/arch/x86/include/asm/kvm_host.h
>> index ef7f4a5..572398e 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -529,6 +529,7 @@ struct kvm_arch {
>>  unsigned int n_requested_mmu_pages;
>>  unsigned int n_max_mmu_pages;
>>  unsigned int indirect_shadow_pages;
>> +unsigned int mmio_invalid_gen;
> Why invalid? Should be mmio_valid_gen or mmio_current_get.

 mmio_invalid_gen is only updated in kvm_mmu_invalidate_mmio_sptes,
 so i named it as _invalid_. But mmio_valid_gen is good for me.

>>> It holds currently valid value though, so calling it "invalid" is
>>> confusing.
>>
>> I agree.
>>
>>>
>
>>  struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
>>  /*
>>   * Hash table of struct kvm_mmu_page.
>> @@ -765,6 +766,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm 
>> *kvm, int slot);
>>  void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
>>   struct kvm_memory_slot *slot,
>>   gfn_t gfn_offset, unsigned long 
>> mask);
>> +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm);
> Agree with Takuya that kvm_mmu_invalidate_mmio_sptes() is a better name.

 Me too.

>
>>  void kvm_mmu_zap_all(struct kvm *kvm);
>>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
>>  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
>>

Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Gleb Natapov
On Mon, Mar 18, 2013 at 09:09:43PM +0800, Xiao Guangrong wrote:
> On 03/18/2013 08:46 PM, Gleb Natapov wrote:
> > On Mon, Mar 18, 2013 at 08:29:29PM +0800, Xiao Guangrong wrote:
> >> On 03/18/2013 05:13 PM, Gleb Natapov wrote:
> >>> On Mon, Mar 18, 2013 at 04:08:50PM +0800, Xiao Guangrong wrote:
>  On 03/17/2013 11:02 PM, Gleb Natapov wrote:
> > On Fri, Mar 15, 2013 at 11:29:53PM +0800, Xiao Guangrong wrote:
> >> This patch tries to introduce a very simple and scale way to invalid 
> >> all
> >> mmio sptes - it need not walk any shadow pages and hold mmu-lock
> >>
> >> KVM maintains a global mmio invalid generation-number which is stored 
> >> in
> >> kvm->arch.mmio_invalid_gen and every mmio spte stores the current 
> >> global
> >> generation-number into his available bits when it is created
> >>
> >> When KVM need zap all mmio sptes, it just simply increase the global
> >> generation-number. When guests do mmio access, KVM intercepts a MMIO 
> >> #PF
> >> then it walks the shadow page table and get the mmio spte. If the
> >> generation-number on the spte does not equal the global 
> >> generation-number,
> >> it will go to the normal #PF handler to update the mmio spte
> >>
> >> Since 19 bits are used to store generation-number on mmio spte, the
> >> generation-number can be round after 33554432 times. It is large enough
> >> for nearly all most cases, but making the code be more strong, we zap 
> >> all
> >> shadow pages when the number is round
> >>
> > Very nice idea, but why drop Takuya patches instead of using
> > kvm_mmu_zap_mmio_sptes() when generation number overflows.
> 
>  I am not sure whether it is still needed. Requesting to zap all mmio 
>  sptes for
>  more than 50 times is really really rare, it nearly does not happen.
>  (By the way, 33554432 is wrong in the changelog, i just copy that for my 
>  origin
>  implantation.) And, after my patch optimizing zapping all shadow pages,
>  zap-all-sps should not be a problem anymore since it does not take too 
>  much lock
>  time.
> 
>  Your idea?
> 
> >>> I expect 50 to become less since I already had plans to store some
> >>
> >> Interesting, just curious, what are the plans? ;)
> >>
> > Currently we uses pio to signal that work is pending to virtio devices. The
> > requirement is that signaling should be fast and PIO is fast since there
> > is not need to emulate instruction. PCIE though is not really designed
> > with PIO in mind, so we will have to use MMIO to do signaling. To avoid
> > instruction emulation I thought about making guest access these devices 
> > using
> > predefined variety of MOV instruction so that emulation can be skipped.
> > The idea is to mark mmio spte to know that emulation is not needed.
> 
> How to know page-fault is caused by the predefined instruction?
> 
Only predefined phys address rages will be accessed that way. If page
fault is in a range we assume the knows instruction is used.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Xiao Guangrong
On 03/18/2013 09:19 PM, Gleb Natapov wrote:
> On Mon, Mar 18, 2013 at 09:09:43PM +0800, Xiao Guangrong wrote:
>> On 03/18/2013 08:46 PM, Gleb Natapov wrote:
>>> On Mon, Mar 18, 2013 at 08:29:29PM +0800, Xiao Guangrong wrote:
 On 03/18/2013 05:13 PM, Gleb Natapov wrote:
> On Mon, Mar 18, 2013 at 04:08:50PM +0800, Xiao Guangrong wrote:
>> On 03/17/2013 11:02 PM, Gleb Natapov wrote:
>>> On Fri, Mar 15, 2013 at 11:29:53PM +0800, Xiao Guangrong wrote:
 This patch tries to introduce a very simple and scale way to invalid 
 all
 mmio sptes - it need not walk any shadow pages and hold mmu-lock

 KVM maintains a global mmio invalid generation-number which is stored 
 in
 kvm->arch.mmio_invalid_gen and every mmio spte stores the current 
 global
 generation-number into his available bits when it is created

 When KVM need zap all mmio sptes, it just simply increase the global
 generation-number. When guests do mmio access, KVM intercepts a MMIO 
 #PF
 then it walks the shadow page table and get the mmio spte. If the
 generation-number on the spte does not equal the global 
 generation-number,
 it will go to the normal #PF handler to update the mmio spte

 Since 19 bits are used to store generation-number on mmio spte, the
 generation-number can be round after 33554432 times. It is large enough
 for nearly all most cases, but making the code be more strong, we zap 
 all
 shadow pages when the number is round

>>> Very nice idea, but why drop Takuya patches instead of using
>>> kvm_mmu_zap_mmio_sptes() when generation number overflows.
>>
>> I am not sure whether it is still needed. Requesting to zap all mmio 
>> sptes for
>> more than 50 times is really really rare, it nearly does not happen.
>> (By the way, 33554432 is wrong in the changelog, i just copy that for my 
>> origin
>> implantation.) And, after my patch optimizing zapping all shadow pages,
>> zap-all-sps should not be a problem anymore since it does not take too 
>> much lock
>> time.
>>
>> Your idea?
>>
> I expect 50 to become less since I already had plans to store some

 Interesting, just curious, what are the plans? ;)

>>> Currently we uses pio to signal that work is pending to virtio devices. The
>>> requirement is that signaling should be fast and PIO is fast since there
>>> is not need to emulate instruction. PCIE though is not really designed
>>> with PIO in mind, so we will have to use MMIO to do signaling. To avoid
>>> instruction emulation I thought about making guest access these devices 
>>> using
>>> predefined variety of MOV instruction so that emulation can be skipped.
>>> The idea is to mark mmio spte to know that emulation is not needed.
>>
>> How to know page-fault is caused by the predefined instruction?
>>
> Only predefined phys address rages will be accessed that way. If page
> fault is in a range we assume the knows instruction is used.

That means the access can be identified by the gfn, why need cache
other things into mmio spte?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Gleb Natapov
On Mon, Mar 18, 2013 at 09:25:10PM +0800, Xiao Guangrong wrote:
> On 03/18/2013 09:19 PM, Gleb Natapov wrote:
> > On Mon, Mar 18, 2013 at 09:09:43PM +0800, Xiao Guangrong wrote:
> >> On 03/18/2013 08:46 PM, Gleb Natapov wrote:
> >>> On Mon, Mar 18, 2013 at 08:29:29PM +0800, Xiao Guangrong wrote:
>  On 03/18/2013 05:13 PM, Gleb Natapov wrote:
> > On Mon, Mar 18, 2013 at 04:08:50PM +0800, Xiao Guangrong wrote:
> >> On 03/17/2013 11:02 PM, Gleb Natapov wrote:
> >>> On Fri, Mar 15, 2013 at 11:29:53PM +0800, Xiao Guangrong wrote:
>  This patch tries to introduce a very simple and scale way to invalid 
>  all
>  mmio sptes - it need not walk any shadow pages and hold mmu-lock
> 
>  KVM maintains a global mmio invalid generation-number which is 
>  stored in
>  kvm->arch.mmio_invalid_gen and every mmio spte stores the current 
>  global
>  generation-number into his available bits when it is created
> 
>  When KVM need zap all mmio sptes, it just simply increase the global
>  generation-number. When guests do mmio access, KVM intercepts a MMIO 
>  #PF
>  then it walks the shadow page table and get the mmio spte. If the
>  generation-number on the spte does not equal the global 
>  generation-number,
>  it will go to the normal #PF handler to update the mmio spte
> 
>  Since 19 bits are used to store generation-number on mmio spte, the
>  generation-number can be round after 33554432 times. It is large 
>  enough
>  for nearly all most cases, but making the code be more strong, we 
>  zap all
>  shadow pages when the number is round
> 
> >>> Very nice idea, but why drop Takuya patches instead of using
> >>> kvm_mmu_zap_mmio_sptes() when generation number overflows.
> >>
> >> I am not sure whether it is still needed. Requesting to zap all mmio 
> >> sptes for
> >> more than 50 times is really really rare, it nearly does not 
> >> happen.
> >> (By the way, 33554432 is wrong in the changelog, i just copy that for 
> >> my origin
> >> implantation.) And, after my patch optimizing zapping all shadow pages,
> >> zap-all-sps should not be a problem anymore since it does not take too 
> >> much lock
> >> time.
> >>
> >> Your idea?
> >>
> > I expect 50 to become less since I already had plans to store some
> 
>  Interesting, just curious, what are the plans? ;)
> 
> >>> Currently we uses pio to signal that work is pending to virtio devices. 
> >>> The
> >>> requirement is that signaling should be fast and PIO is fast since there
> >>> is not need to emulate instruction. PCIE though is not really designed
> >>> with PIO in mind, so we will have to use MMIO to do signaling. To avoid
> >>> instruction emulation I thought about making guest access these devices 
> >>> using
> >>> predefined variety of MOV instruction so that emulation can be skipped.
> >>> The idea is to mark mmio spte to know that emulation is not needed.
> >>
> >> How to know page-fault is caused by the predefined instruction?
> >>
> > Only predefined phys address rages will be accessed that way. If page
> > fault is in a range we assume the knows instruction is used.
> 
> That means the access can be identified by the gfn, why need cache
> other things into mmio spte?
Two not search through memory ranges on each access.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Xiao Guangrong
On 03/18/2013 09:27 PM, Gleb Natapov wrote:
> On Mon, Mar 18, 2013 at 09:25:10PM +0800, Xiao Guangrong wrote:
>> On 03/18/2013 09:19 PM, Gleb Natapov wrote:
>>> On Mon, Mar 18, 2013 at 09:09:43PM +0800, Xiao Guangrong wrote:
 On 03/18/2013 08:46 PM, Gleb Natapov wrote:
> On Mon, Mar 18, 2013 at 08:29:29PM +0800, Xiao Guangrong wrote:
>> On 03/18/2013 05:13 PM, Gleb Natapov wrote:
>>> On Mon, Mar 18, 2013 at 04:08:50PM +0800, Xiao Guangrong wrote:
 On 03/17/2013 11:02 PM, Gleb Natapov wrote:
> On Fri, Mar 15, 2013 at 11:29:53PM +0800, Xiao Guangrong wrote:
>> This patch tries to introduce a very simple and scale way to invalid 
>> all
>> mmio sptes - it need not walk any shadow pages and hold mmu-lock
>>
>> KVM maintains a global mmio invalid generation-number which is 
>> stored in
>> kvm->arch.mmio_invalid_gen and every mmio spte stores the current 
>> global
>> generation-number into his available bits when it is created
>>
>> When KVM need zap all mmio sptes, it just simply increase the global
>> generation-number. When guests do mmio access, KVM intercepts a MMIO 
>> #PF
>> then it walks the shadow page table and get the mmio spte. If the
>> generation-number on the spte does not equal the global 
>> generation-number,
>> it will go to the normal #PF handler to update the mmio spte
>>
>> Since 19 bits are used to store generation-number on mmio spte, the
>> generation-number can be round after 33554432 times. It is large 
>> enough
>> for nearly all most cases, but making the code be more strong, we 
>> zap all
>> shadow pages when the number is round
>>
> Very nice idea, but why drop Takuya patches instead of using
> kvm_mmu_zap_mmio_sptes() when generation number overflows.

 I am not sure whether it is still needed. Requesting to zap all mmio 
 sptes for
 more than 50 times is really really rare, it nearly does not 
 happen.
 (By the way, 33554432 is wrong in the changelog, i just copy that for 
 my origin
 implantation.) And, after my patch optimizing zapping all shadow pages,
 zap-all-sps should not be a problem anymore since it does not take too 
 much lock
 time.

 Your idea?

>>> I expect 50 to become less since I already had plans to store some
>>
>> Interesting, just curious, what are the plans? ;)
>>
> Currently we uses pio to signal that work is pending to virtio devices. 
> The
> requirement is that signaling should be fast and PIO is fast since there
> is not need to emulate instruction. PCIE though is not really designed
> with PIO in mind, so we will have to use MMIO to do signaling. To avoid
> instruction emulation I thought about making guest access these devices 
> using
> predefined variety of MOV instruction so that emulation can be skipped.
> The idea is to mark mmio spte to know that emulation is not needed.

 How to know page-fault is caused by the predefined instruction?

>>> Only predefined phys address rages will be accessed that way. If page
>>> fault is in a range we assume the knows instruction is used.
>>
>> That means the access can be identified by the gfn, why need cache
>> other things into mmio spte?
> Two not search through memory ranges on each access.

Aha... got it. Thank you! ;)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Eric Northup
On Fri, Mar 15, 2013 at 8:29 AM, Xiao Guangrong
 wrote:
> This patch tries to introduce a very simple and scale way to invalid all
> mmio sptes - it need not walk any shadow pages and hold mmu-lock
>
> KVM maintains a global mmio invalid generation-number which is stored in
> kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
> generation-number into his available bits when it is created
>
> When KVM need zap all mmio sptes, it just simply increase the global
> generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
> then it walks the shadow page table and get the mmio spte. If the
> generation-number on the spte does not equal the global generation-number,
> it will go to the normal #PF handler to update the mmio spte
>
> Since 19 bits are used to store generation-number on mmio spte, the
> generation-number can be round after 33554432 times. It is large enough
> for nearly all most cases, but making the code be more strong, we zap all
> shadow pages when the number is round
>
> Signed-off-by: Xiao Guangrong 
> ---
>  arch/x86/include/asm/kvm_host.h |2 +
>  arch/x86/kvm/mmu.c  |   61 
> +--
>  arch/x86/kvm/mmutrace.h |   17 +++
>  arch/x86/kvm/paging_tmpl.h  |7 +++-
>  arch/x86/kvm/vmx.c  |4 ++
>  arch/x86/kvm/x86.c  |6 +--
>  6 files changed, 82 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ef7f4a5..572398e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -529,6 +529,7 @@ struct kvm_arch {
> unsigned int n_requested_mmu_pages;
> unsigned int n_max_mmu_pages;
> unsigned int indirect_shadow_pages;
> +   unsigned int mmio_invalid_gen;

Could this get initialized to something close to the wrap-around
value, so that the wrap-around case gets more real-world coverage?

> struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
> /*
>  * Hash table of struct kvm_mmu_page.
> @@ -765,6 +766,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
> int slot);
>  void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
>  struct kvm_memory_slot *slot,
>  gfn_t gfn_offset, unsigned long mask);
> +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm);
>  void kvm_mmu_zap_all(struct kvm *kvm);
>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
>  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
> kvm_nr_mmu_pages);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 13626f4..7093a92 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -234,12 +234,13 @@ static unsigned int get_mmio_spte_generation(u64 spte)
>  static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
>unsigned access)
>  {
> -   u64 mask = generation_mmio_spte_mask(0);
> +   unsigned int gen = ACCESS_ONCE(kvm->arch.mmio_invalid_gen);
> +   u64 mask = generation_mmio_spte_mask(gen);
>
> access &= ACC_WRITE_MASK | ACC_USER_MASK;
> mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
>
> -   trace_mark_mmio_spte(sptep, gfn, access, 0);
> +   trace_mark_mmio_spte(sptep, gfn, access, gen);
> mmu_spte_set(sptep, mask);
>  }
>
> @@ -269,6 +270,34 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, 
> gfn_t gfn,
> return false;
>  }
>
> +static bool check_mmio_spte(struct kvm *kvm, u64 spte)
> +{
> +   return get_mmio_spte_generation(spte) ==
> +   ACCESS_ONCE(kvm->arch.mmio_invalid_gen);
> +}
> +
> +/*
> + * The caller should protect concurrent access on
> + * kvm->arch.mmio_invalid_gen. Currently, it is used by
> + * kvm_arch_commit_memory_region and protected by kvm->slots_lock.
> + */
> +void kvm_mmu_invalid_mmio_spte(struct kvm *kvm)
> +{
> +   /* Ensure update memslot has been completed. */
> +   smp_mb();
> +
> +trace_kvm_mmu_invalid_mmio_spte(kvm);
> +
> +   /*
> +* The very rare case: if the generation-number is round,
> +* zap all shadow pages.
> +*/
> +   if (unlikely(kvm->arch.mmio_invalid_gen++ == MAX_GEN)) {
> +   kvm->arch.mmio_invalid_gen = 0;
> +   return kvm_mmu_zap_all(kvm);
> +   }
> +}
> +
>  static inline u64 rsvd_bits(int s, int e)
>  {
> return ((1ULL << (e - s + 1)) - 1) << s;
> @@ -3183,9 +3212,12 @@ static u64 walk_shadow_page_get_mmio_spte(struct 
> kvm_vcpu *vcpu, u64 addr)
>  }
>
>  /*
> - * If it is a real mmio page fault, return 1 and emulat the instruction
> - * directly, return 0 to let CPU fault again on the address, -1 is
> - * returned if bug is detected.
> + * Return value:
> + * 2: invalid spte is detected then let the real page fault path
> + *update the mmio spte.
> + * 1: it is a real mmio page fault, 

Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-18 Thread Xiao Guangrong
On 03/19/2013 06:16 AM, Eric Northup wrote:
> On Fri, Mar 15, 2013 at 8:29 AM, Xiao Guangrong
>  wrote:
>> This patch tries to introduce a very simple and scale way to invalid all
>> mmio sptes - it need not walk any shadow pages and hold mmu-lock
>>
>> KVM maintains a global mmio invalid generation-number which is stored in
>> kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
>> generation-number into his available bits when it is created
>>
>> When KVM need zap all mmio sptes, it just simply increase the global
>> generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
>> then it walks the shadow page table and get the mmio spte. If the
>> generation-number on the spte does not equal the global generation-number,
>> it will go to the normal #PF handler to update the mmio spte
>>
>> Since 19 bits are used to store generation-number on mmio spte, the
>> generation-number can be round after 33554432 times. It is large enough
>> for nearly all most cases, but making the code be more strong, we zap all
>> shadow pages when the number is round
>>
>> Signed-off-by: Xiao Guangrong 
>> ---
>>  arch/x86/include/asm/kvm_host.h |2 +
>>  arch/x86/kvm/mmu.c  |   61 
>> +--
>>  arch/x86/kvm/mmutrace.h |   17 +++
>>  arch/x86/kvm/paging_tmpl.h  |7 +++-
>>  arch/x86/kvm/vmx.c  |4 ++
>>  arch/x86/kvm/x86.c  |6 +--
>>  6 files changed, 82 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h 
>> b/arch/x86/include/asm/kvm_host.h
>> index ef7f4a5..572398e 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -529,6 +529,7 @@ struct kvm_arch {
>> unsigned int n_requested_mmu_pages;
>> unsigned int n_max_mmu_pages;
>> unsigned int indirect_shadow_pages;
>> +   unsigned int mmio_invalid_gen;
> 
> Could this get initialized to something close to the wrap-around
> value, so that the wrap-around case gets more real-world coverage?

I am afraid we can not. We cache the current mmio_invalid_gen into mmio spte 
when
it is created no matter what the initiation value is.

If you have a better way, please show me. ;)



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-19 Thread Gleb Natapov
On Tue, Mar 19, 2013 at 11:15:36AM +0800, Xiao Guangrong wrote:
> On 03/19/2013 06:16 AM, Eric Northup wrote:
> > On Fri, Mar 15, 2013 at 8:29 AM, Xiao Guangrong
> >  wrote:
> >> This patch tries to introduce a very simple and scale way to invalid all
> >> mmio sptes - it need not walk any shadow pages and hold mmu-lock
> >>
> >> KVM maintains a global mmio invalid generation-number which is stored in
> >> kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
> >> generation-number into his available bits when it is created
> >>
> >> When KVM need zap all mmio sptes, it just simply increase the global
> >> generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
> >> then it walks the shadow page table and get the mmio spte. If the
> >> generation-number on the spte does not equal the global generation-number,
> >> it will go to the normal #PF handler to update the mmio spte
> >>
> >> Since 19 bits are used to store generation-number on mmio spte, the
> >> generation-number can be round after 33554432 times. It is large enough
> >> for nearly all most cases, but making the code be more strong, we zap all
> >> shadow pages when the number is round
> >>
> >> Signed-off-by: Xiao Guangrong 
> >> ---
> >>  arch/x86/include/asm/kvm_host.h |2 +
> >>  arch/x86/kvm/mmu.c  |   61 
> >> +--
> >>  arch/x86/kvm/mmutrace.h |   17 +++
> >>  arch/x86/kvm/paging_tmpl.h  |7 +++-
> >>  arch/x86/kvm/vmx.c  |4 ++
> >>  arch/x86/kvm/x86.c  |6 +--
> >>  6 files changed, 82 insertions(+), 15 deletions(-)
> >>
> >> diff --git a/arch/x86/include/asm/kvm_host.h 
> >> b/arch/x86/include/asm/kvm_host.h
> >> index ef7f4a5..572398e 100644
> >> --- a/arch/x86/include/asm/kvm_host.h
> >> +++ b/arch/x86/include/asm/kvm_host.h
> >> @@ -529,6 +529,7 @@ struct kvm_arch {
> >> unsigned int n_requested_mmu_pages;
> >> unsigned int n_max_mmu_pages;
> >> unsigned int indirect_shadow_pages;
> >> +   unsigned int mmio_invalid_gen;
> > 
> > Could this get initialized to something close to the wrap-around
> > value, so that the wrap-around case gets more real-world coverage?
> 
> I am afraid we can not. We cache the current mmio_invalid_gen into mmio spte 
> when
> it is created no matter what the initiation value is.
> 
> If you have a better way, please show me. ;)
> 
The idea is to initialize mmio_invalid_gen to value close to MAX_GEN in
order to exercise 

+   if (unlikely(kvm->arch.mmio_invalid_gen++ == MAX_GEN)) {
+   kvm->arch.mmio_invalid_gen = 0;
+   return kvm_mmu_zap_all(kvm);
+   }

path more often.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] KVM: MMU: fast invalid all mmio sptes

2013-03-19 Thread Xiao Guangrong
On 03/19/2013 03:36 PM, Gleb Natapov wrote:
> On Tue, Mar 19, 2013 at 11:15:36AM +0800, Xiao Guangrong wrote:
>> On 03/19/2013 06:16 AM, Eric Northup wrote:
>>> On Fri, Mar 15, 2013 at 8:29 AM, Xiao Guangrong
>>>  wrote:
 This patch tries to introduce a very simple and scale way to invalid all
 mmio sptes - it need not walk any shadow pages and hold mmu-lock

 KVM maintains a global mmio invalid generation-number which is stored in
 kvm->arch.mmio_invalid_gen and every mmio spte stores the current global
 generation-number into his available bits when it is created

 When KVM need zap all mmio sptes, it just simply increase the global
 generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
 then it walks the shadow page table and get the mmio spte. If the
 generation-number on the spte does not equal the global generation-number,
 it will go to the normal #PF handler to update the mmio spte

 Since 19 bits are used to store generation-number on mmio spte, the
 generation-number can be round after 33554432 times. It is large enough
 for nearly all most cases, but making the code be more strong, we zap all
 shadow pages when the number is round

 Signed-off-by: Xiao Guangrong 
 ---
  arch/x86/include/asm/kvm_host.h |2 +
  arch/x86/kvm/mmu.c  |   61 
 +--
  arch/x86/kvm/mmutrace.h |   17 +++
  arch/x86/kvm/paging_tmpl.h  |7 +++-
  arch/x86/kvm/vmx.c  |4 ++
  arch/x86/kvm/x86.c  |6 +--
  6 files changed, 82 insertions(+), 15 deletions(-)

 diff --git a/arch/x86/include/asm/kvm_host.h 
 b/arch/x86/include/asm/kvm_host.h
 index ef7f4a5..572398e 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -529,6 +529,7 @@ struct kvm_arch {
 unsigned int n_requested_mmu_pages;
 unsigned int n_max_mmu_pages;
 unsigned int indirect_shadow_pages;
 +   unsigned int mmio_invalid_gen;
>>>
>>> Could this get initialized to something close to the wrap-around
>>> value, so that the wrap-around case gets more real-world coverage?
>>
>> I am afraid we can not. We cache the current mmio_invalid_gen into mmio spte 
>> when
>> it is created no matter what the initiation value is.
>>
>> If you have a better way, please show me. ;)
>>
> The idea is to initialize mmio_invalid_gen to value close to MAX_GEN in
> order to exercise 

Oh, got it, i understood it as "initialize mmio_invalid properly to _avoid_ more
wrap-around"... Sorry for my careless! :(

The idea can check the hardly-run code at runtime, i am fine with this.

Thank you, Eric and Gleb!

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html