Re: [RFC PATCH v3 1/2] KVM: arm64: Move CMOs from user_mem_abort to the fault handlers

2021-04-08 Thread Alexandru Elisei
Hi Yanan,

On 4/8/21 10:23 AM, wangyanan (Y) wrote:
> Hi Alex,
>
> On 2021/4/7 23:31, Alexandru Elisei wrote:
>> Hi Yanan,
>>
>> On 3/26/21 3:16 AM, Yanan Wang wrote:
>>> We currently uniformly permorm CMOs of D-cache and I-cache in function
>>> user_mem_abort before calling the fault handlers. If we get concurrent
>>> guest faults(e.g. translation faults, permission faults) or some really
>>> unnecessary guest faults caused by BBM, CMOs for the first vcpu are
>> I can't figure out what BBM means.
> Just as Will has explained, it's Break-Before-Make rule. When we need to
> replace an old table entry with a new one, we should firstly invalidate
> the old table entry(Break), before installation of the new entry(Make).

Got it, thank you and Will for the explanation.

>
>
> And I think this patch mainly introduces benefits in two specific scenarios:
> 1) In a VM startup, it will improve efficiency of handling page faults 
> incurred
> by vCPUs, when initially populating stage2 page tables.
> 2) After live migration, the heavy workload will be resumed on the destination
> VMs, however all the stage2 page tables need to be rebuilt.
>>> necessary while the others later are not.
>>>
>>> By moving CMOs to the fault handlers, we can easily identify conditions
>>> where they are really needed and avoid the unnecessary ones. As it's a
>>> time consuming process to perform CMOs especially when flushing a block
>>> range, so this solution reduces much load of kvm and improve efficiency
>>> of the page table code.
>>>
>>> So let's move both clean of D-cache and invalidation of I-cache to the
>>> map path and move only invalidation of I-cache to the permission path.
>>> Since the original APIs for CMOs in mmu.c are only called in function
>>> user_mem_abort, we now also move them to pgtable.c.
>>>
>>> Signed-off-by: Yanan Wang 
>>> ---
>>>   arch/arm64/include/asm/kvm_mmu.h | 31 ---
>>>   arch/arm64/kvm/hyp/pgtable.c | 68 +---
>>>   arch/arm64/kvm/mmu.c | 23 ++-
>>>   3 files changed, 57 insertions(+), 65 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_mmu.h 
>>> b/arch/arm64/include/asm/kvm_mmu.h
>>> index 90873851f677..c31f88306d4e 100644
>>> --- a/arch/arm64/include/asm/kvm_mmu.h
>>> +++ b/arch/arm64/include/asm/kvm_mmu.h
>>> @@ -177,37 +177,6 @@ static inline bool vcpu_has_cache_enabled(struct 
>>> kvm_vcpu
>>> *vcpu)
>>>   return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
>>>   }
>>>   -static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long
>>> size)
>>> -{
>>> -    void *va = page_address(pfn_to_page(pfn));
>>> -
>>> -    /*
>>> - * With FWB, we ensure that the guest always accesses memory using
>>> - * cacheable attributes, and we don't have to clean to PoC when
>>> - * faulting in pages. Furthermore, FWB implies IDC, so cleaning to
>>> - * PoU is not required either in this case.
>>> - */
>>> -    if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
>>> -    return;
>>> -
>>> -    kvm_flush_dcache_to_poc(va, size);
>>> -}
>>> -
>>> -static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
>>> -  unsigned long size)
>>> -{
>>> -    if (icache_is_aliasing()) {
>>> -    /* any kind of VIPT cache */
>>> -    __flush_icache_all();
>>> -    } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
>>> -    /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) 
>>> */
>>> -    void *va = page_address(pfn_to_page(pfn));
>>> -
>>> -    invalidate_icache_range((unsigned long)va,
>>> -    (unsigned long)va + size);
>>> -    }
>>> -}
>>> -
>>>   void kvm_set_way_flush(struct kvm_vcpu *vcpu);
>>>   void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
>>>   diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
>>> index 4d177ce1d536..829a34eea526 100644
>>> --- a/arch/arm64/kvm/hyp/pgtable.c
>>> +++ b/arch/arm64/kvm/hyp/pgtable.c
>>> @@ -464,6 +464,43 @@ static int stage2_map_set_prot_attr(enum 
>>> kvm_pgtable_prot
>>> prot,
>>>   return 0;
>>>   }
>>>   +static bool stage2_pte_cacheable(kvm_pte_t pte)
>>> +{
>>> +    u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
>>> +    return memattr == PAGE_S2_MEMATTR(NORMAL);
>>> +}
>>> +
>>> +static bool stage2_pte_executable(kvm_pte_t pte)
>>> +{
>>> +    return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
>>> +}
>>> +
>>> +static void stage2_flush_dcache(void *addr, u64 size)
>>> +{
>>> +    /*
>>> + * With FWB, we ensure that the guest always accesses memory using
>>> + * cacheable attributes, and we don't have to clean to PoC when
>>> + * faulting in pages. Furthermore, FWB implies IDC, so cleaning to
>>> + * PoU is not required either in this case.
>>> + */
>>> +    if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
>>> +    return;
>>> +
>>> +    __flush_dcache_area(addr, size);
>>> +}
>>> +
>>> +static void 

Re: [RFC PATCH v3 1/2] KVM: arm64: Move CMOs from user_mem_abort to the fault handlers

2021-04-08 Thread wangyanan (Y)

Hi Alex,

On 2021/4/7 23:31, Alexandru Elisei wrote:

Hi Yanan,

On 3/26/21 3:16 AM, Yanan Wang wrote:

We currently uniformly permorm CMOs of D-cache and I-cache in function
user_mem_abort before calling the fault handlers. If we get concurrent
guest faults(e.g. translation faults, permission faults) or some really
unnecessary guest faults caused by BBM, CMOs for the first vcpu are

I can't figure out what BBM means.

Just as Will has explained, it's Break-Before-Make rule. When we need to
replace an old table entry with a new one, we should firstly invalidate
the old table entry(Break), before installation of the new entry(Make).

And I think this patch mainly introduces benefits in two specific scenarios:
1) In a VM startup, it will improve efficiency of handling page faults 
incurred

by vCPUs, when initially populating stage2 page tables.
2) After live migration, the heavy workload will be resumed on the 
destination

VMs, however all the stage2 page tables need to be rebuilt.

necessary while the others later are not.

By moving CMOs to the fault handlers, we can easily identify conditions
where they are really needed and avoid the unnecessary ones. As it's a
time consuming process to perform CMOs especially when flushing a block
range, so this solution reduces much load of kvm and improve efficiency
of the page table code.

So let's move both clean of D-cache and invalidation of I-cache to the
map path and move only invalidation of I-cache to the permission path.
Since the original APIs for CMOs in mmu.c are only called in function
user_mem_abort, we now also move them to pgtable.c.

Signed-off-by: Yanan Wang 
---
  arch/arm64/include/asm/kvm_mmu.h | 31 ---
  arch/arm64/kvm/hyp/pgtable.c | 68 +---
  arch/arm64/kvm/mmu.c | 23 ++-
  3 files changed, 57 insertions(+), 65 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 90873851f677..c31f88306d4e 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -177,37 +177,6 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu 
*vcpu)
return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
  }
  
-static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)

-{
-   void *va = page_address(pfn_to_page(pfn));
-
-   /*
-* With FWB, we ensure that the guest always accesses memory using
-* cacheable attributes, and we don't have to clean to PoC when
-* faulting in pages. Furthermore, FWB implies IDC, so cleaning to
-* PoU is not required either in this case.
-*/
-   if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
-   return;
-
-   kvm_flush_dcache_to_poc(va, size);
-}
-
-static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
- unsigned long size)
-{
-   if (icache_is_aliasing()) {
-   /* any kind of VIPT cache */
-   __flush_icache_all();
-   } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
-   /* PIPT or VPIPT at EL2 (see comment in 
__kvm_tlb_flush_vmid_ipa) */
-   void *va = page_address(pfn_to_page(pfn));
-
-   invalidate_icache_range((unsigned long)va,
-   (unsigned long)va + size);
-   }
-}
-
  void kvm_set_way_flush(struct kvm_vcpu *vcpu);
  void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
  
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c

index 4d177ce1d536..829a34eea526 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -464,6 +464,43 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot 
prot,
return 0;
  }
  
+static bool stage2_pte_cacheable(kvm_pte_t pte)

+{
+   u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
+   return memattr == PAGE_S2_MEMATTR(NORMAL);
+}
+
+static bool stage2_pte_executable(kvm_pte_t pte)
+{
+   return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+}
+
+static void stage2_flush_dcache(void *addr, u64 size)
+{
+   /*
+* With FWB, we ensure that the guest always accesses memory using
+* cacheable attributes, and we don't have to clean to PoC when
+* faulting in pages. Furthermore, FWB implies IDC, so cleaning to
+* PoU is not required either in this case.
+*/
+   if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+   return;
+
+   __flush_dcache_area(addr, size);
+}
+
+static void stage2_invalidate_icache(void *addr, u64 size)
+{
+   if (icache_is_aliasing()) {
+   /* Flush any kind of VIPT icache */
+   __flush_icache_all();
+   } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
+   /* PIPT or VPIPT at EL2 */
+   invalidate_icache_range((unsigned long)addr,
+

Re: [RFC PATCH v3 1/2] KVM: arm64: Move CMOs from user_mem_abort to the fault handlers

2021-04-07 Thread Will Deacon
On Wed, Apr 07, 2021 at 04:31:31PM +0100, Alexandru Elisei wrote:
> On 3/26/21 3:16 AM, Yanan Wang wrote:
> > We currently uniformly permorm CMOs of D-cache and I-cache in function
> > user_mem_abort before calling the fault handlers. If we get concurrent
> > guest faults(e.g. translation faults, permission faults) or some really
> > unnecessary guest faults caused by BBM, CMOs for the first vcpu are
> 
> I can't figure out what BBM means.

Oh, I know that one! BBM means "Break Before Make". Not to be confused with
DBM (Dirty Bit Management) or BFM (Bit Field Move).

Will


Re: [RFC PATCH v3 1/2] KVM: arm64: Move CMOs from user_mem_abort to the fault handlers

2021-04-07 Thread Alexandru Elisei
Hi Yanan,

On 3/26/21 3:16 AM, Yanan Wang wrote:
> We currently uniformly permorm CMOs of D-cache and I-cache in function
> user_mem_abort before calling the fault handlers. If we get concurrent
> guest faults(e.g. translation faults, permission faults) or some really
> unnecessary guest faults caused by BBM, CMOs for the first vcpu are

I can't figure out what BBM means.

> necessary while the others later are not.
>
> By moving CMOs to the fault handlers, we can easily identify conditions
> where they are really needed and avoid the unnecessary ones. As it's a
> time consuming process to perform CMOs especially when flushing a block
> range, so this solution reduces much load of kvm and improve efficiency
> of the page table code.
>
> So let's move both clean of D-cache and invalidation of I-cache to the
> map path and move only invalidation of I-cache to the permission path.
> Since the original APIs for CMOs in mmu.c are only called in function
> user_mem_abort, we now also move them to pgtable.c.
>
> Signed-off-by: Yanan Wang 
> ---
>  arch/arm64/include/asm/kvm_mmu.h | 31 ---
>  arch/arm64/kvm/hyp/pgtable.c | 68 +---
>  arch/arm64/kvm/mmu.c | 23 ++-
>  3 files changed, 57 insertions(+), 65 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_mmu.h 
> b/arch/arm64/include/asm/kvm_mmu.h
> index 90873851f677..c31f88306d4e 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -177,37 +177,6 @@ static inline bool vcpu_has_cache_enabled(struct 
> kvm_vcpu *vcpu)
>   return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
>  }
>  
> -static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long 
> size)
> -{
> - void *va = page_address(pfn_to_page(pfn));
> -
> - /*
> -  * With FWB, we ensure that the guest always accesses memory using
> -  * cacheable attributes, and we don't have to clean to PoC when
> -  * faulting in pages. Furthermore, FWB implies IDC, so cleaning to
> -  * PoU is not required either in this case.
> -  */
> - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
> - return;
> -
> - kvm_flush_dcache_to_poc(va, size);
> -}
> -
> -static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
> -   unsigned long size)
> -{
> - if (icache_is_aliasing()) {
> - /* any kind of VIPT cache */
> - __flush_icache_all();
> - } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
> - /* PIPT or VPIPT at EL2 (see comment in 
> __kvm_tlb_flush_vmid_ipa) */
> - void *va = page_address(pfn_to_page(pfn));
> -
> - invalidate_icache_range((unsigned long)va,
> - (unsigned long)va + size);
> - }
> -}
> -
>  void kvm_set_way_flush(struct kvm_vcpu *vcpu);
>  void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
>  
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 4d177ce1d536..829a34eea526 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -464,6 +464,43 @@ static int stage2_map_set_prot_attr(enum 
> kvm_pgtable_prot prot,
>   return 0;
>  }
>  
> +static bool stage2_pte_cacheable(kvm_pte_t pte)
> +{
> + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
> + return memattr == PAGE_S2_MEMATTR(NORMAL);
> +}
> +
> +static bool stage2_pte_executable(kvm_pte_t pte)
> +{
> + return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
> +}
> +
> +static void stage2_flush_dcache(void *addr, u64 size)
> +{
> + /*
> +  * With FWB, we ensure that the guest always accesses memory using
> +  * cacheable attributes, and we don't have to clean to PoC when
> +  * faulting in pages. Furthermore, FWB implies IDC, so cleaning to
> +  * PoU is not required either in this case.
> +  */
> + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
> + return;
> +
> + __flush_dcache_area(addr, size);
> +}
> +
> +static void stage2_invalidate_icache(void *addr, u64 size)
> +{
> + if (icache_is_aliasing()) {
> + /* Flush any kind of VIPT icache */
> + __flush_icache_all();
> + } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
> + /* PIPT or VPIPT at EL2 */
> + invalidate_icache_range((unsigned long)addr,
> + (unsigned long)addr + size);
> + }
> +}
> +
>  static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
> kvm_pte_t *ptep,
> struct stage2_map_data *data)
> @@ -495,6 +532,13 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, 
> u32 level,
>   put_page(page);
>   }
>  
> + /* Perform CMOs before installation of the new PTE */
> + if (!kvm_pte_valid(old) || 

[RFC PATCH v3 1/2] KVM: arm64: Move CMOs from user_mem_abort to the fault handlers

2021-03-25 Thread Yanan Wang
We currently uniformly permorm CMOs of D-cache and I-cache in function
user_mem_abort before calling the fault handlers. If we get concurrent
guest faults(e.g. translation faults, permission faults) or some really
unnecessary guest faults caused by BBM, CMOs for the first vcpu are
necessary while the others later are not.

By moving CMOs to the fault handlers, we can easily identify conditions
where they are really needed and avoid the unnecessary ones. As it's a
time consuming process to perform CMOs especially when flushing a block
range, so this solution reduces much load of kvm and improve efficiency
of the page table code.

So let's move both clean of D-cache and invalidation of I-cache to the
map path and move only invalidation of I-cache to the permission path.
Since the original APIs for CMOs in mmu.c are only called in function
user_mem_abort, we now also move them to pgtable.c.

Signed-off-by: Yanan Wang 
---
 arch/arm64/include/asm/kvm_mmu.h | 31 ---
 arch/arm64/kvm/hyp/pgtable.c | 68 +---
 arch/arm64/kvm/mmu.c | 23 ++-
 3 files changed, 57 insertions(+), 65 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 90873851f677..c31f88306d4e 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -177,37 +177,6 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu 
*vcpu)
return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
-static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
-   void *va = page_address(pfn_to_page(pfn));
-
-   /*
-* With FWB, we ensure that the guest always accesses memory using
-* cacheable attributes, and we don't have to clean to PoC when
-* faulting in pages. Furthermore, FWB implies IDC, so cleaning to
-* PoU is not required either in this case.
-*/
-   if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
-   return;
-
-   kvm_flush_dcache_to_poc(va, size);
-}
-
-static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
- unsigned long size)
-{
-   if (icache_is_aliasing()) {
-   /* any kind of VIPT cache */
-   __flush_icache_all();
-   } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
-   /* PIPT or VPIPT at EL2 (see comment in 
__kvm_tlb_flush_vmid_ipa) */
-   void *va = page_address(pfn_to_page(pfn));
-
-   invalidate_icache_range((unsigned long)va,
-   (unsigned long)va + size);
-   }
-}
-
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 4d177ce1d536..829a34eea526 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -464,6 +464,43 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot 
prot,
return 0;
 }
 
+static bool stage2_pte_cacheable(kvm_pte_t pte)
+{
+   u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
+   return memattr == PAGE_S2_MEMATTR(NORMAL);
+}
+
+static bool stage2_pte_executable(kvm_pte_t pte)
+{
+   return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+}
+
+static void stage2_flush_dcache(void *addr, u64 size)
+{
+   /*
+* With FWB, we ensure that the guest always accesses memory using
+* cacheable attributes, and we don't have to clean to PoC when
+* faulting in pages. Furthermore, FWB implies IDC, so cleaning to
+* PoU is not required either in this case.
+*/
+   if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+   return;
+
+   __flush_dcache_area(addr, size);
+}
+
+static void stage2_invalidate_icache(void *addr, u64 size)
+{
+   if (icache_is_aliasing()) {
+   /* Flush any kind of VIPT icache */
+   __flush_icache_all();
+   } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
+   /* PIPT or VPIPT at EL2 */
+   invalidate_icache_range((unsigned long)addr,
+   (unsigned long)addr + size);
+   }
+}
+
 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
  kvm_pte_t *ptep,
  struct stage2_map_data *data)
@@ -495,6 +532,13 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, 
u32 level,
put_page(page);
}
 
+   /* Perform CMOs before installation of the new PTE */
+   if (!kvm_pte_valid(old) || stage2_pte_cacheable(old))
+   stage2_flush_dcache(__va(phys), granule);
+
+   if (stage2_pte_executable(new))
+   stage2_invalidate_icache(__va(phys), granule);
+
smp_store_release(ptep, new);
get_page(page);