On Fri, Feb 10, 2012 at 2:22 PM, Marc Zyngier <marc.zyng...@arm.com> wrote:
> Add the necessary infrastructure to handle MMU notifiers on KVM/ARM.
> As we don't have shadow page tables, the implementation is actually very
> simple. The only supported operation is kvm_unmap_hva(), where we remove
> the HVA from the 2nd stage translation. All other hooks are NOPs.
>
> Signed-off-by: Marc Zyngier <marc.zyng...@arm.com>
> ---
> The aging ops are left unused for the moment, until I actually understand what
> they are used for and whether they apply to the ARM architecture.
>
> From v1:
> - Fixed the brown paper bug of invalidating the hva instead of the ipa
>
>  arch/arm/include/asm/kvm_asm.h  |    2 +
>  arch/arm/include/asm/kvm_host.h |   19 ++++++++++++++++
>  arch/arm/kvm/Kconfig            |    1 +
>  arch/arm/kvm/interrupts.S       |   18 ++++++++++++++++
>  arch/arm/kvm/mmu.c              |   44 ++++++++++++++++++++++++++++++++++++--
>  5 files changed, 81 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
> index 5ee7bd3..18be9bb 100644
> --- a/arch/arm/include/asm/kvm_asm.h
> +++ b/arch/arm/include/asm/kvm_asm.h
> @@ -36,6 +36,7 @@ asm(".equ SMCHYP_HVBAR_W, 0xfffffff0");
>  #endif /* __ASSEMBLY__ */
>
>  #ifndef __ASSEMBLY__
> +struct kvm;
>  struct kvm_vcpu;
>
>  extern char __kvm_hyp_init[];
> @@ -46,6 +47,7 @@ extern char __kvm_hyp_vector_end[];
>
>  extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
>
> +extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
>  extern char __kvm_hyp_code_end[];
>  #endif
>
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 555a6f1..1c0c68b 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -109,4 +109,23 @@ struct kvm_vm_stat {
>  struct kvm_vcpu_stat {
>  };
>
> +#define KVM_ARCH_WANT_MMU_NOTIFIER
> +struct kvm;
> +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
> +
> +/* We do not have shadow page tables, hence the empty hooks */
> +static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
> +{
> +       return 0;
> +}
> +
> +static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
> +{
> +       return 0;
> +}
> +
> +static inline void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, 
> pte_t pte)
> +{
> +}
> +
>  #endif /* __ARM_KVM_HOST_H__ */
> diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
> index ccabbb3..7ce9173 100644
> --- a/arch/arm/kvm/Kconfig
> +++ b/arch/arm/kvm/Kconfig
> @@ -36,6 +36,7 @@ config KVM_ARM_HOST
>        depends on KVM
>        depends on MMU
>        depends on CPU_V7 || ARM_VIRT_EXT
> +       select  MMU_NOTIFIER
>        ---help---
>          Provides host support for ARM processors.
>
> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
> index 8b7e5e9..8822fb3 100644
> --- a/arch/arm/kvm/interrupts.S
> +++ b/arch/arm/kvm/interrupts.S
> @@ -351,7 +351,25 @@ return_to_ioctl:
>  THUMB( orr     lr, lr, #1)
>        mov     pc, lr
>

I would prefer moving this to the top of the file before all the
macros so the coherency between the world-switch to the guest and the
return path is clearer (see the v6 staging branch where I already
added a function).

What I want to avoid is this

__switch:
   do some switch stuff
__return:
   do some return stuff

cache_fun_1:
   foo

cache_fun_2:
   foo

cache_fun_3:
   foo

cache_fun_4:
   foo

vector:
   b __return


> +ENTRY(__kvm_tlb_flush_vmid)
> +       hvc     #0                      @ Switch to Hyp mode
> +       push    {r2, r3}
>
> +       ldrd    r2, r3, [r0, #KVM_VTTBR]
> +       mcrr    p15, 6, r2, r3, c2      @ Write VTTBR
> +       isb
> +       mcr     p15, 0, r0, c8, c7, 0   @ TBLIALL
> +       dsb
> +       isb
> +       mov     r2, #0
> +       mov     r3, #0
> +       mcrr    p15, 6, r2, r3, c2      @ Back to VMID #0
> +       isb
> +
> +       pop     {r2, r3}
> +       hvc     #0                      @ Back to SVC
> +       mov     pc, lr
> +ENDPROC(__kvm_tlb_flush_vmid)
>
>        .ltorg
>
> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index baeb8a1..3f8d83b 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -245,12 +245,12 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
>        kvm->arch.pgd = NULL;
>  }
>
> -static int __user_mem_abort(struct kvm *kvm, phys_addr_t addr, pfn_t pfn)
> +static int stage2_set_pte(struct kvm *kvm, phys_addr_t addr, pte_t new_pte)
>  {
>        pgd_t *pgd;
>        pud_t *pud;
>        pmd_t *pmd;
> -       pte_t *pte, new_pte;
> +       pte_t *pte;
>
>        /* Create 2nd stage page table mapping - Level 1 */
>        pgd = kvm->arch.pgd + pgd_index(addr);
> @@ -279,12 +279,18 @@ static int __user_mem_abort(struct kvm *kvm, 
> phys_addr_t addr, pfn_t pfn)
>                pte = pte_offset_kernel(pmd, addr);
>
>        /* Create 2nd stage page table mapping - Level 3 */
> -       new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
>        set_pte_ext(pte, new_pte, 0);
>
>        return 0;
>  }
>
> +static int __user_mem_abort(struct kvm *kvm, phys_addr_t addr, pfn_t pfn)
> +{
> +       pte_t new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
> +
> +       return stage2_set_pte(kvm, addr, new_pte);
> +}
> +
>  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>                          gfn_t gfn, struct kvm_memory_slot *memslot)
>  {
> @@ -510,3 +516,35 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct 
> kvm_run *run)
>
>        return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
>  }
> +
> +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
> +{
> +       static const pte_t null_pte;
> +       struct kvm_memslots *slots;
> +       struct kvm_memory_slot *memslot;
> +       int needs_stage2_flush = 0;
> +
> +       slots = kvm_memslots(kvm);
> +
> +       /* we only care about the pages that the guest sees */
> +       kvm_for_each_memslot(memslot, slots) {

what a pain that you have to do this.

should we not create an arch-independent hva_to_gfn function, that
might return a bad_gfn if that hva is not in a memslot?

that would simplify this code quite a bit...

> +               unsigned long start = memslot->userspace_addr;
> +               unsigned long end;
> +
> +               end = start + (memslot->npages << PAGE_SHIFT);
> +               if (hva >= start && hva < end) {
> +                       gpa_t gpa_offset = hva - start;
> +                       gpa_t gpa = (memslot->base_gfn << PAGE_SHIFT) + 
> gpa_offset;
> +
> +                       if (stage2_set_pte(kvm, gpa, null_pte))
> +                               continue; /* Something bad happened, try to 
> carry on */

the something bad that happened here was that we're out of memory to
create page tables, which really shouldn't happen, because even if
QEMU accessed some of these pages without regard for KVM and these
pages are being evicted, we should certainly not start allocing page
tables to put a blank pte in there...

And, since we're here because of memory pressure in the first place,
continuing to hammer on the system is probably a bad idea...

consider checking for null_pte in stage_set_pte and an error should
not happen here, and if it does, it's more like a BUG();

> +
> +                       needs_stage2_flush = 1;
> +               }
> +       }
> +
> +       if (needs_stage2_flush)
> +               __kvm_tlb_flush_vmid(kvm);
> +
> +       return 0;
> +}
> --
> 1.7.3.4
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to