On Mon, Jan 09, 2017 at 01:24:36AM -0500, Jintack Lim wrote:
> Each nested VM is supposed to have a mmu (i.e. shadow stage-2 page

to have a 'struct kvm_mmu' ?

> table), and we create it when the guest hypervisor writes to vttbr_el2
> with a new vmid.

I think the commit message should also mention that you maintain a list
of seen nested stage 2 translation contexts and associated shadow page
tables.

> 
> In case the guest hypervisor writes to vttbr_el2 with existing vmid, we
> check if the base address is changed. If so, then what we have in the
> shadow page table is not valid any more. So ummap it.

unmap?  We clear the entire shadow stage 2 page table, right?

> 
> Signed-off-by: Jintack Lim <jint...@cs.columbia.edu>
> ---
>  arch/arm/include/asm/kvm_host.h   |  1 +
>  arch/arm/kvm/arm.c                |  1 +
>  arch/arm64/include/asm/kvm_host.h |  1 +
>  arch/arm64/include/asm/kvm_mmu.h  |  6 ++++
>  arch/arm64/kvm/mmu-nested.c       | 71 
> +++++++++++++++++++++++++++++++++++++++
>  arch/arm64/kvm/sys_regs.c         | 15 ++++++++-
>  6 files changed, 94 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index fbde48d..ebf2810 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -84,6 +84,7 @@ struct kvm_arch {
>  
>       /* Never used on arm but added to be compatible with arm64 */
>       struct list_head nested_mmu_list;
> +     spinlock_t mmu_list_lock;
>  
>       /* Interrupt controller */
>       struct vgic_dist        vgic;
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 147df97..6fa5754 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -147,6 +147,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>       kvm->arch.mmu.vmid.vmid_gen = 0;
>       kvm->arch.mmu.el2_vmid.vmid_gen = 0;
>       INIT_LIST_HEAD(&kvm->arch.nested_mmu_list);
> +     spin_lock_init(&kvm->arch.mmu_list_lock);
>  
>       /* The maximum number of VCPUs is limited by the host's GIC model */
>       kvm->arch.max_vcpus = vgic_present ?
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 23e2267..52eea76 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -99,6 +99,7 @@ struct kvm_arch {
>  
>       /* Stage 2 shadow paging contexts for nested L2 VM */
>       struct list_head nested_mmu_list;
> +     spinlock_t mmu_list_lock;

I'm wondering if we really need the separate spin lock or if we could
just grab the KVM mutex?

>  };
>  
>  #define KVM_NR_MEM_OBJS     40
> diff --git a/arch/arm64/include/asm/kvm_mmu.h 
> b/arch/arm64/include/asm/kvm_mmu.h
> index d1ef650..fdc9327 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -327,6 +327,7 @@ static inline unsigned int kvm_get_vmid_bits(void)
>  #ifdef CONFIG_KVM_ARM_NESTED_HYP
>  struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
>  struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
> +bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr);
>  #else
>  static inline struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu,
>                                                      u64 vttbr)
> @@ -337,6 +338,11 @@ static inline struct kvm_s2_mmu 
> *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu)
>  {
>       return &vcpu->kvm->arch.mmu;
>  }
> +
> +static inline bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr)
> +{
> +     return false;
> +}
>  #endif
>  
>  static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
> diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
> index d52078f..0811d94 100644
> --- a/arch/arm64/kvm/mmu-nested.c
> +++ b/arch/arm64/kvm/mmu-nested.c
> @@ -53,3 +53,74 @@ struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu 
> *vcpu)
>  
>       return &nested_mmu->mmu;
>  }
> +
> +static struct kvm_nested_s2_mmu *create_nested_mmu(struct kvm_vcpu *vcpu,
> +                                                u64 vttbr)
> +{
> +     struct kvm_nested_s2_mmu *nested_mmu, *tmp_mmu;
> +     struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list;
> +     bool need_free = false;
> +     int ret;
> +
> +     nested_mmu = kzalloc(sizeof(struct kvm_nested_s2_mmu), GFP_KERNEL);
> +     if (!nested_mmu)
> +             return NULL;
> +
> +     ret = __kvm_alloc_stage2_pgd(&nested_mmu->mmu);
> +     if (ret) {
> +             kfree(nested_mmu);
> +             return NULL;
> +     }
> +
> +     spin_lock(&vcpu->kvm->arch.mmu_list_lock);
> +     tmp_mmu = get_nested_mmu(vcpu, vttbr);
> +     if (!tmp_mmu)
> +             list_add_rcu(&nested_mmu->list, nested_mmu_list);
> +     else /* Somebody already created and put a new nested_mmu to the list */
> +             need_free = true;
> +     spin_unlock(&vcpu->kvm->arch.mmu_list_lock);
> +
> +     if (need_free) {
> +             __kvm_free_stage2_pgd(&nested_mmu->mmu);
> +             kfree(nested_mmu);
> +             nested_mmu = tmp_mmu;
> +     }
> +
> +     return nested_mmu;
> +}
> +
> +static void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu)
> +{
> +     struct kvm_nested_s2_mmu *nested_mmu;
> +     struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list;
> +
> +     list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
> +             kvm_unmap_stage2_range(&nested_mmu->mmu, 0, KVM_PHYS_SIZE);
> +}
> +
> +bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr)
> +{
> +     struct kvm_nested_s2_mmu *nested_mmu;
> +
> +     /* See if we can relax this */

huh?

> +     if (!vttbr)

why is this a special case?

Theoretically an IPA of zero and VMID zero could be a valid page table
base pointer, right?

I'm gussing because the guest hypervisor occasionally writes zero into
VTTBR_EL2, for example when not using stage 2 translation, so perhaps
what you need to do is to defer creating a new nested mmu structure
until you actually enter the VM with stage 2 paging enabled?

> +             return true;
> +
> +     nested_mmu = (struct kvm_nested_s2_mmu *)get_nested_mmu(vcpu, vttbr);
> +     if (!nested_mmu) {
> +             nested_mmu = create_nested_mmu(vcpu, vttbr);
> +             if (!nested_mmu)
> +                     return false;

I'm wondering if this can be simplified by having get_nested_mmu lookup
and allocate the struct and renaming the get_nested_mmu funtion to
lookup_nested_mmu?  This caller looks racy, even though it isn't, which
would be improved by my suggestion as well.

> +     } else {
> +             /*
> +              * unmap the shadow page table if vttbr_el2 is

While the function is called unmap, what we really do is
clearing/flushing the shadow stage 2 page table.

> +              * changed to different value
> +              */
> +             if (vttbr != nested_mmu->virtual_vttbr)
> +                     kvm_nested_s2_unmap(vcpu);
> +     }
> +
> +     nested_mmu->virtual_vttbr = vttbr;
> +
> +     return true;
> +}
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index e66f40d..ddb641c 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -960,6 +960,19 @@ static bool access_cpacr(struct kvm_vcpu *vcpu,
>       return true;
>  }
>  
> +static bool access_vttbr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
> +                      const struct sys_reg_desc *r)
> +{
> +     u64 vttbr = p->regval;
> +
> +     if (!p->is_write) {
> +             p->regval = vcpu_el2_reg(vcpu, r->reg);
> +             return true;
> +     }
> +
> +     return handle_vttbr_update(vcpu, vttbr);
> +}
> +
>  static bool trap_el2_reg(struct kvm_vcpu *vcpu,
>                        struct sys_reg_params *p,
>                        const struct sys_reg_desc *r)
> @@ -1306,7 +1319,7 @@ static bool trap_el2_reg(struct kvm_vcpu *vcpu,
>         trap_el2_reg, reset_el2_val, TCR_EL2, 0 },
>       /* VTTBR_EL2 */
>       { Op0(0b11), Op1(0b100), CRn(0b0010), CRm(0b0001), Op2(0b000),
> -       trap_el2_reg, reset_el2_val, VTTBR_EL2, 0 },
> +       access_vttbr, reset_el2_val, VTTBR_EL2, 0 },
>       /* VTCR_EL2 */
>       { Op0(0b11), Op1(0b100), CRn(0b0010), CRm(0b0001), Op2(0b010),
>         trap_el2_reg, reset_el2_val, VTCR_EL2, 0 },
> -- 
> 1.9.1
> 
> 

Thanks,
-Christoffer
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to