Now that a hypervisor can run in the virtual EL2, the guest hypervisor
can assign any VMID to its own VMs. To avoid conflicts between VMIDs
among a host and guest(s), the host hypervisor maps each VMID from a
guest hypervisor's view (i.e. virtual VMID) to an unique shadow VMID.
It also manages a set of shadow stage-2 page tables for each shadow
VMID. All this information is stored in kvm_nested_s2_mmu struct.

A host hypervisor manages a list of kvm_nested_s2_mmu objects per VM. On
a VM entry it searches an object in the list using a virtual VMID as a
key.

Signed-off-by: Jintack Lim <jintack....@linaro.org>
---

Notes:
    v1-->v2:
    - This is a merged commit of [RFC 39/55] and [RFC 40/55].
    - Updated the commit message and comments.
    - Defer creating a new nested mmu structure until we enter the VM with 
stage 2
      paging enabled, which was previously done on vttbr_el2 write operations.
    - Use the existing kvm->mmu_lock when iterating nested mmus instead of 
creating one.

 arch/arm/include/asm/kvm_host.h      |  12 ++++
 arch/arm64/include/asm/kvm_emulate.h |  13 ++---
 arch/arm64/include/asm/kvm_host.h    |  25 ++++++++
 arch/arm64/include/asm/kvm_mmu.h     |  21 +++++++
 arch/arm64/kvm/Makefile              |   1 +
 arch/arm64/kvm/context.c             |   2 +-
 arch/arm64/kvm/mmu-nested.c          | 108 +++++++++++++++++++++++++++++++++++
 virt/kvm/arm/arm.c                   |   1 +
 8 files changed, 174 insertions(+), 9 deletions(-)
 create mode 100644 arch/arm64/kvm/mmu-nested.c

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 33ccdbe..d84c1c1 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -67,6 +67,15 @@ struct kvm_s2_mmu {
        pgd_t *pgd;
 };
 
+/* Per shadow VMID mmu structure. This is only for nested virtualization */
+struct kvm_nested_s2_mmu {
+       struct kvm_s2_mmu mmu;
+
+       u64 virtual_vttbr;
+
+       struct list_head list;
+};
+
 struct kvm_arch {
        /* Stage 2 paging state for the VM */
        struct kvm_s2_mmu mmu;
@@ -79,6 +88,9 @@ struct kvm_arch {
         * here.
         */
 
+       /* Never used on arm but added to be compatible with arm64 */
+       struct list_head nested_mmu_list;
+
        /* Interrupt controller */
        struct vgic_dist        vgic;
        int max_vcpus;
diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index 71a3a04..f476576 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -199,6 +199,11 @@ static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
        return false;
 }
 
+static inline bool vcpu_nested_stage2_enabled(const struct kvm_vcpu *vcpu)
+{
+       return (vcpu_sys_reg(vcpu, HCR_EL2) & HCR_VM);
+}
+
 static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.fault.esr_el2;
@@ -385,12 +390,4 @@ static inline unsigned long vcpu_data_host_to_guest(struct 
kvm_vcpu *vcpu,
        return data;            /* Leave LE untouched */
 }
 
-static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
-{
-       if (unlikely(is_hyp_ctxt(vcpu)))
-               return &vcpu->kvm->arch.mmu.el2_vmid;
-
-       return &vcpu->kvm->arch.mmu.vmid;
-}
-
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index a7edf0e..0c37e49 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -65,6 +65,28 @@ struct kvm_s2_mmu {
        pgd_t *pgd;
 };
 
+/* Per shadow VMID mmu structure */
+struct kvm_nested_s2_mmu {
+       struct kvm_s2_mmu mmu;
+
+       /*
+        * virtual_vttbr contains vttbr_el2 value from the guest hypervisor.
+        * We use vmid field as a key to search for this mmu object in the list,
+        * and ignore baddr field.
+        *
+        * Note that we may use both of vmid field and baddr field respectively
+        * to find a shadow VMID and a pointer to the shadow stage-2 page
+        * table, then combine them to set up hw_vttbr. The only benefit of
+        * doing that would be reusing shadow stage-2 page tables for different
+        * VMIDs, which is not usual. So, we choose the current design for the
+        * simplicity.
+        *
+        */
+       u64 virtual_vttbr;
+
+       struct list_head list;
+};
+
 struct kvm_arch {
        /* Stage 2 paging state for the VM */
        struct kvm_s2_mmu mmu;
@@ -77,6 +99,9 @@ struct kvm_arch {
 
        /* Interrupt controller */
        struct vgic_dist        vgic;
+
+       /* Stage 2 shadow paging contexts for nested L2 VM */
+       struct list_head nested_mmu_list;
 };
 
 #define KVM_NR_MEM_OBJS     40
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index bceaec1..452912f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -112,6 +112,7 @@
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
+#include <asm/kvm_emulate.h>
 
 static inline unsigned long __kern_hyp_va(unsigned long v)
 {
@@ -321,6 +322,10 @@ static inline unsigned int kvm_get_vmid_bits(void)
        return (cpuid_feature_extract_unsigned_field(reg, 
ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
+struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
+void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
+
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
                                struct kvm_s2_mmu *mmu)
 {
@@ -332,5 +337,21 @@ static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
        return baddr | vmid_field;
 }
 
+static inline u64 get_vmid(u64 vttbr)
+{
+       return (vttbr & VTTBR_VMID_MASK(get_kvm_vmid_bits())) >>
+              VTTBR_VMID_SHIFT;
+}
+
+static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s2_mmu *mmu = vcpu_get_active_s2_mmu(vcpu);
+
+       if (unlikely(is_hyp_ctxt(vcpu)))
+               return &mmu->el2_vmid;
+       else
+               return &mmu->vmid;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 0263ef0..5300db0 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -37,4 +37,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
 kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
 
 kvm-$(CONFIG_KVM_ARM_HOST) += nested.o
+kvm-$(CONFIG_KVM_ARM_HOST) += mmu-nested.o
 kvm-$(CONFIG_KVM_ARM_HOST) += emulate-nested.o
diff --git a/arch/arm64/kvm/context.c b/arch/arm64/kvm/context.c
index afd1702..762d4a5 100644
--- a/arch/arm64/kvm/context.c
+++ b/arch/arm64/kvm/context.c
@@ -177,7 +177,7 @@ static void flush_shadow_el1_sysregs(struct kvm_vcpu *vcpu)
 
 static void setup_s2_mmu(struct kvm_vcpu *vcpu)
 {
-       struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+       struct kvm_s2_mmu *mmu = vcpu_get_active_s2_mmu(vcpu);
        struct kvm_s2_vmid *vmid = vcpu_get_active_vmid(vcpu);
 
        vcpu->arch.hw_vttbr = kvm_get_vttbr(vmid, mmu);
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
new file mode 100644
index 0000000..c436daf
--- /dev/null
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 - Columbia University and Linaro Ltd.
+ * Author: Jintack Lim <jintack....@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_arm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_mmu.h>
+
+static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
+                                                  u64 vttbr)
+{
+       struct kvm_nested_s2_mmu *mmu;
+       u64 virtual_vmid;
+       u64 target_vmid = get_vmid(vttbr);
+       struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list;
+
+       /* Search a mmu in the list using the virtual VMID as a key */
+       list_for_each_entry_rcu(mmu, nested_mmu_list, list) {
+               virtual_vmid = get_vmid(mmu->virtual_vttbr);
+               if (target_vmid == virtual_vmid)
+                       return mmu;
+       }
+       return NULL;
+}
+
+/**
+ * create_nested_mmu - create mmu for the given virtual VMID
+ *
+ * Called from setup_s2_mmu before entering the nested VM to ensure the shadow
+ * stage 2 page table is allocated and it is valid to use.
+ */
+static struct kvm_nested_s2_mmu *create_nested_mmu(struct kvm_vcpu *vcpu,
+                                                  u64 vttbr)
+{
+       struct kvm_nested_s2_mmu *nested_mmu, *tmp_mmu;
+       struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list;
+       bool need_free = false;
+       int ret;
+
+       nested_mmu = kzalloc(sizeof(struct kvm_nested_s2_mmu), GFP_KERNEL);
+       if (!nested_mmu)
+               return NULL;
+
+       ret = __kvm_alloc_stage2_pgd(&nested_mmu->mmu);
+       if (ret) {
+               kfree(nested_mmu);
+               return NULL;
+       }
+
+       spin_lock(&vcpu->kvm->mmu_lock);
+       tmp_mmu = lookup_nested_mmu(vcpu, vttbr);
+       if (!tmp_mmu) {
+               list_add_rcu(&nested_mmu->list, nested_mmu_list);
+       } else {
+               /*
+                * Somebody already put a new nested_mmu for this virtual VMID
+                * to the list behind our back.
+                */
+               need_free = true;
+       }
+       spin_unlock(&vcpu->kvm->mmu_lock);
+
+       if (need_free) {
+               __kvm_free_stage2_pgd(vcpu->kvm, &nested_mmu->mmu);
+               kfree(nested_mmu);
+               nested_mmu = tmp_mmu;
+       }
+
+       /* The virtual VMID will be used as a key when searching a mmu */
+       nested_mmu->virtual_vttbr = vttbr;
+
+       return nested_mmu;
+}
+
+static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
+{
+       u64 vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2);
+       struct kvm_nested_s2_mmu *nested_mmu;
+
+       nested_mmu = lookup_nested_mmu(vcpu, vttbr);
+       if (!nested_mmu)
+               nested_mmu = create_nested_mmu(vcpu, vttbr);
+
+       return &nested_mmu->mmu;
+}
+
+struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu)
+{
+       if (is_hyp_ctxt(vcpu) || !vcpu_nested_stage2_enabled(vcpu))
+               return &vcpu->kvm->arch.mmu;
+
+       return get_s2_mmu_nested(vcpu);
+}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 63dd897..4548d77 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -145,6 +145,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        /* Mark the initial VMID generation invalid */
        kvm->arch.mmu.vmid.vmid_gen = 0;
        kvm->arch.mmu.el2_vmid.vmid_gen = 0;
+       INIT_LIST_HEAD(&kvm->arch.nested_mmu_list);
 
        /* The maximum number of VCPUs is limited by the host's GIC model */
        kvm->arch.max_vcpus = vgic_present ?
-- 
1.9.1

Reply via email to