From: Sagi Shahar <sa...@google.com>

Adds the core logic for transferring state between source and
destination TDs during intra-host migration.

Signed-off-by: Sagi Shahar <sa...@google.com>
Co-developed-by: Ryan Afranji <afra...@google.com>
Signed-off-by: Ryan Afranji <afra...@google.com>
---
 arch/x86/kvm/vmx/tdx.c | 193 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 192 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 4582f94175b7..268aca28d878 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -3534,9 +3534,200 @@ static __always_inline bool tdx_finalized(struct kvm 
*kvm)
        return tdx_kvm->state == TD_STATE_RUNNABLE;
 }
 
+#define MAX_APIC_VECTOR 256
+
+static int tdx_migrate_vcpus(struct kvm *dst, struct kvm *src)
+{
+       struct kvm_vcpu *src_vcpu;
+       struct kvm_tdx *dst_tdx;
+       unsigned long i;
+
+       dst_tdx = to_kvm_tdx(dst);
+
+       kvm_for_each_vcpu(i, src_vcpu, src)
+               tdx_flush_vp_on_cpu(src_vcpu);
+
+       /* Copy per-vCPU state. */
+       kvm_for_each_vcpu(i, src_vcpu, src) {
+               struct vcpu_tdx *dst_tdx_vcpu, *src_tdx_vcpu;
+               struct kvm_lapic_state src_lapic_state;
+               struct kvm_vcpu *dst_vcpu;
+               u64 apic_base;
+               u32 vector;
+               int ret;
+
+               src_tdx_vcpu = to_tdx(src_vcpu);
+               dst_vcpu = kvm_get_vcpu(dst, i);
+               dst_tdx_vcpu = to_tdx(dst_vcpu);
+
+               dst_vcpu->cpu = -1;
+
+               /* Destination vCPU initialization skipped so do it here. */
+               apic_base = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC |
+                       (kvm_vcpu_is_reset_bsp(dst_vcpu) ?
+                        MSR_IA32_APICBASE_BSP : 0);
+               if (kvm_apic_set_base(dst_vcpu, apic_base, true))
+                       return -EINVAL;
+
+               /* Copy lapic state. */
+               ret = kvm_apic_get_state(src_vcpu, &src_lapic_state);
+               if (ret)
+                       return -EINVAL;
+
+               ret = kvm_apic_set_state(dst_vcpu, &src_lapic_state);
+               if (ret)
+                       return -EINVAL;
+
+               /*
+                * pi_desc stores state of posted interrupts for VMs which are
+                * processed by pcpu during VM entry/runtime. For
+                * non-confidential VMs, this storage is synchronized to vcpu
+                * state using set_lapic_state(sync_pir_to_virr).
+                *
+                * For TDX VMs, KVM doesn't have access to virtual lapic page,
+                * so in order to preserve the interrupt state, copy over
+                * pi_desc contents to destination VM during copyless migration.
+                */
+               dst_tdx_vcpu->vt = src_tdx_vcpu->vt;
+               for (vector = 0; vector < MAX_APIC_VECTOR; vector++) {
+                       if (pi_test_pir(vector, &src_tdx_vcpu->vt.pi_desc)) {
+                               __vmx_deliver_posted_interrupt(
+                                               dst_vcpu,
+                                               &dst_tdx_vcpu->vt.pi_desc,
+                                               vector);
+                       }
+               }
+
+               /* Copy non-TDX vCPU state. */
+               memcpy(dst_vcpu->arch.regs, src_vcpu->arch.regs,
+                      NR_VCPU_REGS * sizeof(src_vcpu->arch.regs[0]));
+
+               dst_vcpu->arch.regs_avail = src_vcpu->arch.regs_avail;
+               dst_vcpu->arch.regs_dirty = src_vcpu->arch.regs_dirty;
+               dst_vcpu->arch.tsc_offset = dst_tdx->tsc_offset;
+               dst_vcpu->arch.guest_state_protected =
+                       src_vcpu->arch.guest_state_protected;
+               dst_vcpu->arch.xfd_no_write_intercept =
+                       src_vcpu->arch.xfd_no_write_intercept;
+               dst_vcpu->arch.tsc_offset = dst_tdx->tsc_offset;
+
+               /* Copy TD structures. */
+               dst_tdx_vcpu->vp.tdvpr_page = src_tdx_vcpu->vp.tdvpr_page;
+               dst_tdx_vcpu->vp.tdcx_pages = src_tdx_vcpu->vp.tdcx_pages;
+
+               td_vmcs_write64(dst_tdx_vcpu, POSTED_INTR_DESC_ADDR,
+                               __pa(&dst_tdx_vcpu->vt.pi_desc));
+
+               /* Copy current vCPU status. */
+               dst_tdx_vcpu->ext_exit_qualification =
+                       src_tdx_vcpu->ext_exit_qualification;
+               dst_tdx_vcpu->exit_gpa = src_tdx_vcpu->exit_gpa;
+               dst_tdx_vcpu->vp_enter_args = src_tdx_vcpu->vp_enter_args;
+               dst_tdx_vcpu->vp_enter_ret = src_tdx_vcpu->vp_enter_ret;
+               dst_tdx_vcpu->guest_entered = src_tdx_vcpu->guest_entered;
+               dst_tdx_vcpu->map_gpa_next = src_tdx_vcpu->map_gpa_next;
+               dst_tdx_vcpu->map_gpa_end = src_tdx_vcpu->map_gpa_end;
+
+               /* Copy mirror EPT tables. */
+               vcpu_load(dst_vcpu);
+               if (kvm_mmu_move_mirror_pages_from(dst_vcpu, src_vcpu)) {
+                       vcpu_put(dst_vcpu);
+                       return -EINVAL;
+               }
+               vcpu_put(dst_vcpu);
+
+               dst_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+               dst_tdx_vcpu->state = VCPU_TD_STATE_INITIALIZED;
+
+               /*
+                * Set these source's vCPU migrated structures to NULL to avoid
+                * freeing them during source VM shutdown.
+                */
+               src_tdx_vcpu->vp.tdvpr_page = NULL;
+               src_tdx_vcpu->vp.tdcx_pages = NULL;
+       }
+
+       return 0;
+}
+
 static int tdx_migrate_from(struct kvm *dst, struct kvm *src)
 {
-       return -EINVAL;
+       struct kvm_tdx *src_tdx, *dst_tdx;
+       bool charged = false;
+       int ret;
+
+       src_tdx = to_kvm_tdx(src);
+       dst_tdx = to_kvm_tdx(dst);
+
+       ret = -EINVAL;
+
+       if (src_tdx->state != TD_STATE_RUNNABLE) {
+               pr_warn("Cannot migrate from a non finalized VM\n");
+               goto abort;
+       }
+
+       /* Transfer miscellaneous cgroup. */
+       dst_tdx->misc_cg = get_current_misc_cg();
+       if (dst_tdx->misc_cg != src_tdx->misc_cg) {
+               ret = misc_cg_try_charge(MISC_CG_RES_TDX, dst_tdx->misc_cg, 1);
+               if (ret)
+                       goto abort_dst_cgroup;
+               charged = true;
+       }
+
+       dst_tdx->hkid = src_tdx->hkid;
+
+       /* Copy VM data. */
+       dst_tdx->attributes = src_tdx->attributes;
+       dst_tdx->xfam = src_tdx->xfam;
+       dst_tdx->tsc_offset = src_tdx->tsc_offset;
+       dst_tdx->tsc_multiplier = src_tdx->tsc_multiplier;
+       dst_tdx->nr_premapped = src_tdx->nr_premapped;
+       dst_tdx->wait_for_sept_zap = src_tdx->wait_for_sept_zap;
+       dst_tdx->kvm.arch.gfn_direct_bits = src_tdx->kvm.arch.gfn_direct_bits;
+
+       /* Copy TD structures. */
+       dst_tdx->td.tdcs_nr_pages = src_tdx->td.tdcs_nr_pages;
+       dst_tdx->td.tdcx_nr_pages = src_tdx->td.tdcx_nr_pages;
+       dst_tdx->td.tdr_page = src_tdx->td.tdr_page;
+       dst_tdx->td.tdcs_pages = src_tdx->td.tdcs_pages;
+
+       /* Copy per-vCPU state. */
+       ret = tdx_migrate_vcpus(dst, src);
+       if (ret)
+               goto late_abort;
+
+       dst->mem_attr_array.xa_head = src->mem_attr_array.xa_head;
+       src->mem_attr_array.xa_head = NULL;
+
+       dst_tdx->state = TD_STATE_RUNNABLE;
+
+       /*
+        * Set these source's vCPU migrated structures to NULL to avoid
+        * freeing them during source VM shutdown.
+        */
+       src_tdx->hkid = -1;
+       src_tdx->td.tdr_page = NULL;
+       src_tdx->td.tdcs_pages = NULL;
+
+       return 0;
+
+late_abort:
+       /*
+        * If we aborted after the state transfer already started, the src VM
+        * is no longer valid.
+        */
+       kvm_vm_dead(src);
+
+abort_dst_cgroup:
+       if (charged)
+               misc_cg_uncharge(MISC_CG_RES_TDX, dst_tdx->misc_cg, 1);
+       put_misc_cg(dst_tdx->misc_cg);
+       dst_tdx->misc_cg = NULL;
+abort:
+       dst_tdx->hkid = -1;
+       dst_tdx->td.tdr_page = 0;
+       return ret;
 }
 
 int tdx_vm_move_enc_context_from(struct kvm *kvm, struct kvm *src_kvm)
-- 
2.50.0.rc1.591.g9c95f17f64-goog


Reply via email to