This change adds common kvm specific support to handle KVM VM file descriptor change. KVM VM file descriptor can change as a part of confidential guest reset mechanism. A new function api kvm_arch_vmfd_change_ops() per architecture platform is added in order to implement architecture specific changes required to support it. A subsequent patch will add x86 specific implementation for kvm_arch_vmfd_change_ops as currently only x86 supports confidential guest reset.
Signed-off-by: Ani Sinha <[email protected]> --- accel/kvm/kvm-all.c | 80 ++++++++++++++++++++++++++++++++++++-- accel/kvm/trace-events | 1 + include/system/kvm.h | 2 + target/arm/kvm.c | 10 +++++ target/i386/kvm/kvm.c | 10 +++++ target/loongarch/kvm/kvm.c | 10 +++++ target/mips/kvm.c | 10 +++++ target/ppc/kvm.c | 10 +++++ target/riscv/kvm/kvm-cpu.c | 10 +++++ target/s390x/kvm/kvm.c | 10 +++++ 10 files changed, 150 insertions(+), 3 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f85eb42d78..762f302551 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2415,11 +2415,9 @@ void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi) g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi)); } -static void kvm_irqchip_create(KVMState *s) +static void do_kvm_irqchip_create(KVMState *s) { int ret; - - assert(s->kernel_irqchip_split != ON_OFF_AUTO_AUTO); if (kvm_check_extension(s, KVM_CAP_IRQCHIP)) { ; } else if (kvm_check_extension(s, KVM_CAP_S390_IRQCHIP)) { @@ -2452,7 +2450,13 @@ static void kvm_irqchip_create(KVMState *s) fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret)); exit(1); } +} +static void kvm_irqchip_create(KVMState *s) +{ + assert(s->kernel_irqchip_split != ON_OFF_AUTO_AUTO); + + do_kvm_irqchip_create(s); kvm_kernel_irqchip = true; /* If we have an in-kernel IRQ chip then we must have asynchronous * interrupt delivery (though the reverse is not necessarily true) @@ -2607,6 +2611,75 @@ static int kvm_setup_dirty_ring(KVMState *s) return 0; } +static int kvm_reset_vmfd(MachineState *ms) +{ + KVMState *s; + KVMMemoryListener *kml; + int ret = 0, type; + Error *err = NULL; + + /* + * bail if the current architecture does not support VM file + * descriptor change. + */ + if (!kvm_arch_supports_vmfd_change()) { + error_report("This target architecture does not support KVM VM " + "file descriptor change."); + return -EOPNOTSUPP; + } + + s = KVM_STATE(ms->accelerator); + kml = &s->memory_listener; + + memory_listener_unregister(&kml->listener); + memory_listener_unregister(&kvm_io_listener); + + if (s->vmfd >= 0) { + close(s->vmfd); + } + + type = find_kvm_machine_type(ms); + if (type < 0) { + return -EINVAL; + } + + ret = do_kvm_create_vm(s, type); + if (ret < 0) { + return ret; + } + + s->vmfd = ret; + + kvm_setup_dirty_ring(s); + + /* rebind memory to new vm fd */ + ret = ram_block_rebind(&err); + if (ret < 0) { + return ret; + } + assert(!err); + + ret = kvm_arch_vmfd_change_ops(ms, s); + if (ret < 0) { + return ret; + } + + if (s->kernel_irqchip_allowed) { + do_kvm_irqchip_create(s); + } + + /* these can be only called after ram_block_rebind() */ + memory_listener_register(&kml->listener, &address_space_memory); + memory_listener_register(&kvm_io_listener, &address_space_io); + + /* + * kvm fd has changed. Commit the irq routes to KVM once more. + */ + kvm_irqchip_commit_routes(s); + trace_kvm_reset_vmfd(); + return ret; +} + static int kvm_init(AccelState *as, MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); @@ -4014,6 +4087,7 @@ static void kvm_accel_class_init(ObjectClass *oc, const void *data) AccelClass *ac = ACCEL_CLASS(oc); ac->name = "KVM"; ac->init_machine = kvm_init; + ac->reset_vmfd = kvm_reset_vmfd; ac->has_memory = kvm_accel_has_memory; ac->allowed = &kvm_allowed; ac->gdbstub_supported_sstep_flags = kvm_gdbstub_sstep_flags; diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index e43d18a869..e4beda0148 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -14,6 +14,7 @@ kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" kvm_unpark_vcpu(unsigned long arch_cpu_id, const char *msg) "id: %lu %s" kvm_irqchip_commit_routes(void) "" +kvm_reset_vmfd(void) "" kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" kvm_irqchip_release_virq(int virq) "virq %d" diff --git a/include/system/kvm.h b/include/system/kvm.h index 8f9eecf044..a5ab22421d 100644 --- a/include/system/kvm.h +++ b/include/system/kvm.h @@ -358,6 +358,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s); int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp); int kvm_arch_init_vcpu(CPUState *cpu); int kvm_arch_destroy_vcpu(CPUState *cpu); +bool kvm_arch_supports_vmfd_change(void); +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s); #ifdef TARGET_KVM_HAVE_RESET_PARKED_VCPU void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd); diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 48f853fff8..10cd94a57d 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1569,6 +1569,16 @@ void kvm_arch_init_irq_routing(KVMState *s) { } +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s) +{ + abort(); +} + +bool kvm_arch_supports_vmfd_change(void) +{ + return false; +} + int kvm_arch_irqchip_create(KVMState *s) { if (kvm_kernel_irqchip_split()) { diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 3fdb2a3f62..6aa17cecba 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -3253,6 +3253,16 @@ static int kvm_vm_enable_energy_msrs(KVMState *s) return 0; } +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s) +{ + abort(); +} + +bool kvm_arch_supports_vmfd_change(void) +{ + return false; +} + int kvm_arch_init(MachineState *ms, KVMState *s) { int ret; diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index ef3359ced9..9d5c73f3a3 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -1312,6 +1312,16 @@ int kvm_arch_irqchip_create(KVMState *s) return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); } +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s) +{ + abort(); +} + +bool kvm_arch_supports_vmfd_change(void) +{ + return false; +} + void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { } diff --git a/target/mips/kvm.c b/target/mips/kvm.c index a85e162409..fbef498bd7 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -44,6 +44,16 @@ unsigned long kvm_arch_vcpu_id(CPUState *cs) return cs->cpu_index; } +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s) +{ + abort(); +} + +bool kvm_arch_supports_vmfd_change(void) +{ + return false; +} + int kvm_arch_init(MachineState *ms, KVMState *s) { /* MIPS has 128 signals */ diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 3b2f1077da..7cdc0d09f4 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -180,6 +180,16 @@ int kvm_arch_irqchip_create(KVMState *s) return 0; } +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s) +{ + abort(); +} + +bool kvm_arch_supports_vmfd_change(void) +{ + return false; +} + static int kvm_arch_sync_sregs(PowerPCCPU *cpu) { CPUPPCState *cenv = &cpu->env; diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 5d792563b9..548ea3aeab 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1545,6 +1545,16 @@ int kvm_arch_irqchip_create(KVMState *s) return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); } +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s) +{ + abort(); +} + +bool kvm_arch_supports_vmfd_change(void) +{ + return false; +} + int kvm_arch_process_async_events(CPUState *cs) { return 0; diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index bd6c440aef..6374246416 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -393,6 +393,16 @@ int kvm_arch_irqchip_create(KVMState *s) return 0; } +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s) +{ + abort(); +} + +bool kvm_arch_supports_vmfd_change(void) +{ + return false; +} + unsigned long kvm_arch_vcpu_id(CPUState *cpu) { return cpu->cpu_index; -- 2.42.0
