I've been looking at the guest reboot problem, which currently reboots
the host on svm when a Linux guest issues "# shutdown -r now".
(By the way, with a Linux guest, the rebooting can be worked around by
adding the kernel parameter reboot=b to the guest, this makes Linux use
an alternative reboot method which doesn't cause the host to reboot
under kvm-11).
The patch below stops the rebooting of the host and handles the request
to reboot the guest in an orderly fashion, so it's a big step forward,
but it doesn't yet succeed in rebooting of the guest, instead the VM exits.
The basic approach is to intercept the shutdown. The patch includes some
possible alternatives to reboot, while searching for the best approach:
1) Call kvm_qemu_destroy(); and immediately after kvm_qemu_create_context();
2) New ioctl to reset a vcpu
3) I also tried calling init_vmcb from shutdown_intercept, but that had
no noticable effect
Method 1 seems promising, but currently results in kvm exiting due to
"do_interrupt: unexpect" errors.
Method 2 currently results in vmrun returning KVM_EXIT_TYPE_FAIL_ENTRY.
- Both of methods 1 and 2 need an API version bump
- This code is safe to run without having to fear a host reboot
- SVM only, at the moment
- svm_reset_vcpu() was initially just the one-line call to init_vmcb(),
there's some leftover cruft now...
- A printf has been added to cpu_reset(), if this prints, qemu has
called all registered reset handlers.
Please comment. The Patch is relative to kvm-11.
Index: kernel/kvm_main.c
===================================================================
--- kernel/kvm_main.c (revision 2211)
+++ kernel/kvm_main.c (working copy)
@@ -1764,6 +1764,23 @@
return r;
}
+static int kvm_dev_ioctl_reset_vcpu(struct kvm *kvm, struct kvm_reset_vcpu *slot)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!valid_vcpu(slot->vcpu))
+ return -EINVAL;
+ vcpu = vcpu_load(kvm, slot->vcpu);
+ if (!vcpu)
+ return -ENOENT;
+
+ kvm_arch_ops->reset_vcpu(vcpu);
+
+ vcpu_put(vcpu);
+
+ return 0;
+}
+
static long kvm_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1780,6 +1797,13 @@
goto out;
break;
}
+ case KVM_RESET_VCPU: {
+ struct kvm_reset_vcpu kvm_reset_vcpu;
+ if (copy_from_user(&kvm_reset_vcpu, (void *)arg, sizeof kvm_reset_vcpu))
+ goto out;
+ r = kvm_dev_ioctl_reset_vcpu(kvm, &kvm_reset_vcpu);
+ break;
+ }
case KVM_RUN: {
struct kvm_run kvm_run;
Index: kernel/include/linux/kvm.h
===================================================================
--- kernel/include/linux/kvm.h (revision 2211)
+++ kernel/include/linux/kvm.h (working copy)
@@ -11,7 +11,7 @@
#include <asm/types.h>
#include <linux/ioctl.h>
-#define KVM_API_VERSION 2
+#define KVM_API_VERSION 3
/*
* Architectural interrupt line count, and the size of the bitmap needed
@@ -46,6 +46,7 @@
KVM_EXIT_HLT = 5,
KVM_EXIT_MMIO = 6,
KVM_EXIT_IRQ_WINDOW_OPEN = 7,
+ KVM_EXIT_SHUTDOWN = 8,
};
/* for KVM_RUN */
@@ -218,6 +219,12 @@
};
};
+/* for KVM_RESET_VCPU */
+struct kvm_reset_vcpu {
+ /* in */
+ __u32 vcpu;
+};
+
#define KVMIO 0xAE
#define KVM_GET_API_VERSION _IO(KVMIO, 1)
@@ -235,5 +242,6 @@
#define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs)
#define KVM_SET_MSRS _IOWR(KVMIO, 14, struct kvm_msrs)
#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list)
+#define KVM_RESET_VCPU _IO(KVMIO, 16)
#endif
Index: kernel/kvm.h
===================================================================
--- kernel/kvm.h (revision 2211)
+++ kernel/kvm.h (working copy)
@@ -379,6 +379,7 @@
int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*reset_vcpu)(struct kvm_vcpu *vcpu);
};
extern struct kvm_stat kvm_stat;
Index: kernel/svm.c
===================================================================
--- kernel/svm.c (revision 2211)
+++ kernel/svm.c (working copy)
@@ -508,7 +508,8 @@
(1ULL << INTERCEPT_VMSAVE) |
(1ULL << INTERCEPT_STGI) |
(1ULL << INTERCEPT_CLGI) |
- (1ULL << INTERCEPT_SKINIT);
+ (1ULL << INTERCEPT_SKINIT) |
+ (1ULL << INTERCEPT_SHUTDOWN);
control->iopm_base_pa = iopm_base;
control->msrpm_base_pa = msrpm_base;
@@ -582,6 +583,21 @@
return r;
}
+static void svm_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+ printk("....svm_reset_vcpu\n");
+ memset(vcpu->svm->vmcb, 0, PAGE_SIZE);
+ //vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
+ vcpu->svm->cr0 = 0x00000010;
+ vcpu->svm->asid_generation = 0;
+ memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs));
+ init_vmcb(vcpu->svm->vmcb);
+
+ fx_init(vcpu);
+
+ init_vmcb(vcpu->svm->vmcb);
+}
+
static void svm_free_vcpu(struct kvm_vcpu *vcpu)
{
if (!vcpu->svm)
@@ -1216,6 +1232,14 @@
return 1;
}
+static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ /* Note: After an intercepted shutdown, the state saved in the VMCB is undefined. */
+ printk("....shutdown_interception\n");
+ kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+ return 0;
+}
+
static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run) = {
[SVM_EXIT_READ_CR0] = emulate_on_interception,
@@ -1256,6 +1280,7 @@
[SVM_EXIT_STGI] = invalid_op_interception,
[SVM_EXIT_CLGI] = invalid_op_interception,
[SVM_EXIT_SKINIT] = invalid_op_interception,
+ [SVM_EXIT_SHUTDOWN] = shutdown_interception,
};
@@ -1690,6 +1715,8 @@
.run = svm_vcpu_run,
.skip_emulated_instruction = skip_emulated_instruction,
.vcpu_setup = svm_vcpu_setup,
+
+ .reset_vcpu = svm_reset_vcpu,
};
static int __init svm_init(void)
Index: qemu/cpu-exec.c
===================================================================
--- qemu/cpu-exec.c (revision 2211)
+++ qemu/cpu-exec.c (working copy)
@@ -456,8 +456,20 @@
#ifdef USE_KVM
if (kvm_allowed) {
- kvm_cpu_exec(env);
- longjmp(env->jmp_env, 1);
+ int ret;
+ ret = kvm_cpu_exec(env);
+ if (ret == 1) {
+ longjmp(env->jmp_env, 1);
+ } /* else if (ret == 2) {
+ unused code
+ } */ else {
+ if (env->interrupt_request != 0) {
+ /* hardware interrupt will be executed just after */
+ } else {
+ /* otherwise, we restart */
+ longjmp(env->jmp_env, 1);
+ }
+ }
}
#endif
T0 = 0; /* force lookup of first TB */
Index: qemu/target-i386/helper2.c
===================================================================
--- qemu/target-i386/helper2.c (revision 2211)
+++ qemu/target-i386/helper2.c (working copy)
@@ -151,7 +151,7 @@
void cpu_reset(CPUX86State *env)
{
int i;
-
+ printf("....cpu_reset (env)\n");
memset(env, 0, offsetof(CPUX86State, breakpoints));
tlb_flush(env, 1);
Index: qemu/qemu-kvm.c
===================================================================
--- qemu/qemu-kvm.c (revision 2211)
+++ qemu/qemu-kvm.c (working copy)
@@ -413,9 +413,7 @@
if (!saved_env[0])
saved_env[0] = env;
- kvm_run(kvm_context, 0);
-
- return 0;
+ return kvm_run(kvm_context, 0);
}
@@ -578,6 +576,23 @@
return 1;
}
+
+static int kvm_guest_reboot(void *opaque, int vcpu)
+{
+ /* Note: After an intercepted shutdown, the state saved in the VMCB is undefined. */
+ CPUState **envs = opaque, *env;
+ env = envs[0];
+
+ printf ("----kvm_guest_reboot\n");
+
+ qemu_system_reset_request();
+ //env->exception_index = EXCP_INTERRUPT;
+ //cpu_loop_exit ();
+
+ ///* Not reached. */
+
+ return 3;
+}
static struct kvm_callbacks qemu_kvm_ops = {
.cpuid = kvm_cpuid,
@@ -600,6 +615,7 @@
.io_window = kvm_io_window,
.try_push_interrupts = try_push_interrupts,
.post_kvm_run = post_kvm_run,
+ .shutdown = kvm_guest_reboot,
};
int kvm_qemu_init()
Index: qemu/vl.c
===================================================================
--- qemu/vl.c (revision 2211)
+++ qemu/vl.c (working copy)
@@ -93,7 +93,7 @@
#include "qemu-kvm.h"
#endif
-#define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
+#define DEFAULT_NETWORK_SCRIPT "/etc/kvm/kvm-ifup"
//#define DEBUG_UNUSED_IOPORT
//#define DEBUG_IOPORT
Index: user/kvmctl.c
===================================================================
--- user/kvmctl.c (revision 2211)
+++ user/kvmctl.c (working copy)
@@ -23,7 +23,7 @@
#include <errno.h>
#include "kvmctl.h"
-#define EXPECTED_KVM_API_VERSION 2
+#define EXPECTED_KVM_API_VERSION 3
#if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
#error libkvm: userspace and kernel version mismatch
@@ -83,6 +83,34 @@
return 0;
}
+int kvm_reset_vcpu(kvm_context_t kvm, int vcpu)
+{
+ printf("....kvmctl.c:88:kvm_reset_vcpu() (commented out)\n");
+#if 0
+ // Doesn't work -- vmrun return KVM_EXIT_TYPE_FAIL_ENTRY
+ struct kvm_reset_vcpu kvm_reset_vcpu;
+ int r;
+ kvm_reset_vcpu.vcpu = vcpu;
+ r = ioctl(kvm->fd, KVM_RESET_VCPU, &kvm_reset_vcpu);
+ if (r == -1)
+ return -errno;
+ return 0;
+#endif
+#if 0
+ // This code hacked in at vl.c:5282
+
+ // We're in a library, the symbols below are in qemu-kvm,
+ // so this needs to be replaced by a regustered callback
+
+ // This needs interrupts turned off, which is not done yet
+
+ extern void kvm_qemu_destroy();
+ extern int kvm_qemu_create_context ();
+ kvm_qemu_destroy();
+ kvm_qemu_create_context();
+#endif
+}
+
kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
void *opaque)
{
@@ -522,6 +550,14 @@
return kvm->callbacks->halt(kvm->opaque, kvm_run->vcpu);
}
+static int handle_shutdown(kvm_context_t kvm, struct kvm_run *kvm_run)
+{
+ int r;
+ r = kvm->callbacks->shutdown(kvm->opaque, kvm_run->vcpu);
+ kvm_reset_vcpu(kvm, kvm_run->vcpu);
+ return r;
+}
+
int try_push_interrupts(kvm_context_t kvm)
{
return kvm->callbacks->try_push_interrupts(kvm->opaque);
@@ -594,6 +630,9 @@
break;
case KVM_EXIT_IRQ_WINDOW_OPEN:
break;
+ case KVM_EXIT_SHUTDOWN:
+ r = handle_shutdown(kvm, &kvm_run);
+ break;
default:
fprintf(stderr, "unhandled vm exit: 0x%x\n", kvm_run.exit_reason);
kvm_show_regs(kvm, vcpu);
Index: user/kvmctl.h
===================================================================
--- user/kvmctl.h (revision 2211)
+++ user/kvmctl.h (working copy)
@@ -62,6 +62,8 @@
int (*io_window)(void *opaque);
int (*try_push_interrupts)(void *opaque);
void (*post_kvm_run)(void *opaque, struct kvm_run *kvm_run);
+ /// Guest is being rebooted
+ int (*shutdown)(void *opaque, int vcpu);
};
/*!
@@ -248,4 +250,6 @@
unsigned long len);
void kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
+int kvm_reset_vcpu(kvm_context_t kvm, int vcpu);
+
#endif
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel