[PATCH] Xen PV-on-HVM guest support (v3)

2009-10-15 Thread Ed Swierk
Support for Xen PV-on-HVM guests can be implemented almost entirely in
userspace, except for handling one annoying MSR that maps a Xen
hypercall blob into guest address space.

A generic mechanism to delegate MSR writes to userspace seems overkill
and risks encouraging similar MSR abuse in the future.  Thus this patch
adds special support for the Xen HVM MSR.

I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell
KVM which MSR the guest will write to, as well as the starting address
and size of the hypercall blobs (one each for 32-bit and 64-bit) that
userspace has loaded from files.  When the guest writes to the MSR, KVM
copies one page of the blob from userspace to the guest.

I've tested this patch with a hacked-up version of Gerd's userspace
code, booting a number of guests (CentOS 5.3 i386 and x86_64, and
FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices.

v3: separate blob_{addr,size}_{32,64}; move xen_hvm_config to struct
kvm_arch; remove unneeded ifdefs; return -EFAULT, -E2BIG, etc. from
xen_hvm_config; use is_long_mode(); remove debug printks; document ioctl
in api.txt

Signed-off-by: Ed Swierk eswi...@aristanetworks.com

---
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 5a4bc8c..5980113 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -593,6 +593,30 @@ struct kvm_irqchip {
} chip;
 };
 
+4.27 KVM_XEN_HVM_CONFIG
+
+Capability: KVM_CAP_XEN_HVM
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_xen_hvm_config (in)
+Returns: 0 on success, -1 on error
+
+Sets the MSR that the Xen HVM guest uses to initialize its hypercall
+page, and provides the starting address and size of the hypercall
+blobs in userspace.  When the guest writes the MSR, kvm copies one
+page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
+memory.
+
+struct kvm_xen_hvm_config {
+   __u32 msr;
+   __u32 pad1;
+   __u64 blob_addr_32;
+   __u64 blob_addr_64;
+   __u8 blob_size_32;
+   __u8 blob_size_64;
+   __u8 pad2[30];
+};
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index f02e87a..ef9b4b7 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -19,6 +19,7 @@
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/include/asm/kvm_host.h
b/arch/x86/include/asm/kvm_host.h
index 45226f0..aee95b2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -410,6 +410,8 @@ struct kvm_arch{
 
unsigned long irq_sources_bitmap;
u64 vm_init_tsc;
+
+   struct kvm_xen_hvm_config xen_hvm_config;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1d454d9..66149fa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -835,6 +835,37 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32
msr, u64 data)
return 0;
 }
 
+static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
+{
+   int lm = is_long_mode(vcpu);
+   u8 *blob_addr = lm ? (u8 *)vcpu-kvm-arch.xen_hvm_config.blob_addr_64
+   : (u8 *)vcpu-kvm-arch.xen_hvm_config.blob_addr_32;
+   u8 blob_size = lm ? vcpu-kvm-arch.xen_hvm_config.blob_size_64
+   : vcpu-kvm-arch.xen_hvm_config.blob_size_32;
+   u32 page_num = data  ~PAGE_MASK;
+   u64 page_addr = data  PAGE_MASK;
+   u8 *page;
+   int r;
+
+   r = -E2BIG;
+   if (page_num = blob_size)
+   goto out;
+   r = -ENOMEM;
+   page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+   if (!page)
+   goto out;
+   r = -EFAULT;
+   if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE),
PAGE_SIZE))
+   goto out_free;
+   if (kvm_write_guest(vcpu-kvm, page_addr, page, PAGE_SIZE))
+   goto out_free;
+   r = 0;
+out_free:
+   kfree(page);
+out:
+   return r;
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
switch (msr) {
@@ -950,6 +981,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32
msr, u64 data)
0x%x data 0x%llx\n, msr, data);
break;
default:
+   if (msr  (msr == vcpu-kvm-arch.xen_hvm_config.msr))
+   return xen_hvm_config(vcpu, data);
if (!ignore_msrs) {
pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n,
msr, data);
@@ -2411,6 +2444,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+   case KVM_XEN_HVM_CONFIG: {
+   r = -EFAULT;
+   if (copy_from_user(kvm-arch.xen_hvm_config, argp,
+  sizeof(struct

[PATCH][REPOST] Xen PV-on-HVM guest support (v3)

2009-10-15 Thread Ed Swierk
[Repost; the patch was garbled in my previous attempt.]

Support for Xen PV-on-HVM guests can be implemented almost entirely in
userspace, except for handling one annoying MSR that maps a Xen
hypercall blob into guest address space.

A generic mechanism to delegate MSR writes to userspace seems overkill
and risks encouraging similar MSR abuse in the future.  Thus this patch
adds special support for the Xen HVM MSR.

I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell
KVM which MSR the guest will write to, as well as the starting address
and size of the hypercall blobs (one each for 32-bit and 64-bit) that
userspace has loaded from files.  When the guest writes to the MSR, KVM
copies one page of the blob from userspace to the guest.

I've tested this patch with a hacked-up version of Gerd's userspace
code, booting a number of guests (CentOS 5.3 i386 and x86_64, and
FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices.

v3: separate blob_{addr,size}_{32,64}; move xen_hvm_config to struct
kvm_arch; remove unneeded ifdefs; return -EFAULT, -E2BIG, etc. from
xen_hvm_config; use is_long_mode(); remove debug printks; document ioctl
in api.txt

Signed-off-by: Ed Swierk eswi...@aristanetworks.com

---
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 5a4bc8c..5980113 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -593,6 +593,30 @@ struct kvm_irqchip {
} chip;
 };
 
+4.27 KVM_XEN_HVM_CONFIG
+
+Capability: KVM_CAP_XEN_HVM
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_xen_hvm_config (in)
+Returns: 0 on success, -1 on error
+
+Sets the MSR that the Xen HVM guest uses to initialize its hypercall
+page, and provides the starting address and size of the hypercall
+blobs in userspace.  When the guest writes the MSR, kvm copies one
+page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
+memory.
+
+struct kvm_xen_hvm_config {
+   __u32 msr;
+   __u32 pad1;
+   __u64 blob_addr_32;
+   __u64 blob_addr_64;
+   __u8 blob_size_32;
+   __u8 blob_size_64;
+   __u8 pad2[30];
+};
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index f02e87a..ef9b4b7 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -19,6 +19,7 @@
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 45226f0..aee95b2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -410,6 +410,8 @@ struct kvm_arch{
 
unsigned long irq_sources_bitmap;
u64 vm_init_tsc;
+
+   struct kvm_xen_hvm_config xen_hvm_config;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1d454d9..66149fa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -835,6 +835,37 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 
data)
return 0;
 }
 
+static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
+{
+   int lm = is_long_mode(vcpu);
+   u8 *blob_addr = lm ? (u8 *)vcpu-kvm-arch.xen_hvm_config.blob_addr_64
+   : (u8 *)vcpu-kvm-arch.xen_hvm_config.blob_addr_32;
+   u8 blob_size = lm ? vcpu-kvm-arch.xen_hvm_config.blob_size_64
+   : vcpu-kvm-arch.xen_hvm_config.blob_size_32;
+   u32 page_num = data  ~PAGE_MASK;
+   u64 page_addr = data  PAGE_MASK;
+   u8 *page;
+   int r;
+
+   r = -E2BIG;
+   if (page_num = blob_size)
+   goto out;
+   r = -ENOMEM;
+   page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+   if (!page)
+   goto out;
+   r = -EFAULT;
+   if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
+   goto out_free;
+   if (kvm_write_guest(vcpu-kvm, page_addr, page, PAGE_SIZE))
+   goto out_free;
+   r = 0;
+out_free:
+   kfree(page);
+out:
+   return r;
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
switch (msr) {
@@ -950,6 +981,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 
data)
0x%x data 0x%llx\n, msr, data);
break;
default:
+   if (msr  (msr == vcpu-kvm-arch.xen_hvm_config.msr))
+   return xen_hvm_config(vcpu, data);
if (!ignore_msrs) {
pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n,
msr, data);
@@ -2411,6 +2444,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+   case KVM_XEN_HVM_CONFIG: {
+   r = -EFAULT;
+   if (copy_from_user(kvm

Re: [PATCH][RFC] Xen PV-on-HVM guest support

2009-10-14 Thread Ed Swierk
Thanks for the feedback; I'll post a new version shortly.

On Tue, Oct 13, 2009 at 11:45 PM, Jan Kiszka jan.kis...@web.de wrote:
 Interesting stuff. How usable is your work at this point? I've no
 immediate demand, but the question if one could integrate Xen guests
 with KVM already popped up more than once @work.

So far I've managed to boot CentOS 5.3 (both i386 and x86_64) and use
the Xen PV block and net devices, with pretty good performance. I've
also booted FreeBSD 8.0-RC1 (amd64 only) with a XENHVM kernel and used
the Xen PV block and net devices, but the performance of the net
device is significantly worse than with CentOS. Also some FreeBSD
applications use a flag that's not yet implemented in the net device
emulation, but I'm working on fixing that.

Overall it seems pretty solid for Linux PV-on-HVM guests. I think more
work is needed to support full PV guests, but I don't know how much.
Have folks been asking about PV-on-HVM or full PV?

--Ed
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Xen PV-on-HVM guest support (v2)

2009-10-14 Thread Ed Swierk
Support for Xen PV-on-HVM guests can be implemented almost entirely in
userspace, except for handling one annoying MSR that maps a Xen
hypercall blob into guest address space.

A generic mechanism to delegate MSR writes to userspace seems overkill
and risks encouraging similar MSR abuse in the future.  Thus this patch
adds special support for the Xen HVM MSR.

I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell
KVM which MSR the guest will write to, as well as the starting address
and size of the hypercall blobs (one each for 32-bit and 64-bit) that
userspace has loaded from files.  When the guest writes to the MSR, KVM
copies one page of the blob from userspace to the guest.

I've tested this patch with a hacked-up version of Gerd's userspace
code, booting a number of guests (CentOS 5.3 i386 and x86_64, and
FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices.

v2: fix ioctl struct padding; renumber CAP and ioctl constants; check
kvm_write_guest() return value; change printks to KERN_DEBUG (I think
they're worth keeping for debugging userspace)

Signed-off-by: Ed Swierk eswi...@aristanetworks.com

---
Index: kvm-kmod/include/asm-x86/kvm.h
===
--- kvm-kmod.orig/include/asm-x86/kvm.h
+++ kvm-kmod/include/asm-x86/kvm.h
@@ -59,6 +59,7 @@
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
Index: kvm-kmod/include/linux/kvm.h
===
--- kvm-kmod.orig/include/linux/kvm.h
+++ kvm-kmod/include/linux/kvm.h
@@ -476,6 +476,9 @@ struct kvm_ioeventfd {
 #endif
 #define KVM_CAP_IOEVENTFD 36
 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
+#ifdef __KVM_HAVE_XEN_HVM
+#define KVM_CAP_XEN_HVM 38
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -528,6 +531,15 @@ struct kvm_x86_mce {
 };
 #endif
 
+#ifdef KVM_CAP_XEN_HVM
+struct kvm_xen_hvm_config {
+   __u32 msr;
+   __u8 pad[2];
+   __u8 blob_size[2];
+   __u64 blob_addr[2];
+};
+#endif
+
 #define KVM_IRQFD_FLAG_DEASSIGN (1  0)
 
 struct kvm_irqfd {
@@ -586,6 +598,7 @@ struct kvm_irqfd {
 #define KVM_CREATE_PIT2   _IOW(KVMIO, 0x77, struct 
kvm_pit_config)
 #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78)
 #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
+#define KVM_XEN_HVM_CONFIG_IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
 
 /*
  * ioctls for vcpu fds
Index: kvm-kmod/include/linux/kvm_host.h
===
--- kvm-kmod.orig/include/linux/kvm_host.h
+++ kvm-kmod/include/linux/kvm_host.h
@@ -236,6 +236,10 @@ struct kvm {
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
 #endif
+
+#ifdef KVM_CAP_XEN_HVM
+   struct kvm_xen_hvm_config xen_hvm_config;
+#endif
 };
 
 /* The guest did something we don't support. */
Index: kvm-kmod/x86/x86.c
===
--- kvm-kmod.orig/x86/x86.c
+++ kvm-kmod/x86/x86.c
@@ -875,6 +875,35 @@ static int set_msr_mce(struct kvm_vcpu *
return 0;
 }
 
+#ifdef KVM_CAP_XEN_HVM
+static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
+{
+   int blob = !!(vcpu-arch.shadow_efer  EFER_LME);
+   u32 pnum = data  ~PAGE_MASK;
+   u64 paddr = data  PAGE_MASK;
+   u8 *page;
+   int r = 1;
+
+   if (pnum = vcpu-kvm-xen_hvm_config.blob_size[blob])
+   goto out;
+   page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+   if (!page)
+   goto out;
+   if (copy_from_user(page, (u8 *)vcpu-kvm-xen_hvm_config.blob_addr[blob]
+  + pnum * PAGE_SIZE, PAGE_SIZE))
+   goto out_free;
+   if (kvm_write_guest(vcpu-kvm, paddr, page, PAGE_SIZE))
+   goto out_free;
+   printk(KERN_DEBUG kvm: copied xen hvm blob %d page %d to 0x%llx\n,
+  blob, pnum, paddr);
+   r = 0;
+out_free:
+   kfree(page);
+out:
+   return r;
+}
+#endif
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
switch (msr) {
@@ -990,6 +1019,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
0x%x data 0x%llx\n, msr, data);
break;
default:
+#ifdef KVM_CAP_XEN_HVM
+   if (msr  (msr == vcpu-kvm-xen_hvm_config.msr))
+   return xen_hvm_config(vcpu, data);
+#endif
if (!ignore_msrs) {
pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n,
msr, data);
@@ -2453,6 +2486,17 @@ long kvm_arch_vm_ioctl(struct file *filp
r = 0;
break;
}
+#ifdef KVM_CAP_XEN_HVM
+   case KVM_XEN_HVM_CONFIG: {
+   r = -EFAULT;
+   if (copy_from_user(kvm-xen_hvm_config, argp

[PATCH][RFC] Xen PV-on-HVM guest support

2009-10-13 Thread Ed Swierk
As we discussed a while back, support for Xen PV-on-HVM guests can be
implemented almost entirely in userspace, except for handling one
annoying MSR that maps a Xen hypercall blob into guest address space.

A generic mechanism to delegate MSR writes to userspace seems overkill
and risks encouraging similar MSR abuse in the future.  Thus this patch
adds special support for the Xen HVM MSR.

At Avi's suggestion[1] I implemented a new ioctl, KVM_XEN_HVM_CONFIG,
that lets userspace tell KVM which MSR the guest will write to, as well
as the starting address and size of the hypercall blobs (one each for
32-bit and 64-bit) that userspace has loaded from files.  When the guest
writes to the MSR, KVM copies one page of the blob from userspace to the
guest.

I've tested this patch against a hacked-up version of Gerd's userspace
code[2]; I'm happy to share those hacks if anyone is interested.

[1] http://www.mail-archive.com/kvm@vger.kernel.org/msg16065.html
[2]
http://git.et.redhat.com/?p=qemu-kraxel.git;a=log;h=refs/heads/xenner.v5

Signed-off-by: Ed Swierk eswi...@aristanetworks.com

---
diff -BurN a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
--- a/include/asm-x86/kvm.h 2009-10-13 20:40:55.0 -0700
+++ b/include/asm-x86/kvm.h 2009-10-13 20:21:07.0 -0700
@@ -59,6 +59,7 @@
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff -BurN a/include/linux/kvm.h b/include/linux/kvm.h
--- a/include/linux/kvm.h   2009-10-13 20:40:55.0 -0700
+++ b/include/linux/kvm.h   2009-10-13 20:21:26.0 -0700
@@ -476,6 +476,9 @@
 #endif
 #define KVM_CAP_IOEVENTFD 36
 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
+#ifdef __KVM_HAVE_XEN_HVM
+#define KVM_CAP_XEN_HVM 90
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -528,6 +531,14 @@
 };
 #endif
 
+#ifdef KVM_CAP_XEN_HVM
+struct kvm_xen_hvm_config {
+   __u32 msr;
+   __u64 blob_addr[2];
+   __u8 blob_size[2];
+};
+#endif
+
 #define KVM_IRQFD_FLAG_DEASSIGN (1  0)
 
 struct kvm_irqfd {
@@ -586,6 +597,7 @@
 #define KVM_CREATE_PIT2   _IOW(KVMIO, 0x77, struct 
kvm_pit_config)
 #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78)
 #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
+#define KVM_XEN_HVM_CONFIG_IOW(KVMIO, 0xa1, struct kvm_xen_hvm_config)
 
 /*
  * ioctls for vcpu fds
diff -BurN a/include/linux/kvm_host.h b/include/linux/kvm_host.h
--- a/include/linux/kvm_host.h  2009-10-13 20:40:55.0 -0700
+++ b/include/linux/kvm_host.h  2009-10-13 20:27:03.0 -0700
@@ -236,6 +236,10 @@
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
 #endif
+
+#ifdef KVM_CAP_XEN_HVM
+   struct kvm_xen_hvm_config xen_hvm_config;
+#endif
 };
 
 /* The guest did something we don't support. */
diff -BurN a/x86/x86.c b/x86/x86.c
--- a/x86/x86.c 2009-10-13 20:40:58.0 -0700
+++ b/x86/x86.c 2009-10-13 20:33:49.0 -0700
@@ -875,6 +875,33 @@
return 0;
 }
 
+#ifdef KVM_CAP_XEN_HVM
+static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
+{
+   int blob = !!(vcpu-arch.shadow_efer  EFER_LME);
+   u32 pnum = data  ~PAGE_MASK;
+   u64 paddr = data  PAGE_MASK;
+   u8 *page;
+   int r = 1;
+   printk(KERN_INFO kvm: loading xen hvm blob %d page %d at %llx\n,
+  blob, pnum, paddr);
+   if (pnum = vcpu-kvm-xen_hvm_config.blob_size[blob])
+   goto out;
+   page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+   if (!page)
+   goto out;
+   if (copy_from_user(page, (u8 *)vcpu-kvm-xen_hvm_config.blob_addr[blob]
+  + pnum * PAGE_SIZE, PAGE_SIZE))
+   goto out_free;
+   kvm_write_guest(vcpu-kvm, paddr, page, PAGE_SIZE);
+   r = 0;
+out_free:
+   kfree(page);
+out:
+   return r;
+}
+#endif
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
switch (msr) {
@@ -990,6 +1017,10 @@
0x%x data 0x%llx\n, msr, data);
break;
default:
+#ifdef KVM_CAP_XEN_HVM
+   if (msr  (msr == vcpu-kvm-xen_hvm_config.msr))
+   return xen_hvm_config(vcpu, data);
+#endif
if (!ignore_msrs) {
pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n,
msr, data);
@@ -2453,6 +2484,17 @@
r = 0;
break;
}
+#ifdef KVM_CAP_XEN_HVM
+   case KVM_XEN_HVM_CONFIG: {
+   r = -EFAULT;
+   printk(KERN_INFO kvm: configuring xen hvm\n);
+   if (copy_from_user(kvm-xen_hvm_config, argp,
+  sizeof(struct kvm_xen_hvm_config)))
+   goto out;
+   r = 0;
+   break;
+   }
+#endif
default:
;
}


--
To unsubscribe from this list

Re: Userspace MSR handling

2009-05-27 Thread Ed Swierk
On Mon, May 25, 2009 at 4:20 AM, Avi Kivity a...@redhat.com wrote:
 Device drivers have no business writing to cpu model specific registers.  I
 hate to bring that fugliness to kvm but I do want to support Xen guests.

 It should have been implemented as mmio.  Maybe implement an ioctl that
 converts rdmsr/wrmsr to equivalent mmios?

Converting MSRs to IO sounds fine, but a generic mechanism, with a new
ioctl type and all the bookkeeping for a dynamically-sized list of
MSR-to-MMIO mappings, seems like overkill given the puny scope of the
problem. All the Xen HVM guest needs is a single, arbitrary MSR that
when written generates an MMIO or PIO write handled by userspace. If
this requirement is unique and we don't expect to find other guests
that similarly abuse MSRs, could we get away with a less flexible but
simpler mechanism?

What I have in mind is choosing an unused legacy IO port range, say,
0x28-0x2f, and implementing a KVM-specific MSR, say, MSR_KVM_IO_28,
that maps rdmsr/wrmsr to a pair of inl/outl operations on these ports.
Either MMIO or PIO would work, but I'm assuming it's safer to grab
currently-unused IO ports than particular memory addresses.

That odor you smell is the aroma of hardcoded goop, but I'm trying to
find a solution that doesn't burden KVM with a big chunk of code to
solve a one-off problem.

--Ed
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Userspace MSR handling

2009-05-27 Thread Ed Swierk
On Wed, May 27, 2009 at 9:28 AM, Avi Kivity a...@redhat.com wrote:
 Will it actually solve the problem?

 - can all hypercalls that can be issued with
 pv-on-hvm-on-kvm-with-a-side-order-of-fries be satisfied from userspace?
 - what about connecting the guest driver to xen netback one day?  we don't
 want to go through userspace for that.

In Gerd's current implementation, the code in the hypercall page
(which the guest maps in using that pesky MSR) handles all hypercalls
either internally or by invoking userspace (via another magic IO
port).

I'm too ignorant of Xen to claim that my proposal solves the problem
completely, but after hacking in support for delegating the magic MSR
to userspace, I got an unmodified FreeBSD disk image to boot in
PV-on-HVM-on-KVM+Qemu and use Xen PV network devices.

 We can consider catering to Xen and implementing that MSR in the kernel, if
 it's truly one off.

One way or another, the MSR somehow has to map in a chunk of data
supplied by userspace. Are you suggesting an alternative to the PIO
hack?

--Ed
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Userspace MSR handling

2009-05-27 Thread Ed Swierk
On Wed, 2009-05-27 at 21:16 +0200, Gerd Hoffmann wrote:
 Well, the chunk of data is on disk anyway:
 $libdir/xenner/hvm{32,64}.bin
 
 So a possible plan to attack could be ln -s $libdir/xenner 
 /lib/firmware, let kvm.ko grab it if needed using 
 request_firmware(xenner/hvm${bits}.bin), and a few lines of kernel 
 code handling the wrmsr.  Logic is just this:
 
 void xenner_wrmsr(uint64_t val, int longmode)
 {
  uint32_t page = val  ~PAGE_MASK;
  uint64_t paddr = val  PAGE_MASK;
  uint8_t *blob = longmode ? hvm64 : hvm32;
  cpu_physical_memory_write(paddr, blob + page * PAGE_SIZE,
PAGE_SIZE);
 }
 
 Well, you'll have to sprinkle in blob loading and caching and some error 
 checking.  But even with that it is probably hard to beat in actual code 
 size.  Additional plus is we get away without a new ioctl then.
 
 Comments?

I like it.

Here's a first attempt.  One obvious improvement would be to cache the
reference to the firmware blob to avoid re-reading it on every wrmsr.

---
diff -BurN kvm-kmod-2.6.30-rc6/include/asm-x86/kvm_para.h 
kvm-kmod-2.6.30-rc6.new/include/asm-x86/kvm_para.h
--- kvm-kmod-2.6.30-rc6/include/asm-x86/kvm_para.h  2009-05-21 
02:10:14.0 -0700
+++ kvm-kmod-2.6.30-rc6.new/include/asm-x86/kvm_para.h  2009-05-27 
14:44:42.252004038 -0700
@@ -56,6 +56,7 @@
 
 #define MSR_KVM_WALL_CLOCK  0x11
 #define MSR_KVM_SYSTEM_TIME 0x12
+#define MSR_KVM_LOAD_XENNER_FIRMWARE 0x4000
 
 #define KVM_MAX_MMU_OP_BATCH   32
 
diff -BurN kvm-kmod-2.6.30-rc6/include/linux/kvm_host.h 
kvm-kmod-2.6.30-rc6.new/include/linux/kvm_host.h
--- kvm-kmod-2.6.30-rc6/include/linux/kvm_host.h2009-05-21 
02:10:14.0 -0700
+++ kvm-kmod-2.6.30-rc6.new/include/linux/kvm_host.h2009-05-27 
14:16:47.839529841 -0700
@@ -192,6 +192,7 @@
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
 #endif
+   struct device *kvm_dev;
 };
 
 /* The guest did something we don't support. */
diff -BurN kvm-kmod-2.6.30-rc6/x86/kvm_main.c 
kvm-kmod-2.6.30-rc6.new/x86/kvm_main.c
--- kvm-kmod-2.6.30-rc6/x86/kvm_main.c  2009-05-21 02:10:18.0 -0700
+++ kvm-kmod-2.6.30-rc6.new/x86/kvm_main.c  2009-05-27 15:22:43.463251834 
-0700
@@ -816,6 +816,8 @@
 };
 #endif /* CONFIG_MMU_NOTIFIER  KVM_ARCH_WANT_MMU_NOTIFIER */
 
+static struct miscdevice kvm_dev;
+
 static struct kvm *kvm_create_vm(void)
 {
struct kvm *kvm = kvm_arch_create_vm();
@@ -869,6 +871,7 @@
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
kvm_coalesced_mmio_init(kvm);
 #endif
+   kvm-kvm_dev = kvm_dev.this_device;
 out:
return kvm;
 }
diff -BurN kvm-kmod-2.6.30-rc6/x86/x86.c kvm-kmod-2.6.30-rc6.new/x86/x86.c
--- kvm-kmod-2.6.30-rc6/x86/x86.c   2009-05-21 02:10:18.0 -0700
+++ kvm-kmod-2.6.30-rc6.new/x86/x86.c   2009-05-27 15:17:42.798002879 -0700
@@ -77,6 +77,7 @@
 #include linux/iommu.h
 #include linux/intel-iommu.h
 #include linux/cpufreq.h
+#include linux/firmware.h
 
 #include asm/uaccess.h
 #include asm/msr.h
@@ -846,6 +847,22 @@
kvm_request_guest_time_update(vcpu);
break;
}
+   case MSR_KVM_LOAD_XENNER_FIRMWARE: {
+   const char *fw_name = (vcpu-arch.shadow_efer  EFER_LME
+  ? xenner/hvm64.bin
+  : xenner/hvm32.bin);
+   const struct firmware *firmware;
+   uint32_t page = data  ~PAGE_MASK;
+   uint64_t paddr = data  PAGE_MASK;
+   if (request_firmware(firmware, fw_name, vcpu-kvm-kvm_dev))
+   return 1;
+   printk(KERN_INFO kvm: loading %s page %d to %llx\n,
+  fw_name, page, paddr);
+   kvm_write_guest(vcpu-kvm, paddr,
+   firmware-data + page * PAGE_SIZE, PAGE_SIZE);
+   release_firmware(firmware);
+   break;
+   }
default:
pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n, msr, data);
return 1;


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Userspace MSR handling

2009-05-22 Thread Ed Swierk
I'm experimenting with Gerd's excellent work on integrating Xenner
into Qemu (http://git.et.redhat.com/?p=qemu-kraxel.git). I'm using it
to boot a FreeBSD guest that uses the Xen paravirtual network drivers.
Decoupling the Xen PV guest support from the hypervisor really
simplifies deployment.

The current implementation doesn't yet support KVM, as KVM has to
handle a Xen-specific MSR in order to map hypercall pages into the
guest physical address space. A recent thread on this list discussed
the issue but didn't come to a resolution.

Does it make sense to implement a generic mechanism for handling MSRs
in userspace? I imagine a mechanism analogous to PIO, adding a
KVM_EXIT_MSR code and a msr type in the kvm_run struct.

I'm happy to take a stab at implementing this if no one else is
already working on it.

--Ed
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html