[PATCH] Support for USB host device auto disconnect.

2008-08-01 Thread Max Krasnyansky
I got really annoyed by the fact that you have to manually do
usb_del in the monitor when host device is unplugged and decided
to fix it :)

Basically we now automatically remove guest USB device
when the actual host device is disconnected.

At first I've extended set_fd_handlerX() stuff to support checking
for exceptions on fds. But unfortunately usbfs code does not wake up
user-space process when device is removed, which means we need a
timer to periodically check if device is still there. So I removed
fd exception stuff and implemented it with the timer.

Signed-off-by: Max Krasnyansky <[EMAIL PROTECTED]>
---
 qemu/hw/usb.h|1 +
 qemu/usb-linux.c |   56 +
 qemu/vl.c|   26 +---
 3 files changed, 66 insertions(+), 17 deletions(-)

diff --git a/qemu/hw/usb.h b/qemu/hw/usb.h
index 8bdc68d..2edb982 100644
--- a/qemu/hw/usb.h
+++ b/qemu/hw/usb.h
@@ -197,6 +197,7 @@ static inline void usb_cancel_packet(USBPacket * p)
 p->cancel_cb(p, p->cancel_opaque);
 }
 
+int usb_device_del_addr(int bus_num, int addr);
 void usb_attach(USBPort *port, USBDevice *dev);
 int usb_generic_handle_packet(USBDevice *s, USBPacket *p);
 int set_usb_string(uint8_t *buf, const char *str);
diff --git a/qemu/usb-linux.c b/qemu/usb-linux.c
index 78cd317..3e3c54e 100644
--- a/qemu/usb-linux.c
+++ b/qemu/usb-linux.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu-common.h"
+#include "qemu-timer.h"
 #include "hw/usb.h"
 #include "console.h"
 
@@ -79,6 +80,7 @@ typedef struct USBHostDevice {
 uint8_t descr[1024];
 int descr_len;
 int urbs_ready;
+QEMUTimer *timer;
 } USBHostDevice;
 
 typedef struct PendingURB {
@@ -167,7 +169,11 @@ static int usb_host_update_interfaces(USBHostDevice *dev, 
int configuration)
 }
 config_descr_len = dev->descr[i];
 
-if (configuration == dev->descr[i + 5])
+#ifdef DEBUG
+   printf("config #%d need %d\n", dev->descr[i + 5], configuration); 
+#endif
+
+if (configuration < 0 || configuration == dev->descr[i + 5])
 break;
 
 i += config_descr_len;
@@ -232,8 +238,11 @@ static void usb_host_handle_destroy(USBDevice *dev)
 {
 USBHostDevice *s = (USBHostDevice *)dev;
 
+qemu_del_timer(s->timer);
+
 if (s->fd >= 0)
 close(s->fd);
+
 qemu_free(s);
 }
 
@@ -596,6 +605,22 @@ static int usb_linux_update_endp_table(USBHostDevice *s)
 return 0;
 }
 
+static void usb_host_device_check(void *priv)
+{
+USBHostDevice *s = priv;
+struct usbdevfs_connectinfo ci;
+int err;
+
+err = ioctl(s->fd, USBDEVFS_CONNECTINFO, &ci);
+if (err < 0) {
+printf("usb device %d.%d disconnected\n", 0, s->dev.addr);
+   usb_device_del_addr(0, s->dev.addr);
+   return;
+}
+
+qemu_mod_timer(s->timer, qemu_get_clock(rt_clock) + 1000);
+}
+
 /* XXX: exclude high speed devices or implement EHCI */
 USBDevice *usb_host_device_open(const char *devname)
 {
@@ -606,24 +631,30 @@ USBDevice *usb_host_device_open(const char *devname)
 int bus_num, addr;
 char product_name[PRODUCT_NAME_SZ];
 
+if (usb_host_find_device(&bus_num, &addr,
+ product_name, sizeof(product_name),
+ devname) < 0)
+return NULL;
+
+
 dev = qemu_mallocz(sizeof(USBHostDevice));
 if (!dev)
 goto fail;
 
-#ifdef DEBUG_ISOCH
+dev->timer = qemu_new_timer(rt_clock, usb_host_device_check, (void *) dev);
+if (!dev->timer)
+   goto fail;
+
+#ifdef DEBUG
 printf("usb_host_device_open %s\n", devname);
 #endif
-if (usb_host_find_device(&bus_num, &addr,
- product_name, sizeof(product_name),
- devname) < 0)
-return NULL;
 
 snprintf(buf, sizeof(buf), USBDEVFS_PATH "/%03d/%03d",
  bus_num, addr);
 fd = open(buf, O_RDWR | O_NONBLOCK);
 if (fd < 0) {
 perror(buf);
-return NULL;
+goto fail;
 }
 
 /* read the device description */
@@ -647,7 +678,7 @@ USBDevice *usb_host_device_open(const char *devname)
 dev->configuration = 1;
 
 /* XXX - do something about initial configuration */
-if (!usb_host_update_interfaces(dev, 1))
+if (!usb_host_update_interfaces(dev, -1))
 goto fail;
 
 ret = ioctl(fd, USBDEVFS_CONNECTINFO, &ci);
@@ -702,11 +733,18 @@ USBDevice *usb_host_device_open(const char *devname)
 fcntl(dev->pipe_fds[1], F_SETFL, O_NONBLOCK);
 qemu_set_fd_handler(dev->pipe_fds[0], urb_completion_pipe_read, NULL, dev);
 #endif
+
+/* Start the timer to detect disconnect */
+qemu_mod_timer(dev->timer, qemu_get_clock(rt_clock) + 1000);
+
 dev->urbs_ready = 0;
 return (USBDevice *)dev;
 fail:
-if (dev)
+if (dev) {
+   if (dev->timer)
+   qemu_del_timer(dev->timer);
 qemu_free(dev);
+}
 close(fd);
 return NULL;
 }
diff --git a/qemu/vl.c b/qemu/vl.c
index e

[PATCH] Generic packet handler cleanup and documentation

2008-08-01 Thread Max Krasnyansky
A bit better documentation of the USB device API, namely
return codes.
Rewrite of usb_generic_handle_packet() to make it more
reable and easier to follow.

Signed-off-by: Max Krasnyansky <[EMAIL PROTECTED]>
---
 qemu/hw/usb.c |  265 +++--
 qemu/hw/usb.h |   35 +++-
 2 files changed, 179 insertions(+), 121 deletions(-)

diff --git a/qemu/hw/usb.c b/qemu/hw/usb.c
index be4d66d..c17266d 100644
--- a/qemu/hw/usb.c
+++ b/qemu/hw/usb.c
@@ -3,6 +3,8 @@
  *
  * Copyright (c) 2005 Fabrice Bellard
  *
+ * 2008 Generic packet handler rewrite by Max Krasnyansky
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to 
deal
  * in the Software without restriction, including without limitation the rights
@@ -30,6 +32,7 @@ void usb_attach(USBPort *port, USBDevice *dev)
 }
 
 /**/
+
 /* generic USB device helpers (you are not forced to use them when
writing your USB device driver, but they help handling the
protocol)
@@ -39,141 +42,164 @@ void usb_attach(USBPort *port, USBDevice *dev)
 #define SETUP_STATE_DATA 1
 #define SETUP_STATE_ACK  2
 
-int usb_generic_handle_packet(USBDevice *s, USBPacket *p)
+static int do_token_setup(USBDevice *s, USBPacket *p)
+{
+int request, value, index;
+int ret = 0;
+
+if (p->len != 8)
+return USB_RET_STALL;
+ 
+memcpy(s->setup_buf, p->data, 8);
+s->setup_len   = (s->setup_buf[7] << 8) | s->setup_buf[6];
+s->setup_index = 0;
+
+request = (s->setup_buf[0] << 8) | s->setup_buf[1];
+value   = (s->setup_buf[3] << 8) | s->setup_buf[2];
+index   = (s->setup_buf[5] << 8) | s->setup_buf[4];
+ 
+if (s->setup_buf[0] & USB_DIR_IN) {
+ret = s->handle_control(s, request, value, index, 
+s->setup_len, s->data_buf);
+if (ret < 0)
+return ret;
+
+if (ret < s->setup_len)
+s->setup_len = ret;
+s->setup_state = SETUP_STATE_DATA;
+} else {
+if (s->setup_len == 0)
+s->setup_state = SETUP_STATE_ACK;
+else
+s->setup_state = SETUP_STATE_DATA;
+}
+
+return ret;
+}
+
+static int do_token_in(USBDevice *s, USBPacket *p)
 {
-int l, ret = 0;
-int len = p->len;
-uint8_t *data = p->data;
+int request, value, index;
+int ret = 0;
+
+if (p->devep != 0)
+return s->handle_data(s, p);
+
+request = (s->setup_buf[0] << 8) | s->setup_buf[1];
+value   = (s->setup_buf[3] << 8) | s->setup_buf[2];
+index   = (s->setup_buf[5] << 8) | s->setup_buf[4];
+ 
+switch(s->setup_state) {
+case SETUP_STATE_ACK:
+if (!(s->setup_buf[0] & USB_DIR_IN)) {
+s->setup_state = SETUP_STATE_IDLE;
+ret = s->handle_control(s, request, value, index,
+s->setup_len, s->data_buf);
+if (ret > 0)
+return 0;
+return ret;
+}
+
+/* return 0 byte */
+return 0;
+
+case SETUP_STATE_DATA:
+if (s->setup_buf[0] & USB_DIR_IN) {
+int len = s->setup_len - s->setup_index;
+if (len > p->len)
+len = p->len;
+memcpy(p->data, s->data_buf + s->setup_index, len);
+s->setup_index += len;
+if (s->setup_index >= s->setup_len)
+s->setup_state = SETUP_STATE_ACK;
+return len;
+}
+
+s->setup_state = SETUP_STATE_IDLE;
+return USB_RET_STALL;
+
+default:
+return USB_RET_STALL;
+}
+}
+
+static int do_token_out(USBDevice *s, USBPacket *p)
+{
+if (p->devep != 0)
+return s->handle_data(s, p);
+
+switch(s->setup_state) {
+case SETUP_STATE_ACK:
+if (s->setup_buf[0] & USB_DIR_IN) {
+s->setup_state = SETUP_STATE_IDLE;
+/* transfer OK */
+} else {
+/* ignore additional output */
+}
+return 0;
+
+case SETUP_STATE_DATA:
+if (!(s->setup_buf[0] & USB_DIR_IN)) {
+int len = s->setup_len - s->setup_index;
+if (len > p->len)
+len = p->len;
+memcpy(s->data_buf + s->setup_index, p->data, len);
+s->setup_index += len;
+if (s->setup_index >= s->setup_len)
+s->setup_state = SETUP_STATE_ACK;
+return len;
+}
+
+s->setup_state = SETUP_STATE_IDLE;
+return USB_RET_STALL;
+
+default:
+return USB_RET_STALL;
+}
+}
 
+/*
+ * Generic packet handler.
+ * Called by the HC (host controller).
+ *
+ * Returns length of the transaction or one of the USB_RET_XXX codes.
+ */
+int usb_generic_handle_packet(USBDevice *s, USBPacket *p)
+{
 switch(p->pid) {
 case USB_MSG_ATTACH:
 s->state = USB_STATE_ATTACHED;
-break;
+return 0;
+
 case USB_MSG_DETACH:

[PATCH] Extra debugging for Linux USB host.

2008-08-01 Thread Max Krasnyansky
Print details and status of the control and bulk transfers.

Signed-off-by: Max Krasnyansky <[EMAIL PROTECTED]>
---
 qemu/usb-linux.c |   29 ++---
 1 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/qemu/usb-linux.c b/qemu/usb-linux.c
index 78f4b2a..97842c9 100644
--- a/qemu/usb-linux.c
+++ b/qemu/usb-linux.c
@@ -207,7 +207,7 @@ static int usb_host_update_interfaces(USBHostDevice *dev, 
int configuration)
 i += dev_descr_len;
 while (i < dev->descr_len) {
 #ifdef DEBUG
-printf("i is %d, descr_len is %d, dl %d, dt %d\n", i, dev->descr_len,
+printf("husb: i is %d, descr_len is %d, dl %d, dt %d\n", i, 
dev->descr_len,
dev->descr[i], dev->descr[i+1]);
 #endif
 if (dev->descr[i+1] != USB_DT_CONFIG) {
@@ -217,7 +217,7 @@ static int usb_host_update_interfaces(USBHostDevice *dev, 
int configuration)
 config_descr_len = dev->descr[i];
 
 #ifdef DEBUG
-   printf("config #%d need %d\n", dev->descr[i + 5], configuration); 
+   printf("husb: config #%d need %d\n", dev->descr[i + 5], configuration); 
 #endif
 
 if (configuration < 0 || configuration == dev->descr[i + 5])
@@ -227,7 +227,7 @@ static int usb_host_update_interfaces(USBHostDevice *dev, 
int configuration)
 }
 
 if (i >= dev->descr_len) {
-printf("usb_host: error - device has no matching configuration\n");
+printf("husb: error. device has no matching configuration\n");
 goto fail;
 }
 nb_interfaces = dev->descr[i + 4];
@@ -264,7 +264,7 @@ static int usb_host_update_interfaces(USBHostDevice *dev, 
int configuration)
 }
 
 #ifdef DEBUG
-printf("usb_host: %d interfaces claimed for configuration %d\n",
+printf("husb: %d interfaces claimed for configuration %d\n",
nb_interfaces, configuration);
 #endif
 
@@ -322,10 +322,9 @@ static int usb_host_handle_control(USBDevice *dev,
 ret = ioctl(s->fd, USBDEVFS_SETINTERFACE, &si);
 usb_linux_update_endp_table(s);
 } else if (request == (DeviceOutRequest | USB_REQ_SET_CONFIGURATION)) {
-#ifdef DEBUG
-printf("usb_host_handle_control: SET_CONFIGURATION request - "
-   "config %d\n", value & 0xff);
-#endif
+#ifdef DEBUG
+printf("husb: ctrl set config %d\n", value & 0xff);
+#endif
 if (s->configuration != (value & 0xff)) {
 s->configuration = (value & 0xff);
 intf_update_required = 1;
@@ -341,6 +340,11 @@ static int usb_host_handle_control(USBDevice *dev,
 ct.timeout = 50;
 ct.data = data;
 ret = ioctl(s->fd, USBDEVFS_CONTROL, &ct);
+
+#ifdef DEBUG
+printf("husb: ctrl. req 0x%x val 0x%x index %d len %d ret %d errno 
%d\n",
+request, value, index, length, ret, errno);
+#endif
 }
 
 if (ret < 0) {
@@ -383,15 +387,18 @@ static int usb_host_handle_data(USBDevice *dev, USBPacket 
*p)
 bt.timeout = 50;
 bt.data = p->data;
 ret = ioctl(s->fd, USBDEVFS_BULK, &bt);
+
+#ifdef DEBUG
+printf("husb: bulk. ep %d len %d ret %d errno %d\n",
+bt.ep, bt.len, ret, errno);
+#endif
+
 if (ret < 0) {
 switch(errno) {
 case ETIMEDOUT:
 return USB_RET_NAK;
 case EPIPE:
 default:
-#ifdef DEBUG
-printf("handle_data: errno=%d\n", errno);
-#endif
 return USB_RET_STALL;
 }
 } else {
-- 
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Support for USB host device auto connect.

2008-08-01 Thread Max Krasnyansky
QEMU can now automatically grab host USB devices that match the filter.
For now I just extended 'host:X.Y' and 'host:VID:PID' syntax to handle
wildcards. So for example if you do something like
   usb_add host:5.*
QEMU will automatically grab any non-hub device with host address 5.*.

Same with the 'host:PID:*', we grab any device that matches PID.

Filtering itself is very generic so we can probably add more elaborate
syntax like 'host:BUS.ADDR:VID:PID'. So that we can do 'host:5.*:6000:*'.

Anyway, it's implemented using a periodic timer that scans host devices
and grabs those that match the filter. Timer is started when the first
filter is added.

We now keep the list of all host devices that we grabbed to make sure that
we do not grab the same device twice.

btw It's currently possible to grab the same host device more than once.
ie You can just do "usb_add host:1.1" more than once, which of course does
not work. So this patch fixes that issue too.

Along with auto disconnect patch that I send a minute ago the setup is very
seamless now. You can just allocate some usb ports to the VMs and plug/unplug
devices at any time.

Signed-off-by: Max Krasnyansky <[EMAIL PROTECTED]>
---
 qemu/hw/usb.h|1 +
 qemu/usb-linux.c |  209 ++
 qemu/vl.c|   57 ---
 3 files changed, 225 insertions(+), 42 deletions(-)

diff --git a/qemu/hw/usb.h b/qemu/hw/usb.h
index 2edb982..4a009a5 100644
--- a/qemu/hw/usb.h
+++ b/qemu/hw/usb.h
@@ -197,6 +197,7 @@ static inline void usb_cancel_packet(USBPacket * p)
 p->cancel_cb(p, p->cancel_opaque);
 }
 
+int usb_device_add_dev(USBDevice *dev);
 int usb_device_del_addr(int bus_num, int addr);
 void usb_attach(USBPort *port, USBDevice *dev);
 int usb_generic_handle_packet(USBDevice *s, USBPacket *p);
diff --git a/qemu/usb-linux.c b/qemu/usb-linux.c
index 3e3c54e..78f4b2a 100644
--- a/qemu/usb-linux.c
+++ b/qemu/usb-linux.c
@@ -3,6 +3,9 @@
  *
  * Copyright (c) 2005 Fabrice Bellard
  *
+ * Support for host device auto connect & disconnect
+ *   Copyright (c) 2008 Max Krasnyansky
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to 
deal
  * in the Software without restriction, including without limitation the rights
@@ -69,6 +72,8 @@ struct endp_data {
 uint8_t type;
 };
 
+
+
 /* FIXME: move USBPacket to PendingURB */
 typedef struct USBHostDevice {
 USBDevice dev;
@@ -80,9 +85,51 @@ typedef struct USBHostDevice {
 uint8_t descr[1024];
 int descr_len;
 int urbs_ready;
+
 QEMUTimer *timer;
+
+/* Host side address */
+int bus_num;
+int addr;
+
+struct USBHostDevice *next;
 } USBHostDevice;
 
+static USBHostDevice *hostdev_list;
+
+static void hostdev_link(USBHostDevice *dev)
+{
+dev->next = hostdev_list;
+hostdev_list = dev;
+}
+
+static void hostdev_unlink(USBHostDevice *dev)
+{
+USBHostDevice *pdev = hostdev_list;
+USBHostDevice **prev = &hostdev_list;
+
+while (pdev) {
+   if (pdev == dev) {
+*prev = dev->next;
+return;
+}
+
+prev = &pdev->next;
+pdev = pdev->next;
+}
+}
+
+static USBHostDevice *hostdev_find(int bus_num, int addr)
+{
+USBHostDevice *s = hostdev_list;
+while (s) {
+if (s->bus_num == bus_num && s->addr == addr)
+return s;
+s = s->next;
+}
+return NULL;
+}
+
 typedef struct PendingURB {
 struct usbdevfs_urb *urb;
 int status;
@@ -240,6 +287,8 @@ static void usb_host_handle_destroy(USBDevice *dev)
 
 qemu_del_timer(s->timer);
 
+hostdev_unlink(s);
+
 if (s->fd >= 0)
 close(s->fd);
 
@@ -621,32 +670,26 @@ static void usb_host_device_check(void *priv)
 qemu_mod_timer(s->timer, qemu_get_clock(rt_clock) + 1000);
 }
 
-/* XXX: exclude high speed devices or implement EHCI */
-USBDevice *usb_host_device_open(const char *devname)
+static USBDevice *usb_host_device_open_addr(int bus_num, int addr, const char 
*prod_name)
 {
 int fd = -1, ret;
 USBHostDevice *dev = NULL;
 struct usbdevfs_connectinfo ci;
 char buf[1024];
-int bus_num, addr;
-char product_name[PRODUCT_NAME_SZ];
-
-if (usb_host_find_device(&bus_num, &addr,
- product_name, sizeof(product_name),
- devname) < 0)
-return NULL;
-
 
 dev = qemu_mallocz(sizeof(USBHostDevice));
 if (!dev)
 goto fail;
 
+dev->bus_num = bus_num;
+dev->addr = addr;
+
 dev->timer = qemu_new_timer(rt_clock, usb_host_device_check, (void *) dev);
 if (!dev->timer)
goto fail;
 
 #ifdef DEBUG
-printf("usb_host_device_open %s\n", devname);
+printf("usb_host_device_open %d.%d\n", bus_num, addr);
 #endif
 
 snprintf(buf, sizeof(buf), USBDEVFS_PATH "/%03d/%03d",
@@ -706,12 +749,12 @@ USBDevice *usb_host_device_open(const char *devname)
 

Re: Can several guests run simultaneously on KVM

2008-08-01 Thread David Mair

Stephen Liu wrote:

--- David Mair <[EMAIL PROTECTED]> wrote:


Stephen Liu wrote:

Hi folks,


Can I run serveral guests on KVM at the same time similar to VMware
hypervisor?

Yes.

--



Hi David.


Thanks for your advice.


Any document for reference?  


Assuming you have done a configure/make/make install then you need to 
load the drivers. I load the kvm drivers via initrd so that means 
arranging for them to be included and running mkinitrd. A reboot will 
cause them to load but you can modprobe or insmod to load them without a 
reboot.


I change the group of /dev/kvm to be the default group for most users 
(you need to be able to read/write it to run a VM in my experience). I 
did that with an init script. It's been a few kvm releases since I last 
checked if that's still necessary.


After that you'll probably find more useful information if you search 
for material about qemu rather than kvm. qemu is the userspace host 
application for kvm VMs.


An example of the process of creating a VM is to create a virtual disk 
image with qemu-img:


> qemu-img create -f qcow2 /path/to/my-vm-disk.img 8G

That creates a virtual disk that appears to a VM to be 8GB (the actual 
file will start at about 40kB and grow with use).


Now, you can start a VM with qemu, the command lines can be quite long. 
Here's a very basic example:


> qemu-system-x86_64 -m 512 \
-hda /path/to/my-vm-disk.img \
-cdrom /path/to/OSinstallDiskOrISO \
-boot d \
-name "My groovy new VM"

Replace the 512 with the amount of memory you want the VM to have. The 
VM's IDE primary:master hard disk will be the virtual disk file you 
created earlier. The VM's optical drive (IDE secondary:master) will have 
"inserted" media that is whatever you use as the argument to -cdrom. It 
can be an iso file or the dev file for an optical drive (and more).


The boot argument specifies which device the BIOS will try to boot from 
(the optical drive in this case). You'll probably want to change the 
boot d to boot c after the OS is installed.


The guest will have no networking as shown. Network configuration can be 
intimidating. Try getting the basics working then ask for more help with 
networking.


You'll also find in this mailing list archive a few wrapper scripts 
people have done that make a lot of it easier to deal with.




On googling I only found;
Re: [kvm-devel] KVM and Perf Counters
http://www.mail-archive.com/[EMAIL PROTECTED]/msg01133.html


http://synergy2.sourceforge.net/
synergy2.sourceforge.net —  Synergy lets you easily share a single
mouse and keyboard between multiple computers with different operating
systems, each with its own display, without special hardware. It's
intended for users with multiple computers on their desk since each
system uses its own monitor(s).


There are several documents for Xen and VMware.


--
David.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Can several guests run simultaneously on KVM

2008-08-01 Thread Stephen Liu

--- David Mair <[EMAIL PROTECTED]> wrote:

> Stephen Liu wrote:
> > Hi folks,
> > 
> > 
> > Can I run serveral guests on KVM at the same time similar to VMware
> > hypervisor?
> 
> Yes.
> 
> -- 


Hi David.


Thanks for your advice.


Any document for reference?  


On googling I only found;
Re: [kvm-devel] KVM and Perf Counters
http://www.mail-archive.com/[EMAIL PROTECTED]/msg01133.html


http://synergy2.sourceforge.net/
synergy2.sourceforge.net —  Synergy lets you easily share a single
mouse and keyboard between multiple computers with different operating
systems, each with its own display, without special hardware. It's
intended for users with multiple computers on their desk since each
system uses its own monitor(s).


There are several documents for Xen and VMware.


TIA


B.R.
Stephen

Send instant messages to your online friends http://uk.messenger.yahoo.com 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC][PATCH] Add HPET emulation to qemu (v2)

2008-08-01 Thread Beth Kon
Major changes:

- Rebased to register-based operations for ease of save/restore.
- Looked through Xen's hpet implementation and picked up a bunch of 
  things, though not quite everything yet. Thanks!
- PIT and RTC are entirely disabled in legacy mode, not just their 
  interrupts.
 
There is still a bunch to do but I'm re-posting primarily because 
of the switch to register-based. I have still only tested with a 
linux guest. Windows guest is next on my list... as soon as I return 
from my week vacation.

I've been playing with CONFIG_NO_HZ and been surprised by the 
results.  I was trying to reproduce the wakeup every 10ms that 
Samuel Thibault mentioned, thinking the HPET would improve it. 
But for an idle guest in both cases (with and without HPET), the 
number of wakeups per second was relatively low (28). Ultimately 
this depends on exactly what the guest is doing
when idle, so maybe the HPET won't provide any improvement here. 
But in any case, I didn't see the 10ms wakeup cycle with CONFIG_NO_HZ. 
If anyone can shed any light on this, I could look into it more if need
be.
 
Signed-off-by: Beth Kon <[EMAIL PROTECTED]>
***

Makefile.target  |2 
hw/hpet.c|  441 +++
hw/i8254.c   |   11 +
hw/mc146818rtc.c |   30 +++
hw/pc.c  |2 
5 files changed, 483 insertions(+), 3 deletions(-)

***

diff --git a/Makefile.target b/Makefile.target
index 42162c3..946bdef 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -536,7 +536,7 @@ ifeq ($(TARGET_BASE_ARCH), i386)
 OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
 OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
-OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
+OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o hpet.o
 CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
 endif
 ifeq ($(TARGET_BASE_ARCH), ppc)
diff --git a/hw/hpet.c b/hw/hpet.c
new file mode 100644
index 000..adfecf0
--- /dev/null
+++ b/hw/hpet.c
@@ -0,0 +1,441 @@
+/*
+ *  High Precisition Event Timer emulation
+ *
+ *  Copyright (c) 2007 Alexander Graf
+ *  Copyright (c) 2008 IBM Corporation
+ *
+ *  Authors: Beth Kon <[EMAIL PROTECTED]>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * *
+ *
+ * This driver attempts to emulate an HPET device in software. It is by no
+ * means complete and is prone to break on certain conditions.
+ *
+ */
+#include "hw.h"
+#include "console.h"
+#include "qemu-timer.h"
+
+//#define HPET_DEBUG
+
+#define HPET_BASE  0xfed0
+#define HPET_CLK_PERIOD 1000ULL /* 1000 femtoseconds == 10ns*/
+
+#define FS_PER_NS 100
+#define HPET_NUM_TIMERS 3
+#define HPET_TIMER_TYPE_LEVEL 1
+#define HPET_TIMER_TYPE_EDGE 0
+#define HPET_TIMER_DELIVERY_APIC 0
+#define HPET_TIMER_DELIVERY_FSB 1
+#define HPET_TIMER_CAP_FSB_INT_DEL (1 << 15)
+#define HPET_TIMER_CAP_PER_INT (1 << 4)
+
+#define HPET_CFG_ENABLE 0x001
+#define HPET_CFG_LEGACY 0x002
+
+#define HPET_ID 0x000
+#define HPET_PERIOD 0x004
+#define HPET_CFG0x010
+#define HPET_STATUS 0x020
+#define HPET_COUNTER0x0f0
+#define HPET_TN_REGS0x100 ... 0x3ff /*address range of all TN regs*/
+#define HPET_TN_CFG 0x000
+#define HPET_TN_CMP 0x008
+#define HPET_TN_ROUTE   0x010
+
+
+#define HPET_TN_INT_TYPE_LEVEL   0x002
+#define HPET_TN_ENABLE   0x004
+#define HPET_TN_PERIODIC 0x008
+#define HPET_TN_PERIODIC_CAP 0x010
+#define HPET_TN_SIZE_CAP 0x020
+#define HPET_TN_SETVAL   0x040
+#define HPET_TN_32BIT0x100
+#define HPET_TN_INT_ROUTE_MASK  0x3e00
+#define HPET_TN_INT_ROUTE_SHIFT  9
+#define HPET_TN_INT_ROUTE_CAP_SHIFT 32
+#define HPET_TN_CFG_BITS_READONLY_OR_RESERVED 0x80b1U
+
+#define timer_int_route(timer)   \
+((timer->config & HPET_TN_INT_ROUTE_MASK) >> HPET_TN_INT_ROUTE_SHIFT)
+
+#define hpet_enabled(s)  (s->config & HPET_CFG_ENABLE)
+#define timer_is_periodic(t) (t->config & HPET_TN_PERIODIC)
+#define timer_enabled(t) (t->config & HPET_TN_ENABLE)
+
+struct HPETState;
+typedef struct HPETTimer {  /* timers */
+uint8_t tn; /*timer number*/
+QEMUTimer *qemu_timer

Re: Can several guests run simultaneously on KVM

2008-08-01 Thread David Mair

Stephen Liu wrote:

Hi folks,


Can I run serveral guests on KVM at the same time similar to VMware
hypervisor?


Yes.

--
David.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Can several guests run simultaneously on KVM

2008-08-01 Thread Stephen Liu
Hi folks,


Can I run serveral guests on KVM at the same time similar to VMware
hypervisor?

Thanks


B.R.
Stephen L


Send instant messages to your online friends http://uk.messenger.yahoo.com 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: How to connect USB enclosue to guest

2008-08-01 Thread Stephen Liu

--- Javier Guerra <[EMAIL PROTECTED]> wrote:

> On Fri, Aug 1, 2008 at 10:41 AM, Stephen Liu <[EMAIL PROTECTED]>
> wrote:
> > Hi folks,
> >
> >
> > Ubuntu 8.04 server amd64 - host
> > Ubuntu 6.06 server amd64 - guest
> > KVM 1:62+dfsq
> > UBS enclosure
> >
> >
> > Please advise how to mount the USB enclosure to guest.  It can be
> > mounted on host.  Pointer would be appreciated.  TIA
> 
> instead of messing with USB, manage it as any block device.
> 
> don't mount it on host, and add the /dev/sdX device file as an extra
> drive on the qemu command line.
> 
> downside is that you wouldn't be able to (easily) unmount from the
> guest.


Hi Javier,


Thanks for your advice.


I'm running the USB enclosure (a HD) as portable storage drive to
servers.  I have to connect it to another server later when in need.


I have another thought whether I can create the enclosure as an image
on KVM?  But even possible I have to recreate a new image next time
connecting it to the guest.


Any advice?  TIA


B.R.
Stephen

Send instant messages to your online friends http://uk.messenger.yahoo.com 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH]: pointer to vmcs getting lost

2008-08-01 Thread Jesse

Thanks for the feedback. Comments inline.

Marcelo Tosatti wrote:

Hi Jesse,

On Fri, Aug 01, 2008 at 03:18:52PM -0700, Jesse wrote:
  

Greetings,

I noticed a race condition when running two guests simultaneously and  
debugging both guests (on 64-bit intel cpus). Periodically I would get  
errors from the vmread, vmwrite, or vmresume instructions. Some research  
revealed that these errors were being caused by having an invalid vmcs  
loaded. Further, I found that the vmcs is a per_cpu variable, which I  
believe means that any reference to it is invalid after a context  
switch. (Corrections appreciated). This means that the vmcs must be  
reloaded each time the process is switched to. 



The preempt notifiers will do that for you.
  
Right, but they won't call VMPTRLD. For some reason this matters (for 
intel chips), even if the variable ends up back in the same place, as 
far as I can tell.
  
The patch below fixed the  
problem for me.


This patch does three things.
1. Extends the critical section in __vcpu_run to include the handling of  
vmexits, where many of the vmread/writes occur.
2. Perform a vcpu_load after we enter the critical section, and after we  
return from kvm_resched.
3. Move the call to kvm_guest_debug_pre into the critical section  
(because it calls vmread/write).



Wouldnt it suffice to move ->guest_debug_pre into the non preemptable
section? http://article.gmane.org/gmane.comp.emulators.kvm.devel/20244 
  
Excellent. I hadn't seen that patch yet. However, many of the 
vmreads/vmwrites that failed in my testing were in the exit handlers. 
And a calling VMPTRLD (in vcpu_load) explicitly on entering the critical 
section secures any other vmcs concurrency problems.

I haven't tested that patch though.

  


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC][PATCH] Add HPET emulation to qemu (v2)

2008-08-01 Thread Beth Kon
Major changes:

- Rebased to register-based operations for ease of save/restore.
- Looked through Xen's hpet implementation and picked up a bunch of 
  things, though not quite everything yet. Thanks!
- PIT and RTC are entirely disabled in legacy mode, not just their 
  interrupts.
 
There is still a bunch to do but I'm re-posting primarily because 
of the switch to register-based. I have still only tested with a 
linux guest. Windows guest is next on my list... as soon as I return 
from my week vacation.

I've been playing with CONFIG_NO_HZ and been surprised by the 
results.  I was trying to reproduce the wakeup every 10ms that 
Samuel Thibault mentioned, thinking the HPET would improve it. 
But for an idle guest in both cases (with and without HPET), the 
number of wakeups per second was relatively low (28). Ultimately 
this depends on exactly what the guest is doing
when idle, so maybe the HPET won't provide any improvement here. 
But in any case, I didn't see the 10ms wakeup cycle with CONFIG_NO_HZ. 
If anyone can shed any light on this, I could look into it more if need
be.
 
Signed-off-by: Beth Kon <[EMAIL PROTECTED]>
***

Makefile.target  |2 
hw/hpet.c|  441 +++
hw/i8254.c   |   11 +
hw/mc146818rtc.c |   30 +++
hw/pc.c  |2 
5 files changed, 483 insertions(+), 3 deletions(-)

***

diff --git a/Makefile.target b/Makefile.target
index 42162c3..946bdef 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -536,7 +536,7 @@ ifeq ($(TARGET_BASE_ARCH), i386)
 OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
 OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
-OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
+OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o hpet.o
 CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
 endif
 ifeq ($(TARGET_BASE_ARCH), ppc)
diff --git a/hw/hpet.c b/hw/hpet.c
new file mode 100644
index 000..adfecf0
--- /dev/null
+++ b/hw/hpet.c
@@ -0,0 +1,441 @@
+/*
+ *  High Precisition Event Timer emulation
+ *
+ *  Copyright (c) 2007 Alexander Graf
+ *  Copyright (c) 2008 IBM Corporation
+ *
+ *  Authors: Beth Kon <[EMAIL PROTECTED]>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * *
+ *
+ * This driver attempts to emulate an HPET device in software. It is by no
+ * means complete and is prone to break on certain conditions.
+ *
+ */
+#include "hw.h"
+#include "console.h"
+#include "qemu-timer.h"
+
+//#define HPET_DEBUG
+
+#define HPET_BASE  0xfed0
+#define HPET_CLK_PERIOD 1000ULL /* 1000 femtoseconds == 10ns*/
+
+#define FS_PER_NS 100
+#define HPET_NUM_TIMERS 3
+#define HPET_TIMER_TYPE_LEVEL 1
+#define HPET_TIMER_TYPE_EDGE 0
+#define HPET_TIMER_DELIVERY_APIC 0
+#define HPET_TIMER_DELIVERY_FSB 1
+#define HPET_TIMER_CAP_FSB_INT_DEL (1 << 15)
+#define HPET_TIMER_CAP_PER_INT (1 << 4)
+
+#define HPET_CFG_ENABLE 0x001
+#define HPET_CFG_LEGACY 0x002
+
+#define HPET_ID 0x000
+#define HPET_PERIOD 0x004
+#define HPET_CFG0x010
+#define HPET_STATUS 0x020
+#define HPET_COUNTER0x0f0
+#define HPET_TN_REGS0x100 ... 0x3ff /*address range of all TN regs*/
+#define HPET_TN_CFG 0x000
+#define HPET_TN_CMP 0x008
+#define HPET_TN_ROUTE   0x010
+
+
+#define HPET_TN_INT_TYPE_LEVEL   0x002
+#define HPET_TN_ENABLE   0x004
+#define HPET_TN_PERIODIC 0x008
+#define HPET_TN_PERIODIC_CAP 0x010
+#define HPET_TN_SIZE_CAP 0x020
+#define HPET_TN_SETVAL   0x040
+#define HPET_TN_32BIT0x100
+#define HPET_TN_INT_ROUTE_MASK  0x3e00
+#define HPET_TN_INT_ROUTE_SHIFT  9
+#define HPET_TN_INT_ROUTE_CAP_SHIFT 32
+#define HPET_TN_CFG_BITS_READONLY_OR_RESERVED 0x80b1U
+
+#define timer_int_route(timer)   \
+((timer->config & HPET_TN_INT_ROUTE_MASK) >> HPET_TN_INT_ROUTE_SHIFT)
+
+#define hpet_enabled(s)  (s->config & HPET_CFG_ENABLE)
+#define timer_is_periodic(t) (t->config & HPET_TN_PERIODIC)
+#define timer_enabled(t) (t->config & HPET_TN_ENABLE)
+
+struct HPETState;
+typedef struct HPETTimer {  /* timers */
+uint8_t tn; /*timer number*/
+QEMUTimer *qemu_timer

Re: [PATCH]: pointer to vmcs getting lost

2008-08-01 Thread Marcelo Tosatti
Hi Jesse,

On Fri, Aug 01, 2008 at 03:18:52PM -0700, Jesse wrote:
> Greetings,
>
> I noticed a race condition when running two guests simultaneously and  
> debugging both guests (on 64-bit intel cpus). Periodically I would get  
> errors from the vmread, vmwrite, or vmresume instructions. Some research  
> revealed that these errors were being caused by having an invalid vmcs  
> loaded. Further, I found that the vmcs is a per_cpu variable, which I  
> believe means that any reference to it is invalid after a context  
> switch. (Corrections appreciated). This means that the vmcs must be  
> reloaded each time the process is switched to. 

The preempt notifiers will do that for you.

> The patch below fixed the  
> problem for me.
>
> This patch does three things.
> 1. Extends the critical section in __vcpu_run to include the handling of  
> vmexits, where many of the vmread/writes occur.
> 2. Perform a vcpu_load after we enter the critical section, and after we  
> return from kvm_resched.
> 3. Move the call to kvm_guest_debug_pre into the critical section  
> (because it calls vmread/write).

Wouldnt it suffice to move ->guest_debug_pre into the non preemptable
section? http://article.gmane.org/gmane.comp.emulators.kvm.devel/20244 

I haven't tested that patch though.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 2/2] KVM: x86: do not execute halted vcpus (v2)

2008-08-01 Thread Marcelo Tosatti
Offline or uninitialized vcpu's can be executed if requested to perform
userspace work. 

Follow Avi's suggestion to handle halted vcpu's in the main loop,
simplifying kvm_emulate_halt(). Introduce a new vcpu->requests bit to
indicate events that promote state from halted to running.

Also standardize vcpu wake sites.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm/arch/x86/kvm/x86.c
===
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -2772,11 +2772,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vc
KVMTRACE_0D(HLT, vcpu, handler);
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-   up_read(&vcpu->kvm->slots_lock);
-   kvm_vcpu_block(vcpu);
-   down_read(&vcpu->kvm->slots_lock);
-   if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-   return -EINTR;
return 1;
} else {
vcpu->run->exit_reason = KVM_EXIT_HLT;
@@ -3071,24 +3066,10 @@ static void vapic_exit(struct kvm_vcpu *
up_read(&vcpu->kvm->slots_lock);
 }
 
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
int r;
 
-   if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
-   pr_debug("vcpu %d received sipi with vector # %x\n",
-  vcpu->vcpu_id, vcpu->arch.sipi_vector);
-   kvm_lapic_reset(vcpu);
-   r = kvm_x86_ops->vcpu_reset(vcpu);
-   if (r)
-   return r;
-   vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-   }
-
-   down_read(&vcpu->kvm->slots_lock);
-   vapic_enter(vcpu);
-
-again:
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
kvm_mmu_unload(vcpu);
@@ -3132,15 +3113,6 @@ again:
goto out;
}
 
-   if (signal_pending(current)) {
-   local_irq_enable();
-   preempt_enable();
-   r = -EINTR;
-   kvm_run->exit_reason = KVM_EXIT_INTR;
-   ++vcpu->stat.signal_exits;
-   goto out;
-   }
-
if (vcpu->guest_debug.enabled)
kvm_x86_ops->guest_debug_pre(vcpu);
 
@@ -3201,26 +3173,63 @@ again:
kvm_lapic_sync_from_vapic(vcpu);
 
r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
+out:
+   return r;
+}
 
-   if (r > 0) {
-   if (dm_request_for_irq_injection(vcpu, kvm_run)) {
-   r = -EINTR;
-   kvm_run->exit_reason = KVM_EXIT_INTR;
-   ++vcpu->stat.request_irq_exits;
-   goto out;
-   }
-   if (!need_resched())
-   goto again;
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+   int r;
+
+   if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
+   printk("vcpu %d received sipi with vector # %x\n",
+  vcpu->vcpu_id, vcpu->arch.sipi_vector);
+   kvm_lapic_reset(vcpu);
+   r = kvm_x86_ops->vcpu_reset(vcpu);
+   if (r)
+   return r;
+   vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
 
-out:
-   up_read(&vcpu->kvm->slots_lock);
-   if (r > 0) {
-   kvm_resched(vcpu);
-   down_read(&vcpu->kvm->slots_lock);
-   goto again;
+   down_read(&vcpu->kvm->slots_lock);
+   vapic_enter(vcpu);
+
+   r = 1;
+   while (r > 0) {
+   if (kvm_arch_vcpu_runnable(vcpu))
+   r = vcpu_enter_guest(vcpu, kvm_run);
+   else {
+   up_read(&vcpu->kvm->slots_lock);
+   kvm_vcpu_block(vcpu);
+   down_read(&vcpu->kvm->slots_lock);
+   if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
+   if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+   vcpu->arch.mp_state =
+   KVM_MP_STATE_RUNNABLE;
+   if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
+   r = -EINTR;
+   }
+
+   if (r > 0) {
+   if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+   r = -EINTR;
+   kvm_run->exit_reason = KVM_EXIT_INTR;
+   ++vcpu->stat.request_irq_exits;
+   }
+   if (signal_pending(current)) {
+   r = -EINTR;
+   kvm_run->exit_reason = KVM_EXIT_INTR;
+

[patch 0/2] do not run halted vcpu's

2008-08-01 Thread Marcelo Tosatti
Avi Kivity wrote:

> Any reason this is not in __vcpu_run()?
>
> Our main loop could look like
>
> while (no reason to stop)
>   if (runnable)
>enter guest
>   else
>block
>   deal with aftermath
>
> kvm_emulate_halt would then simply modify the mp state.

Like this?

- I don't think it is necessary to test for pending signals inside irq
safe section, so move that to exit processing.

- Same for need_resched().



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 1/2] KVM: x86: set debug registers after "schedulable" section

2008-08-01 Thread Marcelo Tosatti
The vcpu thread can be preempted after the guest_debug_pre() callback,
resulting in invalid debug registers on the new vcpu.

Move it inside the non-preemptable section.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm/arch/x86/kvm/x86.c
===
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -3088,10 +3088,6 @@ static int __vcpu_run(struct kvm_vcpu *v
down_read(&vcpu->kvm->slots_lock);
vapic_enter(vcpu);
 
-preempted:
-   if (vcpu->guest_debug.enabled)
-   kvm_x86_ops->guest_debug_pre(vcpu);
-
 again:
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3145,6 +3141,9 @@ again:
goto out;
}
 
+   if (vcpu->guest_debug.enabled)
+   kvm_x86_ops->guest_debug_pre(vcpu);
+
vcpu->guest_mode = 1;
/*
 * Make sure that guest_mode assignment won't happen after
@@ -3219,7 +3218,7 @@ out:
if (r > 0) {
kvm_resched(vcpu);
down_read(&vcpu->kvm->slots_lock);
-   goto preempted;
+   goto again;
}
 
post_kvm_run_save(vcpu, kvm_run);

-- 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH]: pointer to vmcs getting lost

2008-08-01 Thread Jesse

Greetings,

I noticed a race condition when running two guests simultaneously and 
debugging both guests (on 64-bit intel cpus). Periodically I would get 
errors from the vmread, vmwrite, or vmresume instructions. Some research 
revealed that these errors were being caused by having an invalid vmcs 
loaded. Further, I found that the vmcs is a per_cpu variable, which I 
believe means that any reference to it is invalid after a context 
switch. (Corrections appreciated). This means that the vmcs must be 
reloaded each time the process is switched to. The patch below fixed the 
problem for me.


This patch does three things.
1. Extends the critical section in __vcpu_run to include the handling of 
vmexits, where many of the vmread/writes occur.
2. Perform a vcpu_load after we enter the critical section, and after we 
return from kvm_resched.
3. Move the call to kvm_guest_debug_pre into the critical section 
(because it calls vmread/write).


I hope you find this useful. I am not on list, so please CC me on replies.

~Jesse Dutton



diff -ruNa kvm-72/kernel/x86.c kvm-72-changed/kernel/x86.c
--- kvm-72/kernel/x86.c 2008-07-27 06:20:10.0 -0700
+++ kvm-72-changed/kernel/x86.c 2008-07-31 15:25:25.0 -0700
@@ -2845,8 +2845,6 @@
vapic_enter(vcpu);

preempted:
-   if (vcpu->guest_debug.enabled)
-   kvm_x86_ops->guest_debug_pre(vcpu);

again:
if (vcpu->requests)
@@ -2878,7 +2876,12 @@
clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
kvm_inject_pending_timer_irqs(vcpu);

+   vcpu_put(vcpu);
preempt_disable();
+   vcpu_load(vcpu);
+
+   if (vcpu->guest_debug.enabled)
+   kvm_x86_ops->guest_debug_pre(vcpu);

kvm_x86_ops->prepare_guest_switch(vcpu);
kvm_load_guest_fpu(vcpu);
@@ -2941,7 +2944,6 @@

kvm_guest_exit();

-   preempt_enable();

down_read(&vcpu->kvm->slots_lock);

@@ -2960,6 +2962,8 @@

r = kvm_x86_ops->handle_exit(kvm_run, vcpu);

+   preempt_enable();
+
if (r > 0) {
if (dm_request_for_irq_injection(vcpu, kvm_run)) {
r = -EINTR;
@@ -2974,7 +2978,9 @@
out:
up_read(&vcpu->kvm->slots_lock);
if (r > 0) {
+   vcpu_put(vcpu);
kvm_resched(vcpu);
+   vcpu_load(vcpu);
down_read(&vcpu->kvm->slots_lock);
goto preempted;
}


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Balloon device in qemu?

2008-08-01 Thread Anthony Liguori

Marcelo Tosatti wrote:

On Thu, Jul 31, 2008 at 04:01:54PM +0300, Avi Kivity wrote:
  
Marcelo, the balloon was last seen drifting over your territory.  Care  
to brush it up and send it over?



Anthony rewrote the backend, I think this is the latest version:

http://www.archivum.info/[EMAIL PROTECTED]/2008-04/msg00080.html
  


Is there a way to detect MMU notifiers from userspace?  I don't think 
it's currently safe to madvise unconditionally.


Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Balloon device in qemu?

2008-08-01 Thread Marcelo Tosatti
On Thu, Jul 31, 2008 at 04:01:54PM +0300, Avi Kivity wrote:
> Marcelo, the balloon was last seen drifting over your territory.  Care  
> to brush it up and send it over?

Anthony rewrote the backend, I think this is the latest version:

http://www.archivum.info/[EMAIL PROTECTED]/2008-04/msg00080.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


kvm-72 rhel4 (2.6.9-derived) i386 guest - rtl8139 stalling; lost interrupts?

2008-08-01 Thread Charles Duffy
I have observed downloads to stall on an RHEL4 i386 guest (on an x86_64 
host), such that creating additional network traffic (ie. pinging the 
virtual host in question) will "unstick" the download. This does not 
happen to a RHEL5 x86_64 guest on the same host, and is quite reliably 
reproducible while kickstarting a new virtual machine across a network 
(bridged, with a different VM hosting the packages being downloaded).


To me, this smells like lost interrupts; is there anything I can do to 
better diagnose the issue?


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: bios: Put AP boot up code to 0x1000

2008-08-01 Thread H. Peter Anvin

Avi Kivity wrote:


IIRC the rombios32.c writes to the memory it is in, so it expects RAM, 
not ROM.


kvm doesn't support ROM, so it would work.  Qemu doesn't, so it would fail.



You can specify either RAM or ROM in Qemu, but you have to edit the C code.

If you need RAM in the low 640K, your choices are roughly 0x300..0x340 
or in the EBDA.


-hpa
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: How to connect USB enclosue to guest

2008-08-01 Thread Javier Guerra
On Fri, Aug 1, 2008 at 10:41 AM, Stephen Liu <[EMAIL PROTECTED]> wrote:
> Hi folks,
>
>
> Ubuntu 8.04 server amd64 - host
> Ubuntu 6.06 server amd64 - guest
> KVM 1:62+dfsq
> UBS enclosure
>
>
> Please advise how to mount the USB enclosure to guest.  It can be
> mounted on host.  Pointer would be appreciated.  TIA

instead of messing with USB, manage it as any block device.

don't mount it on host, and add the /dev/sdX device file as an extra
drive on the qemu command line.

downside is that you wouldn't be able to (easily) unmount from the guest.

-- 
Javier
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


How to connect USB enclosue to guest

2008-08-01 Thread Stephen Liu
Hi folks,


Ubuntu 8.04 server amd64 - host
Ubuntu 6.06 server amd64 - guest
KVM 1:62+dfsq
UBS enclosure


Please advise how to mount the USB enclosure to guest.  It can be
mounted on host.  Pointer would be appreciated.  TIA


B.R.
Stephen L

Send instant messages to your online friends http://uk.messenger.yahoo.com 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Weekly KVM Test report, kernel 771310c .. userspace 5c646ce7

2008-08-01 Thread Xu, Jiajun

Hi All,

This is our Weekly KVM Testing Report against lastest kvm.git
771310c770214cd879d30d0825fb5e140cd74866 and kvm-userspace.git
5c646ce7bfb0eb0a108a09f08a3854a27c11cdaf.
There is no new issue found this week.

Two Old Issues:

1. 32bits Rhel5/FC6 guest may fail to reboot after installation
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=1991647&group_id=180599 



2. failure to migrate guests with more than 4GB of RAM
https://sourceforge.net/tracker/index.php?func=detail&aid=1971512&group_id=180599&atid=893831

Test environment

Platform   
Woodcrest

CPU 4
Memory size 8G'

Details

IA32-pae: 1. boot guest with 256M
memory   PASS
2. boot guest with 1500M memory PASS
3. boot 4 same guest in parallel PASS
4. boot two windows xp guestPASS
5. boot linux and windows guest in parallel  PASS
6. save/restore 32-bit HVM guests  PASS
7. save/restore 32-bit HVM guests with 4 vcpus   PASS
8. live migration 32-bit HVM guests PASS
9. live migration 32-bit HVM guests with 4 vcpus  PASS
10. boot base kernel linux 
PASS

11. kernel build on SMP linux guestPASS
12. LTP on linux guest  
PASS

13. boot Windows 2000 without ACPI  PASS
14. boot Windows 2000 with ACPI enabled  PASS
15. boot Windows 2003 with ACPI enabled   PASS
16. boot Windows xp with ACPI enabled  PASS
17. boot Windows vista with ACPI enabled   PASS
18. boot SMP Windows 2000 with ACPI enabled  PASS
19. boot SMP Windows 2003 with ACPI enabled  PASS
20. boot SMP Windows xp with ACPI enabled  PASS
21. boot SMP Windows 2008 with ACPI enabled   PASS


IA32e: 1. boot 32-bit guest with 256M
memory   PASS
2. boot 64-bit guest with 256M memory   PASS
3. boot 32-bit guest with 1500M memory PASS
4. boot 64-bit guest with 1500M memory PASS
5. boot 4G pae
guest PASS
6. boot 4G 64-bit
guest  PASS
7. boot four 32-bit guest in
parallel  PASS
8. boot four 64-bit guest in
parallel  PASS
9. boot two 32-bit windows xp in parallel  PASS
10. boot 32-bit linux and 32 bit windows guest in parallel   PASS
11. boot four 32-bit different guest in para
PASS
12. save/restore 32-bit linux guests
PASS
13. save/restore 64-bit linux guests
PASS

14. save/restore 64-bit linux guests with 4 vcpus   PASS
15. save/restore 32-bit linux guests with 4 vcpus   PASS
16. live migration 64bit linux
guests PASS
17. live migration 32bit linux
guests PASS
18. live migration 64bit linux guests with 4 vcpus   PASS
19. live migration 32bit linux guests with 4 vcpus   PASS
20. boot 32-bit
x-server   PASS 21.
kernel build in 32-bit linux guest OS  PASS
22. kernel build in 64-bit linux guest OS  PASS
23. LTP on 32-bit linux guest OS   
PASS
24. LTP on 64-bit linux guest OS   
PASS

25. boot 64-bit guests with ACPI enabled PASS
26. boot 32-bit Windows 2000 without ACPIPASS
27. boot 32-bit Windows xp without ACPIPASS
28. boot 64-bit Windows xp with ACPI enabledPASS
29. boot 64-bit Windows vista with ACPI enabled PASS
30. boot 32-bit SMP Windows 2000 with ACPI enabled PASS
31. boot 32-bit SMP windows 2003 with ACPI enabled  PASS
32. boot 32-bit SMP Windows xp with ACPI enabledPASS
33. boot 64-bit SMP Windows vista with ACPI enabled PASS
34. boot 32-bit SMP windows 2008 with ACPI enabled  PASS
35. boot 64-bit SMP windows 2003 with ACPI enabled  PASS
36. boot 64-bit SMP windows XP with ACPI enabled PASS
37. boot 64-bit SMP windows 2008 with A

Re: [PATCH] Save 64-bit of the IA-32e capable sysenter MSRs

2008-08-01 Thread Alexander Graf


On Jul 31, 2008, at 12:07 PM, Avi Kivity wrote:


Alexander Graf wrote:

Hi,

When transitioning from KVM to the qemu userspace, we try to get  
and push a whole bunch of MSR values, including the SYSENTER ones.  
While this is basically a good idea, qemu doesn't know anything  
about SYSENTER on x86_64, which is an additional feature only  
available on Intel CPUs and thus defines SYSENTER_EIP and  
SYSENTER_ESP as 32-bit values.


Because we're saving/restoring the SYSENTER values on transitions  
to userspace and the values in env->cpu only hold 32 bits, they get  
truncated to 32-bit. The easiest way around this is to widen the  
fields to 64 bits. This should not disturb the current  
functionality, but allows us to run 32-bit code on x86_64 using VMX.


As this is KVM specific for now, I'll only CC the qemu mailinglist,  
but wouldn't encourage qemu to take the patch for now, as it's not  
necessary when emulating an AMD CPU.




Applied, thanks.  I imagine save/restore support needs updating as  
well?


Yes, it does. Thanks for the reminder.

 That should go into qemuy btw since maintaining the save/restore  
format across forks isn't possible.


I'll just write up a patch that implements IA-32e sysenter in qemu  
after vacation. That should fix the KVM issues too. For now it's good  
we have a workaround that works in "most cases" at least.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM compile fails on s390x

2008-08-01 Thread Robin Atwood
On Friday 01 Aug 2008, Christian Borntraeger wrote:
> Am Freitag, 1. August 2008 schrieb Daniel P. Berrange:
> > What are your intentions for this part longer term. Are you planning to
> > create a QEMU target for s390 so we can use real QEMU binaries instead
> > of emulating a subset of its features. If s390 userspace were QEMU based
> > them you'd be able to use libvirt as a management API in same way as
> > the other architectures.

Christian, Daniel,
Thanks for the response. I was simply intrigued by the possibility of using 
KVM to virtualise zLinux and avoid having to use z/VM. 

> Yes, the goal is to move towards qemu. kuli is not intended as a long term
> solution. Consider it a proof of concept/testing code to show that the
> kernel part is working.

> You can also look at our KVM Forum presentation:
> http://kvm.qumranet.com/kvmwiki/KvmForum2008?action=AttachFile&do=get&targe
>t=kdf2008_17.pdf The slide "Next step" contains the merge into common kvm
> userspace.

That would have been my next question: now the kernel modules is build, how do 
I IPL  a guest machine?!

> I dont think we will see a full s390 backend just to make kvm work. We are
> considering Anthonys proposal for an kvm backend in qemu instead.

I have used Qemu for some years and It has always seemed to me since the 
inception of KVM that the code bases should be merged to avoid duplication of 
effort and to make things easier for the users.

I will follow up your links, thanks very much for the pointers.

Cheers...
-Robin
-- 
--
Robin Atwood.

"Ship me somewheres east of Suez, where the best is like the worst,
 Where there ain't no Ten Commandments an' a man can raise a thirst"
 from "Mandalay" by Rudyard Kipling
--








--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM compile fails on s390x

2008-08-01 Thread Christian Borntraeger
Am Freitag, 1. August 2008 schrieb Daniel P. Berrange:
> What are your intentions for this part longer term. Are you planning to
> create a QEMU target for s390 so we can use real QEMU binaries instead
> of emulating a subset of its features. If s390 userspace were QEMU based
> them you'd be able to use libvirt as a management API in same way as
> the other architectures.

Yes, the goal is to move towards qemu. kuli is not intended as a long term 
solution. Consider it a proof of concept/testing code to show that the kernel 
part is working.

You can also look at our KVM Forum presentation:
http://kvm.qumranet.com/kvmwiki/KvmForum2008?action=AttachFile&do=get&target=kdf2008_17.pdf
The slide "Next step" contains the merge into common kvm userspace.

I dont think we will see a full s390 backend just to make kvm work. We are 
considering Anthonys proposal for an kvm backend in qemu instead.

Christian
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM compile fails on s390x

2008-08-01 Thread Daniel P. Berrange
On Fri, Aug 01, 2008 at 02:09:02PM +0200, Christian Borntraeger wrote:
> Am Donnerstag, 31. Juli 2008 schrieb Robin Atwood:
> > I guess I am doing something very dumb, but when I try to compile kvm-72 on 
> > z900 architecture (actually Hercules) I immediately get a compile error:
> > 
> > zgentoo kvm-72 # ./configure --disable-gfx-check --disable-sdl
> 
> Your userspace headers dont have all the necessary pieces...
> You want to add --with-patched-kernel, that should trigger an headers_sync to 
> get the header files from the kernel.
> 
> Anyway, that wont give you anything useful by now, as we only have kvmlib 
> done 
> in the userspace. Until we ported the rest of our userspace part over to 
> kvm-userspace, you can use our prototype code found on 
> 
> http://www-128.ibm.com/developerworks/linux/linux390/kuli.html
> 
> It supports a subset of the qemu syntax, e.g. somethin like
> kuli -drive file=disks/root -megs 128 -n tap,ifname=tap0 -smp=2
> works.

What are your intentions for this part longer term. Are you planning to
create a QEMU target for s390 so we can use real QEMU binaries instead
of emulating a subset of its features. If s390 userspace were QEMU based
them you'd be able to use libvirt as a management API in same way as
the other architectures.

Experiance with the 'xenner' work has shown that trying to write a tool
that behaves like qemu is a non-trivial, never ending task. As soon as
libvirt makes use of some new feature of QEMU's CLI or monitor console,
we find Xenner doesn't cope and have to duplicate yet more code. Hence
xenner is slowly switching over to use QEMU fully, rather than emulating
its CLI syntax

Regards,
Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM compile fails on s390x

2008-08-01 Thread Christian Borntraeger
Am Donnerstag, 31. Juli 2008 schrieb Robin Atwood:
> I guess I am doing something very dumb, but when I try to compile kvm-72 on 
> z900 architecture (actually Hercules) I immediately get a compile error:
> 
> zgentoo kvm-72 # ./configure --disable-gfx-check --disable-sdl

Your userspace headers dont have all the necessary pieces...
You want to add --with-patched-kernel, that should trigger an headers_sync to 
get the header files from the kernel.

Anyway, that wont give you anything useful by now, as we only have kvmlib done 
in the userspace. Until we ported the rest of our userspace part over to 
kvm-userspace, you can use our prototype code found on 

http://www-128.ibm.com/developerworks/linux/linux390/kuli.html

It supports a subset of the qemu syntax, e.g. somethin like
kuli -drive file=disks/root -megs 128 -n tap,ifname=tap0 -smp=2
works.
You might need "tunctl" and "screen" for better results. If you want to stay 
at 2.6.26 there are some kernel patches in the kuli package. They add virtio 
console support as well as a fix for the z900. 2.6.27-rc1 should work out of 
the box.

Christian
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/5] KVM: pci device assignment

2008-08-01 Thread Amit Shah
* On Wednesday 30 Jul 2008 17:28:01 Ben-Ami Yassour wrote:
> On Wed, 2008-07-30 at 11:33 +0530, Amit Shah wrote:
> > * On Tuesday 29 July 2008 15:08:27 Ben-Ami Yassour wrote:
> > > On Tue, 2008-07-29 at 14:49 +0530, Amit Shah wrote:
> > > > * On Monday 28 Jul 2008 21:56:26 Ben-Ami Yassour wrote:
> > > > > +static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
> > > > > +   struct kvm_assigned_pci_dev 
> > > > > *assigned_dev)
> > > > > +{
> > > > >
> > > > >
> > > > > + if (pci_enable_device(dev)) {
> > > > > + printk(KERN_INFO "%s: Could not enable PCI device\n", 
> > > > > __func__);
> > > > > + r = -EBUSY;
> > > > > + goto out_put;
> > > > > + }
> > > > > + r = pci_request_regions(dev, "kvm_assigned_device");
> > > > > + if (r) {
> > > > > + printk(KERN_INFO "%s: Could not get access to device 
> > > > > regions\n",
> > > > > +__func__);
> > > > > + goto out_disable;
> > > >
> > > > Shouldn't disable here unconditionally (see my comment earlier to the
> > > > previous patch).
> > >
> > > Why? the device should not be used by the host at the same time.
> > > What is the condition that you were thinking of?
> > >
> > > > > +static void kvm_free_assigned_devices(struct kvm *kvm)
> > > > > +{
> > > > > + struct list_head *ptr, *ptr2;
> > > > > + struct kvm_assigned_dev_kernel *assigned_dev;
> > > > > +
> > > > > + list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
> > > > > + assigned_dev = list_entry(ptr,
> > > > > +   struct 
> > > > > kvm_assigned_dev_kernel,
> > > > > +   list);
> > > > > +
> > > > > + if (irqchip_in_kernel(kvm) && 
> > > > > assigned_dev->irq_requested) {
> > > > > + free_irq(assigned_dev->host_irq,
> > > > > +  (void *)assigned_dev);
> > > > > +
> > > > > + kvm_unregister_irq_ack_notifier(kvm,
> > > > > + &assigned_dev->
> > > > > + ack_notifier);
> > > > > + }
> > > >
> > > > Unregister the notifier before freeing irq
> > >
> > > I don't think that there is any importance to the order in this case,
> > > but just in case, the order should be in reverse to the initialization
> > > order, which is the case here.
> >
> > Just that we shouldn't accept any new interrupts to ack because we're
> > going to free the irq as the next step. This is more relevant now that we
> > don't have the rwlock for the device assignment structures. If the work
> > gets scheduled after we free the resources, it's going to bomb:
>
> These are two separate things.
> The ack notifier is called when the guest is signaling the virutal
> IOAPIC with EOI, and it has no direct relation with the host interrupt
> handler.

Of course. You're right.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH]: Add a "migrate_incoming" monitor option

2008-08-01 Thread Chris Lalancette
Daniel P. Berrange wrote:
>> @@ -9673,11 +9675,16 @@ int main(int argc, char **argv)
>>  if (incoming) {
>>  int rc;
>>  
>> -rc = migrate_incoming(incoming);
>> -if (rc != 0) {
>> -fprintf(stderr, "Migration failed rc=%d\n", rc);
>> -exit(rc);
>> -}
>> +if (strncmp(incoming, "monitor", 7) == 0) {
>> +incoming_monitor = 1;
>> +}
>> +else {
>> +rc = migrate_incoming(incoming);
>> +if (rc != 0) {
>> +fprintf(stderr, "Migration failed rc=%d\n", rc);
>> +exit(rc);
>> +}
>> +}
> 
> Rather than putting the strncmp("monitor")  into vl.c, I'd just leave
> this part as is.  Put the logic into the 'migrate_incoming()' method
> so that it just sets the 'incoming_monitor' flag and then returns
> immediately. That would allwo the 'incoming_Monitor' flag to be declared
> static to the migrate.c file, instead of polluting vl.c

Actually, that won't quite work.  We still need to share the incoming_monitor
flag between migration.c and monitor.c.  However, your suggestion is better in
that this is a "migration-specific" flag, so I'll move it over like you suggest.

Chris Lalancette
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH]: Implement tcp "nowait" option for migration

2008-08-01 Thread Chris Lalancette
Sometimes you want to be able to start up the receiving side of a live migration
and actually be able to run monitor commands before you do the migration.
Libvirt, in particular, wants to do this for setting up the migration.  This
patch implements a "nowait" option to the receiving side so that you start up
the receiving side similar to:

qemu-kvm -M pc -S blah blah -incoming tcp://0:,nowait

Then you are able to interact with the monitor before the live migration takes
place.

Signed-off-by: Chris Lalancette <[EMAIL PROTECTED]>
diff --git a/qemu/migration.c b/qemu/migration.c
index a64a287..d16e289 100644
--- a/qemu/migration.c
+++ b/qemu/migration.c
@@ -886,13 +886,10 @@ static int migrate_incoming_fd(int fd)
 return ret;
 }
 
-static int migrate_incoming_tcp(const char *host)
+static int migrate_listen_tcp(const char *host, int *outfd)
 {
 struct sockaddr_in addr;
-socklen_t addrlen = sizeof(addr);
-int fd, sfd;
-ssize_t len;
-uint8_t status = 0;
+int fd = -1;
 int reuse = 1;
 int rc;
 
@@ -928,19 +925,43 @@ static int migrate_incoming_tcp(const char *host)
 	goto error_socket;
 }
 
+*outfd = fd;
+
+return 0;
+
+error_socket:
+close(fd);
+error:
+return rc;
+}
+
+struct migrate_tcp_data {
+int listen_fd;
+int rc;
+};
+
+static void migrate_incoming_tcp(void *opaque)
+{
+struct sockaddr_in addr;
+socklen_t addrlen = sizeof(addr);
+struct migrate_tcp_data *data = (struct migrate_tcp_data *)opaque;
+int sfd;
+ssize_t len;
+uint8_t status = 0;
+
 again:
-sfd = accept(fd, (struct sockaddr *)&addr, &addrlen);
+sfd = accept(data->listen_fd, (struct sockaddr *)&addr, &addrlen);
 if (sfd == -1) {
 	if (errno == EINTR)
 	goto again;
 perror("accept() failed");
-rc = MIG_STAT_DST_ACCEPT_FAILED;
+data->rc = MIG_STAT_DST_ACCEPT_FAILED;
 	goto error_socket;
 }
 
-rc = migrate_incoming_fd(sfd);
-if (rc != 0) {
-fprintf(stderr, "migrate_incoming_fd failed (rc=%d)\n", rc);
+data->rc = migrate_incoming_fd(sfd);
+if (data->rc != 0) {
+fprintf(stderr, "migrate_incoming_fd failed (rc=%d)\n", data->rc);
 	goto error_accept;
 }
 
@@ -951,13 +972,13 @@ send_ack:
 if (len != 1) {
 fprintf(stderr, "migration: send_ack: write error len=%zu (%s)\n",
 len, strerror(errno));
-rc = MIG_STAT_DST_WRITE_FAILED;
+data->rc = MIG_STAT_DST_WRITE_FAILED;
 	goto error_accept;
 }
 
-rc = wait_for_message("WAIT FOR GO", sfd, wait_for_message_timeout);
-if (rc) {
-rc += 200;
+data->rc = wait_for_message("WAIT FOR GO", sfd, wait_for_message_timeout);
+if (data->rc) {
+data->rc += 200;
 goto error_accept;
 }
 
@@ -966,7 +987,7 @@ wait_for_go:
 if (len == -1 && errno == EAGAIN)
 	goto wait_for_go;
 if (len != 1) {
-rc = MIG_STAT_DST_READ_FAILED;
+data->rc = MIG_STAT_DST_READ_FAILED;
 fprintf(stderr, "migration: wait_for_go: read error len=%zu (%s)\n",
 len, strerror(errno));
 }
@@ -974,9 +995,10 @@ wait_for_go:
 error_accept:
 close(sfd);
 error_socket:
-close(fd);
-error:
-return rc;
+qemu_set_fd_handler(data->listen_fd, NULL, NULL, NULL);
+close(data->listen_fd);
+
+qemu_free(data);
 }
 
 int migrate_incoming(const char *device)
@@ -996,16 +1018,57 @@ int migrate_incoming(const char *device)
 	}
 } else if (strstart(device, "tcp://", &ptr)) {
 	char *host, *end;
+	struct migrate_tcp_data *data;
+	int is_waitconnect = 1;
+
 	host = strdup(ptr);
+	if (!host)
+	goto fail;
 	end = strchr(host, '/');
 	if (end) *end = 0;
-	ret = migrate_incoming_tcp(host);
+
+	data = qemu_mallocz(sizeof(struct migrate_tcp_data));
+	if (!data) {
+	qemu_free(host);
+	goto fail;
+	}
+
+	ptr = host;
+	while((ptr = strchr(ptr,','))) {
+	ptr++;
+	if (!strncmp(ptr,"nowait",6)) {
+	is_waitconnect = 0;
+	} else {
+	printf("Unknown option: %s\n", ptr);
+		qemu_free(host);
+	goto fail;
+	}
+	}
+
+	ret = migrate_listen_tcp(host, &(data->listen_fd));
 	qemu_free(host);
+	if (ret != 0)
+	goto fail;
+
+	/*
+	 * if we made it here, then migrate_incoming_tcp is responsible for
+	 * freeing the "data" structure
+	 */
+	if (!is_waitconnect) {
+	socket_set_nonblock(data->listen_fd);
+	qemu_set_fd_handler(data->listen_fd, migrate_incoming_tcp, NULL, data);
+	}
+	else {
+	migrate_incoming_tcp(data);
+	ret = data->rc;
+	}
+
 } else {
 	errno = EINVAL;
 	ret = MIG_STAT_DST_INVALID_PARAMS;
 }
 
+ fail:
 return ret;
 }
 


Re: [PATCH]: Add a "migrate_incoming" monitor option

2008-08-01 Thread Chris Lalancette
Daniel P. Berrange wrote:
> An accept trick only handles the TCP case though. I know this was Chris'
> example that we're currently using, but we intend to switch to passing 
> an open file descriptor instead, and proxying the data via a secure 
> channel instead of the plain tcp, or builtin SSH tunnelling. 
> 
> With an open FD we'll need another way - I guess just registering a
> event on POLLIN might do the trick. It seems to me that just having
> an explicit migrate incoming monitor command would be simpler than having
> to code different triggers to implement 'nowait' for each transport
> we have.

I just finished a patch to do the "nowait" for tcp as suggested by Anthony, and
it was actually far less code then I feared.  It just needed a little
re-factoring of the migrate_incoming_tcp() function.  I'll be posting that in a
couple of minutes.  Given how this code worked out, doing a "nowait" for the fd
style is really no big deal.

Both ways (explicit monitor command and "nowait") seem to have their benefits;
the monitor command has the benefit of being more flexible, while the nowait has
the benefit of being similar to already existing Qemu commands.  My preference
is for the monitor command since it seems a little more natural for a management
tool, but the nowait is clearly an option as well.

I'll also post a cleanup patch with Dan's suggestion for the monitor patch, so
both implementations will be available.

Chris Lalancette
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH]: Add a "migrate_incoming" monitor option

2008-08-01 Thread Daniel P. Berrange
On Thu, Jul 31, 2008 at 12:27:43PM -0500, Anthony Liguori wrote:
> Chris Lalancette wrote:
> >We've been trying to plumb libvirt to do KVM migration.  One of the 
> >stumbling
> >blocks we are running into, however, is that libvirt expects to be able to 
> >use
> >the Qemu monitor both before and after migration has taken place, on both 
> >the
> >source and destination nodes.  After migration has taken place is no 
> >problem; we
> >return to the main qemu select() loop, and we can run monitor commands.
> >However, before migration, on the destination side, when we start qemu 
> >with a
> >command-line like:
> >
> >qemu-kvm -M pc -S blah blah -incoming tcp://0:
> >
> >we can't run any monitor commands since the migration code is synchronously
> >waiting for an incoming tcp connection.  To get around this, the following 
> >patch
> >adds a new monitor command called "migrate_incoming"; it takes all of the 
> >same
> >parameters as the command-line option, but just starts it later.  To make 
> >sure
> >it is safe, you actually have to start with "-incoming monitor"; if you 
> >run it
> >without that, it will just spit an error at you.  So with this in place, 
> >libvirt
> >can do the equivalent of:
> >
> >qemu-kvm -M pc -S blah blah -incoming monitor
> >  
> 
> I think adding a 'nowait' parameter to migration would make more sense 
> than introducing a monitor command.
> 
> So:
> 
> qemu-kvm -M pc -S blah blah -incoming tcp://0:,nowait
> 
> From an implementation perspective, it's just a matter of setting a 
> callback for the accept fd I imagine.

An accept trick only handles the TCP case though. I know this was Chris'
example that we're currently using, but we intend to switch to passing 
an open file descriptor instead, and proxying the data via a secure 
channel instead of the plain tcp, or builtin SSH tunnelling. 

With an open FD we'll need another way - I guess just registering a
event on POLLIN might do the trick. It seems to me that just having
an explicit migrate incoming monitor command would be simpler than having
to code different triggers to implement 'nowait' for each transport
we have.
 
Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html