[PATCH 02/18] Introduce read() to FdMigrationState.

2011-03-22 Thread Yoshiaki Tamura
Currently FdMigrationState doesn't support read(), and this patch
introduces it to get response from the other side.  Note that this
won't change the existing migration protocol to be bi-directional.

Signed-off-by: Yoshiaki Tamura 
---
 migration-tcp.c |   15 +++
 migration.c |   13 +
 migration.h |3 +++
 3 files changed, 31 insertions(+), 0 deletions(-)

diff --git a/migration-tcp.c b/migration-tcp.c
index e8dff9d..62ec0ea 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -39,6 +39,20 @@ static int socket_write(FdMigrationState *s, const void * 
buf, size_t size)
 return send(s->fd, buf, size, 0);
 }
 
+static int socket_read(FdMigrationState *s, const void * buf, size_t size)
+{
+ssize_t len;
+
+do {
+len = recv(s->fd, (void *)buf, size, 0);
+} while (len == -1 && socket_error() == EINTR);
+if (len == -1) {
+len = -socket_error();
+}
+
+return len;
+}
+
 static int tcp_close(FdMigrationState *s)
 {
 DPRINTF("tcp_close\n");
@@ -94,6 +108,7 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
 
 s->get_error = socket_errno;
 s->write = socket_write;
+s->read = socket_read;
 s->close = tcp_close;
 s->mig_state.cancel = migrate_fd_cancel;
 s->mig_state.get_status = migrate_fd_get_status;
diff --git a/migration.c b/migration.c
index af3a1f2..302b8fe 100644
--- a/migration.c
+++ b/migration.c
@@ -340,6 +340,19 @@ ssize_t migrate_fd_put_buffer(void *opaque, const void 
*data, size_t size)
 return ret;
 }
 
+int migrate_fd_get_buffer(void *opaque, uint8_t *data, int64_t pos, size_t 
size)
+{
+FdMigrationState *s = opaque;
+int ret;
+
+ret = s->read(s, data, size);
+if (ret == -1) {
+ret = -(s->get_error(s));
+}
+
+return ret;
+}
+
 void migrate_fd_connect(FdMigrationState *s)
 {
 int ret;
diff --git a/migration.h b/migration.h
index 2170792..88a6987 100644
--- a/migration.h
+++ b/migration.h
@@ -48,6 +48,7 @@ struct FdMigrationState
 int (*get_error)(struct FdMigrationState*);
 int (*close)(struct FdMigrationState*);
 int (*write)(struct FdMigrationState*, const void *, size_t);
+int (*read)(struct FdMigrationState *, const void *, size_t);
 void *opaque;
 };
 
@@ -116,6 +117,8 @@ void migrate_fd_put_notify(void *opaque);
 
 ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size);
 
+int migrate_fd_get_buffer(void *opaque, uint8_t *data, int64_t pos, size_t 
size);
+
 void migrate_fd_connect(FdMigrationState *s);
 
 void migrate_fd_put_ready(void *opaque);
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/18] vl.c: add deleted flag for deleting the handler.

2011-03-22 Thread Yoshiaki Tamura
Make deleting handlers robust against deletion of any elements in a
handler by using a deleted flag like in file descriptors.

Signed-off-by: Yoshiaki Tamura 
---
 vl.c |   15 ++-
 1 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/vl.c b/vl.c
index dbb927d..483e2e3 100644
--- a/vl.c
+++ b/vl.c
@@ -1158,6 +1158,7 @@ static void nographic_update(void *opaque)
 struct vm_change_state_entry {
 VMChangeStateHandler *cb;
 void *opaque;
+int deleted;
 QLIST_ENTRY (vm_change_state_entry) entries;
 };
 
@@ -1178,18 +1179,22 @@ VMChangeStateEntry 
*qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
 
 void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
 {
-QLIST_REMOVE (e, entries);
-qemu_free (e);
+e->deleted = 1;
 }
 
 void vm_state_notify(int running, int reason)
 {
-VMChangeStateEntry *e;
+VMChangeStateEntry *e, *ne;
 
 trace_vm_state_notify(running, reason);
 
-for (e = vm_change_state_head.lh_first; e; e = e->entries.le_next) {
-e->cb(e->opaque, running, reason);
+QLIST_FOREACH_SAFE(e, &vm_change_state_head, entries, ne) {
+if (e->deleted) {
+QLIST_REMOVE(e, entries);
+qemu_free(e);
+} else {
+e->cb(e->opaque, running, reason);
+}
 }
 }
 
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/18] Call init handler of event-tap at main() in vl.c.

2011-03-22 Thread Yoshiaki Tamura
Signed-off-by: Yoshiaki Tamura 
---
 vl.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/vl.c b/vl.c
index 483e2e3..6ed9b20 100644
--- a/vl.c
+++ b/vl.c
@@ -160,6 +160,7 @@ int main(int argc, char **argv)
 #include "qemu-queue.h"
 #include "cpus.h"
 #include "arch_init.h"
+#include "event-tap.h"
 
 #include "ui/qemu-spice.h"
 
@@ -3042,6 +3043,8 @@ int main(int argc, char **argv, char **envp)
 
 blk_mig_init();
 
+event_tap_init();
+
 /* open the virtual block devices */
 if (snapshot)
 qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, 
NULL, 0);
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/18] ioport: insert event_tap_ioport() to ioport_write().

2011-03-22 Thread Yoshiaki Tamura
Record ioport event to replay it upon failover.

Signed-off-by: Yoshiaki Tamura 
---
 ioport.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/ioport.c b/ioport.c
index 2e971fa..f485bab 100644
--- a/ioport.c
+++ b/ioport.c
@@ -27,6 +27,7 @@
 
 #include "ioport.h"
 #include "trace.h"
+#include "event-tap.h"
 
 /***/
 /* IO Port */
@@ -76,6 +77,7 @@ static void ioport_write(int index, uint32_t address, 
uint32_t data)
 default_ioport_writel
 };
 IOPortWriteFunc *func = ioport_write_table[index][address];
+event_tap_ioport(index, address, data);
 if (!func)
 func = default_func[index];
 func(ioport_opaque[address], address, data);
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/18] Insert event_tap_mmio() to cpu_physical_memory_rw() in exec.c.

2011-03-22 Thread Yoshiaki Tamura
Record mmio write event to replay it upon failover.

Signed-off-by: Yoshiaki Tamura 
---
 exec.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/exec.c b/exec.c
index 964ce31..be71464 100644
--- a/exec.c
+++ b/exec.c
@@ -33,6 +33,7 @@
 #include "osdep.h"
 #include "kvm.h"
 #include "qemu-timer.h"
+#include "event-tap.h"
 #if defined(CONFIG_USER_ONLY)
 #include 
 #include 
@@ -3733,6 +3734,9 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, 
uint8_t *buf,
 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
 if (p)
 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+
+event_tap_mmio(addr, buf, len);
+
 /* XXX: could force cpu_single_env to NULL to avoid
potential bugs */
 if (l >= 4 && ((addr1 & 3) == 0)) {
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/18] block: insert event-tap to bdrv_aio_writev(), bdrv_aio_flush() and bdrv_flush().

2011-03-22 Thread Yoshiaki Tamura
event-tap function is called only when it is on, and requests were
sent from device emulators.

Signed-off-by: Yoshiaki Tamura 
Acked-by: Kevin Wolf 
---
 block.c |   15 +++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/block.c b/block.c
index c8e2f97..952543a 100644
--- a/block.c
+++ b/block.c
@@ -28,6 +28,7 @@
 #include "block_int.h"
 #include "module.h"
 #include "qemu-objects.h"
+#include "event-tap.h"
 
 #ifdef CONFIG_BSD
 #include 
@@ -1585,6 +1586,10 @@ int bdrv_flush(BlockDriverState *bs)
 }
 
 if (bs->drv && bs->drv->bdrv_flush) {
+if (*bs->device_name && event_tap_is_on()) {
+event_tap_bdrv_flush();
+}
+
 return bs->drv->bdrv_flush(bs);
 }
 
@@ -2220,6 +2225,11 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, 
int64_t sector_num,
 if (bdrv_check_request(bs, sector_num, nb_sectors))
 return NULL;
 
+if (*bs->device_name && event_tap_is_on()) {
+return event_tap_bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
+ cb, opaque);
+}
+
 if (bs->dirty_bitmap) {
 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
  opaque);
@@ -2493,6 +2503,11 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
 
 if (!drv)
 return NULL;
+
+if (*bs->device_name && event_tap_is_on()) {
+return event_tap_bdrv_aio_flush(bs, cb, opaque);
+}
+
 return drv->bdrv_aio_flush(bs, cb, opaque);
 }
 
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/18] virtio: decrement last_avail_idx with inuse before saving.

2011-03-22 Thread Yoshiaki Tamura
For regular migration inuse == 0 always as requests are flushed before
save. However, event-tap log when enabled introduces an extra queue
for requests which is not being flushed, thus the last inuse requests
are left in the event-tap queue.  Move the last_avail_idx value sent
to the remote back to make it repeat the last inuse requests.

Signed-off-by: Michael S. Tsirkin 
Signed-off-by: Yoshiaki Tamura 
---
 hw/virtio.c |   10 +-
 1 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/hw/virtio.c b/hw/virtio.c
index 31bd9e3..f05d1b6 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -673,12 +673,20 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 qemu_put_be32(f, i);
 
 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+/* For regular migration inuse == 0 always as
+ * requests are flushed before save. However,
+ * event-tap log when enabled introduces an extra
+ * queue for requests which is not being flushed,
+ * thus the last inuse requests are left in the event-tap queue.
+ * Move the last_avail_idx value sent to the remote back
+ * to make it repeat the last inuse requests. */
+uint16_t last_avail = vdev->vq[i].last_avail_idx - vdev->vq[i].inuse;
 if (vdev->vq[i].vring.num == 0)
 break;
 
 qemu_put_be32(f, vdev->vq[i].vring.num);
 qemu_put_be64(f, vdev->vq[i].pa);
-qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+qemu_put_be16s(f, &last_avail);
 if (vdev->binding->save_queue)
 vdev->binding->save_queue(vdev->binding_opaque, i, f);
 }
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/18] migration: introduce migrate_ft_trans_{put,get}_ready(), and modify migrate_fd_put_ready() when ft_mode is on.

2011-03-22 Thread Yoshiaki Tamura
Introduce migrate_ft_trans_put_ready() which kicks the FT transaction
cycle.  When ft_mode is on, migrate_fd_put_ready() would open
ft_trans_file and turn on event_tap.  To end or cancel FT transaction,
ft_mode and event_tap is turned off.  migrate_ft_trans_get_ready() is
called to receive ack from the receiver.

Signed-off-by: Yoshiaki Tamura 
---
 migration.c |  266 ++-
 1 files changed, 265 insertions(+), 1 deletions(-)

diff --git a/migration.c b/migration.c
index 1c2d956..d536df0 100644
--- a/migration.c
+++ b/migration.c
@@ -21,6 +21,7 @@
 #include "qemu_socket.h"
 #include "block-migration.h"
 #include "qemu-objects.h"
+#include "event-tap.h"
 
 //#define DEBUG_MIGRATION
 
@@ -283,6 +284,17 @@ void migrate_fd_error(FdMigrationState *s)
 migrate_fd_cleanup(s);
 }
 
+static void migrate_ft_trans_error(FdMigrationState *s)
+{
+ft_mode = FT_ERROR;
+qemu_savevm_state_cancel(s->mon, s->file);
+migrate_fd_error(s);
+/* we need to set vm running to avoid assert in virtio-net */
+vm_start();
+event_tap_unregister();
+vm_stop(0);
+}
+
 int migrate_fd_cleanup(FdMigrationState *s)
 {
 int ret = 0;
@@ -318,6 +330,17 @@ void migrate_fd_put_notify(void *opaque)
 qemu_file_put_notify(s->file);
 }
 
+static void migrate_fd_get_notify(void *opaque)
+{
+FdMigrationState *s = opaque;
+
+qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
+qemu_file_get_notify(s->file);
+if (qemu_file_has_error(s->file)) {
+migrate_ft_trans_error(s);
+}
+}
+
 ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size)
 {
 FdMigrationState *s = opaque;
@@ -353,6 +376,10 @@ int migrate_fd_get_buffer(void *opaque, uint8_t *data, 
int64_t pos, size_t size)
 ret = -(s->get_error(s));
 }
 
+if (ret == -EAGAIN) {
+qemu_set_fd_handler2(s->fd, NULL, migrate_fd_get_notify, NULL, s);
+}
+
 return ret;
 }
 
@@ -379,6 +406,230 @@ void migrate_fd_connect(FdMigrationState *s)
 migrate_fd_put_ready(s);
 }
 
+static int migrate_ft_trans_commit(void *opaque)
+{
+FdMigrationState *s = opaque;
+int ret = -1;
+
+if (ft_mode != FT_TRANSACTION_COMMIT && ft_mode != FT_TRANSACTION_ATOMIC) {
+fprintf(stderr,
+"migrate_ft_trans_commit: invalid ft_mode %d\n", ft_mode);
+goto out;
+}
+
+do {
+if (ft_mode == FT_TRANSACTION_ATOMIC) {
+if (qemu_ft_trans_begin(s->file) < 0) {
+fprintf(stderr, "qemu_ft_trans_begin failed\n");
+goto out;
+}
+
+ret = qemu_savevm_trans_begin(s->mon, s->file, 0);
+if (ret < 0) {
+fprintf(stderr, "qemu_savevm_trans_begin failed\n");
+goto out;
+}
+
+ft_mode = FT_TRANSACTION_COMMIT;
+if (ret) {
+/* don't proceed until if fd isn't ready */
+goto out;
+}
+}
+
+/* make the VM state consistent by flushing outstanding events */
+vm_stop(0);
+
+/* send at full speed */
+qemu_file_set_rate_limit(s->file, 0);
+
+ret = qemu_savevm_trans_complete(s->mon, s->file);
+if (ret < 0) {
+fprintf(stderr, "qemu_savevm_trans_complete failed\n");
+goto out;
+}
+
+ret = qemu_ft_trans_commit(s->file);
+if (ret < 0) {
+fprintf(stderr, "qemu_ft_trans_commit failed\n");
+goto out;
+}
+
+if (ret) {
+ft_mode = FT_TRANSACTION_RECV;
+ret = 1;
+goto out;
+}
+
+/* flush and check if events are remaining */
+vm_start();
+ret = event_tap_flush_one();
+if (ret < 0) {
+fprintf(stderr, "event_tap_flush_one failed\n");
+goto out;
+}
+
+ft_mode =  ret ? FT_TRANSACTION_BEGIN : FT_TRANSACTION_ATOMIC;
+} while (ft_mode != FT_TRANSACTION_BEGIN);
+
+vm_start();
+ret = 0;
+
+out:
+return ret;
+}
+
+static int migrate_ft_trans_get_ready(void *opaque)
+{
+FdMigrationState *s = opaque;
+int ret = -1;
+
+if (ft_mode != FT_TRANSACTION_RECV) {
+fprintf(stderr,
+"migrate_ft_trans_get_ready: invalid ft_mode %d\n", ft_mode);
+goto error_out;
+}
+
+/* flush and check if events are remaining */
+vm_start();
+ret = event_tap_flush_one();
+if (ret < 0) {
+fprintf(stderr, "event_tap_flush_one failed\n");
+goto error_out;
+}
+
+if (ret) {
+ft_mode = FT_TRANSACTION_BEGIN;
+} else {
+ft_mode = FT_TRANSACTION_ATOMIC;
+
+ret = migrate_ft_trans_commit(s);
+if (ret < 0) {
+goto error_out;
+}
+if (ret) {
+goto out;
+}
+}
+
+vm_start();
+ret = 0;
+goto out;
+
+error_out:
+migrate_ft_trans_error(s);
+
+out:
+return ret;
+}

[PATCH 01/18] Make QEMUFile buf expandable, and introduce qemu_realloc_buffer() and qemu_clear_buffer().

2011-03-22 Thread Yoshiaki Tamura
Currently buf size is fixed at 32KB.  It would be useful if it could
be flexible.

Signed-off-by: Yoshiaki Tamura 
---
 hw/hw.h  |2 ++
 savevm.c |   20 +++-
 2 files changed, 21 insertions(+), 1 deletions(-)

diff --git a/hw/hw.h b/hw/hw.h
index 1b09039..f90ff15 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -58,6 +58,8 @@ void qemu_fflush(QEMUFile *f);
 int qemu_fclose(QEMUFile *f);
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
 void qemu_put_byte(QEMUFile *f, int v);
+void *qemu_realloc_buffer(QEMUFile *f, int size);
+void qemu_clear_buffer(QEMUFile *f);
 
 static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
 {
diff --git a/savevm.c b/savevm.c
index 03fce62..d293f9c 100644
--- a/savevm.c
+++ b/savevm.c
@@ -171,7 +171,8 @@ struct QEMUFile {
when reading */
 int buf_index;
 int buf_size; /* 0 when writing */
-uint8_t buf[IO_BUF_SIZE];
+int buf_max_size;
+uint8_t *buf;
 
 int has_error;
 };
@@ -422,6 +423,9 @@ QEMUFile *qemu_fopen_ops(void *opaque, 
QEMUFilePutBufferFunc *put_buffer,
 f->get_rate_limit = get_rate_limit;
 f->is_write = 0;
 
+f->buf_max_size = IO_BUF_SIZE;
+f->buf = qemu_malloc(sizeof(uint8_t) * f->buf_max_size);
+
 return f;
 }
 
@@ -452,6 +456,19 @@ void qemu_fflush(QEMUFile *f)
 }
 }
 
+void *qemu_realloc_buffer(QEMUFile *f, int size)
+{
+f->buf_max_size = size;
+f->buf = qemu_realloc(f->buf, f->buf_max_size);
+
+return f->buf;
+}
+
+void qemu_clear_buffer(QEMUFile *f)
+{
+f->buf_size = f->buf_index = f->buf_offset = 0;
+}
+
 static void qemu_fill_buffer(QEMUFile *f)
 {
 int len;
@@ -477,6 +494,7 @@ int qemu_fclose(QEMUFile *f)
 qemu_fflush(f);
 if (f->close)
 ret = f->close(f->opaque);
+qemu_free(f->buf);
 qemu_free(f);
 return ret;
 }
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/18] Introduce fault tolerant VM transaction QEMUFile and ft_mode.

2011-03-22 Thread Yoshiaki Tamura
This code implements VM transaction protocol.  Like buffered_file, it
sits between savevm and migration layer.  With this architecture, VM
transaction protocol is implemented mostly independent from other
existing code.

Signed-off-by: Yoshiaki Tamura 
Signed-off-by: OHMURA Kei 
---
 Makefile.objs   |1 +
 ft_trans_file.c |  624 +++
 ft_trans_file.h |   72 +++
 migration.c |3 +
 trace-events|   15 ++
 5 files changed, 715 insertions(+), 0 deletions(-)
 create mode 100644 ft_trans_file.c
 create mode 100644 ft_trans_file.h

diff --git a/Makefile.objs b/Makefile.objs
index f8cf199..c084905 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -101,6 +101,7 @@ common-obj-y += qdev.o qdev-properties.o
 common-obj-y += block-migration.o
 common-obj-y += pflib.o
 common-obj-y += bitmap.o bitops.o
+common-obj-y += ft_trans_file.o
 
 common-obj-$(CONFIG_BRLAPI) += baum.o
 common-obj-$(CONFIG_POSIX) += migration-exec.o migration-unix.o migration-fd.o
diff --git a/ft_trans_file.c b/ft_trans_file.c
new file mode 100644
index 000..2b42b95
--- /dev/null
+++ b/ft_trans_file.c
@@ -0,0 +1,624 @@
+/*
+ * Fault tolerant VM transaction QEMUFile
+ *
+ * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * This source code is based on buffered_file.c.
+ * Copyright IBM, Corp. 2008
+ * Authors:
+ *  Anthony Liguori
+ */
+
+#include "qemu-common.h"
+#include "qemu-error.h"
+#include "hw/hw.h"
+#include "qemu-timer.h"
+#include "sysemu.h"
+#include "qemu-char.h"
+#include "trace.h"
+#include "ft_trans_file.h"
+
+typedef struct FtTransHdr
+{
+uint16_t cmd;
+uint16_t id;
+uint32_t seq;
+uint32_t payload_len;
+} FtTransHdr;
+
+typedef struct QEMUFileFtTrans
+{
+FtTransPutBufferFunc *put_buffer;
+FtTransGetBufferFunc *get_buffer;
+FtTransPutReadyFunc *put_ready;
+FtTransGetReadyFunc *get_ready;
+FtTransWaitForUnfreezeFunc *wait_for_unfreeze;
+FtTransCloseFunc *close;
+void *opaque;
+QEMUFile *file;
+
+enum QEMU_VM_TRANSACTION_STATE state;
+uint32_t seq;
+uint16_t id;
+
+int has_error;
+
+bool freeze_output;
+bool freeze_input;
+bool rate_limit;
+bool is_sender;
+bool is_payload;
+
+uint8_t *buf;
+size_t buf_max_size;
+size_t put_offset;
+size_t get_offset;
+
+FtTransHdr header;
+size_t header_offset;
+} QEMUFileFtTrans;
+
+#define IO_BUF_SIZE 32768
+
+static void ft_trans_append(QEMUFileFtTrans *s,
+const uint8_t *buf, size_t size)
+{
+if (size > (s->buf_max_size - s->put_offset)) {
+trace_ft_trans_realloc(s->buf_max_size, size + 1024);
+s->buf_max_size += size + 1024;
+s->buf = qemu_realloc(s->buf, s->buf_max_size);
+}
+
+trace_ft_trans_append(size);
+memcpy(s->buf + s->put_offset, buf, size);
+s->put_offset += size;
+}
+
+static void ft_trans_flush(QEMUFileFtTrans *s)
+{
+size_t offset = 0;
+
+if (s->has_error) {
+error_report("flush when error %d, bailing", s->has_error);
+return;
+}
+
+while (offset < s->put_offset) {
+ssize_t ret;
+
+ret = s->put_buffer(s->opaque, s->buf + offset, s->put_offset - 
offset);
+if (ret == -EAGAIN) {
+break;
+}
+
+if (ret <= 0) {
+error_report("error flushing data, %s", strerror(errno));
+s->has_error = FT_TRANS_ERR_FLUSH;
+break;
+} else {
+offset += ret;
+}
+}
+
+trace_ft_trans_flush(offset, s->put_offset);
+memmove(s->buf, s->buf + offset, s->put_offset - offset);
+s->put_offset -= offset;
+s->freeze_output = !!s->put_offset;
+}
+
+static ssize_t ft_trans_put(void *opaque, void *buf, int size)
+{
+QEMUFileFtTrans *s = opaque;
+size_t offset = 0;
+ssize_t len;
+
+/* flush buffered data before putting next */
+if (s->put_offset) {
+ft_trans_flush(s);
+}
+
+while (!s->freeze_output && offset < size) {
+len = s->put_buffer(s->opaque, (uint8_t *)buf + offset, size - offset);
+
+if (len == -EAGAIN) {
+trace_ft_trans_freeze_output();
+s->freeze_output = 1;
+break;
+}
+
+if (len <= 0) {
+error_report("putting data failed, %s", strerror(errno));
+s->has_error = 1;
+offset = -EINVAL;
+break;
+}
+
+offset += len;
+}
+
+if (s->freeze_output) {
+ft_trans_append(s, buf + offset, size - offset);
+offset = size;
+}
+
+return offset;
+}
+
+static int ft_trans_send_header(QEMUFileFtTrans *s,
+enum QEMU_VM_TRANSACTION_STATE state,
+uint32_t payload_len)
+{
+int ret;
+FtTransHdr *hdr = &s-

[PATCH 04/18] qemu-char: export socket_set_nodelay().

2011-03-22 Thread Yoshiaki Tamura
Signed-off-by: Yoshiaki Tamura 
---
 qemu-char.c   |2 +-
 qemu_socket.h |1 +
 2 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 31c9e79..fa16d36 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2111,7 +2111,7 @@ static void tcp_chr_telnet_init(int fd)
 send(fd, (char *)buf, 3, 0);
 }
 
-static void socket_set_nodelay(int fd)
+void socket_set_nodelay(int fd)
 {
 int val = 1;
 setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
diff --git a/qemu_socket.h b/qemu_socket.h
index 180e4db..a05e1e5 100644
--- a/qemu_socket.h
+++ b/qemu_socket.h
@@ -36,6 +36,7 @@ int inet_aton(const char *cp, struct in_addr *ia);
 int qemu_socket(int domain, int type, int protocol);
 int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
 void socket_set_nonblock(int fd);
+void socket_set_nodelay(int fd);
 int send_all(int fd, const void *buf, int len1);
 
 /* New, ipv6-ready socket helper functions, see qemu-sockets.c */
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/18] Introduce event-tap.

2011-03-22 Thread Yoshiaki Tamura
event-tap controls when to start FT transaction, and provides proxy
functions to called from net/block devices.  While FT transaction, it
queues up net/block requests, and flush them when the transaction gets
completed.

Signed-off-by: Yoshiaki Tamura 
Signed-off-by: OHMURA Kei 
---
 Makefile.target |1 +
 event-tap.c |  940 +++
 event-tap.h |   44 +++
 qemu-tool.c |   27 ++
 trace-events|   10 +
 5 files changed, 1022 insertions(+), 0 deletions(-)
 create mode 100644 event-tap.c
 create mode 100644 event-tap.h

diff --git a/Makefile.target b/Makefile.target
index 62b102a..f088121 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -199,6 +199,7 @@ obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_NO_KVM) += kvm-stub.o
 LIBS+=-lz
+obj-y += event-tap.o
 
 QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
 QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
diff --git a/event-tap.c b/event-tap.c
new file mode 100644
index 000..95c147a
--- /dev/null
+++ b/event-tap.c
@@ -0,0 +1,940 @@
+/*
+ * Event Tap functions for QEMU
+ *
+ * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qemu-error.h"
+#include "block.h"
+#include "block_int.h"
+#include "ioport.h"
+#include "osdep.h"
+#include "sysemu.h"
+#include "hw/hw.h"
+#include "net.h"
+#include "event-tap.h"
+#include "trace.h"
+
+enum EVENT_TAP_STATE {
+EVENT_TAP_OFF,
+EVENT_TAP_ON,
+EVENT_TAP_SUSPEND,
+EVENT_TAP_FLUSH,
+EVENT_TAP_LOAD,
+EVENT_TAP_REPLAY,
+};
+
+static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF;
+
+typedef struct EventTapIOport {
+uint32_t address;
+uint32_t data;
+int  index;
+} EventTapIOport;
+
+#define MMIO_BUF_SIZE 8
+
+typedef struct EventTapMMIO {
+uint64_t address;
+uint8_t  buf[MMIO_BUF_SIZE];
+int  len;
+} EventTapMMIO;
+
+typedef struct EventTapNetReq {
+char *device_name;
+int iovcnt;
+int vlan_id;
+bool vlan_needed;
+bool async;
+struct iovec *iov;
+NetPacketSent *sent_cb;
+} EventTapNetReq;
+
+#define MAX_BLOCK_REQUEST 32
+
+typedef struct EventTapAIOCB EventTapAIOCB;
+
+typedef struct EventTapBlkReq {
+char *device_name;
+int num_reqs;
+int num_cbs;
+bool is_flush;
+BlockRequest reqs[MAX_BLOCK_REQUEST];
+EventTapAIOCB *acb[MAX_BLOCK_REQUEST];
+} EventTapBlkReq;
+
+#define EVENT_TAP_IOPORT (1 << 0)
+#define EVENT_TAP_MMIO   (1 << 1)
+#define EVENT_TAP_NET(1 << 2)
+#define EVENT_TAP_BLK(1 << 3)
+
+#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1)
+
+typedef struct EventTapLog {
+int mode;
+union {
+EventTapIOport ioport;
+EventTapMMIO mmio;
+};
+union {
+EventTapNetReq net_req;
+EventTapBlkReq blk_req;
+};
+QTAILQ_ENTRY(EventTapLog) node;
+} EventTapLog;
+
+struct EventTapAIOCB {
+BlockDriverAIOCB common;
+BlockDriverAIOCB *acb;
+bool is_canceled;
+};
+
+static EventTapLog *last_event_tap;
+
+static QTAILQ_HEAD(, EventTapLog) event_list;
+static QTAILQ_HEAD(, EventTapLog) event_pool;
+
+static int (*event_tap_cb)(void);
+static QEMUBH *event_tap_bh;
+static VMChangeStateEntry *vmstate;
+
+static void event_tap_bh_cb(void *p)
+{
+if (event_tap_cb) {
+event_tap_cb();
+}
+
+qemu_bh_delete(event_tap_bh);
+event_tap_bh = NULL;
+}
+
+static void event_tap_schedule_bh(void)
+{
+trace_event_tap_ignore_bh(!!event_tap_bh);
+
+/* if bh is already set, we ignore it for now */
+if (event_tap_bh) {
+return;
+}
+
+event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL);
+qemu_bh_schedule(event_tap_bh);
+
+return;
+}
+
+static void *event_tap_alloc_log(void)
+{
+EventTapLog *log;
+
+if (QTAILQ_EMPTY(&event_pool)) {
+log = qemu_mallocz(sizeof(EventTapLog));
+} else {
+log = QTAILQ_FIRST(&event_pool);
+QTAILQ_REMOVE(&event_pool, log, node);
+}
+
+return log;
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req);
+static void event_tap_free_blk_req(EventTapBlkReq *blk_req);
+
+static void event_tap_free_log(EventTapLog *log)
+{
+int mode = log->mode & ~EVENT_TAP_TYPE_MASK;
+
+if (mode == EVENT_TAP_NET) {
+event_tap_free_net_req(&log->net_req);
+} else if (mode == EVENT_TAP_BLK) {
+event_tap_free_blk_req(&log->blk_req);
+}
+
+log->mode = 0;
+
+/* return the log to event_pool */
+QTAILQ_INSERT_HEAD(&event_pool, log, node);
+}
+
+static void event_tap_free_pool(void)
+{
+EventTapLog *log, *next;
+
+QTAILQ_FOREACH_SAFE(log, &event_pool, node, next) {
+QTAILQ_REMOVE(&event_pool, log, node);
+qemu_free(log);
+}
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req)
+{
+int i;
+
+if (!net_req->async) {
+for (i = 0; i <

[PATCH 17/18] migration-tcp: modify tcp_accept_incoming_migration() to handle ft_mode, and add a hack not to close fd when ft_mode is enabled.

2011-03-22 Thread Yoshiaki Tamura
When ft_mode is set in the header, tcp_accept_incoming_migration()
sets ft_trans_incoming() as a callback, and call
qemu_file_get_notify() to receive FT transaction iteratively.  We also
need a hack no to close fd before moving to ft_transaction mode, so
that we can reuse the fd for it.  vm_change_state_handler is added to
turn off ft_mode when cont is pressed.

Signed-off-by: Yoshiaki Tamura 
---
 migration-tcp.c |   67 ++-
 1 files changed, 66 insertions(+), 1 deletions(-)

diff --git a/migration-tcp.c b/migration-tcp.c
index 62ec0ea..096781b 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -18,6 +18,8 @@
 #include "sysemu.h"
 #include "buffered_file.h"
 #include "block.h"
+#include "ft_trans_file.h"
+#include "event-tap.h"
 
 //#define DEBUG_MIGRATION_TCP
 
@@ -29,6 +31,8 @@
 do { } while (0)
 #endif
 
+static VMChangeStateEntry *vmstate;
+
 static int socket_errno(FdMigrationState *s)
 {
 return socket_error();
@@ -56,7 +60,8 @@ static int socket_read(FdMigrationState *s, const void * buf, 
size_t size)
 static int tcp_close(FdMigrationState *s)
 {
 DPRINTF("tcp_close\n");
-if (s->fd != -1) {
+/* FIX ME: accessing ft_mode here isn't clean */
+if (s->fd != -1 && ft_mode != FT_INIT) {
 close(s->fd);
 s->fd = -1;
 }
@@ -150,6 +155,36 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
 return &s->mig_state;
 }
 
+static void ft_trans_incoming(void *opaque)
+{
+QEMUFile *f = opaque;
+
+qemu_file_get_notify(f);
+if (qemu_file_has_error(f)) {
+ft_mode = FT_ERROR;
+qemu_fclose(f);
+}
+}
+
+static void ft_trans_reset(void *opaque, int running, int reason)
+{
+QEMUFile *f = opaque;
+
+if (running) {
+if (ft_mode != FT_ERROR) {
+qemu_fclose(f);
+}
+ft_mode = FT_OFF;
+qemu_del_vm_change_state_handler(vmstate);
+}
+}
+
+static void ft_trans_schedule_replay(QEMUFile *f)
+{
+event_tap_schedule_replay();
+vmstate = qemu_add_vm_change_state_handler(ft_trans_reset, f);
+}
+
 static void tcp_accept_incoming_migration(void *opaque)
 {
 struct sockaddr_in addr;
@@ -175,8 +210,38 @@ static void tcp_accept_incoming_migration(void *opaque)
 goto out;
 }
 
+if (ft_mode == FT_INIT) {
+autostart = 0;
+}
+
 process_incoming_migration(f);
+
+if (ft_mode == FT_INIT) {
+int ret;
+
+socket_set_nodelay(c);
+
+f = qemu_fopen_ft_trans(s, c);
+if (f == NULL) {
+fprintf(stderr, "could not qemu_fopen_ft_trans\n");
+goto out;
+}
+
+/* need to wait sender to setup */
+ret = qemu_ft_trans_begin(f);
+if (ret < 0) {
+goto out;
+}
+
+qemu_set_fd_handler2(c, NULL, ft_trans_incoming, NULL, f);
+ft_trans_schedule_replay(f);
+ft_mode = FT_TRANSACTION_RECV;
+
+return;
+}
+
 qemu_fclose(f);
+
 out:
 close(c);
 out2:
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/18] Introduce qemu_loadvm_state_no_header() and make qemu_loadvm_state() a wrapper.

2011-03-22 Thread Yoshiaki Tamura
Introduce qemu_loadvm_state_no_header() so that it can be called
iteratively without reading the header, and qemu_loadvm_state()
becomes a wrapper of it.

Signed-off-by: Yoshiaki Tamura 
---
 savevm.c |   45 +++--
 1 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/savevm.c b/savevm.c
index d293f9c..4a76e32 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1743,30 +1743,14 @@ typedef struct LoadStateEntry {
 int version_id;
 } LoadStateEntry;
 
-int qemu_loadvm_state(QEMUFile *f)
+static int qemu_loadvm_state_no_header(QEMUFile *f)
 {
 QLIST_HEAD(, LoadStateEntry) loadvm_handlers =
 QLIST_HEAD_INITIALIZER(loadvm_handlers);
 LoadStateEntry *le, *new_le;
 uint8_t section_type;
-unsigned int v;
-int ret;
-
-if (qemu_savevm_state_blocked(default_mon)) {
-return -EINVAL;
-}
-
-v = qemu_get_be32(f);
-if (v != QEMU_VM_FILE_MAGIC)
-return -EINVAL;
 
-v = qemu_get_be32(f);
-if (v == QEMU_VM_FILE_VERSION_COMPAT) {
-fprintf(stderr, "SaveVM v2 format is obsolete and don't work 
anymore\n");
-return -ENOTSUP;
-}
-if (v != QEMU_VM_FILE_VERSION)
-return -ENOTSUP;
+int ret;
 
 while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
 uint32_t instance_id, version_id, section_id;
@@ -1861,6 +1845,31 @@ out:
 return ret;
 }
 
+int qemu_loadvm_state(QEMUFile *f)
+{
+unsigned int v;
+
+if (qemu_savevm_state_blocked(default_mon)) {
+return -EINVAL;
+}
+
+v = qemu_get_be32(f);
+if (v != QEMU_VM_FILE_MAGIC) {
+return -EINVAL;
+}
+
+v = qemu_get_be32(f);
+if (v == QEMU_VM_FILE_VERSION_COMPAT) {
+fprintf(stderr, "SaveVM v2 format is obsolete and don't work 
anymore\n");
+return -ENOTSUP;
+}
+if (v != QEMU_VM_FILE_VERSION) {
+return -ENOTSUP;
+}
+
+return qemu_loadvm_state_no_header(f);
+}
+
 static int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
   const char *name)
 {
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/18] [PATCH 00/18] Kemari for KVM v0.2.13

2011-03-22 Thread Yoshiaki Tamura
Hi,

This patch series is a revised version of Kemari for KVM, which
applied comments for the previous post.  The current code is based on
qemu.git 4ac8e585c85079f6fd2b2b6da3cb845e3e19459c.

The changes from v0.2.12 -> v0.2.13 are:

- replaced qemu_get_timer() with qemu_get_timer_ns()
- check check s->file before calling qemu_ft_trans_cancel()
- avoid virtio-net assert upon calling event_tap_unregister()

The changes from v0.2.11 -> v0.2.12 are:

- fix vm_state_notify() to use QLIST_FOREACH_SAFE (Juan)
- introduce qemu_loadvm_state_no_header() and refactored
  qemu_loadvm_state() to call it after checking headers (Juan)

The changes from v0.2.10 -> v0.2.11 are:

- rebased to 0.14
- upon unregistering event-tap, set event_tap_state after event_tap_flush
- modify commit log of 02/18 that it won't make existing migration
  bi-directional.

The changes from v0.2.9 -> v0.2.10 are:

- change migrate format to kemari::: (Paolo)

The changes from v0.2.8 -> v0.2.9 are:

- abstract common code between qemu_savevm_{state,trans}_* (Paolo)
- change incoming format to kemari::: (Paolo)

The changes from v0.2.7 -> v0.2.8 are:

- fixed calling wrong cb in event-tap
- add missing qemu_aio_release in event-tap

The changes from v0.2.6 -> v0.2.7 are:

- add AIOCB, AIOPool and cancel functions (Kevin)
- insert event-tap for bdrv_flush (Kevin)
- add error handing when calling bdrv functions (Kevin)
- fix usage of qemu_aio_flush and bdrv_flush (Kevin)
- use bs in AIOCB on the primary (Kevin)
- reorder event-tap functions to gather with block/net (Kevin)
- fix checking bs->device_name (Kevin)

The changes from v0.2.5 -> v0.2.6 are:

- use qemu_{put,get}_be32() to save/load niov in event-tap

The changes from v0.2.4 -> v0.2.5 are:

- fixed braces and trailing spaces by using Blue's checkpatch.pl (Blue)
- event-tap: don't try to send blk_req if it's a bdrv_aio_flush event

The changes from v0.2.3 -> v0.2.4 are:

- call vm_start() before event_tap_flush_one() to avoid failure in
  virtio-net assertion
- add vm_change_state_handler to turn off ft_mode
- use qemu_iovec functions in event-tap
- remove duplicated code in migration
- remove unnecessary new line for error_report in ft_trans_file

The changes from v0.2.2 -> v0.2.3 are:

- queue async net requests without copying (MST)
-- if not async, contents of the packets are sent to the secondary
- better description for option -k (MST)
- fix memory transfer failure
- fix ft transaction initiation failure

The changes from v0.2.1 -> v0.2.2 are:

- decrement last_avaid_idx with inuse before saving (MST)
- remove qemu_aio_flush() and bdrv_flush_all() in migrate_ft_trans_commit()

The changes from v0.2 -> v0.2.1 are:

- Move event-tap to net/block layer and use stubs (Blue, Paul, MST, Kevin)
- Tap bdrv_aio_flush (Marcelo)
- Remove multiwrite interface in event-tap (Stefan)
- Fix event-tap to use pio/mmio to replay both net/block (Stefan)
- Improve error handling in event-tap (Stefan)
- Fix leak in event-tap (Stefan)
- Revise virtio last_avail_idx manipulation (MST)
- Clean up migration.c hook (Marcelo)
- Make deleting change state handler robust (Isaku, Anthony)

The changes from v0.1.1 -> v0.2 are:

- Introduce a queue in event-tap to make VM sync live.
- Change transaction receiver to a state machine for async receiving.
- Replace net/block layer functions with event-tap proxy functions.
- Remove dirty bitmap optimization for now.
- convert DPRINTF() in ft_trans_file to trace functions.
- convert fprintf() in ft_trans_file to error_report().
- improved error handling in ft_trans_file.
- add a tmp pointer to qemu_del_vm_change_state_handler.

The changes from v0.1 -> v0.1.1 are:

- events are tapped in net/block layer instead of device emulation layer.
- Introduce a new option for -incoming to accept FT transaction.

- Removed writev() support to QEMUFile and FdMigrationState for now.
  I would post this work in a different series.

- Modified virtio-blk save/load handler to send inuse variable to
  correctly replay.

- Removed configure --enable-ft-mode.
- Removed unnecessary check for qemu_realloc().

The first 6 patches modify several functions of qemu to prepare
introducing Kemari specific components.

The next 6 patches are the components of Kemari.  They introduce
event-tap and the FT transaction protocol file based on buffered file.
The design document of FT transaction protocol can be found at,
http://wiki.qemu.org/images/b/b1/Kemari_sender_receiver_0.5a.pdf

Then the following 2 patches modifies net/block layer functions with
event-tap functions.  Please note that if Kemari is off, event-tap
will just passthrough, and there is most no intrusion to exisiting
functions including normal live migration.

Finally, the migration layer are modified to support Kemari in the
last 4 patches.  Again, there shouldn't be any affection if a user
doesn't specify Kemari specific options.  The transaction is now async
on both sender and receiver side.  The sender side respects the
max_downtime 

[PATCH 15/18] savevm: introduce qemu_savevm_trans_{begin,commit}.

2011-03-22 Thread Yoshiaki Tamura
Introduce qemu_savevm_trans_{begin,commit} to send the memory and
device info together, while avoiding cancelling memory state tracking.
This patch also abstracts common code between
qemu_savevm_state_{begin,iterate,commit}.

Signed-off-by: Yoshiaki Tamura 
---
 savevm.c |  157 +++---
 sysemu.h |2 +
 2 files changed, 101 insertions(+), 58 deletions(-)

diff --git a/savevm.c b/savevm.c
index 48a0f65..4793be0 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1629,29 +1629,68 @@ bool qemu_savevm_state_blocked(Monitor *mon)
 return false;
 }
 
-int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
-int shared)
+/*
+ * section: header to write
+ * inc: if true, forces to pass SECTION_PART instead of SECTION_START
+ * pause: if true, breaks the loop when live handler returned 0
+ */
+static int qemu_savevm_state_live(Monitor *mon, QEMUFile *f, int section,
+  bool inc, bool pause)
 {
 SaveStateEntry *se;
+int skip = 0, ret;
 
 QTAILQ_FOREACH(se, &savevm_handlers, entry) {
-if(se->set_params == NULL) {
+int len, stage;
+
+if (se->save_live_state == NULL) {
 continue;
-   }
-   se->set_params(blk_enable, shared, se->opaque);
+}
+
+/* Section type */
+qemu_put_byte(f, section);
+qemu_put_be32(f, se->section_id);
+
+if (section == QEMU_VM_SECTION_START) {
+/* ID string */
+len = strlen(se->idstr);
+qemu_put_byte(f, len);
+qemu_put_buffer(f, (uint8_t *)se->idstr, len);
+
+qemu_put_be32(f, se->instance_id);
+qemu_put_be32(f, se->version_id);
+
+stage = inc ? QEMU_VM_SECTION_PART : QEMU_VM_SECTION_START;
+} else {
+assert(inc);
+stage = section;
+}
+
+ret = se->save_live_state(mon, f, stage, se->opaque);
+if (!ret) {
+skip++;
+if (pause) {
+break;
+}
+}
 }
-
-qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
-qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+
+return skip;
+}
+
+static void qemu_savevm_state_full(QEMUFile *f)
+{
+SaveStateEntry *se;
 
 QTAILQ_FOREACH(se, &savevm_handlers, entry) {
 int len;
 
-if (se->save_live_state == NULL)
+if (se->save_state == NULL && se->vmsd == NULL) {
 continue;
+}
 
 /* Section type */
-qemu_put_byte(f, QEMU_VM_SECTION_START);
+qemu_put_byte(f, QEMU_VM_SECTION_FULL);
 qemu_put_be32(f, se->section_id);
 
 /* ID string */
@@ -1662,9 +1701,29 @@ int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, 
int blk_enable,
 qemu_put_be32(f, se->instance_id);
 qemu_put_be32(f, se->version_id);
 
-se->save_live_state(mon, f, QEMU_VM_SECTION_START, se->opaque);
+vmstate_save(f, se);
+}
+
+qemu_put_byte(f, QEMU_VM_EOF);
+}
+
+int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
+int shared)
+{
+SaveStateEntry *se;
+
+QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+if (se->set_params == NULL) {
+continue;
+}
+se->set_params(blk_enable, shared, se->opaque);
 }
 
+qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
+qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+
+qemu_savevm_state_live(mon, f, QEMU_VM_SECTION_START, 0, 0);
+
 if (qemu_file_has_error(f)) {
 qemu_savevm_state_cancel(mon, f);
 return -EIO;
@@ -1675,29 +1734,16 @@ int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, 
int blk_enable,
 
 int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f)
 {
-SaveStateEntry *se;
 int ret = 1;
 
-QTAILQ_FOREACH(se, &savevm_handlers, entry) {
-if (se->save_live_state == NULL)
-continue;
-
-/* Section type */
-qemu_put_byte(f, QEMU_VM_SECTION_PART);
-qemu_put_be32(f, se->section_id);
-
-ret = se->save_live_state(mon, f, QEMU_VM_SECTION_PART, se->opaque);
-if (!ret) {
-/* Do not proceed to the next vmstate before this one reported
-   completion of the current stage. This serializes the migration
-   and reduces the probability that a faster changing state is
-   synchronized over and over again. */
-break;
-}
-}
-
-if (ret)
+/* Do not proceed to the next vmstate before this one reported
+   completion of the current stage. This serializes the migration
+   and reduces the probability that a faster changing state is
+   synchronized over and over again. */
+ret = qemu_savevm_state_live(mon, f, QEMU_VM_SECTION_PART, 1, 1);
+if (!ret) {
 return 1;
+}
 
 if (qemu_file_has_error(f)) {
 qemu_savevm_state_cancel(mon, f);
@@ -1709,46 +1755,41 @@ int qemu_save

[PATCH 13/18] net: insert event-tap to qemu_send_packet() and qemu_sendv_packet_async().

2011-03-22 Thread Yoshiaki Tamura
event-tap function is called only when it is on.

Signed-off-by: Yoshiaki Tamura 
---
 net.c |9 +
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/net.c b/net.c
index ddcca97..a541ede 100644
--- a/net.c
+++ b/net.c
@@ -37,6 +37,7 @@
 #include "qemu_socket.h"
 #include "hw/qdev.h"
 #include "iov.h"
+#include "event-tap.h"
 
 static QTAILQ_HEAD(, VLANState) vlans;
 static QTAILQ_HEAD(, VLANClientState) non_vlan_clients;
@@ -519,6 +520,10 @@ ssize_t qemu_send_packet_async(VLANClientState *sender,
 
 void qemu_send_packet(VLANClientState *vc, const uint8_t *buf, int size)
 {
+if (event_tap_is_on()) {
+return event_tap_send_packet(vc, buf, size);
+}
+
 qemu_send_packet_async(vc, buf, size, NULL);
 }
 
@@ -600,6 +605,10 @@ ssize_t qemu_sendv_packet_async(VLANClientState *sender,
 {
 NetQueue *queue;
 
+if (event_tap_is_on()) {
+return event_tap_sendv_packet_async(sender, iov, iovcnt, sent_cb);
+}
+
 if (sender->link_down || (!sender->peer && !sender->vlan)) {
 return iov_size(iov, iovcnt);
 }
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/18] Introduce "kemari:" to enable FT migration mode (Kemari).

2011-03-22 Thread Yoshiaki Tamura
When "kemari:" is set in front of URI of migrate command, it will turn
on ft_mode to start FT migration mode (Kemari).  On the receiver side,
the option looks like, -incoming kemari:::

Signed-off-by: Yoshiaki Tamura 
Acked-by: Paolo Bonzini 
---
 hmp-commands.hx |4 +++-
 migration.c |   12 
 qmp-commands.hx |4 +++-
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 834e6a8..4cd7bfa 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -760,7 +760,9 @@ ETEXI
  "\n\t\t\t -b for migration without shared storage with"
  " full copy of disk\n\t\t\t -i for migration without "
  "shared storage with incremental copy of disk "
- "(base image shared between src and destination)",
+ "(base image shared between src and destination)"
+ "\n\t\t\t put \"kemari:\" in front of URI to enable "
+ "Fault Tolerance mode (Kemari protocol)",
 .user_print = monitor_user_noop,   
.mhandler.cmd_new = do_migrate,
 },
diff --git a/migration.c b/migration.c
index d536df0..5017dea 100644
--- a/migration.c
+++ b/migration.c
@@ -48,6 +48,12 @@ int qemu_start_incoming_migration(const char *uri)
 const char *p;
 int ret;
 
+/* check ft_mode (Kemari protocol) */
+if (strstart(uri, "kemari:", &p)) {
+ft_mode = FT_INIT;
+uri = p;
+}
+
 if (strstart(uri, "tcp:", &p))
 ret = tcp_start_incoming_migration(p);
 #if !defined(WIN32)
@@ -99,6 +105,12 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject 
**ret_data)
 return -1;
 }
 
+/* check ft_mode (Kemari protocol) */
+if (strstart(uri, "kemari:", &p)) {
+ft_mode = FT_INIT;
+uri = p;
+}
+
 if (strstart(uri, "tcp:", &p)) {
 s = tcp_start_outgoing_migration(mon, p, max_throttle, detach,
  blk, inc);
diff --git a/qmp-commands.hx b/qmp-commands.hx
index fbd98ee..71e4f0e 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -437,7 +437,9 @@ EQMP
  "\n\t\t\t -b for migration without shared storage with"
  " full copy of disk\n\t\t\t -i for migration without "
  "shared storage with incremental copy of disk "
- "(base image shared between src and destination)",
+ "(base image shared between src and destination)"
+ "\n\t\t\t put \"kemari:\" in front of URI to enable "
+ "Fault Tolerance mode (Kemari protocol)",
 .user_print = monitor_user_noop,   
.mhandler.cmd_new = do_migrate,
 },
-- 
1.7.1.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/18] savevm: introduce util functions to control ft_trans_file from savevm layer.

2011-03-22 Thread Yoshiaki Tamura
To utilize ft_trans_file function, savevm needs interfaces to be
exported.

Signed-off-by: Yoshiaki Tamura 
---
 hw/hw.h  |5 ++
 savevm.c |  150 ++
 2 files changed, 155 insertions(+), 0 deletions(-)

diff --git a/hw/hw.h b/hw/hw.h
index f90ff15..2d4d595 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -51,6 +51,7 @@ QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc 
*put_buffer,
 QEMUFile *qemu_fopen(const char *filename, const char *mode);
 QEMUFile *qemu_fdopen(int fd, const char *mode);
 QEMUFile *qemu_fopen_socket(int fd);
+QEMUFile *qemu_fopen_ft_trans(int s_fd, int c_fd);
 QEMUFile *qemu_popen(FILE *popen_file, const char *mode);
 QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
 int qemu_stdio_fd(QEMUFile *f);
@@ -60,6 +61,9 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int 
size);
 void qemu_put_byte(QEMUFile *f, int v);
 void *qemu_realloc_buffer(QEMUFile *f, int size);
 void qemu_clear_buffer(QEMUFile *f);
+int qemu_ft_trans_begin(QEMUFile *f);
+int qemu_ft_trans_commit(QEMUFile *f);
+int qemu_ft_trans_cancel(QEMUFile *f);
 
 static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
 {
@@ -94,6 +98,7 @@ void qemu_file_set_error(QEMUFile *f);
  * halted due to rate limiting or EAGAIN errors occur as it can be used to
  * resume output. */
 void qemu_file_put_notify(QEMUFile *f);
+void qemu_file_get_notify(void *opaque);
 
 static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
 {
diff --git a/savevm.c b/savevm.c
index 4a76e32..48a0f65 100644
--- a/savevm.c
+++ b/savevm.c
@@ -82,6 +82,7 @@
 #include "migration.h"
 #include "qemu_socket.h"
 #include "qemu-queue.h"
+#include "ft_trans_file.h"
 
 #define SELF_ANNOUNCE_ROUNDS 5
 
@@ -189,6 +190,13 @@ typedef struct QEMUFileSocket
 QEMUFile *file;
 } QEMUFileSocket;
 
+typedef struct QEMUFileSocketTrans
+{
+int fd;
+QEMUFileSocket *s;
+VMChangeStateEntry *e;
+} QEMUFileSocketTrans;
+
 static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
 {
 QEMUFileSocket *s = opaque;
@@ -204,6 +212,22 @@ static int socket_get_buffer(void *opaque, uint8_t *buf, 
int64_t pos, int size)
 return len;
 }
 
+static ssize_t socket_put_buffer(void *opaque, const void *buf, size_t size)
+{
+QEMUFileSocket *s = opaque;
+ssize_t len;
+
+do {
+len = send(s->fd, (void *)buf, size, 0);
+} while (len == -1 && socket_error() == EINTR);
+
+if (len == -1) {
+len = -socket_error();
+}
+
+return len;
+}
+
 static int socket_close(void *opaque)
 {
 QEMUFileSocket *s = opaque;
@@ -211,6 +235,71 @@ static int socket_close(void *opaque)
 return 0;
 }
 
+static int socket_trans_get_buffer(void *opaque, uint8_t *buf, int64_t pos, 
size_t size)
+{
+QEMUFileSocketTrans *t = opaque;
+QEMUFileSocket *s = t->s;
+ssize_t len;
+
+len = socket_get_buffer(s, buf, pos, size);
+
+return len;
+}
+
+static ssize_t socket_trans_put_buffer(void *opaque, const void *buf, size_t 
size)
+{
+QEMUFileSocketTrans *t = opaque;
+
+return socket_put_buffer(t->s, buf, size);
+}
+
+static int qemu_loadvm_state_no_header(QEMUFile *f);
+
+static int socket_trans_get_ready(void *opaque)
+{
+QEMUFileSocketTrans *t = opaque;
+QEMUFileSocket *s = t->s;
+QEMUFile *f = s->file;
+int ret = 0;
+
+ret = qemu_loadvm_state_no_header(f);
+if (ret < 0) {
+fprintf(stderr,
+"socket_trans_get_ready: error while loading vmstate\n");
+}
+
+return ret;
+}
+
+static int socket_trans_close(void *opaque)
+{
+QEMUFileSocketTrans *t = opaque;
+QEMUFileSocket *s = t->s;
+
+qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
+qemu_set_fd_handler2(t->fd, NULL, NULL, NULL, NULL);
+qemu_del_vm_change_state_handler(t->e);
+close(s->fd);
+close(t->fd);
+qemu_free(s);
+qemu_free(t);
+
+return 0;
+}
+
+static void socket_trans_resume(void *opaque, int running, int reason)
+{
+QEMUFileSocketTrans *t = opaque;
+QEMUFileSocket *s = t->s;
+
+if (!running) {
+return;
+}
+
+qemu_announce_self();
+qemu_fclose(s->file);
+}
+
 static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int 
size)
 {
 QEMUFileStdio *s = opaque;
@@ -333,6 +422,26 @@ QEMUFile *qemu_fopen_socket(int fd)
 return s->file;
 }
 
+QEMUFile *qemu_fopen_ft_trans(int s_fd, int c_fd)
+{
+QEMUFileSocketTrans *t = qemu_mallocz(sizeof(QEMUFileSocketTrans));
+QEMUFileSocket *s = qemu_mallocz(sizeof(QEMUFileSocket));
+
+t->s = s;
+t->fd = s_fd;
+t->e = qemu_add_vm_change_state_handler(socket_trans_resume, t);
+
+s->fd = c_fd;
+s->file = qemu_fopen_ops_ft_trans(t, socket_trans_put_buffer,
+  socket_trans_get_buffer, NULL,
+  socket_trans_get_ready,
+  migrate_fd_wait_for_unfreeze,

KVM make error--drivers/usb/serial/usb_wwan.c

2011-03-22 Thread Ren, Yongjie
Hi folks,
Kvm.git: commit 2ee44a580db58f98d85b57bfc468bbc5729ec9b3  Author: Avi Kivity    
Date:   Mon Mar 21 12:53:58 2011 +0200
My build system: RHEL5u5,  Linux kvm-build 2.6.38-rc4+ #1 SMP Sat Feb 19 
15:35:09 CST 2011 x86_64 x86_64 x86_64 GNU/Linux

When I make kvm, I get the following error.
drivers/usb/serial/usb_wwan.c: In function 'play_delayed':
drivers/usb/serial/usb_wwan.c:702: error: 'struct dev_pm_info' has no member 
named 'usage_count'

And I look at the 702Row of usb_wwan.c    It shows:
atomic_dec(&port->serial->interface->dev.power.usage_count);
I see usage_count is declared in kvm.git/include/linux/pm.h , but it is 
declared between "#ifdef" and "#endif".   #ifdef CONFIG_PM_RUNTIME .
Kernel of build system doesn't define CONFIG_PM_RUNTIME, so I got the kvm make 
error.

Anybody knows the solution ?  Thanks. 

Best Regards,
 Yongjie Ren    (Jay)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop

2011-03-22 Thread Shirley Ma
On Tue, 2011-03-22 at 13:36 +0200, Michael S. Tsirkin wrote:
> diff --git a/drivers/virtio/virtio_ring.c
> b/drivers/virtio/virtio_ring.c
> index cc2f73e..6106017 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -185,11 +185,6 @@ int virtqueue_add_buf_gfp(struct virtqueue *_vq,
> if (vq->num_free < out + in) {
> pr_debug("Can't add buf len %i - avail = %i\n",
>  out + in, vq->num_free);
> -   /* FIXME: for historical reasons, we force a notify
> here if
> -* there are outgoing parts to the buffer.  Presumably
> the
> -* host should service the ring ASAP. */
> -   if (out)
> -   vq->notify(&vq->vq);
> END_USE(vq);
> return -ENOSPC;
> }
> 

With simply removing the notify here, it does help the case when TX
overrun hits too often, for example for 1K message size, the single
TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s.

Thanks
Shirley

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/6] PCI / Intel IOMMU: Use syscore_ops instead of sysdev class and sysdev

2011-03-22 Thread Rafael J. Wysocki
On Tuesday, March 22, 2011, Joerg Roedel wrote:
> On Mon, Mar 21, 2011 at 07:36:17PM -0400, Rafael J. Wysocki wrote:
> >  drivers/pci/intel-iommu.c |   38 +-
> >  1 file changed, 9 insertions(+), 29 deletions(-)
> 
> Looks good.

May I take that as an ACK?

> I prepare a patch to convert AMD IOMMU to syscore_ops too.

Already done.  :-)

It's a part of patch [1/6].

Thanks,
Rafael
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/6] Do not use sysdevs for implementing "core" PM operations on x86

2011-03-22 Thread Rafael J. Wysocki
On Tuesday, March 22, 2011, Ingo Molnar wrote:
> 
> * Rafael J. Wysocki  wrote:
> 
> > > If there are no objectsions, I'd like to push these patches through the 
> > > suspend
> > > tree.
> > 
> > [1/8] has been merged in the meantime and [3/8] has been included into the
> > ACPI tree.  if there are no objections, I'm going to push the following
> > patches to Linus this week through the suspend-2.6 tree:
> > 
> > [1/6] - Convert sysdev users in arch/x86 to using struct syscore_ops.
> > 
> > [2/6] - Make timekeeping use struct syscore_ops for suspend/resume.
> >  
> > [3/6] - Make Intel IOMMU use struct syscore_ops for suspend/resume.
> > 
> > [4/6] - Make KVM use struct syscore_ops for suspend/resume.
> > 
> > [5/6] - Make cpufreq use struct syscore_ops for boot CPU suspend/resume.
> > 
> > [6/6] - Introduce config switch allowing architectures to skip sysdev
> > suspend/resume/shutdown code.
> 
> The x86 bits look fine.
> 
> Acked-by: Ingo Molnar 

Thanks!

> The patches affect a lot of hardware so please make sure they are tested well 
> before pushing them to Linus :-)

I have tested the majority, but unfortunately I have no hardware to test
the Intel IOMMU patch on it.

Thanks,
Rafael
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


msix_unset_mask_notifier: Assertion `dev->msix_mask_notifier' failed.

2011-03-22 Thread Nikola Ciprich
Hello,
I wanted to give vhost_net a try, but I can't make it work,
when I try to start qemu-kvm with vhost interface, it crashers with
msix_unset_mask_notifier: Assertion `dev->msix_mask_notifier' failed.

according to some threads I've digged up, it is related to missing
eventfd support (I'm using centos 5 for host).
host kernel is 2.6.37, arch x86_64, I'm using qemu-kvm-0.14.0
is there some workaround for this?
thanks in advance
nik


-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm, emulation hangs when using some usb device

2011-03-22 Thread David Ahern


On 03/22/11 12:20, slavik wrote:
> i suggest this is synonyms for single function, doesn't it?
> 

It's the legacy syntax - you are already using it with the tablet
option. If it cleared your problem then it is also synonymous for bug in
the -device route.


> On Tue, Mar 22, 2011 at 11:07 PM, David Ahern  wrote:
>>
>>
>> On 03/22/11 11:53, slavik wrote:
>>> hi, as usual
>>> kvm -boot d   -usbdevice tablet  -vnc :8 -monitor
>>> telnet:0.0.0.0:4008,server,nowait -drive
>>> file="/winxp.img",cache=unsafe,if=virtio,boot=on,aio=native -balloon
>>> virtio -name vmwin
>>> 1  -fda /virtio-win-1.1.16.vfd -enable-kvm -m 1024 -usb -smp 2 -device
>>> usb-host,hostbus=002,hostaddr=003,id=usbdev2  -device
>>> usb-host,hostbus=005,hostaddr=003,id=usbdev4
>>
>> try -usbdevice host:bus.addr instead of -device. So,
>> -usbdevice host:002.003 -usbdevice host:005.003
>>
>>>
>>> I was tried "-no-kvm-irqchip" and "-no-kvm-pit", nothing changes, with
>>> "-no-kvm" it does not work too, but with other symptoms.
>>>
>>> On Tue, Mar 22, 2011 at 9:57 PM, David Ahern  wrote:


 On 03/22/11 05:03, slavik wrote:
> I have some troubles with passing usb into kvm virtual machine.
> I trying to provide the usb device 2022:0008 (Amikon vpn key) or 19d2:2000
> (ZTE MF112 hsdpa modem)  into virtual machine with windows xp iax32.
> kvm virtual machine was hung completely, and stop responding to anything.
> Maybe I need there some trick to use this kind (switchable?) of usb 
> devices?

 As a command line option? If so what options did you use for the usb
 components?



>
> ps: Linux workdesk 2.6.38-gentoo #1 SMP Mon Mar 21 18:10:13 YEKT 2011 
> x86_64
> AMD Phenom(tm) II X6 1055T Processor AuthenticAMD GNU/Linux
>
> --
> wbr Slavik
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

>>>
>>>
>>>
>>
> 
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm, emulation hangs when using some usb device

2011-03-22 Thread slavik
i suggest this is synonyms for single function, doesn't it?

On Tue, Mar 22, 2011 at 11:07 PM, David Ahern  wrote:
>
>
> On 03/22/11 11:53, slavik wrote:
>> hi, as usual
>> kvm -boot d   -usbdevice tablet  -vnc :8 -monitor
>> telnet:0.0.0.0:4008,server,nowait -drive
>> file="/winxp.img",cache=unsafe,if=virtio,boot=on,aio=native -balloon
>> virtio -name vmwin
>> 1  -fda /virtio-win-1.1.16.vfd -enable-kvm -m 1024 -usb -smp 2 -device
>> usb-host,hostbus=002,hostaddr=003,id=usbdev2  -device
>> usb-host,hostbus=005,hostaddr=003,id=usbdev4
>
> try -usbdevice host:bus.addr instead of -device. So,
> -usbdevice host:002.003 -usbdevice host:005.003
>
>>
>> I was tried "-no-kvm-irqchip" and "-no-kvm-pit", nothing changes, with
>> "-no-kvm" it does not work too, but with other symptoms.
>>
>> On Tue, Mar 22, 2011 at 9:57 PM, David Ahern  wrote:
>>>
>>>
>>> On 03/22/11 05:03, slavik wrote:
 I have some troubles with passing usb into kvm virtual machine.
 I trying to provide the usb device 2022:0008 (Amikon vpn key) or 19d2:2000
 (ZTE MF112 hsdpa modem)  into virtual machine with windows xp iax32.
 kvm virtual machine was hung completely, and stop responding to anything.
 Maybe I need there some trick to use this kind (switchable?) of usb 
 devices?
>>>
>>> As a command line option? If so what options did you use for the usb
>>> components?
>>>
>>>
>>>

 ps: Linux workdesk 2.6.38-gentoo #1 SMP Mon Mar 21 18:10:13 YEKT 2011 
 x86_64
 AMD Phenom(tm) II X6 1055T Processor AuthenticAMD GNU/Linux

 --
 wbr Slavik
 --
 To unsubscribe from this list: send the line "unsubscribe kvm" in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

>>>
>>
>>
>>
>



-- 
wbr Slavik
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


BUG: 0.14.0 -device usb-host supports only one device

2011-03-22 Thread Erik Rull
When enabling the -device usb-host option support for adding automatically 
USB devices from the host to the guest, only one device gets detected.
It does not matter if it is added via commandline or via device_add on the 
qemu console.


Curious: If a second devices is plugged into the host, nothing happens in 
qemu. But on the host, the device is detected. If the first device is 
removed, the second device gets detected by qemu. If then the first device 
is added again, it gets not detected by qemu until the second device is 
removed and so on.


When adding the devices manually, everything is fine.
Confirmed with and without the ehci-patch on qemu-kvm 0.14.0.

Best regards,

Erik
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm, emulation hangs when using some usb device

2011-03-22 Thread Erik Rull

slavik wrote:

I have some troubles with passing usb into kvm virtual machine.
I trying to provide the usb device 2022:0008 (Amikon vpn key) or 19d2:2000
(ZTE MF112 hsdpa modem)  into virtual machine with windows xp iax32.
kvm virtual machine was hung completely, and stop responding to anything.
Maybe I need there some trick to use this kind (switchable?) of usb devices?

ps: Linux workdesk 2.6.38-gentoo #1 SMP Mon Mar 21 18:10:13 YEKT 2011 x86_64
AMD Phenom(tm) II X6 1055T Processor AuthenticAMD GNU/Linux


Have you enabled USB 2.0 on your host system? If yes -> causes issues 
(Windows Bluescreen e.g.) with printers and sometimes other USB 2.0 
devices. If you set it to USB 1.1, it should be fine but the transfer rate 
is extremely low.


I would propose the ehci-patch from David, this works quite nice and gives 
a good transfer performance and no bluescreens with printers and so on.


Best regards,

Erik
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm, emulation hangs when using some usb device

2011-03-22 Thread David Ahern


On 03/22/11 05:03, slavik wrote:
> I have some troubles with passing usb into kvm virtual machine.
> I trying to provide the usb device 2022:0008 (Amikon vpn key) or 19d2:2000
> (ZTE MF112 hsdpa modem)  into virtual machine with windows xp iax32.
> kvm virtual machine was hung completely, and stop responding to anything.
> Maybe I need there some trick to use this kind (switchable?) of usb devices?

As a command line option? If so what options did you use for the usb
components?



> 
> ps: Linux workdesk 2.6.38-gentoo #1 SMP Mon Mar 21 18:10:13 YEKT 2011 x86_64
> AMD Phenom(tm) II X6 1055T Processor AuthenticAMD GNU/Linux
> 
> --
> wbr Slavik
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: fix crash on irqfd deassign

2011-03-22 Thread Avi Kivity

On 03/17/2011 10:53 AM, Michael S. Tsirkin wrote:

irqfd in kvm used flush_work incorrectly:
it assumed that work scheduled previously can't run
after flush_work, but since kvm uses a non-reentrant
workqueue (by means of schedule_work)
we need flush_work_sync to get that guarantee.



Applied, and queued for 2.6.39.  Thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/5] KVM: x86 emulator: Use single stage decoding for PUSHA and POPA instructions

2011-03-22 Thread Avi Kivity

On 03/22/2011 05:54 PM, Takuya Yoshikawa wrote:

I intentionally left emulate_* in this version because I thought
there might be some reason for introducing new em_* naming.



It's just that their signatures are all the same, and to conserve space 
in the decode tables.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/5] KVM: x86 emulator: Use single stage decoding for PUSHA and POPA instructions

2011-03-22 Thread Takuya Yoshikawa
On Tue, 22 Mar 2011 15:07:20 +0200
Avi Kivity  wrote:

> > +static int em_pusha(struct x86_emulate_ctxt *ctxt)
> > +{
> > +   return emulate_pusha(ctxt, ctxt->ops);
> > +}
> > +
> 
> You can simply rename/update emulate_pusha/emulate_popa, since they have 
> no other callers.
> 

I intentionally left emulate_* in this version because I thought
there might be some reason for introducing new em_* naming.

OK, I'll remove useless old functions.

Takuya
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/5] KVM: x86 emulator: Use single stage decoding for Group 1 instructions

2011-03-22 Thread Avi Kivity

On 03/22/2011 05:35 PM, Takuya Yoshikawa wrote:

On Tue, 22 Mar 2011 14:53:21 +0200
Avi Kivity  wrote:


I prefer to have the patchset fully updated, even if it takes a while.
Good luck with the recovery!

Things already got back as usual, thanks.
I had expected much longer time.



Good to hear.


BTW, is it better to wait until rc1 is released when we send
patches for the next merge window?



No need - we (the maintainers) buffer patches in kvm.git master.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/5] KVM: x86 emulator: Use single stage decoding for POP instructions

2011-03-22 Thread Takuya Yoshikawa
On Tue, 22 Mar 2011 15:06:33 +0200
Avi Kivity  wrote:

> > POP is converted.  RET will be converted later.
> 
> There is also POP r/m (8F /0); could be done later.
> 

OK, I'll recheck.
I want to put related things into one patch if possible.

Takuya
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/5] KVM: x86 emulator: Use single stage decoding for PUSH/POP XS instructions

2011-03-22 Thread Takuya Yoshikawa
On Tue, 22 Mar 2011 15:03:11 +0200
Avi Kivity  wrote:

> > +static int em_push_es(struct x86_emulate_ctxt *ctxt)
> > +{
> > +   emulate_push_sreg(ctxt, ctxt->ops, VCPU_SREG_ES);
> > +   return X86EMUL_CONTINUE;
> > +}
> 
> I thought of adding generic sreg decoding, so we can use 
> em_push()/em_pop() here.  It can be done later, though, and really 
> there's no huge advantage here.
> 

Then, I will drop this one from the next version.

Takuya
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/5] KVM: x86 emulator: Use single stage decoding for Group 1 instructions

2011-03-22 Thread Takuya Yoshikawa
On Tue, 22 Mar 2011 14:55:57 +0200
Avi Kivity  wrote:

> > @@ -2337,10 +2401,20 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
> >   #define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM),   
> > \
> > D2bv(((_f) | DstReg | SrcMem | ModRM)&  ~Lock), \
> > D2bv(((_f)&  ~Lock) | DstAcc | SrcImm)
> > +#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e),
> > \
> > +   I2bv(((_f) | DstReg | SrcMem | ModRM)&  ~Lock, _e), \
> > +   I2bv(((_f)&  ~Lock) | DstAcc | SrcImm, _e)
> >
> 
> I think you can remove D6ALU, no?

Yes, I can. I'll remove in the next version.

Takuya

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/5] KVM: x86 emulator: Use single stage decoding for Group 1 instructions

2011-03-22 Thread Takuya Yoshikawa
On Tue, 22 Mar 2011 14:53:21 +0200
Avi Kivity  wrote:

> 
> I prefer to have the patchset fully updated, even if it takes a while.  
> Good luck with the recovery!

Things already got back as usual, thanks.
I had expected much longer time.

BTW, is it better to wait until rc1 is released when we send
patches for the next merge window?

> > >  >  What is the difference of CMPS and SCAS?
> > >  >
> > >  >
> > >  One compares to memory locations and another memory with AX register.
> >
> > I wanted to know whether we should introduce em_cmps() or em_scas() later.
> >
> > Probably we can eliminate introducing em_scas() because it should be
> > completely same as em_cmp().
> 
> I agree.
> 
> > But em_cmps() will be needed for inserting
> >c->dst.type = OP_NONE;
> > before em_cmp().
> 
> I think we can put this line into em_cmp().  In fact, it looks like CMP 
> r/m, reg will now write back the data into memory, which is wrong.  So I 
> recommend a first patch to add c->dst.type = OP_NONE before the cmp: 
> label, so we have a fix patch followed by a refactoring patch.

I'll update like that!

> 
> Later we can have a ReadOnly opcode table bit, so we can disable 
> writeback from the opcode tables, not the code.

OK, then we can remove the line at this timing.

Takuya
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


KVM call minutes Mars 22

2011-03-22 Thread Juan Quintela

Minutes of today call:

- Patch integration
  * Anthony thinks that we need more reviews
  * chicken-eng problem to become a maintainer
  * more patch reviews
  * need for autotest to run faster
  * not everybody use autotest
  * use patches for infrastructure
  * we need to get better integrating changes
that touch lots of files
  * have to get better testing, both autotest and unit test


- QAPI integration
  * how to integrate
  * integrate at the same time that tests
  * do the generator and then the command conversion
(should be mecanical)

- GSOC
  * we have been accepted

Later, Juan.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM:PPC Issue in exit timing clearance

2011-03-22 Thread Alexander Graf

On 03/22/2011 01:43 PM, Avi Kivity wrote:

On 03/16/2011 06:37 PM, Bharat Bhushan wrote:
Following dump is observed on host when clearing the exit timing 
counters


[root@p1021mds kvm]# echo -n 'c'>  vm1200_vcpu0_timing
INFO: task echo:1276 blocked for more than 120 seconds.
"echo 0>  /proc/sys/kernel/hung_task_timeout_secs" disables this 
message.

echo  D 0ff5bf94 0  1276   1190 0x
Call Trace:
[c2157e40] [c0007908] __switch_to+0x9c/0xc4
[c2157e50] [c040293c] schedule+0x1b4/0x3bc
[c2157e90] [c04032dc] __mutex_lock_slowpath+0x74/0xc0
[c2157ec0] [c00369e4] kvmppc_init_timing_stats+0x20/0xb8
[c2157ed0] [c0036b00] kvmppc_exit_timing_write+0x84/0x98
[c2157ef0] [c00b9f90] vfs_write+0xc0/0x16c
[c2157f10] [c00ba284] sys_write+0x4c/0x90
[c2157f40] [c000e320] ret_from_syscall+0x0/0x3c

The vcpu->mutex is used by kvm_ioctl_* (KVM_RUN etc) and same was 
used when clearing the stats (in kvmppc_init_timing_stats()).
What happens is that when the guest is idle then it held the 
vcpu->mutx. While the exiting timing process waits for guest to 
release the vcpu->mutex and a hang state is reached.


Now using seprate lock for exit timing stats.



Seems excessive to have a new lock just for timing.


The whole thing is only used for debugging, so being excessive doesn't 
hurt too much here. In normal configurations, it's #ifdef'ed out.




What about using vcpu->requests to have the statistics cleared in vcpu 
context?


What about dropping the whole thing and replacing it with tracepoints?


That should work. The question is if it's worth the effort. The current 
code is there after all :).



Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/5] KVM: x86 emulator: Use single stage decoding for PUSHA and POPA instructions

2011-03-22 Thread Avi Kivity

On 03/13/2011 05:21 PM, Takuya Yoshikawa wrote:

From: Takuya Yoshikawa

PUSHA and POPA are converted.

Signed-off-by: Takuya Yoshikawa
---
  arch/x86/kvm/emulate.c |   19 ---
  1 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8295c50..4e16a55 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2224,6 +2224,11 @@ static int em_push_gs(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
  }

+static int em_pusha(struct x86_emulate_ctxt *ctxt)
+{
+   return emulate_pusha(ctxt, ctxt->ops);
+}
+


You can simply rename/update emulate_pusha/emulate_popa, since they have 
no other callers.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/5] KVM: x86 emulator: Use single stage decoding for POP instructions

2011-03-22 Thread Avi Kivity

On 03/13/2011 05:20 PM, Takuya Yoshikawa wrote:

From: Takuya Yoshikawa

POP is converted.  RET will be converted later.


There is also POP r/m (8F /0); could be done later.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/5] KVM: x86 emulator: Use single stage decoding for PUSH/POP XS instructions

2011-03-22 Thread Avi Kivity

On 03/13/2011 05:19 PM, Takuya Yoshikawa wrote:

From: Takuya Yoshikawa

PUSH ES/CS/SS/DS/FS/GS and POP ES/SS/DS/FS/GS are converted.


+static int em_push_es(struct x86_emulate_ctxt *ctxt)
+{
+   emulate_push_sreg(ctxt, ctxt->ops, VCPU_SREG_ES);
+   return X86EMUL_CONTINUE;
+}


I thought of adding generic sreg decoding, so we can use 
em_push()/em_pop() here.  It can be done later, though, and really 
there's no huge advantage here.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] KVM call agenda for Mars 21th

2011-03-22 Thread Luiz Capitulino
On Mon, 21 Mar 2011 13:58:35 +0100
Juan Quintela  wrote:

> 
> Please, send in any agenda items you are interested in covening.
> 
> - Merge patches speed.  I just "feel", that patches are not being
>   handled fast enough, so ... I looked how much patches have been
>   integrated since Mars 1st:

- QAPI speedup merge proposal
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/5] KVM: x86 emulator: Use single stage decoding for Group 1 instructions

2011-03-22 Thread Avi Kivity

On 03/13/2011 05:17 PM, Takuya Yoshikawa wrote:

From: Takuya Yoshikawa

ADD, OR, ADC, SBB, AND, SUB, XOR, CMP are converted using a new macro
I6ALU(_f, _e).

CMPS, SCAS will be converted later.



@@ -2337,10 +2401,20 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
  #define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM),   
\
D2bv(((_f) | DstReg | SrcMem | ModRM)&  ~Lock), \
D2bv(((_f)&  ~Lock) | DstAcc | SrcImm)
+#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e),
\
+   I2bv(((_f) | DstReg | SrcMem | ModRM)&  ~Lock, _e), \
+   I2bv(((_f)&  ~Lock) | DstAcc | SrcImm, _e)



I think you can remove D6ALU, no?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [COMMIT] [WIN-GUEST-DRIVERS] Balloon - remove WMI usage. Remove wmi.c.

2011-03-22 Thread Vadim Rozenfeld
On Tue, 2011-03-22 at 06:06 -0400, Yan Vugenfirer wrote:
> Hello Vadim,
> 
> Can you check this issues?
> 
http://www.mail-archive.com/kvm@vger.kernel.org/msg51061.html

> Thanks,
> Yan.
> 
> > -Original Message-
> > From: ya su [mailto:suya94...@gmail.com]
> > Sent: Saturday, March 19, 2011 5:28 AM
> > To: Yan Vugenfirer
> > Cc: kvm@vger.kernel.org
> > Subject: Re: [COMMIT] [WIN-GUEST-DRIVERS] Balloon - remove WMI usage.
> > Remove wmi.c.
> >
> > Yan:
> >
> >  I have tested the newest balloon driver (from 1.1.16) on windows
> > server 2003, balloon.sys can not be installed successfully and return
> > error code 10. have you tested this or any updates? thanks.
> >
> > Regards.
> >
> > Green.
> >
> >
> > 2010/2/15 Yan Vugenfirer :
> > > repository: C:/dev/kvm-guest-drivers-windows
> > > branch: master
> > > commit 7ab588f373eda9d08a497e969739019d2075a6d2
> > > Author: Yan Vugenfirer 
> > > Date:   Mon Feb 15 15:01:36 2010 +0200
> > >
> > >[WIN-GUEST-DRIVERS] Balloon - remove WMI usage. Remove wmi.c.
> > >
> > >Signed-off-by: Vadim Rozenfeld
> > >
> > > diff --git a/Balloon/BalloonWDF/wmi.c b/Balloon/BalloonWDF/wmi.c
> > > deleted file mode 100644
> > > index 70a9270..000
> > > --- a/Balloon/BalloonWDF/wmi.c
> > > +++ /dev/null
> > > @@ -1,90 +0,0 @@
> > > -
> > /**
> > > - * Copyright (c) 2009  Red Hat, Inc.
> > > - *
> > > - * File: device.c
> > > - *
> > > - * Author(s):
> > > - *
> > > - * This file contains WMI support routines
> > > - *
> > > - * This work is licensed under the terms of the GNU GPL, version 2.
> >  See
> > > - * the COPYING file in the top-level directory.
> > > - *
> > > -
> > **/
> > > -#include "precomp.h"
> > > -
> > > -#if defined(EVENT_TRACING)
> > > -#include "wmi.tmh"
> > > -#endif
> > > -
> > > -
> > > -#define MOFRESOURCENAME L"MofResourceName"
> > > -
> > > -#ifdef ALLOC_PRAGMA
> > > -#pragma alloc_text(PAGE, WmiRegistration)
> > > -#pragma alloc_text(PAGE, EvtWmiDeviceInfoQueryInstance)
> > > -#endif
> > > -
> > > -NTSTATUS
> > > -WmiRegistration(
> > > -WDFDEVICE  Device
> > > -)
> > > -{
> > > -WDF_WMI_PROVIDER_CONFIG providerConfig;
> > > -WDF_WMI_INSTANCE_CONFIG instanceConfig;
> > > -NTSTATUSstatus;
> > > -DECLARE_CONST_UNICODE_STRING(mofRsrcName, MOFRESOURCENAME);
> > > -
> > > -PAGED_CODE();
> > > -
> > > -TraceEvents(TRACE_LEVEL_INFORMATION, DBG_PNP, "-->
> > WmiRegistration\n");
> > > -
> > > -status = WdfDeviceAssignMofResourceName(Device, &mofRsrcName);
> > > -if (!NT_SUCCESS(status)) {
> > > -TraceEvents(TRACE_LEVEL_ERROR, DBG_PNP,
> > > - "WdfDeviceAssignMofResourceName failed 0x%x",
> > status);
> > > -return status;
> > > -}
> > > -
> > > -WDF_WMI_PROVIDER_CONFIG_INIT(&providerConfig,
> > &GUID_DEV_WMI_BALLOON);
> > > -providerConfig.MinInstanceBufferSize = sizeof(ULONGLONG);
> > > -
> > > -WDF_WMI_INSTANCE_CONFIG_INIT_PROVIDER_CONFIG(&instanceConfig,
> > &providerConfig);
> > > -instanceConfig.Register = TRUE;
> > > -instanceConfig.EvtWmiInstanceQueryInstance =
> > EvtWmiDeviceInfoQueryInstance;
> > > -
> > > -status = WdfWmiInstanceCreate(Device,
> > > -  &instanceConfig,
> > > -  WDF_NO_OBJECT_ATTRIBUTES,
> > > -  WDF_NO_HANDLE);
> > > -if (!NT_SUCCESS(status)) {
> > > -TraceEvents(TRACE_LEVEL_ERROR, DBG_PNP,
> > > - "WdfWmiInstanceCreate failed 0x%x", status);
> > > -return status;
> > > -}
> > > -
> > > -TraceEvents(TRACE_LEVEL_INFORMATION, DBG_PNP, "<--
> > WmiRegistration\n");
> > > -return status;
> > > -}
> > > -
> > > -NTSTATUS
> > > -EvtWmiDeviceInfoQueryInstance(
> > > -__in  WDFWMIINSTANCE WmiInstance,
> > > -__in  ULONG OutBufferSize,
> > > -__out_bcount_part(OutBufferSize, *BufferUsed) PVOID OutBuffer,
> > > -__out PULONG BufferUsed
> > > -)
> > > -{
> > > -PDRIVER_CONTEXT drvCxt = GetDriverContext(WdfGetDriver());
> > > -
> > > -PAGED_CODE();
> > > -
> > > -TraceEvents(TRACE_LEVEL_VERBOSE, DBG_WMI, "-->
> > EvtWmiDeviceInfoQueryInstance\n");
> > > -
> > > -RtlZeroMemory(OutBuffer, sizeof(ULONGLONG));
> > > -*(ULONGLONG*) OutBuffer = (ULONGLONG)drvCxt->num_pages;
> > > -*BufferUsed = sizeof(ULONGLONG);
> > > -
> > > -TraceEvents(TRACE_LEVEL_VERBOSE, DBG_WMI, "<--
> > EvtWmiDeviceInfoQueryInstance\n");
> > > -return STATUS_SUCCESS;
> > > -}
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm-
> > commits" in
> > > the body of a message to majord...@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > >
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@

Re: [PATCH 1/5] KVM: x86 emulator: Use single stage decoding for Group 1 instructions

2011-03-22 Thread Avi Kivity

On 03/15/2011 04:06 PM, Takuya Yoshikawa wrote:

>  >  So I just decided to treat CMPS and SCAS in another patch.
>  >  I mean I may introduce em_cmps or em_scas later if needed.
>  >
>  scas will likely just call em_cmp.
>
>  >  You prefer to treat these in this patch?
>  >
>  If there will be other patch for those instruction then it may be left
>  as is.

In my city, electric power supply may become restricted under control
from now, though only a few hours.  So please take the patch series as
is if possible!


I prefer to have the patchset fully updated, even if it takes a while.  
Good luck with the recovery!



>  >
>  >  What is the difference of CMPS and SCAS?
>  >
>  >
>  One compares to memory locations and another memory with AX register.

I wanted to know whether we should introduce em_cmps() or em_scas() later.

Probably we can eliminate introducing em_scas() because it should be
completely same as em_cmp().


I agree.


But em_cmps() will be needed for inserting
   c->dst.type = OP_NONE;
before em_cmp().


I think we can put this line into em_cmp().  In fact, it looks like CMP 
r/m, reg will now write back the data into memory, which is wrong.  So I 
recommend a first patch to add c->dst.type = OP_NONE before the cmp: 
label, so we have a fix patch followed by a refactoring patch.


Later we can have a ReadOnly opcode table bit, so we can disable 
writeback from the opcode tables, not the code.



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: fix crash on irqfd deassign

2011-03-22 Thread Michael S. Tsirkin
On Tue, Mar 22, 2011 at 02:37:27PM +0200, Avi Kivity wrote:
> On 03/17/2011 10:53 AM, Michael S. Tsirkin wrote:
> >irqfd in kvm used flush_work incorrectly:
> >it assumed that work scheduled previously can't run
> >after flush_work, but since kvm uses a non-reentrant
> >workqueue (by means of schedule_work)
> >we need flush_work_sync to get that guarantee.
> >
> >Signed-off-by: Michael S. Tsirkin
> >Reported-by: Jean-Philippe Menil
> >Tested-by: Jean-Philippe Menil
> >---
> >
> >Note: this is needed for kernel 2.6.39 and earlier.
> >
> 
> What about kvm.git master?

Sorry about the confusion.

Clarification: this is needed on  kvm.git master,
as well as 2.6.39 and earlier.

> -- 
> error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM:PPC Issue in exit timing clearance

2011-03-22 Thread Avi Kivity

On 03/16/2011 06:37 PM, Bharat Bhushan wrote:

Following dump is observed on host when clearing the exit timing counters

[root@p1021mds kvm]# echo -n 'c'>  vm1200_vcpu0_timing
INFO: task echo:1276 blocked for more than 120 seconds.
"echo 0>  /proc/sys/kernel/hung_task_timeout_secs" disables this message.
echo  D 0ff5bf94 0  1276   1190 0x
Call Trace:
[c2157e40] [c0007908] __switch_to+0x9c/0xc4
[c2157e50] [c040293c] schedule+0x1b4/0x3bc
[c2157e90] [c04032dc] __mutex_lock_slowpath+0x74/0xc0
[c2157ec0] [c00369e4] kvmppc_init_timing_stats+0x20/0xb8
[c2157ed0] [c0036b00] kvmppc_exit_timing_write+0x84/0x98
[c2157ef0] [c00b9f90] vfs_write+0xc0/0x16c
[c2157f10] [c00ba284] sys_write+0x4c/0x90
[c2157f40] [c000e320] ret_from_syscall+0x0/0x3c

The vcpu->mutex is used by kvm_ioctl_* (KVM_RUN etc) and same was used 
when clearing the stats (in kvmppc_init_timing_stats()).
What happens is that when the guest is idle then it held the vcpu->mutx. While the 
exiting timing process waits for guest to release the vcpu->mutex and a hang state 
is reached.

Now using seprate lock for exit timing stats.



Seems excessive to have a new lock just for timing.

What about using vcpu->requests to have the statistics cleared in vcpu 
context?


What about dropping the whole thing and replacing it with tracepoints?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: fix crash on irqfd deassign

2011-03-22 Thread Avi Kivity

On 03/17/2011 10:53 AM, Michael S. Tsirkin wrote:

irqfd in kvm used flush_work incorrectly:
it assumed that work scheduled previously can't run
after flush_work, but since kvm uses a non-reentrant
workqueue (by means of schedule_work)
we need flush_work_sync to get that guarantee.

Signed-off-by: Michael S. Tsirkin
Reported-by: Jean-Philippe Menil
Tested-by: Jean-Philippe Menil
---

Note: this is needed for kernel 2.6.39 and earlier.



What about kvm.git master?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop

2011-03-22 Thread Michael S. Tsirkin
On Mon, Mar 21, 2011 at 11:03:07AM -0700, Shirley Ma wrote:
> On Fri, 2011-03-18 at 18:41 -0700, Shirley Ma wrote:
> > > > +   /* Drop packet instead of stop queue for better
> > performance
> > > */
> > > 
> > > I would like to see some justification as to why this is the right
> > > way to go and not just papering over the real problem. 
> > 
> > Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive,
> > which involves:
> > 
> > 1. Guest enable callback: one memory barrier, interrupt flag set
> 
> Missed this cost: for history reason, it also involves a guest exit from
> I/O write (PCI_QUEUE_NOTIFY).

OK, after some research, it looks like the reason was the tx timer that
qemu used to use. So the hack of avoiding the add_buf call will
avoid this kick and so break these hosts.
I guess we can add a feature bit to detect a new host
and so avoid the kick. We are running low on feature bits
unfortunately, but just fo testing, could you quantify the difference
that this makes using the following patch:


diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cc2f73e..6106017 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -185,11 +185,6 @@ int virtqueue_add_buf_gfp(struct virtqueue *_vq,
if (vq->num_free < out + in) {
pr_debug("Can't add buf len %i - avail = %i\n",
 out + in, vq->num_free);
-   /* FIXME: for historical reasons, we force a notify here if
-* there are outgoing parts to the buffer.  Presumably the
-* host should service the ring ASAP. */
-   if (out)
-   vq->notify(&vq->vq);
END_USE(vq);
return -ENOSPC;
}

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Pull request (QMP enhancements)

2011-03-22 Thread Avi Kivity

On 03/16/2011 12:26 PM, Dmitry Konishchev wrote:

Hi! I use QEMU via QMP and discovered that for some tasks there is no
proper way to do them via QMP. I've written few patches:
* One of them modifies "pci_add" command to return pci address of the
added device, when user hasn't specified it (to be able to delete it
via "pci_del" in the future).
* The second one adds INCOMING_FINISHED QMP event which is emitted
when QEMU finished incoming migration (when started with -incoming
command line option). It is needed because now there is no way to
determine, whether it finished or not, and QMP "cont" command just
returns error.

Here is the patches:
https://github.com/KonishchevDmitry/qemu-kvm/commit/f7e7119fecbce280e7ee45364260fb6e4d58d49a
https://github.com/KonishchevDmitry/qemu-kvm/commit/97250c2a7eeb1506e0a1517b416046dd02720025

It will be awesome if you include this patches to the upstream.



Please post the patches to qemu-de...@nongnu.org (please post patches, 
not URLs to patches).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


kvm, emulation hangs when using some usb device

2011-03-22 Thread slavik
I have some troubles with passing usb into kvm virtual machine.
I trying to provide the usb device 2022:0008 (Amikon vpn key) or 19d2:2000
(ZTE MF112 hsdpa modem)  into virtual machine with windows xp iax32.
kvm virtual machine was hung completely, and stop responding to anything.
Maybe I need there some trick to use this kind (switchable?) of usb devices?

ps: Linux workdesk 2.6.38-gentoo #1 SMP Mon Mar 21 18:10:13 YEKT 2011 x86_64
AMD Phenom(tm) II X6 1055T Processor AuthenticAMD GNU/Linux

--
wbr Slavik
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/6] PCI / Intel IOMMU: Use syscore_ops instead of sysdev class and sysdev

2011-03-22 Thread Joerg Roedel
On Mon, Mar 21, 2011 at 07:36:17PM -0400, Rafael J. Wysocki wrote:
>  drivers/pci/intel-iommu.c |   38 +-
>  1 file changed, 9 insertions(+), 29 deletions(-)

Looks good. I prepare a patch to convert AMD IOMMU to syscore_ops too.

Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: "KVM internal error. Suberror: 1" with ancient 2.4 kernel as guest

2011-03-22 Thread Jiri Kosina
On Mon, 21 Mar 2011, Wei Xu wrote:

> Avi and Jiri:
> 
> I implemented emulation of movq(64bit) and movdqa(128 bit). If you guys 
> still need it let me know and I can post somewhere...

I found a way around it, so I don't need it any more in the setup that has 
been affected.

But it is definitely worth having merged in my opinion. Could you please 
post the patches for review/merge?

Thanks,

-- 
Jiri Kosina
SUSE Labs, Novell Inc.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] posix-timers: RCU conversion

2011-03-22 Thread Avi Kivity

On 03/22/2011 10:59 AM, Ben Nagy wrote:

On Tue, Mar 22, 2011 at 12:54 PM, Eric Dumazet  wrote:
>  Ben Nagy reported a scalability problem with KVM/QEMU that hit very hard
>  a single spinlock (idr_lock) in posix-timers code, on its 48 core
>  machine.

Hi all,

Thanks a lot for all the help so far. We've tested with Eric's patch.

First up, here's our version of the patch for the current ubuntu
kernel from git:
http://paste.ubuntu.com/583668/

Here's top with 96 idle guests running:
op - 16:47:53 up  1:09,  3 users,  load average: 0.00, 0.01, 0.05
Tasks: 499 total,   3 running, 496 sleeping,   0 stopped,   0 zombie
Cpu(s):  1.9%us,  3.2%sy,  0.0%ni, 95.0%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:  99068656k total, 13121096k used, 85947560k free,22192k buffers
Swap:  2438140k total,0k used,  2438140k free,  3597860k cached
  (much better!)

Start of perf top:

--
PerfTop:   10318 irqs/sec  kernel:97.4%  exact:  0.0% [1000Hz
cycles],  (all, 48 CPUs)
--

  samples  pcnt functionDSO
  ___ _ ___
___

 95444.00 59.3% __ticket_spin_lock
[kernel.kallsyms]
 12937.00  8.0% native_safe_halt
[kernel.kallsyms]
  6149.00  3.8% kvm_get_cs_db_l_bits
/lib/modules/2.6.38-7-server/kernel/arch/x86/kvm/kvm.ko
  5105.00  3.2% tg_load_down
[kernel.kallsyms]
  5088.00  3.2% svm_vcpu_run
/lib/modules/2.6.38-7-server/kernel/arch/x86/kvm/kvm-amd.ko
  4807.00  3.0% kvm_set_pfn_dirty
/lib/modules/2.6.38-7-server/kernel/arch/x86/kvm/kvm.ko
  2855.00  1.8% ktime_get
[kernel.kallsyms]
  1535.00  1.0% find_busiest_group
[kernel.kallsyms]
  1386.00  0.9% find_next_bit
[kernel.kallsyms]


Start of perf report -g
 55.26%kvm  [kernel.kallsyms] [k] __ticket_spin_lock
   |
   --- __ticket_spin_lock
  |
  |--94.68%-- _raw_spin_lock
  |  |
  |  |--97.55%-- double_rq_lock
  |  |  load_balance
  |  |  idle_balance
  |  |  schedule
  |  |  |
  |  |  |--60.56%--
schedule_hrtimeout_range_clock
  |  |  |
schedule_hrtimeout_range
  |  |  |  poll_schedule_timeout
  |  |  |  do_select
  |  |  |  core_sys_select
  |  |  |  sys_select
  |  |  |  system_call_fastpath




Looks like the posix-timer issue is completely gone, to be replaced by 
the load balancer.


Copying peterz.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/6] Do not use sysdevs for implementing "core" PM operations on x86

2011-03-22 Thread Ingo Molnar

* Rafael J. Wysocki  wrote:

> > If there are no objectsions, I'd like to push these patches through the 
> > suspend
> > tree.
> 
> [1/8] has been merged in the meantime and [3/8] has been included into the
> ACPI tree.  if there are no objections, I'm going to push the following
> patches to Linus this week through the suspend-2.6 tree:
> 
> [1/6] - Convert sysdev users in arch/x86 to using struct syscore_ops.
> 
> [2/6] - Make timekeeping use struct syscore_ops for suspend/resume.
>  
> [3/6] - Make Intel IOMMU use struct syscore_ops for suspend/resume.
> 
> [4/6] - Make KVM use struct syscore_ops for suspend/resume.
> 
> [5/6] - Make cpufreq use struct syscore_ops for boot CPU suspend/resume.
> 
> [6/6] - Introduce config switch allowing architectures to skip sysdev
> suspend/resume/shutdown code.

The x86 bits look fine.

Acked-by: Ingo Molnar 

The patches affect a lot of hardware so please make sure they are tested well 
before pushing them to Linus :-)

Ingo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] alleviate time drift with HPET periodic timers

2011-03-22 Thread Jan Kiszka
On 2011-03-22 11:03, Ulrich Obergfell wrote:
> 
>>> Part 3 of the patch implements the following options for the
>>> 'configure' script.
>>>
>>> --disable-hpet-driftfix
>>> --enable-hpet-driftfix
>>
>> I see no benefit in this configurability. Just make the driftfix
>> unconditionally available, runtime-disabled by default for now until it
>> matured and there is no downside in enabling it all the time.
> 
> 
> Many Thanks Jan,
> 
> I enclosed the code in '#ifdef CONFIG_HPET_DRIFTFIX ... #endif'
> so that it can be easily identified (and removed if the generic API
> would be implemented some day). Since the ifdef's are already there
> I added the configuration option for convenience. As you don't see
> any benefit in this option, I can remove that part of the patch.
> However, I'd suggest to keep the ifdef's and do the following:
> 
> - Rename to '#ifdef HPET_DRIFTFIX ... #endif' to make it clear that
>   this is not controlled via a configuration option.
> 
> - Add '#define HPET_DRIFTFIX' to hw/hpet_emul.h.
> 
> Do you agree ?

Thanks to versioning control and feature-oriented commits, it's not very
hard to identify what code changes relate to which feature additions. So
I still don't see a need for that.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/3] alleviate time drift with HPET periodic timers

2011-03-22 Thread Avi Kivity

On 03/18/2011 05:54 PM, Ulrich Obergfell wrote:

Please review and please comment.


Some procedural remarks:

- put all patches in the same thread.  'git send-email' can do that 
automatically for you.  This really helps reviewers, at least with some 
email readers
- give individual patches meaningful subjects (not 'alleviate time drift 
with HPET periodic timers' for all patches + subject letter)

- reduce the amount of ifdefs in your patches

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [COMMIT] [WIN-GUEST-DRIVERS] Balloon - remove WMI usage. Remove wmi.c.

2011-03-22 Thread Yan Vugenfirer
Hello Vadim,

Can you check this issues?

Thanks,
Yan.

> -Original Message-
> From: ya su [mailto:suya94...@gmail.com]
> Sent: Saturday, March 19, 2011 5:28 AM
> To: Yan Vugenfirer
> Cc: kvm@vger.kernel.org
> Subject: Re: [COMMIT] [WIN-GUEST-DRIVERS] Balloon - remove WMI usage.
> Remove wmi.c.
>
> Yan:
>
>  I have tested the newest balloon driver (from 1.1.16) on windows
> server 2003, balloon.sys can not be installed successfully and return
> error code 10. have you tested this or any updates? thanks.
>
> Regards.
>
> Green.
>
>
> 2010/2/15 Yan Vugenfirer :
> > repository: C:/dev/kvm-guest-drivers-windows
> > branch: master
> > commit 7ab588f373eda9d08a497e969739019d2075a6d2
> > Author: Yan Vugenfirer 
> > Date:   Mon Feb 15 15:01:36 2010 +0200
> >
> >    [WIN-GUEST-DRIVERS] Balloon - remove WMI usage. Remove wmi.c.
> >
> >        Signed-off-by: Vadim Rozenfeld
> >
> > diff --git a/Balloon/BalloonWDF/wmi.c b/Balloon/BalloonWDF/wmi.c
> > deleted file mode 100644
> > index 70a9270..000
> > --- a/Balloon/BalloonWDF/wmi.c
> > +++ /dev/null
> > @@ -1,90 +0,0 @@
> > -
> /**
> > - * Copyright (c) 2009  Red Hat, Inc.
> > - *
> > - * File: device.c
> > - *
> > - * Author(s):
> > - *
> > - * This file contains WMI support routines
> > - *
> > - * This work is licensed under the terms of the GNU GPL, version 2.
>  See
> > - * the COPYING file in the top-level directory.
> > - *
> > -
> **/
> > -#include "precomp.h"
> > -
> > -#if defined(EVENT_TRACING)
> > -#include "wmi.tmh"
> > -#endif
> > -
> > -
> > -#define MOFRESOURCENAME L"MofResourceName"
> > -
> > -#ifdef ALLOC_PRAGMA
> > -#pragma alloc_text(PAGE, WmiRegistration)
> > -#pragma alloc_text(PAGE, EvtWmiDeviceInfoQueryInstance)
> > -#endif
> > -
> > -NTSTATUS
> > -WmiRegistration(
> > -    WDFDEVICE      Device
> > -    )
> > -{
> > -    WDF_WMI_PROVIDER_CONFIG providerConfig;
> > -    WDF_WMI_INSTANCE_CONFIG instanceConfig;
> > -    NTSTATUS        status;
> > -    DECLARE_CONST_UNICODE_STRING(mofRsrcName, MOFRESOURCENAME);
> > -
> > -    PAGED_CODE();
> > -
> > -    TraceEvents(TRACE_LEVEL_INFORMATION, DBG_PNP, "-->
> WmiRegistration\n");
> > -
> > -    status = WdfDeviceAssignMofResourceName(Device, &mofRsrcName);
> > -    if (!NT_SUCCESS(status)) {
> > -        TraceEvents(TRACE_LEVEL_ERROR, DBG_PNP,
> > -                     "WdfDeviceAssignMofResourceName failed 0x%x",
> status);
> > -        return status;
> > -    }
> > -
> > -    WDF_WMI_PROVIDER_CONFIG_INIT(&providerConfig,
> &GUID_DEV_WMI_BALLOON);
> > -    providerConfig.MinInstanceBufferSize = sizeof(ULONGLONG);
> > -
> > -    WDF_WMI_INSTANCE_CONFIG_INIT_PROVIDER_CONFIG(&instanceConfig,
> &providerConfig);
> > -    instanceConfig.Register = TRUE;
> > -    instanceConfig.EvtWmiInstanceQueryInstance =
> EvtWmiDeviceInfoQueryInstance;
> > -
> > -    status = WdfWmiInstanceCreate(Device,
> > -                                  &instanceConfig,
> > -                                  WDF_NO_OBJECT_ATTRIBUTES,
> > -                                  WDF_NO_HANDLE);
> > -    if (!NT_SUCCESS(status)) {
> > -        TraceEvents(TRACE_LEVEL_ERROR, DBG_PNP,
> > -                     "WdfWmiInstanceCreate failed 0x%x", status);
> > -        return status;
> > -    }
> > -
> > -    TraceEvents(TRACE_LEVEL_INFORMATION, DBG_PNP, "<--
> WmiRegistration\n");
> > -    return status;
> > -}
> > -
> > -NTSTATUS
> > -EvtWmiDeviceInfoQueryInstance(
> > -    __in  WDFWMIINSTANCE WmiInstance,
> > -    __in  ULONG OutBufferSize,
> > -    __out_bcount_part(OutBufferSize, *BufferUsed) PVOID OutBuffer,
> > -    __out PULONG BufferUsed
> > -    )
> > -{
> > -    PDRIVER_CONTEXT drvCxt = GetDriverContext(WdfGetDriver());
> > -
> > -    PAGED_CODE();
> > -
> > -    TraceEvents(TRACE_LEVEL_VERBOSE, DBG_WMI, "-->
> EvtWmiDeviceInfoQueryInstance\n");
> > -
> > -    RtlZeroMemory(OutBuffer, sizeof(ULONGLONG));
> > -    *(ULONGLONG*) OutBuffer = (ULONGLONG)drvCxt->num_pages;
> > -    *BufferUsed = sizeof(ULONGLONG);
> > -
> > -    TraceEvents(TRACE_LEVEL_VERBOSE, DBG_WMI, "<--
> EvtWmiDeviceInfoQueryInstance\n");
> > -    return STATUS_SUCCESS;
> > -}
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm-
> commits" in
> > the body of a message to majord...@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] alleviate time drift with HPET periodic timers

2011-03-22 Thread Ulrich Obergfell

>> Part 3 of the patch implements the following options for the
>> 'configure' script.
>>
>> --disable-hpet-driftfix
>> --enable-hpet-driftfix
> 
> I see no benefit in this configurability. Just make the driftfix
> unconditionally available, runtime-disabled by default for now until it
> matured and there is no downside in enabling it all the time.


Many Thanks Jan,

I enclosed the code in '#ifdef CONFIG_HPET_DRIFTFIX ... #endif'
so that it can be easily identified (and removed if the generic API
would be implemented some day). Since the ifdef's are already there
I added the configuration option for convenience. As you don't see
any benefit in this option, I can remove that part of the patch.
However, I'd suggest to keep the ifdef's and do the following:

- Rename to '#ifdef HPET_DRIFTFIX ... #endif' to make it clear that
  this is not controlled via a configuration option.

- Add '#define HPET_DRIFTFIX' to hw/hpet_emul.h.

Do you agree ?


Regards,

Uli
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] alleviate time drift with HPET periodic timers

2011-03-22 Thread Ulrich Obergfell

>> Part 1 of the patch implements the following QEMU command line option.
>>
>> -hpet [device=none|present][,driftfix=none|slew]
> 
> Just define driftfix as property of the hpet device. That way it can be
> controlled both globally (-global hpet.driftfix=...) and per hpet block
> (once we support instantiating >1 of them).


Many Thanks Jan,

I started investigating code changes. I'm thinking of ...

- adding a new field to the HPETState structure.

uint32_t driftfix;

- adding the property 'driftfix' to the DeviceInfo structure.

DEFINE_PROP_BIT("driftfix", HPETState, driftfix, 0, false)

  Using a single bit so that the option syntax would be, e.g.:

-global hpet.driftfix=on (Default is 'off')

- Replace all 'if (hpet_driftfix ...)' by:

if ((HPETState)s->driftfix ...)


Regards,

Uli
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Biweekly KVM Test report, kernel a72e315c... qemu b73357ec...

2011-03-22 Thread Avi Kivity
On 03/21/2011 06:51 PM, Avi Kivity wrote:
> On 03/21/2011 06:26 PM, Avi Kivity wrote:
> > On 03/21/2011 02:35 PM, Avi Kivity wrote:
> > > On 03/21/2011 04:47 AM, Ren, Yongjie wrote:
> > > > Hi all,
> > > > This is KVM test result against kvm.git 
> > > > a72e315c509376bbd1e121219c3ad9f23973923f based on kernel 2.6.38-rc6+, 
> > > > and qemu-kvm.git b73357ecd2b14c057134cb71d29447b5b988c516.
> > > >
> > > > The VT-d bug 730441 concerning "nomsi NIC" is exist for two weeks.
> > > > We found another two issues: one about "Save/Restore" , the other about 
> > > > "Live Migration".
> > > >
> > > > New issues:
> > > > 1. [KVM-User] I/O errors after "Save/Restore"
> > > >  https://bugs.launchpad.net/qemu/+bug/739088.
> > >
> > > Probably the same as the next issue.
> > >
> > > > 2. [KVM-User] guest hangs when using network after live migration
> > > >   https://bugs.launchpad.net/qemu/+bug/739092
> > >
> > > Confirmed, autotest Fedora.32.migrate.tmp also fails. Looks like the
> > > recent qemu.git merge. I am bisecting to find out more.
> > >
> >
> > 82fa39b75181b730d6d4d09f443bd26bcfcd045c is the first bad commit
> > commit 82fa39b75181b730d6d4d09f443bd26bcfcd045c
> > Author: Juan Quintela 
> > Date: Thu Mar 10 12:33:49 2011 +0100
> >
> > vmstate: Fix varrays with uint8 indexes
>
> With this reverted, qemu-kvm.git master appears to work.

qemu-kvm.git master is fixed now.

-- 
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] KVM: Use syscore_ops instead of sysdev class and sysdev

2011-03-22 Thread Avi Kivity

On 03/22/2011 01:37 AM, Rafael J. Wysocki wrote:

From: Rafael J. Wysocki

KVM uses a sysdev class and a sysdev for executing kvm_suspend()
after interrupts have been turned off on the boot CPU (during system
suspend) and for executing kvm_resume() before turning on interrupts
on the boot CPU (during system resume).  However, since both of these
functions ignore their arguments, the entire mechanism may be
replaced with a struct syscore_ops object which is simpler.



Acked-by: Avi Kivity 

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] posix-timers: RCU conversion

2011-03-22 Thread Ben Nagy
On Tue, Mar 22, 2011 at 12:54 PM, Eric Dumazet  wrote:
> Ben Nagy reported a scalability problem with KVM/QEMU that hit very hard
> a single spinlock (idr_lock) in posix-timers code, on its 48 core
> machine.

Hi all,

Thanks a lot for all the help so far. We've tested with Eric's patch.

First up, here's our version of the patch for the current ubuntu
kernel from git:
http://paste.ubuntu.com/583668/

Here's top with 96 idle guests running:
op - 16:47:53 up  1:09,  3 users,  load average: 0.00, 0.01, 0.05
Tasks: 499 total,   3 running, 496 sleeping,   0 stopped,   0 zombie
Cpu(s):  1.9%us,  3.2%sy,  0.0%ni, 95.0%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:  99068656k total, 13121096k used, 85947560k free,22192k buffers
Swap:  2438140k total,0k used,  2438140k free,  3597860k cached
 (much better!)

Start of perf top:

--
   PerfTop:   10318 irqs/sec  kernel:97.4%  exact:  0.0% [1000Hz
cycles],  (all, 48 CPUs)
--

 samples  pcnt functionDSO
 ___ _ ___
___

95444.00 59.3% __ticket_spin_lock
[kernel.kallsyms]
12937.00  8.0% native_safe_halt
[kernel.kallsyms]
 6149.00  3.8% kvm_get_cs_db_l_bits
/lib/modules/2.6.38-7-server/kernel/arch/x86/kvm/kvm.ko
 5105.00  3.2% tg_load_down
[kernel.kallsyms]
 5088.00  3.2% svm_vcpu_run
/lib/modules/2.6.38-7-server/kernel/arch/x86/kvm/kvm-amd.ko
 4807.00  3.0% kvm_set_pfn_dirty
/lib/modules/2.6.38-7-server/kernel/arch/x86/kvm/kvm.ko
 2855.00  1.8% ktime_get
[kernel.kallsyms]
 1535.00  1.0% find_busiest_group
[kernel.kallsyms]
 1386.00  0.9% find_next_bit
[kernel.kallsyms]


Start of perf report -g
55.26%kvm  [kernel.kallsyms] [k] __ticket_spin_lock
  |
  --- __ticket_spin_lock
 |
 |--94.68%-- _raw_spin_lock
 |  |
 |  |--97.55%-- double_rq_lock
 |  |  load_balance
 |  |  idle_balance
 |  |  schedule
 |  |  |
 |  |  |--60.56%--
schedule_hrtimeout_range_clock
 |  |  |
schedule_hrtimeout_range
 |  |  |  poll_schedule_timeout
 |  |  |  do_select
 |  |  |  core_sys_select
 |  |  |  sys_select
 |  |  |  system_call_fastpath


Here is the perf.data from the unpatched (non debug) kernel
http://www.coseinc.com/woigbfwr32/perf.data

Here is the perf.data from the patched (non debug) kernel
http://www.coseinc.com/woigbfwr32/perf_patched.data

I think we're certainly in 'it's going to be useable' territory now,
but any further improvements or patches to test would of course be
gratefully received! Next step from my end is to test the guests under
load, unless there are any other suggestions.

I'm extremely impressed by the speed and professionalism of the
response to this problem, both from those on #kvm and the widening
circle of those on this email thread.

Many thanks!

Cheers,

ben
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] posix-timers: RCU conversion

2011-03-22 Thread Eric Dumazet
Ben Nagy reported a scalability problem with KVM/QEMU that hit very hard
a single spinlock (idr_lock) in posix-timers code, on its 48 core
machine.

Even on a 16 cpu machine (2x4x2), a single test can show 98% of cpu time
used in ticket_spin_lock, from lock_timer

Ref: http://www.spinics.net/lists/kvm/msg51526.html

Switching to RCU is quite easy, IDR being already RCU ready.

idr_lock should be locked only for an insert/delete, not a lookup.

Benchmark on a 2x4x2 machine, 16 processes calling timer_gettime().

Before :

real1m18.669s
user0m1.346s
sys 1m17.180s

After :

real0m3.296s
user0m1.366s
sys 0m1.926s


Reported-by: Ben Nagy 
Signed-off-by: Eric Dumazet 
Cc: Avi Kivity 
Cc: Thomas Gleixner 
Cc: John Stultz 
Cc: Richard Cochran 
Cc: Paul E. McKenney 
---
 include/linux/posix-timers.h |1 +
 kernel/posix-timers.c|   25 ++---
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index d51243a..5dc27ca 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -81,6 +81,7 @@ struct k_itimer {
unsigned long expires;
} mmtimer;
} it;
+   struct rcu_head rcu;
 };
 
 struct k_clock {
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 4c01249..acb9be9 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -491,6 +491,13 @@ static struct k_itimer * alloc_posix_timer(void)
return tmr;
 }
 
+static void k_itimer_rcu_free(struct rcu_head *head)
+{
+   struct k_itimer *tmr = container_of(head, struct k_itimer, rcu);
+
+   kmem_cache_free(posix_timers_cache, tmr);
+}
+
 #define IT_ID_SET  1
 #define IT_ID_NOT_SET  0
 static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
@@ -503,7 +510,7 @@ static void release_posix_timer(struct k_itimer *tmr, int 
it_id_set)
}
put_pid(tmr->it_pid);
sigqueue_free(tmr->sigq);
-   kmem_cache_free(posix_timers_cache, tmr);
+   call_rcu(&tmr->rcu, k_itimer_rcu_free);
 }
 
 static struct k_clock *clockid_to_kclock(const clockid_t id)
@@ -631,22 +638,18 @@ out:
 static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
 {
struct k_itimer *timr;
-   /*
-* Watch out here.  We do a irqsave on the idr_lock and pass the
-* flags part over to the timer lock.  Must not let interrupts in
-* while we are moving the lock.
-*/
-   spin_lock_irqsave(&idr_lock, *flags);
+
+   rcu_read_lock();
timr = idr_find(&posix_timers_id, (int)timer_id);
if (timr) {
-   spin_lock(&timr->it_lock);
+   spin_lock_irqsave(&timr->it_lock, *flags);
if (timr->it_signal == current->signal) {
-   spin_unlock(&idr_lock);
+   rcu_read_unlock();
return timr;
}
-   spin_unlock(&timr->it_lock);
+   spin_unlock_irqrestore(&timr->it_lock, *flags);
}
-   spin_unlock_irqrestore(&idr_lock, *flags);
+   rcu_read_unlock();
 
return NULL;
 }


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html