[PATCH 09/18] Introduce event-tap.

2011-04-25 Thread OHMURA Kei
event-tap controls when to start FT transaction, and provides proxy
functions to called from net/block devices.  While FT transaction, it
queues up net/block requests, and flush them when the transaction gets
completed.

Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
---
 Makefile.target |1 +
 event-tap.c |  940 +++
 event-tap.h |   44 +++
 qemu-tool.c |   28 ++
 trace-events|   10 +
 5 files changed, 1023 insertions(+), 0 deletions(-)
 create mode 100644 event-tap.c
 create mode 100644 event-tap.h

diff --git a/Makefile.target b/Makefile.target
index 0e0ef36..e489df4 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -199,6 +199,7 @@ obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_NO_KVM) += kvm-stub.o
 LIBS+=-lz
+obj-y += event-tap.o
 
 QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
 QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
diff --git a/event-tap.c b/event-tap.c
new file mode 100644
index 000..95c147a
--- /dev/null
+++ b/event-tap.c
@@ -0,0 +1,940 @@
+/*
+ * Event Tap functions for QEMU
+ *
+ * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include qemu-common.h
+#include qemu-error.h
+#include block.h
+#include block_int.h
+#include ioport.h
+#include osdep.h
+#include sysemu.h
+#include hw/hw.h
+#include net.h
+#include event-tap.h
+#include trace.h
+
+enum EVENT_TAP_STATE {
+EVENT_TAP_OFF,
+EVENT_TAP_ON,
+EVENT_TAP_SUSPEND,
+EVENT_TAP_FLUSH,
+EVENT_TAP_LOAD,
+EVENT_TAP_REPLAY,
+};
+
+static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF;
+
+typedef struct EventTapIOport {
+uint32_t address;
+uint32_t data;
+int  index;
+} EventTapIOport;
+
+#define MMIO_BUF_SIZE 8
+
+typedef struct EventTapMMIO {
+uint64_t address;
+uint8_t  buf[MMIO_BUF_SIZE];
+int  len;
+} EventTapMMIO;
+
+typedef struct EventTapNetReq {
+char *device_name;
+int iovcnt;
+int vlan_id;
+bool vlan_needed;
+bool async;
+struct iovec *iov;
+NetPacketSent *sent_cb;
+} EventTapNetReq;
+
+#define MAX_BLOCK_REQUEST 32
+
+typedef struct EventTapAIOCB EventTapAIOCB;
+
+typedef struct EventTapBlkReq {
+char *device_name;
+int num_reqs;
+int num_cbs;
+bool is_flush;
+BlockRequest reqs[MAX_BLOCK_REQUEST];
+EventTapAIOCB *acb[MAX_BLOCK_REQUEST];
+} EventTapBlkReq;
+
+#define EVENT_TAP_IOPORT (1  0)
+#define EVENT_TAP_MMIO   (1  1)
+#define EVENT_TAP_NET(1  2)
+#define EVENT_TAP_BLK(1  3)
+
+#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1)
+
+typedef struct EventTapLog {
+int mode;
+union {
+EventTapIOport ioport;
+EventTapMMIO mmio;
+};
+union {
+EventTapNetReq net_req;
+EventTapBlkReq blk_req;
+};
+QTAILQ_ENTRY(EventTapLog) node;
+} EventTapLog;
+
+struct EventTapAIOCB {
+BlockDriverAIOCB common;
+BlockDriverAIOCB *acb;
+bool is_canceled;
+};
+
+static EventTapLog *last_event_tap;
+
+static QTAILQ_HEAD(, EventTapLog) event_list;
+static QTAILQ_HEAD(, EventTapLog) event_pool;
+
+static int (*event_tap_cb)(void);
+static QEMUBH *event_tap_bh;
+static VMChangeStateEntry *vmstate;
+
+static void event_tap_bh_cb(void *p)
+{
+if (event_tap_cb) {
+event_tap_cb();
+}
+
+qemu_bh_delete(event_tap_bh);
+event_tap_bh = NULL;
+}
+
+static void event_tap_schedule_bh(void)
+{
+trace_event_tap_ignore_bh(!!event_tap_bh);
+
+/* if bh is already set, we ignore it for now */
+if (event_tap_bh) {
+return;
+}
+
+event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL);
+qemu_bh_schedule(event_tap_bh);
+
+return;
+}
+
+static void *event_tap_alloc_log(void)
+{
+EventTapLog *log;
+
+if (QTAILQ_EMPTY(event_pool)) {
+log = qemu_mallocz(sizeof(EventTapLog));
+} else {
+log = QTAILQ_FIRST(event_pool);
+QTAILQ_REMOVE(event_pool, log, node);
+}
+
+return log;
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req);
+static void event_tap_free_blk_req(EventTapBlkReq *blk_req);
+
+static void event_tap_free_log(EventTapLog *log)
+{
+int mode = log-mode  ~EVENT_TAP_TYPE_MASK;
+
+if (mode == EVENT_TAP_NET) {
+event_tap_free_net_req(log-net_req);
+} else if (mode == EVENT_TAP_BLK) {
+event_tap_free_blk_req(log-blk_req);
+}
+
+log-mode = 0;
+
+/* return the log to event_pool */
+QTAILQ_INSERT_HEAD(event_pool, log, node);
+}
+
+static void event_tap_free_pool(void)
+{
+EventTapLog *log, *next;
+
+QTAILQ_FOREACH_SAFE(log, event_pool, node, next) {
+QTAILQ_REMOVE(event_pool, log, node);
+qemu_free(log);
+}
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req)
+{
+int i;
+
+if (!net_req-async) {
+for 

[PATCH 09/18] Introduce event-tap.

2011-03-22 Thread Yoshiaki Tamura
event-tap controls when to start FT transaction, and provides proxy
functions to called from net/block devices.  While FT transaction, it
queues up net/block requests, and flush them when the transaction gets
completed.

Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
---
 Makefile.target |1 +
 event-tap.c |  940 +++
 event-tap.h |   44 +++
 qemu-tool.c |   27 ++
 trace-events|   10 +
 5 files changed, 1022 insertions(+), 0 deletions(-)
 create mode 100644 event-tap.c
 create mode 100644 event-tap.h

diff --git a/Makefile.target b/Makefile.target
index 62b102a..f088121 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -199,6 +199,7 @@ obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_NO_KVM) += kvm-stub.o
 LIBS+=-lz
+obj-y += event-tap.o
 
 QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
 QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
diff --git a/event-tap.c b/event-tap.c
new file mode 100644
index 000..95c147a
--- /dev/null
+++ b/event-tap.c
@@ -0,0 +1,940 @@
+/*
+ * Event Tap functions for QEMU
+ *
+ * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include qemu-common.h
+#include qemu-error.h
+#include block.h
+#include block_int.h
+#include ioport.h
+#include osdep.h
+#include sysemu.h
+#include hw/hw.h
+#include net.h
+#include event-tap.h
+#include trace.h
+
+enum EVENT_TAP_STATE {
+EVENT_TAP_OFF,
+EVENT_TAP_ON,
+EVENT_TAP_SUSPEND,
+EVENT_TAP_FLUSH,
+EVENT_TAP_LOAD,
+EVENT_TAP_REPLAY,
+};
+
+static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF;
+
+typedef struct EventTapIOport {
+uint32_t address;
+uint32_t data;
+int  index;
+} EventTapIOport;
+
+#define MMIO_BUF_SIZE 8
+
+typedef struct EventTapMMIO {
+uint64_t address;
+uint8_t  buf[MMIO_BUF_SIZE];
+int  len;
+} EventTapMMIO;
+
+typedef struct EventTapNetReq {
+char *device_name;
+int iovcnt;
+int vlan_id;
+bool vlan_needed;
+bool async;
+struct iovec *iov;
+NetPacketSent *sent_cb;
+} EventTapNetReq;
+
+#define MAX_BLOCK_REQUEST 32
+
+typedef struct EventTapAIOCB EventTapAIOCB;
+
+typedef struct EventTapBlkReq {
+char *device_name;
+int num_reqs;
+int num_cbs;
+bool is_flush;
+BlockRequest reqs[MAX_BLOCK_REQUEST];
+EventTapAIOCB *acb[MAX_BLOCK_REQUEST];
+} EventTapBlkReq;
+
+#define EVENT_TAP_IOPORT (1  0)
+#define EVENT_TAP_MMIO   (1  1)
+#define EVENT_TAP_NET(1  2)
+#define EVENT_TAP_BLK(1  3)
+
+#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1)
+
+typedef struct EventTapLog {
+int mode;
+union {
+EventTapIOport ioport;
+EventTapMMIO mmio;
+};
+union {
+EventTapNetReq net_req;
+EventTapBlkReq blk_req;
+};
+QTAILQ_ENTRY(EventTapLog) node;
+} EventTapLog;
+
+struct EventTapAIOCB {
+BlockDriverAIOCB common;
+BlockDriverAIOCB *acb;
+bool is_canceled;
+};
+
+static EventTapLog *last_event_tap;
+
+static QTAILQ_HEAD(, EventTapLog) event_list;
+static QTAILQ_HEAD(, EventTapLog) event_pool;
+
+static int (*event_tap_cb)(void);
+static QEMUBH *event_tap_bh;
+static VMChangeStateEntry *vmstate;
+
+static void event_tap_bh_cb(void *p)
+{
+if (event_tap_cb) {
+event_tap_cb();
+}
+
+qemu_bh_delete(event_tap_bh);
+event_tap_bh = NULL;
+}
+
+static void event_tap_schedule_bh(void)
+{
+trace_event_tap_ignore_bh(!!event_tap_bh);
+
+/* if bh is already set, we ignore it for now */
+if (event_tap_bh) {
+return;
+}
+
+event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL);
+qemu_bh_schedule(event_tap_bh);
+
+return;
+}
+
+static void *event_tap_alloc_log(void)
+{
+EventTapLog *log;
+
+if (QTAILQ_EMPTY(event_pool)) {
+log = qemu_mallocz(sizeof(EventTapLog));
+} else {
+log = QTAILQ_FIRST(event_pool);
+QTAILQ_REMOVE(event_pool, log, node);
+}
+
+return log;
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req);
+static void event_tap_free_blk_req(EventTapBlkReq *blk_req);
+
+static void event_tap_free_log(EventTapLog *log)
+{
+int mode = log-mode  ~EVENT_TAP_TYPE_MASK;
+
+if (mode == EVENT_TAP_NET) {
+event_tap_free_net_req(log-net_req);
+} else if (mode == EVENT_TAP_BLK) {
+event_tap_free_blk_req(log-blk_req);
+}
+
+log-mode = 0;
+
+/* return the log to event_pool */
+QTAILQ_INSERT_HEAD(event_pool, log, node);
+}
+
+static void event_tap_free_pool(void)
+{
+EventTapLog *log, *next;
+
+QTAILQ_FOREACH_SAFE(log, event_pool, node, next) {
+QTAILQ_REMOVE(event_pool, log, node);
+qemu_free(log);
+}
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req)
+{
+int i;
+
+if (!net_req-async) {
+for 

Re: [PATCH 09/18] Introduce event-tap.

2011-03-09 Thread ya su
Yoshi:

I meet one problem if I killed a ft source VM, the dest ft VM will
return errors as the following:

qemu-system-x86_64: fill buffer failed, Resource temporarily unavailable
qemu-system-x86_64: recv header failed

the problem is that the dest VM can not continue to run, as it is
interrupted in the middle of a transaction, some of rams have been
updated, but the others not, do you have any plan for rolling back to
cancel the interrupted transaction? thanks.


Green.



2011/3/9 Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp:
 ya su wrote:

 Yoshi:

     I think event-tap is a great idea, it remove the reading from disk
 which will increase ft effiency much better as your plan in later
 series.

     one question: IO read/write may dirty rams, but it is difficute to
 differ them from other dirty pages like caused by  running of
 softwares,  whether that means you need change all the emulated device
 realization?  actually I think it will not send too much rams caused
 by IO Read/Write in ram_save_live, but if It can event-tap IO
 read/write and replay on the other side, Does that means we don't need
 call qemu_savevm_state_full in ft transactoins?

 I'm not expecting to remove qemu_savevm_state_full in the transaction.  Just
 reduce the number of pages to be transfered as a result.

 Thanks,

 Yoshi


 Green.


 2011/3/9 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:

 ya su wrote:

 2011/3/8 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:

 ya su wrote:

 Yokshiaki:

     event-tap record block and io wirte events, and replay these on
 the other side, so block_save_live is useless during the latter ft
 phase, right? if so, I think it need to process the following code in
 block_save_live function:

 Actually no.  It just replays the last events only.  We do have patches
 that
 enable block replication without using block live migration, like the
 way
 you described above.  In that case, we disable block live migration
 when
  we
 go into ft mode.  We're thinking to propose it after this series get
 settled.

 so event-tap's objective is to initial a ft transaction, to start the
 sync. of ram/block/device states? if so, it need not change
 bdrv_aio_writev/bdrv_aio_flush normal process, on the other side it
 need not invokde bdrv_aio_writev either, right?

 Mostly yes, but because event-tap is queuing requests from block/net, it
 needs to flush queued requests after the transaction on the primary side.
  On the secondary, it currently doesn't have to invoke bdrv_aio_writev as
 you mentioned.  But will change soon to enable block replication with
 event-tap.




     if (stage == 1) {
         init_blk_migration(mon, f);

         /* start track dirty blocks */
         set_dirty_tracking(1);
     }
 --
 the following code will send block to the other side, as this will
 also be done by event-tap replay. I think it should placed in stage 3,
 before the assert line. (this may affect some stage 2 rate-limit
 then, so this can be placed in stage 2, though it looks ugly), another
 choice is to avoid the invocation of block_save_live, right?
 ---
     flush_blks(f);

     if (qemu_file_has_error(f)) {
         blk_mig_cleanup(mon);
         return 0;
     }

     blk_mig_reset_dirty_cursor();
 
     if (stage == 2) {


     another question is: since you event-tap io write(I think IO READ
 should also be event-tapped, as read may cause io chip state to
 change),  you then need not invoke qemu_savevm_state_full in
 qemu_savevm_trans_complete, right? thanks.

 It's not necessary to tap IO READ, but you can if you like.  We also
 have
 experimental patches for this to reduce rams to be transfered.  But I
 don't
 understand why we don't have to invoke qemu_savevm_state_full although
 I
 think we may reduce number of rams by replaying IO READ on the
 secondary.


 I first think the objective of io-Write event-tap is to reproduce the
 same device state on the other side, though I doubt this,  so I think
 IO-Read also should be recorded and replayed. since event-tap is only
 to initial a ft transaction, the sync. of states still depend on
 qemu_save_vm_live/full,  I understand the design now, thanks.

 but I don't understand why io-write event-tap can reduce transfered
 rams as you mentioned, the amount of rams only depend on dirty pages,
 IO write don't change the normal process unlike block write, right?

 The point is, if we can assure that IO read retrieves the same data on
 both
 sides, instead of dirtying the ram by read, meaning we have to transfer
 in
 the transaction, just replay the operation and get the same data on the
 otherside. Anyway, that's just a plan :)

 Thanks,

 Yoshi


 Thanks,

 Yoshi



 Green.



 2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:

 event-tap controls when to start FT transaction, and provides proxy
 functions to called from net/block devices.  While FT 

Re: [PATCH 09/18] Introduce event-tap.

2011-03-09 Thread Yoshiaki Tamura

ya su wrote:

Yoshi:

 I meet one problem if I killed a ft source VM, the dest ft VM will
return errors as the following:

qemu-system-x86_64: fill buffer failed, Resource temporarily unavailable
qemu-system-x86_64: recv header failed

 the problem is that the dest VM can not continue to run, as it is
interrupted in the middle of a transaction, some of rams have been
updated, but the others not, do you have any plan for rolling back to
cancel the interrupted transaction? thanks.


No it's not a problem.  This is one of FAQs I get, but just press cont or c in 
the secondary qemu, it should run.


Thanks,

Yoshi




Green.



2011/3/9 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:

ya su wrote:


Yoshi:

 I think event-tap is a great idea, it remove the reading from disk
which will increase ft effiency much better as your plan in later
series.

 one question: IO read/write may dirty rams, but it is difficute to
differ them from other dirty pages like caused by  running of
softwares,  whether that means you need change all the emulated device
realization?  actually I think it will not send too much rams caused
by IO Read/Write in ram_save_live, but if It can event-tap IO
read/write and replay on the other side, Does that means we don't need
call qemu_savevm_state_full in ft transactoins?


I'm not expecting to remove qemu_savevm_state_full in the transaction.  Just
reduce the number of pages to be transfered as a result.

Thanks,

Yoshi



Green.


2011/3/9 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:


ya su wrote:


2011/3/8 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:


ya su wrote:


Yokshiaki:

 event-tap record block and io wirte events, and replay these on
the other side, so block_save_live is useless during the latter ft
phase, right? if so, I think it need to process the following code in
block_save_live function:


Actually no.  It just replays the last events only.  We do have patches
that
enable block replication without using block live migration, like the
way
you described above.  In that case, we disable block live migration
when
  we
go into ft mode.  We're thinking to propose it after this series get
settled.


so event-tap's objective is to initial a ft transaction, to start the
sync. of ram/block/device states? if so, it need not change
bdrv_aio_writev/bdrv_aio_flush normal process, on the other side it
need not invokde bdrv_aio_writev either, right?


Mostly yes, but because event-tap is queuing requests from block/net, it
needs to flush queued requests after the transaction on the primary side.
  On the secondary, it currently doesn't have to invoke bdrv_aio_writev as
you mentioned.  But will change soon to enable block replication with
event-tap.







 if (stage == 1) {
 init_blk_migration(mon, f);

 /* start track dirty blocks */
 set_dirty_tracking(1);
 }
--
the following code will send block to the other side, as this will
also be done by event-tap replay. I think it should placed in stage 3,
before the assert line. (this may affect some stage 2 rate-limit
then, so this can be placed in stage 2, though it looks ugly), another
choice is to avoid the invocation of block_save_live, right?
---
 flush_blks(f);

 if (qemu_file_has_error(f)) {
 blk_mig_cleanup(mon);
 return 0;
 }

 blk_mig_reset_dirty_cursor();

 if (stage == 2) {


 another question is: since you event-tap io write(I think IO READ
should also be event-tapped, as read may cause io chip state to
change),  you then need not invoke qemu_savevm_state_full in
qemu_savevm_trans_complete, right? thanks.


It's not necessary to tap IO READ, but you can if you like.  We also
have
experimental patches for this to reduce rams to be transfered.  But I
don't
understand why we don't have to invoke qemu_savevm_state_full although
I
think we may reduce number of rams by replaying IO READ on the
secondary.



I first think the objective of io-Write event-tap is to reproduce the
same device state on the other side, though I doubt this,  so I think
IO-Read also should be recorded and replayed. since event-tap is only
to initial a ft transaction, the sync. of states still depend on
qemu_save_vm_live/full,  I understand the design now, thanks.

but I don't understand why io-write event-tap can reduce transfered
rams as you mentioned, the amount of rams only depend on dirty pages,
IO write don't change the normal process unlike block write, right?


The point is, if we can assure that IO read retrieves the same data on
both
sides, instead of dirtying the ram by read, meaning we have to transfer
in
the transaction, just replay the operation and get the same data on the
otherside. Anyway, that's just a plan :)

Thanks,

Yoshi




Thanks,

Yoshi




Green.



2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:


event-tap controls when to 

Re: [PATCH 09/18] Introduce event-tap.

2011-03-08 Thread Yoshiaki Tamura

ya su wrote:

Yokshiaki:

 event-tap record block and io wirte events, and replay these on
the other side, so block_save_live is useless during the latter ft
phase, right? if so, I think it need to process the following code in
block_save_live function:


Actually no.  It just replays the last events only.  We do have patches that 
enable block replication without using block live migration, like the way you 
described above.  In that case, we disable block live migration when  we go into 
ft mode.  We're thinking to propose it after this series get settled.




 if (stage == 1) {
 init_blk_migration(mon, f);

 /* start track dirty blocks */
 set_dirty_tracking(1);
 }
--
the following code will send block to the other side, as this will
also be done by event-tap replay. I think it should placed in stage 3,
before the assert line. (this may affect some stage 2 rate-limit
then, so this can be placed in stage 2, though it looks ugly), another
choice is to avoid the invocation of block_save_live, right?
---
 flush_blks(f);

 if (qemu_file_has_error(f)) {
 blk_mig_cleanup(mon);
 return 0;
 }

 blk_mig_reset_dirty_cursor();

 if (stage == 2) {


 another question is: since you event-tap io write(I think IO READ
should also be event-tapped, as read may cause io chip state to
change),  you then need not invoke qemu_savevm_state_full in
qemu_savevm_trans_complete, right? thanks.


It's not necessary to tap IO READ, but you can if you like.  We also have 
experimental patches for this to reduce rams to be transfered.  But I don't 
understand why we don't have to invoke qemu_savevm_state_full although I think 
we may reduce number of rams by replaying IO READ on the secondary.


Thanks,

Yoshi




Green.



2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:

event-tap controls when to start FT transaction, and provides proxy
functions to called from net/block devices.  While FT transaction, it
queues up net/block requests, and flush them when the transaction gets
completed.

Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp
---
  Makefile.target |1 +
  event-tap.c |  940 +++
  event-tap.h |   44 +++
  qemu-tool.c |   28 ++
  trace-events|   10 +
  5 files changed, 1023 insertions(+), 0 deletions(-)
  create mode 100644 event-tap.c
  create mode 100644 event-tap.h

diff --git a/Makefile.target b/Makefile.target
index 220589e..da57efe 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -199,6 +199,7 @@ obj-y += rwhandler.o
  obj-$(CONFIG_KVM) += kvm.o kvm-all.o
  obj-$(CONFIG_NO_KVM) += kvm-stub.o
  LIBS+=-lz
+obj-y += event-tap.o

  QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
  QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
diff --git a/event-tap.c b/event-tap.c
new file mode 100644
index 000..95c147a
--- /dev/null
+++ b/event-tap.c
@@ -0,0 +1,940 @@
+/*
+ * Event Tap functions for QEMU
+ *
+ * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include qemu-common.h
+#include qemu-error.h
+#include block.h
+#include block_int.h
+#include ioport.h
+#include osdep.h
+#include sysemu.h
+#include hw/hw.h
+#include net.h
+#include event-tap.h
+#include trace.h
+
+enum EVENT_TAP_STATE {
+EVENT_TAP_OFF,
+EVENT_TAP_ON,
+EVENT_TAP_SUSPEND,
+EVENT_TAP_FLUSH,
+EVENT_TAP_LOAD,
+EVENT_TAP_REPLAY,
+};
+
+static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF;
+
+typedef struct EventTapIOport {
+uint32_t address;
+uint32_t data;
+int  index;
+} EventTapIOport;
+
+#define MMIO_BUF_SIZE 8
+
+typedef struct EventTapMMIO {
+uint64_t address;
+uint8_t  buf[MMIO_BUF_SIZE];
+int  len;
+} EventTapMMIO;
+
+typedef struct EventTapNetReq {
+char *device_name;
+int iovcnt;
+int vlan_id;
+bool vlan_needed;
+bool async;
+struct iovec *iov;
+NetPacketSent *sent_cb;
+} EventTapNetReq;
+
+#define MAX_BLOCK_REQUEST 32
+
+typedef struct EventTapAIOCB EventTapAIOCB;
+
+typedef struct EventTapBlkReq {
+char *device_name;
+int num_reqs;
+int num_cbs;
+bool is_flush;
+BlockRequest reqs[MAX_BLOCK_REQUEST];
+EventTapAIOCB *acb[MAX_BLOCK_REQUEST];
+} EventTapBlkReq;
+
+#define EVENT_TAP_IOPORT (1  0)
+#define EVENT_TAP_MMIO   (1  1)
+#define EVENT_TAP_NET(1  2)
+#define EVENT_TAP_BLK(1  3)
+
+#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1)
+
+typedef struct EventTapLog {
+int mode;
+union {
+EventTapIOport ioport;
+EventTapMMIO mmio;
+};
+union {
+EventTapNetReq net_req;
+EventTapBlkReq blk_req;
+};
+QTAILQ_ENTRY(EventTapLog) node;
+} 

Re: [PATCH 09/18] Introduce event-tap.

2011-03-08 Thread ya su
2011/3/8 Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp:
 ya su wrote:

 Yokshiaki:

     event-tap record block and io wirte events, and replay these on
 the other side, so block_save_live is useless during the latter ft
 phase, right? if so, I think it need to process the following code in
 block_save_live function:

 Actually no.  It just replays the last events only.  We do have patches that
 enable block replication without using block live migration, like the way
 you described above.  In that case, we disable block live migration when  we
 go into ft mode.  We're thinking to propose it after this series get
 settled.

so event-tap's objective is to initial a ft transaction, to start the
sync. of ram/block/device states? if so, it need not change
bdrv_aio_writev/bdrv_aio_flush normal process, on the other side it
need not invokde bdrv_aio_writev either, right?



     if (stage == 1) {
         init_blk_migration(mon, f);

         /* start track dirty blocks */
         set_dirty_tracking(1);
     }
 --
 the following code will send block to the other side, as this will
 also be done by event-tap replay. I think it should placed in stage 3,
 before the assert line. (this may affect some stage 2 rate-limit
 then, so this can be placed in stage 2, though it looks ugly), another
 choice is to avoid the invocation of block_save_live, right?
 ---
     flush_blks(f);

     if (qemu_file_has_error(f)) {
         blk_mig_cleanup(mon);
         return 0;
     }

     blk_mig_reset_dirty_cursor();
 
     if (stage == 2) {


     another question is: since you event-tap io write(I think IO READ
 should also be event-tapped, as read may cause io chip state to
 change),  you then need not invoke qemu_savevm_state_full in
 qemu_savevm_trans_complete, right? thanks.

 It's not necessary to tap IO READ, but you can if you like.  We also have
 experimental patches for this to reduce rams to be transfered.  But I don't
 understand why we don't have to invoke qemu_savevm_state_full although I
 think we may reduce number of rams by replaying IO READ on the secondary.


I first think the objective of io-Write event-tap is to reproduce the
same device state on the other side, though I doubt this,  so I think
IO-Read also should be recorded and replayed. since event-tap is only
to initial a ft transaction, the sync. of states still depend on
qemu_save_vm_live/full,  I understand the design now, thanks.

but I don't understand why io-write event-tap can reduce transfered
rams as you mentioned, the amount of rams only depend on dirty pages,
IO write don't change the normal process unlike block write, right?

 Thanks,

 Yoshi



 Green.



 2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:

 event-tap controls when to start FT transaction, and provides proxy
 functions to called from net/block devices.  While FT transaction, it
 queues up net/block requests, and flush them when the transaction gets
 completed.

 Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp
 Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp
 ---
  Makefile.target |    1 +
  event-tap.c     |  940
 +++
  event-tap.h     |   44 +++
  qemu-tool.c     |   28 ++
  trace-events    |   10 +
  5 files changed, 1023 insertions(+), 0 deletions(-)
  create mode 100644 event-tap.c
  create mode 100644 event-tap.h

 diff --git a/Makefile.target b/Makefile.target
 index 220589e..da57efe 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -199,6 +199,7 @@ obj-y += rwhandler.o
  obj-$(CONFIG_KVM) += kvm.o kvm-all.o
  obj-$(CONFIG_NO_KVM) += kvm-stub.o
  LIBS+=-lz
 +obj-y += event-tap.o

  QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
  QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
 diff --git a/event-tap.c b/event-tap.c
 new file mode 100644
 index 000..95c147a
 --- /dev/null
 +++ b/event-tap.c
 @@ -0,0 +1,940 @@
 +/*
 + * Event Tap functions for QEMU
 + *
 + * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation.
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2.  See
 + * the COPYING file in the top-level directory.
 + */
 +
 +#include qemu-common.h
 +#include qemu-error.h
 +#include block.h
 +#include block_int.h
 +#include ioport.h
 +#include osdep.h
 +#include sysemu.h
 +#include hw/hw.h
 +#include net.h
 +#include event-tap.h
 +#include trace.h
 +
 +enum EVENT_TAP_STATE {
 +    EVENT_TAP_OFF,
 +    EVENT_TAP_ON,
 +    EVENT_TAP_SUSPEND,
 +    EVENT_TAP_FLUSH,
 +    EVENT_TAP_LOAD,
 +    EVENT_TAP_REPLAY,
 +};
 +
 +static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF;
 +
 +typedef struct EventTapIOport {
 +    uint32_t address;
 +    uint32_t data;
 +    int      index;
 +} EventTapIOport;
 +
 +#define MMIO_BUF_SIZE 8
 +
 +typedef struct EventTapMMIO {
 +    uint64_t address;
 +    uint8_t  buf[MMIO_BUF_SIZE];
 +    int      len;
 +} EventTapMMIO;
 +
 +typedef 

Re: [PATCH 09/18] Introduce event-tap.

2011-03-08 Thread ya su
Yoshi:

I think event-tap is a great idea, it remove the reading from disk
which will increase ft effiency much better as your plan in later
series.

one question: IO read/write may dirty rams, but it is difficute to
differ them from other dirty pages like caused by  running of
softwares,  whether that means you need change all the emulated device
realization?  actually I think it will not send too much rams caused
by IO Read/Write in ram_save_live, but if It can event-tap IO
read/write and replay on the other side, Does that means we don't need
call qemu_savevm_state_full in ft transactoins?

Green.


2011/3/9 Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp:
 ya su wrote:

 2011/3/8 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:

 ya su wrote:

 Yokshiaki:

     event-tap record block and io wirte events, and replay these on
 the other side, so block_save_live is useless during the latter ft
 phase, right? if so, I think it need to process the following code in
 block_save_live function:

 Actually no.  It just replays the last events only.  We do have patches
 that
 enable block replication without using block live migration, like the way
 you described above.  In that case, we disable block live migration when
  we
 go into ft mode.  We're thinking to propose it after this series get
 settled.

 so event-tap's objective is to initial a ft transaction, to start the
 sync. of ram/block/device states? if so, it need not change
 bdrv_aio_writev/bdrv_aio_flush normal process, on the other side it
 need not invokde bdrv_aio_writev either, right?

 Mostly yes, but because event-tap is queuing requests from block/net, it
 needs to flush queued requests after the transaction on the primary side.
  On the secondary, it currently doesn't have to invoke bdrv_aio_writev as
 you mentioned.  But will change soon to enable block replication with
 event-tap.




     if (stage == 1) {
         init_blk_migration(mon, f);

         /* start track dirty blocks */
         set_dirty_tracking(1);
     }
 --
 the following code will send block to the other side, as this will
 also be done by event-tap replay. I think it should placed in stage 3,
 before the assert line. (this may affect some stage 2 rate-limit
 then, so this can be placed in stage 2, though it looks ugly), another
 choice is to avoid the invocation of block_save_live, right?
 ---
     flush_blks(f);

     if (qemu_file_has_error(f)) {
         blk_mig_cleanup(mon);
         return 0;
     }

     blk_mig_reset_dirty_cursor();
 
     if (stage == 2) {


     another question is: since you event-tap io write(I think IO READ
 should also be event-tapped, as read may cause io chip state to
 change),  you then need not invoke qemu_savevm_state_full in
 qemu_savevm_trans_complete, right? thanks.

 It's not necessary to tap IO READ, but you can if you like.  We also have
 experimental patches for this to reduce rams to be transfered.  But I
 don't
 understand why we don't have to invoke qemu_savevm_state_full although I
 think we may reduce number of rams by replaying IO READ on the secondary.


 I first think the objective of io-Write event-tap is to reproduce the
 same device state on the other side, though I doubt this,  so I think
 IO-Read also should be recorded and replayed. since event-tap is only
 to initial a ft transaction, the sync. of states still depend on
 qemu_save_vm_live/full,  I understand the design now, thanks.

 but I don't understand why io-write event-tap can reduce transfered
 rams as you mentioned, the amount of rams only depend on dirty pages,
 IO write don't change the normal process unlike block write, right?

 The point is, if we can assure that IO read retrieves the same data on both
 sides, instead of dirtying the ram by read, meaning we have to transfer in
 the transaction, just replay the operation and get the same data on the
 otherside. Anyway, that's just a plan :)

 Thanks,

 Yoshi


 Thanks,

 Yoshi



 Green.



 2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:

 event-tap controls when to start FT transaction, and provides proxy
 functions to called from net/block devices.  While FT transaction, it
 queues up net/block requests, and flush them when the transaction gets
 completed.

 Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp
 Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp
 ---
  Makefile.target |    1 +
  event-tap.c     |  940
 +++
  event-tap.h     |   44 +++
  qemu-tool.c     |   28 ++
  trace-events    |   10 +
  5 files changed, 1023 insertions(+), 0 deletions(-)
  create mode 100644 event-tap.c
  create mode 100644 event-tap.h

 diff --git a/Makefile.target b/Makefile.target
 index 220589e..da57efe 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -199,6 +199,7 @@ obj-y += rwhandler.o
  obj-$(CONFIG_KVM) += kvm.o 

Re: [PATCH 09/18] Introduce event-tap.

2011-03-08 Thread Yoshiaki Tamura

ya su wrote:

Yoshi:

 I think event-tap is a great idea, it remove the reading from disk
which will increase ft effiency much better as your plan in later
series.

 one question: IO read/write may dirty rams, but it is difficute to
differ them from other dirty pages like caused by  running of
softwares,  whether that means you need change all the emulated device
realization?  actually I think it will not send too much rams caused
by IO Read/Write in ram_save_live, but if It can event-tap IO
read/write and replay on the other side, Does that means we don't need
call qemu_savevm_state_full in ft transactoins?


I'm not expecting to remove qemu_savevm_state_full in the transaction.  Just 
reduce the number of pages to be transfered as a result.


Thanks,

Yoshi



Green.


2011/3/9 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:

ya su wrote:


2011/3/8 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:


ya su wrote:


Yokshiaki:

 event-tap record block and io wirte events, and replay these on
the other side, so block_save_live is useless during the latter ft
phase, right? if so, I think it need to process the following code in
block_save_live function:


Actually no.  It just replays the last events only.  We do have patches
that
enable block replication without using block live migration, like the way
you described above.  In that case, we disable block live migration when
  we
go into ft mode.  We're thinking to propose it after this series get
settled.


so event-tap's objective is to initial a ft transaction, to start the
sync. of ram/block/device states? if so, it need not change
bdrv_aio_writev/bdrv_aio_flush normal process, on the other side it
need not invokde bdrv_aio_writev either, right?


Mostly yes, but because event-tap is queuing requests from block/net, it
needs to flush queued requests after the transaction on the primary side.
  On the secondary, it currently doesn't have to invoke bdrv_aio_writev as
you mentioned.  But will change soon to enable block replication with
event-tap.







 if (stage == 1) {
 init_blk_migration(mon, f);

 /* start track dirty blocks */
 set_dirty_tracking(1);
 }
--
the following code will send block to the other side, as this will
also be done by event-tap replay. I think it should placed in stage 3,
before the assert line. (this may affect some stage 2 rate-limit
then, so this can be placed in stage 2, though it looks ugly), another
choice is to avoid the invocation of block_save_live, right?
---
 flush_blks(f);

 if (qemu_file_has_error(f)) {
 blk_mig_cleanup(mon);
 return 0;
 }

 blk_mig_reset_dirty_cursor();

 if (stage == 2) {


 another question is: since you event-tap io write(I think IO READ
should also be event-tapped, as read may cause io chip state to
change),  you then need not invoke qemu_savevm_state_full in
qemu_savevm_trans_complete, right? thanks.


It's not necessary to tap IO READ, but you can if you like.  We also have
experimental patches for this to reduce rams to be transfered.  But I
don't
understand why we don't have to invoke qemu_savevm_state_full although I
think we may reduce number of rams by replaying IO READ on the secondary.



I first think the objective of io-Write event-tap is to reproduce the
same device state on the other side, though I doubt this,  so I think
IO-Read also should be recorded and replayed. since event-tap is only
to initial a ft transaction, the sync. of states still depend on
qemu_save_vm_live/full,  I understand the design now, thanks.

but I don't understand why io-write event-tap can reduce transfered
rams as you mentioned, the amount of rams only depend on dirty pages,
IO write don't change the normal process unlike block write, right?


The point is, if we can assure that IO read retrieves the same data on both
sides, instead of dirtying the ram by read, meaning we have to transfer in
the transaction, just replay the operation and get the same data on the
otherside. Anyway, that's just a plan :)

Thanks,

Yoshi




Thanks,

Yoshi




Green.



2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp:


event-tap controls when to start FT transaction, and provides proxy
functions to called from net/block devices.  While FT transaction, it
queues up net/block requests, and flush them when the transaction gets
completed.

Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp
---
  Makefile.target |1 +
  event-tap.c |  940
+++
  event-tap.h |   44 +++
  qemu-tool.c |   28 ++
  trace-events|   10 +
  5 files changed, 1023 insertions(+), 0 deletions(-)
  create mode 100644 event-tap.c
  create mode 100644 event-tap.h

diff --git a/Makefile.target b/Makefile.target
index 220589e..da57efe 100644

Re: [PATCH 09/18] Introduce event-tap.

2011-03-03 Thread ya su
Yokshiaki:

event-tap record block and io wirte events, and replay these on
the other side, so block_save_live is useless during the latter ft
phase, right? if so, I think it need to process the following code in
block_save_live function:

if (stage == 1) {
init_blk_migration(mon, f);

/* start track dirty blocks */
set_dirty_tracking(1);
}
--
the following code will send block to the other side, as this will
also be done by event-tap replay. I think it should placed in stage 3,
before the assert line. (this may affect some stage 2 rate-limit
then, so this can be placed in stage 2, though it looks ugly), another
choice is to avoid the invocation of block_save_live, right?
---
flush_blks(f);

if (qemu_file_has_error(f)) {
blk_mig_cleanup(mon);
return 0;
}

blk_mig_reset_dirty_cursor();

if (stage == 2) {


another question is: since you event-tap io write(I think IO READ
should also be event-tapped, as read may cause io chip state to
change),  you then need not invoke qemu_savevm_state_full in
qemu_savevm_trans_complete, right? thanks.


Green.



2011/2/24 Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp:
 event-tap controls when to start FT transaction, and provides proxy
 functions to called from net/block devices.  While FT transaction, it
 queues up net/block requests, and flush them when the transaction gets
 completed.

 Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
 Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
 ---
  Makefile.target |    1 +
  event-tap.c     |  940 
 +++
  event-tap.h     |   44 +++
  qemu-tool.c     |   28 ++
  trace-events    |   10 +
  5 files changed, 1023 insertions(+), 0 deletions(-)
  create mode 100644 event-tap.c
  create mode 100644 event-tap.h

 diff --git a/Makefile.target b/Makefile.target
 index 220589e..da57efe 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -199,6 +199,7 @@ obj-y += rwhandler.o
  obj-$(CONFIG_KVM) += kvm.o kvm-all.o
  obj-$(CONFIG_NO_KVM) += kvm-stub.o
  LIBS+=-lz
 +obj-y += event-tap.o

  QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
  QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
 diff --git a/event-tap.c b/event-tap.c
 new file mode 100644
 index 000..95c147a
 --- /dev/null
 +++ b/event-tap.c
 @@ -0,0 +1,940 @@
 +/*
 + * Event Tap functions for QEMU
 + *
 + * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation.
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2.  See
 + * the COPYING file in the top-level directory.
 + */
 +
 +#include qemu-common.h
 +#include qemu-error.h
 +#include block.h
 +#include block_int.h
 +#include ioport.h
 +#include osdep.h
 +#include sysemu.h
 +#include hw/hw.h
 +#include net.h
 +#include event-tap.h
 +#include trace.h
 +
 +enum EVENT_TAP_STATE {
 +    EVENT_TAP_OFF,
 +    EVENT_TAP_ON,
 +    EVENT_TAP_SUSPEND,
 +    EVENT_TAP_FLUSH,
 +    EVENT_TAP_LOAD,
 +    EVENT_TAP_REPLAY,
 +};
 +
 +static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF;
 +
 +typedef struct EventTapIOport {
 +    uint32_t address;
 +    uint32_t data;
 +    int      index;
 +} EventTapIOport;
 +
 +#define MMIO_BUF_SIZE 8
 +
 +typedef struct EventTapMMIO {
 +    uint64_t address;
 +    uint8_t  buf[MMIO_BUF_SIZE];
 +    int      len;
 +} EventTapMMIO;
 +
 +typedef struct EventTapNetReq {
 +    char *device_name;
 +    int iovcnt;
 +    int vlan_id;
 +    bool vlan_needed;
 +    bool async;
 +    struct iovec *iov;
 +    NetPacketSent *sent_cb;
 +} EventTapNetReq;
 +
 +#define MAX_BLOCK_REQUEST 32
 +
 +typedef struct EventTapAIOCB EventTapAIOCB;
 +
 +typedef struct EventTapBlkReq {
 +    char *device_name;
 +    int num_reqs;
 +    int num_cbs;
 +    bool is_flush;
 +    BlockRequest reqs[MAX_BLOCK_REQUEST];
 +    EventTapAIOCB *acb[MAX_BLOCK_REQUEST];
 +} EventTapBlkReq;
 +
 +#define EVENT_TAP_IOPORT (1  0)
 +#define EVENT_TAP_MMIO   (1  1)
 +#define EVENT_TAP_NET    (1  2)
 +#define EVENT_TAP_BLK    (1  3)
 +
 +#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1)
 +
 +typedef struct EventTapLog {
 +    int mode;
 +    union {
 +        EventTapIOport ioport;
 +        EventTapMMIO mmio;
 +    };
 +    union {
 +        EventTapNetReq net_req;
 +        EventTapBlkReq blk_req;
 +    };
 +    QTAILQ_ENTRY(EventTapLog) node;
 +} EventTapLog;
 +
 +struct EventTapAIOCB {
 +    BlockDriverAIOCB common;
 +    BlockDriverAIOCB *acb;
 +    bool is_canceled;
 +};
 +
 +static EventTapLog *last_event_tap;
 +
 +static QTAILQ_HEAD(, EventTapLog) event_list;
 +static QTAILQ_HEAD(, EventTapLog) event_pool;
 +
 +static int (*event_tap_cb)(void);
 +static QEMUBH *event_tap_bh;
 +static VMChangeStateEntry *vmstate;
 +
 +static void event_tap_bh_cb(void *p)
 +{
 +    if (event_tap_cb) {
 +        event_tap_cb();
 +    }
 +
 +    qemu_bh_delete(event_tap_bh);
 +    event_tap_bh = NULL;
 +}
 

[PATCH 09/18] Introduce event-tap.

2011-02-23 Thread Yoshiaki Tamura
event-tap controls when to start FT transaction, and provides proxy
functions to called from net/block devices.  While FT transaction, it
queues up net/block requests, and flush them when the transaction gets
completed.

Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
---
 Makefile.target |1 +
 event-tap.c |  940 +++
 event-tap.h |   44 +++
 qemu-tool.c |   28 ++
 trace-events|   10 +
 5 files changed, 1023 insertions(+), 0 deletions(-)
 create mode 100644 event-tap.c
 create mode 100644 event-tap.h

diff --git a/Makefile.target b/Makefile.target
index 220589e..da57efe 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -199,6 +199,7 @@ obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_NO_KVM) += kvm-stub.o
 LIBS+=-lz
+obj-y += event-tap.o
 
 QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
 QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
diff --git a/event-tap.c b/event-tap.c
new file mode 100644
index 000..95c147a
--- /dev/null
+++ b/event-tap.c
@@ -0,0 +1,940 @@
+/*
+ * Event Tap functions for QEMU
+ *
+ * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include qemu-common.h
+#include qemu-error.h
+#include block.h
+#include block_int.h
+#include ioport.h
+#include osdep.h
+#include sysemu.h
+#include hw/hw.h
+#include net.h
+#include event-tap.h
+#include trace.h
+
+enum EVENT_TAP_STATE {
+EVENT_TAP_OFF,
+EVENT_TAP_ON,
+EVENT_TAP_SUSPEND,
+EVENT_TAP_FLUSH,
+EVENT_TAP_LOAD,
+EVENT_TAP_REPLAY,
+};
+
+static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF;
+
+typedef struct EventTapIOport {
+uint32_t address;
+uint32_t data;
+int  index;
+} EventTapIOport;
+
+#define MMIO_BUF_SIZE 8
+
+typedef struct EventTapMMIO {
+uint64_t address;
+uint8_t  buf[MMIO_BUF_SIZE];
+int  len;
+} EventTapMMIO;
+
+typedef struct EventTapNetReq {
+char *device_name;
+int iovcnt;
+int vlan_id;
+bool vlan_needed;
+bool async;
+struct iovec *iov;
+NetPacketSent *sent_cb;
+} EventTapNetReq;
+
+#define MAX_BLOCK_REQUEST 32
+
+typedef struct EventTapAIOCB EventTapAIOCB;
+
+typedef struct EventTapBlkReq {
+char *device_name;
+int num_reqs;
+int num_cbs;
+bool is_flush;
+BlockRequest reqs[MAX_BLOCK_REQUEST];
+EventTapAIOCB *acb[MAX_BLOCK_REQUEST];
+} EventTapBlkReq;
+
+#define EVENT_TAP_IOPORT (1  0)
+#define EVENT_TAP_MMIO   (1  1)
+#define EVENT_TAP_NET(1  2)
+#define EVENT_TAP_BLK(1  3)
+
+#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1)
+
+typedef struct EventTapLog {
+int mode;
+union {
+EventTapIOport ioport;
+EventTapMMIO mmio;
+};
+union {
+EventTapNetReq net_req;
+EventTapBlkReq blk_req;
+};
+QTAILQ_ENTRY(EventTapLog) node;
+} EventTapLog;
+
+struct EventTapAIOCB {
+BlockDriverAIOCB common;
+BlockDriverAIOCB *acb;
+bool is_canceled;
+};
+
+static EventTapLog *last_event_tap;
+
+static QTAILQ_HEAD(, EventTapLog) event_list;
+static QTAILQ_HEAD(, EventTapLog) event_pool;
+
+static int (*event_tap_cb)(void);
+static QEMUBH *event_tap_bh;
+static VMChangeStateEntry *vmstate;
+
+static void event_tap_bh_cb(void *p)
+{
+if (event_tap_cb) {
+event_tap_cb();
+}
+
+qemu_bh_delete(event_tap_bh);
+event_tap_bh = NULL;
+}
+
+static void event_tap_schedule_bh(void)
+{
+trace_event_tap_ignore_bh(!!event_tap_bh);
+
+/* if bh is already set, we ignore it for now */
+if (event_tap_bh) {
+return;
+}
+
+event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL);
+qemu_bh_schedule(event_tap_bh);
+
+return;
+}
+
+static void *event_tap_alloc_log(void)
+{
+EventTapLog *log;
+
+if (QTAILQ_EMPTY(event_pool)) {
+log = qemu_mallocz(sizeof(EventTapLog));
+} else {
+log = QTAILQ_FIRST(event_pool);
+QTAILQ_REMOVE(event_pool, log, node);
+}
+
+return log;
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req);
+static void event_tap_free_blk_req(EventTapBlkReq *blk_req);
+
+static void event_tap_free_log(EventTapLog *log)
+{
+int mode = log-mode  ~EVENT_TAP_TYPE_MASK;
+
+if (mode == EVENT_TAP_NET) {
+event_tap_free_net_req(log-net_req);
+} else if (mode == EVENT_TAP_BLK) {
+event_tap_free_blk_req(log-blk_req);
+}
+
+log-mode = 0;
+
+/* return the log to event_pool */
+QTAILQ_INSERT_HEAD(event_pool, log, node);
+}
+
+static void event_tap_free_pool(void)
+{
+EventTapLog *log, *next;
+
+QTAILQ_FOREACH_SAFE(log, event_pool, node, next) {
+QTAILQ_REMOVE(event_pool, log, node);
+qemu_free(log);
+}
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req)
+{
+int i;
+
+if (!net_req-async) {
+for 

[PATCH 09/18] Introduce event-tap.

2011-02-23 Thread Yoshiaki Tamura
event-tap controls when to start FT transaction, and provides proxy
functions to called from net/block devices.  While FT transaction, it
queues up net/block requests, and flush them when the transaction gets
completed.

Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
---
 Makefile.target |1 +
 event-tap.c |  940 +++
 event-tap.h |   44 +++
 qemu-tool.c |   28 ++
 trace-events|   10 +
 5 files changed, 1023 insertions(+), 0 deletions(-)
 create mode 100644 event-tap.c
 create mode 100644 event-tap.h

diff --git a/Makefile.target b/Makefile.target
index 220589e..da57efe 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -199,6 +199,7 @@ obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_NO_KVM) += kvm-stub.o
 LIBS+=-lz
+obj-y += event-tap.o
 
 QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
 QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
diff --git a/event-tap.c b/event-tap.c
new file mode 100644
index 000..95c147a
--- /dev/null
+++ b/event-tap.c
@@ -0,0 +1,940 @@
+/*
+ * Event Tap functions for QEMU
+ *
+ * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include qemu-common.h
+#include qemu-error.h
+#include block.h
+#include block_int.h
+#include ioport.h
+#include osdep.h
+#include sysemu.h
+#include hw/hw.h
+#include net.h
+#include event-tap.h
+#include trace.h
+
+enum EVENT_TAP_STATE {
+EVENT_TAP_OFF,
+EVENT_TAP_ON,
+EVENT_TAP_SUSPEND,
+EVENT_TAP_FLUSH,
+EVENT_TAP_LOAD,
+EVENT_TAP_REPLAY,
+};
+
+static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF;
+
+typedef struct EventTapIOport {
+uint32_t address;
+uint32_t data;
+int  index;
+} EventTapIOport;
+
+#define MMIO_BUF_SIZE 8
+
+typedef struct EventTapMMIO {
+uint64_t address;
+uint8_t  buf[MMIO_BUF_SIZE];
+int  len;
+} EventTapMMIO;
+
+typedef struct EventTapNetReq {
+char *device_name;
+int iovcnt;
+int vlan_id;
+bool vlan_needed;
+bool async;
+struct iovec *iov;
+NetPacketSent *sent_cb;
+} EventTapNetReq;
+
+#define MAX_BLOCK_REQUEST 32
+
+typedef struct EventTapAIOCB EventTapAIOCB;
+
+typedef struct EventTapBlkReq {
+char *device_name;
+int num_reqs;
+int num_cbs;
+bool is_flush;
+BlockRequest reqs[MAX_BLOCK_REQUEST];
+EventTapAIOCB *acb[MAX_BLOCK_REQUEST];
+} EventTapBlkReq;
+
+#define EVENT_TAP_IOPORT (1  0)
+#define EVENT_TAP_MMIO   (1  1)
+#define EVENT_TAP_NET(1  2)
+#define EVENT_TAP_BLK(1  3)
+
+#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1)
+
+typedef struct EventTapLog {
+int mode;
+union {
+EventTapIOport ioport;
+EventTapMMIO mmio;
+};
+union {
+EventTapNetReq net_req;
+EventTapBlkReq blk_req;
+};
+QTAILQ_ENTRY(EventTapLog) node;
+} EventTapLog;
+
+struct EventTapAIOCB {
+BlockDriverAIOCB common;
+BlockDriverAIOCB *acb;
+bool is_canceled;
+};
+
+static EventTapLog *last_event_tap;
+
+static QTAILQ_HEAD(, EventTapLog) event_list;
+static QTAILQ_HEAD(, EventTapLog) event_pool;
+
+static int (*event_tap_cb)(void);
+static QEMUBH *event_tap_bh;
+static VMChangeStateEntry *vmstate;
+
+static void event_tap_bh_cb(void *p)
+{
+if (event_tap_cb) {
+event_tap_cb();
+}
+
+qemu_bh_delete(event_tap_bh);
+event_tap_bh = NULL;
+}
+
+static void event_tap_schedule_bh(void)
+{
+trace_event_tap_ignore_bh(!!event_tap_bh);
+
+/* if bh is already set, we ignore it for now */
+if (event_tap_bh) {
+return;
+}
+
+event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL);
+qemu_bh_schedule(event_tap_bh);
+
+return;
+}
+
+static void *event_tap_alloc_log(void)
+{
+EventTapLog *log;
+
+if (QTAILQ_EMPTY(event_pool)) {
+log = qemu_mallocz(sizeof(EventTapLog));
+} else {
+log = QTAILQ_FIRST(event_pool);
+QTAILQ_REMOVE(event_pool, log, node);
+}
+
+return log;
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req);
+static void event_tap_free_blk_req(EventTapBlkReq *blk_req);
+
+static void event_tap_free_log(EventTapLog *log)
+{
+int mode = log-mode  ~EVENT_TAP_TYPE_MASK;
+
+if (mode == EVENT_TAP_NET) {
+event_tap_free_net_req(log-net_req);
+} else if (mode == EVENT_TAP_BLK) {
+event_tap_free_blk_req(log-blk_req);
+}
+
+log-mode = 0;
+
+/* return the log to event_pool */
+QTAILQ_INSERT_HEAD(event_pool, log, node);
+}
+
+static void event_tap_free_pool(void)
+{
+EventTapLog *log, *next;
+
+QTAILQ_FOREACH_SAFE(log, event_pool, node, next) {
+QTAILQ_REMOVE(event_pool, log, node);
+qemu_free(log);
+}
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req)
+{
+int i;
+
+if (!net_req-async) {
+for 

[PATCH 09/18] Introduce event-tap.

2011-02-10 Thread Yoshiaki Tamura
event-tap controls when to start FT transaction, and provides proxy
functions to called from net/block devices.  While FT transaction, it
queues up net/block requests, and flush them when the transaction gets
completed.

Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
---
 Makefile.target |1 +
 event-tap.c |  939 +++
 event-tap.h |   44 +++
 qemu-tool.c |   28 ++
 trace-events|   10 +
 5 files changed, 1022 insertions(+), 0 deletions(-)
 create mode 100644 event-tap.c
 create mode 100644 event-tap.h

diff --git a/Makefile.target b/Makefile.target
index b0ba95f..edbdbee 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -199,6 +199,7 @@ obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_NO_KVM) += kvm-stub.o
 LIBS+=-lz
+obj-y += event-tap.o
 
 QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
 QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
diff --git a/event-tap.c b/event-tap.c
new file mode 100644
index 000..f44d835
--- /dev/null
+++ b/event-tap.c
@@ -0,0 +1,939 @@
+/*
+ * Event Tap functions for QEMU
+ *
+ * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include qemu-common.h
+#include qemu-error.h
+#include block.h
+#include block_int.h
+#include ioport.h
+#include osdep.h
+#include sysemu.h
+#include hw/hw.h
+#include net.h
+#include event-tap.h
+#include trace.h
+
+enum EVENT_TAP_STATE {
+EVENT_TAP_OFF,
+EVENT_TAP_ON,
+EVENT_TAP_SUSPEND,
+EVENT_TAP_FLUSH,
+EVENT_TAP_LOAD,
+EVENT_TAP_REPLAY,
+};
+
+static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF;
+
+typedef struct EventTapIOport {
+uint32_t address;
+uint32_t data;
+int  index;
+} EventTapIOport;
+
+#define MMIO_BUF_SIZE 8
+
+typedef struct EventTapMMIO {
+uint64_t address;
+uint8_t  buf[MMIO_BUF_SIZE];
+int  len;
+} EventTapMMIO;
+
+typedef struct EventTapNetReq {
+char *device_name;
+int iovcnt;
+int vlan_id;
+bool vlan_needed;
+bool async;
+struct iovec *iov;
+NetPacketSent *sent_cb;
+} EventTapNetReq;
+
+#define MAX_BLOCK_REQUEST 32
+
+typedef struct EventTapAIOCB EventTapAIOCB;
+
+typedef struct EventTapBlkReq {
+char *device_name;
+int num_reqs;
+int num_cbs;
+bool is_flush;
+BlockRequest reqs[MAX_BLOCK_REQUEST];
+EventTapAIOCB *acb[MAX_BLOCK_REQUEST];
+} EventTapBlkReq;
+
+#define EVENT_TAP_IOPORT (1  0)
+#define EVENT_TAP_MMIO   (1  1)
+#define EVENT_TAP_NET(1  2)
+#define EVENT_TAP_BLK(1  3)
+
+#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1)
+
+typedef struct EventTapLog {
+int mode;
+union {
+EventTapIOport ioport;
+EventTapMMIO mmio;
+};
+union {
+EventTapNetReq net_req;
+EventTapBlkReq blk_req;
+};
+QTAILQ_ENTRY(EventTapLog) node;
+} EventTapLog;
+
+struct EventTapAIOCB {
+BlockDriverAIOCB common;
+BlockDriverAIOCB *acb;
+bool is_canceled;
+};
+
+static EventTapLog *last_event_tap;
+
+static QTAILQ_HEAD(, EventTapLog) event_list;
+static QTAILQ_HEAD(, EventTapLog) event_pool;
+
+static int (*event_tap_cb)(void);
+static QEMUBH *event_tap_bh;
+static VMChangeStateEntry *vmstate;
+
+static void event_tap_bh_cb(void *p)
+{
+if (event_tap_cb) {
+event_tap_cb();
+}
+
+qemu_bh_delete(event_tap_bh);
+event_tap_bh = NULL;
+}
+
+static void event_tap_schedule_bh(void)
+{
+trace_event_tap_ignore_bh(!!event_tap_bh);
+
+/* if bh is already set, we ignore it for now */
+if (event_tap_bh) {
+return;
+}
+
+event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL);
+qemu_bh_schedule(event_tap_bh);
+
+return;
+}
+
+static void *event_tap_alloc_log(void)
+{
+EventTapLog *log;
+
+if (QTAILQ_EMPTY(event_pool)) {
+log = qemu_mallocz(sizeof(EventTapLog));
+} else {
+log = QTAILQ_FIRST(event_pool);
+QTAILQ_REMOVE(event_pool, log, node);
+}
+
+return log;
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req);
+static void event_tap_free_blk_req(EventTapBlkReq *blk_req);
+
+static void event_tap_free_log(EventTapLog *log)
+{
+int mode = log-mode  ~EVENT_TAP_TYPE_MASK;
+
+if (mode == EVENT_TAP_NET) {
+event_tap_free_net_req(log-net_req);
+} else if (mode == EVENT_TAP_BLK) {
+event_tap_free_blk_req(log-blk_req);
+}
+
+log-mode = 0;
+
+/* return the log to event_pool */
+QTAILQ_INSERT_HEAD(event_pool, log, node);
+}
+
+static void event_tap_free_pool(void)
+{
+EventTapLog *log, *next;
+
+QTAILQ_FOREACH_SAFE(log, event_pool, node, next) {
+QTAILQ_REMOVE(event_pool, log, node);
+qemu_free(log);
+}
+}
+
+static void event_tap_free_net_req(EventTapNetReq *net_req)
+{
+int i;
+
+if (!net_req-async) {
+for