[PATCH 09/18] Introduce event-tap.
event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT transaction, it queues up net/block requests, and flush them when the transaction gets completed. Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp --- Makefile.target |1 + event-tap.c | 940 +++ event-tap.h | 44 +++ qemu-tool.c | 28 ++ trace-events| 10 + 5 files changed, 1023 insertions(+), 0 deletions(-) create mode 100644 event-tap.c create mode 100644 event-tap.h diff --git a/Makefile.target b/Makefile.target index 0e0ef36..e489df4 100644 --- a/Makefile.target +++ b/Makefile.target @@ -199,6 +199,7 @@ obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_NO_KVM) += kvm-stub.o LIBS+=-lz +obj-y += event-tap.o QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) diff --git a/event-tap.c b/event-tap.c new file mode 100644 index 000..95c147a --- /dev/null +++ b/event-tap.c @@ -0,0 +1,940 @@ +/* + * Event Tap functions for QEMU + * + * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include qemu-common.h +#include qemu-error.h +#include block.h +#include block_int.h +#include ioport.h +#include osdep.h +#include sysemu.h +#include hw/hw.h +#include net.h +#include event-tap.h +#include trace.h + +enum EVENT_TAP_STATE { +EVENT_TAP_OFF, +EVENT_TAP_ON, +EVENT_TAP_SUSPEND, +EVENT_TAP_FLUSH, +EVENT_TAP_LOAD, +EVENT_TAP_REPLAY, +}; + +static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF; + +typedef struct EventTapIOport { +uint32_t address; +uint32_t data; +int index; +} EventTapIOport; + +#define MMIO_BUF_SIZE 8 + +typedef struct EventTapMMIO { +uint64_t address; +uint8_t buf[MMIO_BUF_SIZE]; +int len; +} EventTapMMIO; + +typedef struct EventTapNetReq { +char *device_name; +int iovcnt; +int vlan_id; +bool vlan_needed; +bool async; +struct iovec *iov; +NetPacketSent *sent_cb; +} EventTapNetReq; + +#define MAX_BLOCK_REQUEST 32 + +typedef struct EventTapAIOCB EventTapAIOCB; + +typedef struct EventTapBlkReq { +char *device_name; +int num_reqs; +int num_cbs; +bool is_flush; +BlockRequest reqs[MAX_BLOCK_REQUEST]; +EventTapAIOCB *acb[MAX_BLOCK_REQUEST]; +} EventTapBlkReq; + +#define EVENT_TAP_IOPORT (1 0) +#define EVENT_TAP_MMIO (1 1) +#define EVENT_TAP_NET(1 2) +#define EVENT_TAP_BLK(1 3) + +#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1) + +typedef struct EventTapLog { +int mode; +union { +EventTapIOport ioport; +EventTapMMIO mmio; +}; +union { +EventTapNetReq net_req; +EventTapBlkReq blk_req; +}; +QTAILQ_ENTRY(EventTapLog) node; +} EventTapLog; + +struct EventTapAIOCB { +BlockDriverAIOCB common; +BlockDriverAIOCB *acb; +bool is_canceled; +}; + +static EventTapLog *last_event_tap; + +static QTAILQ_HEAD(, EventTapLog) event_list; +static QTAILQ_HEAD(, EventTapLog) event_pool; + +static int (*event_tap_cb)(void); +static QEMUBH *event_tap_bh; +static VMChangeStateEntry *vmstate; + +static void event_tap_bh_cb(void *p) +{ +if (event_tap_cb) { +event_tap_cb(); +} + +qemu_bh_delete(event_tap_bh); +event_tap_bh = NULL; +} + +static void event_tap_schedule_bh(void) +{ +trace_event_tap_ignore_bh(!!event_tap_bh); + +/* if bh is already set, we ignore it for now */ +if (event_tap_bh) { +return; +} + +event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL); +qemu_bh_schedule(event_tap_bh); + +return; +} + +static void *event_tap_alloc_log(void) +{ +EventTapLog *log; + +if (QTAILQ_EMPTY(event_pool)) { +log = qemu_mallocz(sizeof(EventTapLog)); +} else { +log = QTAILQ_FIRST(event_pool); +QTAILQ_REMOVE(event_pool, log, node); +} + +return log; +} + +static void event_tap_free_net_req(EventTapNetReq *net_req); +static void event_tap_free_blk_req(EventTapBlkReq *blk_req); + +static void event_tap_free_log(EventTapLog *log) +{ +int mode = log-mode ~EVENT_TAP_TYPE_MASK; + +if (mode == EVENT_TAP_NET) { +event_tap_free_net_req(log-net_req); +} else if (mode == EVENT_TAP_BLK) { +event_tap_free_blk_req(log-blk_req); +} + +log-mode = 0; + +/* return the log to event_pool */ +QTAILQ_INSERT_HEAD(event_pool, log, node); +} + +static void event_tap_free_pool(void) +{ +EventTapLog *log, *next; + +QTAILQ_FOREACH_SAFE(log, event_pool, node, next) { +QTAILQ_REMOVE(event_pool, log, node); +qemu_free(log); +} +} + +static void event_tap_free_net_req(EventTapNetReq *net_req) +{ +int i; + +if (!net_req-async) { +for
[PATCH 09/18] Introduce event-tap.
event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT transaction, it queues up net/block requests, and flush them when the transaction gets completed. Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp --- Makefile.target |1 + event-tap.c | 940 +++ event-tap.h | 44 +++ qemu-tool.c | 27 ++ trace-events| 10 + 5 files changed, 1022 insertions(+), 0 deletions(-) create mode 100644 event-tap.c create mode 100644 event-tap.h diff --git a/Makefile.target b/Makefile.target index 62b102a..f088121 100644 --- a/Makefile.target +++ b/Makefile.target @@ -199,6 +199,7 @@ obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_NO_KVM) += kvm-stub.o LIBS+=-lz +obj-y += event-tap.o QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) diff --git a/event-tap.c b/event-tap.c new file mode 100644 index 000..95c147a --- /dev/null +++ b/event-tap.c @@ -0,0 +1,940 @@ +/* + * Event Tap functions for QEMU + * + * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include qemu-common.h +#include qemu-error.h +#include block.h +#include block_int.h +#include ioport.h +#include osdep.h +#include sysemu.h +#include hw/hw.h +#include net.h +#include event-tap.h +#include trace.h + +enum EVENT_TAP_STATE { +EVENT_TAP_OFF, +EVENT_TAP_ON, +EVENT_TAP_SUSPEND, +EVENT_TAP_FLUSH, +EVENT_TAP_LOAD, +EVENT_TAP_REPLAY, +}; + +static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF; + +typedef struct EventTapIOport { +uint32_t address; +uint32_t data; +int index; +} EventTapIOport; + +#define MMIO_BUF_SIZE 8 + +typedef struct EventTapMMIO { +uint64_t address; +uint8_t buf[MMIO_BUF_SIZE]; +int len; +} EventTapMMIO; + +typedef struct EventTapNetReq { +char *device_name; +int iovcnt; +int vlan_id; +bool vlan_needed; +bool async; +struct iovec *iov; +NetPacketSent *sent_cb; +} EventTapNetReq; + +#define MAX_BLOCK_REQUEST 32 + +typedef struct EventTapAIOCB EventTapAIOCB; + +typedef struct EventTapBlkReq { +char *device_name; +int num_reqs; +int num_cbs; +bool is_flush; +BlockRequest reqs[MAX_BLOCK_REQUEST]; +EventTapAIOCB *acb[MAX_BLOCK_REQUEST]; +} EventTapBlkReq; + +#define EVENT_TAP_IOPORT (1 0) +#define EVENT_TAP_MMIO (1 1) +#define EVENT_TAP_NET(1 2) +#define EVENT_TAP_BLK(1 3) + +#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1) + +typedef struct EventTapLog { +int mode; +union { +EventTapIOport ioport; +EventTapMMIO mmio; +}; +union { +EventTapNetReq net_req; +EventTapBlkReq blk_req; +}; +QTAILQ_ENTRY(EventTapLog) node; +} EventTapLog; + +struct EventTapAIOCB { +BlockDriverAIOCB common; +BlockDriverAIOCB *acb; +bool is_canceled; +}; + +static EventTapLog *last_event_tap; + +static QTAILQ_HEAD(, EventTapLog) event_list; +static QTAILQ_HEAD(, EventTapLog) event_pool; + +static int (*event_tap_cb)(void); +static QEMUBH *event_tap_bh; +static VMChangeStateEntry *vmstate; + +static void event_tap_bh_cb(void *p) +{ +if (event_tap_cb) { +event_tap_cb(); +} + +qemu_bh_delete(event_tap_bh); +event_tap_bh = NULL; +} + +static void event_tap_schedule_bh(void) +{ +trace_event_tap_ignore_bh(!!event_tap_bh); + +/* if bh is already set, we ignore it for now */ +if (event_tap_bh) { +return; +} + +event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL); +qemu_bh_schedule(event_tap_bh); + +return; +} + +static void *event_tap_alloc_log(void) +{ +EventTapLog *log; + +if (QTAILQ_EMPTY(event_pool)) { +log = qemu_mallocz(sizeof(EventTapLog)); +} else { +log = QTAILQ_FIRST(event_pool); +QTAILQ_REMOVE(event_pool, log, node); +} + +return log; +} + +static void event_tap_free_net_req(EventTapNetReq *net_req); +static void event_tap_free_blk_req(EventTapBlkReq *blk_req); + +static void event_tap_free_log(EventTapLog *log) +{ +int mode = log-mode ~EVENT_TAP_TYPE_MASK; + +if (mode == EVENT_TAP_NET) { +event_tap_free_net_req(log-net_req); +} else if (mode == EVENT_TAP_BLK) { +event_tap_free_blk_req(log-blk_req); +} + +log-mode = 0; + +/* return the log to event_pool */ +QTAILQ_INSERT_HEAD(event_pool, log, node); +} + +static void event_tap_free_pool(void) +{ +EventTapLog *log, *next; + +QTAILQ_FOREACH_SAFE(log, event_pool, node, next) { +QTAILQ_REMOVE(event_pool, log, node); +qemu_free(log); +} +} + +static void event_tap_free_net_req(EventTapNetReq *net_req) +{ +int i; + +if (!net_req-async) { +for
Re: [PATCH 09/18] Introduce event-tap.
Yoshi: I meet one problem if I killed a ft source VM, the dest ft VM will return errors as the following: qemu-system-x86_64: fill buffer failed, Resource temporarily unavailable qemu-system-x86_64: recv header failed the problem is that the dest VM can not continue to run, as it is interrupted in the middle of a transaction, some of rams have been updated, but the others not, do you have any plan for rolling back to cancel the interrupted transaction? thanks. Green. 2011/3/9 Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp: ya su wrote: Yoshi: I think event-tap is a great idea, it remove the reading from disk which will increase ft effiency much better as your plan in later series. one question: IO read/write may dirty rams, but it is difficute to differ them from other dirty pages like caused by running of softwares, whether that means you need change all the emulated device realization? actually I think it will not send too much rams caused by IO Read/Write in ram_save_live, but if It can event-tap IO read/write and replay on the other side, Does that means we don't need call qemu_savevm_state_full in ft transactoins? I'm not expecting to remove qemu_savevm_state_full in the transaction. Just reduce the number of pages to be transfered as a result. Thanks, Yoshi Green. 2011/3/9 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: ya su wrote: 2011/3/8 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: ya su wrote: Yokshiaki: event-tap record block and io wirte events, and replay these on the other side, so block_save_live is useless during the latter ft phase, right? if so, I think it need to process the following code in block_save_live function: Actually no. It just replays the last events only. We do have patches that enable block replication without using block live migration, like the way you described above. In that case, we disable block live migration when we go into ft mode. We're thinking to propose it after this series get settled. so event-tap's objective is to initial a ft transaction, to start the sync. of ram/block/device states? if so, it need not change bdrv_aio_writev/bdrv_aio_flush normal process, on the other side it need not invokde bdrv_aio_writev either, right? Mostly yes, but because event-tap is queuing requests from block/net, it needs to flush queued requests after the transaction on the primary side. On the secondary, it currently doesn't have to invoke bdrv_aio_writev as you mentioned. But will change soon to enable block replication with event-tap. if (stage == 1) { init_blk_migration(mon, f); /* start track dirty blocks */ set_dirty_tracking(1); } -- the following code will send block to the other side, as this will also be done by event-tap replay. I think it should placed in stage 3, before the assert line. (this may affect some stage 2 rate-limit then, so this can be placed in stage 2, though it looks ugly), another choice is to avoid the invocation of block_save_live, right? --- flush_blks(f); if (qemu_file_has_error(f)) { blk_mig_cleanup(mon); return 0; } blk_mig_reset_dirty_cursor(); if (stage == 2) { another question is: since you event-tap io write(I think IO READ should also be event-tapped, as read may cause io chip state to change), you then need not invoke qemu_savevm_state_full in qemu_savevm_trans_complete, right? thanks. It's not necessary to tap IO READ, but you can if you like. We also have experimental patches for this to reduce rams to be transfered. But I don't understand why we don't have to invoke qemu_savevm_state_full although I think we may reduce number of rams by replaying IO READ on the secondary. I first think the objective of io-Write event-tap is to reproduce the same device state on the other side, though I doubt this, so I think IO-Read also should be recorded and replayed. since event-tap is only to initial a ft transaction, the sync. of states still depend on qemu_save_vm_live/full, I understand the design now, thanks. but I don't understand why io-write event-tap can reduce transfered rams as you mentioned, the amount of rams only depend on dirty pages, IO write don't change the normal process unlike block write, right? The point is, if we can assure that IO read retrieves the same data on both sides, instead of dirtying the ram by read, meaning we have to transfer in the transaction, just replay the operation and get the same data on the otherside. Anyway, that's just a plan :) Thanks, Yoshi Thanks, Yoshi Green. 2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT
Re: [PATCH 09/18] Introduce event-tap.
ya su wrote: Yoshi: I meet one problem if I killed a ft source VM, the dest ft VM will return errors as the following: qemu-system-x86_64: fill buffer failed, Resource temporarily unavailable qemu-system-x86_64: recv header failed the problem is that the dest VM can not continue to run, as it is interrupted in the middle of a transaction, some of rams have been updated, but the others not, do you have any plan for rolling back to cancel the interrupted transaction? thanks. No it's not a problem. This is one of FAQs I get, but just press cont or c in the secondary qemu, it should run. Thanks, Yoshi Green. 2011/3/9 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: ya su wrote: Yoshi: I think event-tap is a great idea, it remove the reading from disk which will increase ft effiency much better as your plan in later series. one question: IO read/write may dirty rams, but it is difficute to differ them from other dirty pages like caused by running of softwares, whether that means you need change all the emulated device realization? actually I think it will not send too much rams caused by IO Read/Write in ram_save_live, but if It can event-tap IO read/write and replay on the other side, Does that means we don't need call qemu_savevm_state_full in ft transactoins? I'm not expecting to remove qemu_savevm_state_full in the transaction. Just reduce the number of pages to be transfered as a result. Thanks, Yoshi Green. 2011/3/9 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: ya su wrote: 2011/3/8 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: ya su wrote: Yokshiaki: event-tap record block and io wirte events, and replay these on the other side, so block_save_live is useless during the latter ft phase, right? if so, I think it need to process the following code in block_save_live function: Actually no. It just replays the last events only. We do have patches that enable block replication without using block live migration, like the way you described above. In that case, we disable block live migration when we go into ft mode. We're thinking to propose it after this series get settled. so event-tap's objective is to initial a ft transaction, to start the sync. of ram/block/device states? if so, it need not change bdrv_aio_writev/bdrv_aio_flush normal process, on the other side it need not invokde bdrv_aio_writev either, right? Mostly yes, but because event-tap is queuing requests from block/net, it needs to flush queued requests after the transaction on the primary side. On the secondary, it currently doesn't have to invoke bdrv_aio_writev as you mentioned. But will change soon to enable block replication with event-tap. if (stage == 1) { init_blk_migration(mon, f); /* start track dirty blocks */ set_dirty_tracking(1); } -- the following code will send block to the other side, as this will also be done by event-tap replay. I think it should placed in stage 3, before the assert line. (this may affect some stage 2 rate-limit then, so this can be placed in stage 2, though it looks ugly), another choice is to avoid the invocation of block_save_live, right? --- flush_blks(f); if (qemu_file_has_error(f)) { blk_mig_cleanup(mon); return 0; } blk_mig_reset_dirty_cursor(); if (stage == 2) { another question is: since you event-tap io write(I think IO READ should also be event-tapped, as read may cause io chip state to change), you then need not invoke qemu_savevm_state_full in qemu_savevm_trans_complete, right? thanks. It's not necessary to tap IO READ, but you can if you like. We also have experimental patches for this to reduce rams to be transfered. But I don't understand why we don't have to invoke qemu_savevm_state_full although I think we may reduce number of rams by replaying IO READ on the secondary. I first think the objective of io-Write event-tap is to reproduce the same device state on the other side, though I doubt this, so I think IO-Read also should be recorded and replayed. since event-tap is only to initial a ft transaction, the sync. of states still depend on qemu_save_vm_live/full, I understand the design now, thanks. but I don't understand why io-write event-tap can reduce transfered rams as you mentioned, the amount of rams only depend on dirty pages, IO write don't change the normal process unlike block write, right? The point is, if we can assure that IO read retrieves the same data on both sides, instead of dirtying the ram by read, meaning we have to transfer in the transaction, just replay the operation and get the same data on the otherside. Anyway, that's just a plan :) Thanks, Yoshi Thanks, Yoshi Green. 2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: event-tap controls when to
Re: [PATCH 09/18] Introduce event-tap.
ya su wrote: Yokshiaki: event-tap record block and io wirte events, and replay these on the other side, so block_save_live is useless during the latter ft phase, right? if so, I think it need to process the following code in block_save_live function: Actually no. It just replays the last events only. We do have patches that enable block replication without using block live migration, like the way you described above. In that case, we disable block live migration when we go into ft mode. We're thinking to propose it after this series get settled. if (stage == 1) { init_blk_migration(mon, f); /* start track dirty blocks */ set_dirty_tracking(1); } -- the following code will send block to the other side, as this will also be done by event-tap replay. I think it should placed in stage 3, before the assert line. (this may affect some stage 2 rate-limit then, so this can be placed in stage 2, though it looks ugly), another choice is to avoid the invocation of block_save_live, right? --- flush_blks(f); if (qemu_file_has_error(f)) { blk_mig_cleanup(mon); return 0; } blk_mig_reset_dirty_cursor(); if (stage == 2) { another question is: since you event-tap io write(I think IO READ should also be event-tapped, as read may cause io chip state to change), you then need not invoke qemu_savevm_state_full in qemu_savevm_trans_complete, right? thanks. It's not necessary to tap IO READ, but you can if you like. We also have experimental patches for this to reduce rams to be transfered. But I don't understand why we don't have to invoke qemu_savevm_state_full although I think we may reduce number of rams by replaying IO READ on the secondary. Thanks, Yoshi Green. 2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT transaction, it queues up net/block requests, and flush them when the transaction gets completed. Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp --- Makefile.target |1 + event-tap.c | 940 +++ event-tap.h | 44 +++ qemu-tool.c | 28 ++ trace-events| 10 + 5 files changed, 1023 insertions(+), 0 deletions(-) create mode 100644 event-tap.c create mode 100644 event-tap.h diff --git a/Makefile.target b/Makefile.target index 220589e..da57efe 100644 --- a/Makefile.target +++ b/Makefile.target @@ -199,6 +199,7 @@ obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_NO_KVM) += kvm-stub.o LIBS+=-lz +obj-y += event-tap.o QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) diff --git a/event-tap.c b/event-tap.c new file mode 100644 index 000..95c147a --- /dev/null +++ b/event-tap.c @@ -0,0 +1,940 @@ +/* + * Event Tap functions for QEMU + * + * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include qemu-common.h +#include qemu-error.h +#include block.h +#include block_int.h +#include ioport.h +#include osdep.h +#include sysemu.h +#include hw/hw.h +#include net.h +#include event-tap.h +#include trace.h + +enum EVENT_TAP_STATE { +EVENT_TAP_OFF, +EVENT_TAP_ON, +EVENT_TAP_SUSPEND, +EVENT_TAP_FLUSH, +EVENT_TAP_LOAD, +EVENT_TAP_REPLAY, +}; + +static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF; + +typedef struct EventTapIOport { +uint32_t address; +uint32_t data; +int index; +} EventTapIOport; + +#define MMIO_BUF_SIZE 8 + +typedef struct EventTapMMIO { +uint64_t address; +uint8_t buf[MMIO_BUF_SIZE]; +int len; +} EventTapMMIO; + +typedef struct EventTapNetReq { +char *device_name; +int iovcnt; +int vlan_id; +bool vlan_needed; +bool async; +struct iovec *iov; +NetPacketSent *sent_cb; +} EventTapNetReq; + +#define MAX_BLOCK_REQUEST 32 + +typedef struct EventTapAIOCB EventTapAIOCB; + +typedef struct EventTapBlkReq { +char *device_name; +int num_reqs; +int num_cbs; +bool is_flush; +BlockRequest reqs[MAX_BLOCK_REQUEST]; +EventTapAIOCB *acb[MAX_BLOCK_REQUEST]; +} EventTapBlkReq; + +#define EVENT_TAP_IOPORT (1 0) +#define EVENT_TAP_MMIO (1 1) +#define EVENT_TAP_NET(1 2) +#define EVENT_TAP_BLK(1 3) + +#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1) + +typedef struct EventTapLog { +int mode; +union { +EventTapIOport ioport; +EventTapMMIO mmio; +}; +union { +EventTapNetReq net_req; +EventTapBlkReq blk_req; +}; +QTAILQ_ENTRY(EventTapLog) node; +}
Re: [PATCH 09/18] Introduce event-tap.
2011/3/8 Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp: ya su wrote: Yokshiaki: event-tap record block and io wirte events, and replay these on the other side, so block_save_live is useless during the latter ft phase, right? if so, I think it need to process the following code in block_save_live function: Actually no. It just replays the last events only. We do have patches that enable block replication without using block live migration, like the way you described above. In that case, we disable block live migration when we go into ft mode. We're thinking to propose it after this series get settled. so event-tap's objective is to initial a ft transaction, to start the sync. of ram/block/device states? if so, it need not change bdrv_aio_writev/bdrv_aio_flush normal process, on the other side it need not invokde bdrv_aio_writev either, right? if (stage == 1) { init_blk_migration(mon, f); /* start track dirty blocks */ set_dirty_tracking(1); } -- the following code will send block to the other side, as this will also be done by event-tap replay. I think it should placed in stage 3, before the assert line. (this may affect some stage 2 rate-limit then, so this can be placed in stage 2, though it looks ugly), another choice is to avoid the invocation of block_save_live, right? --- flush_blks(f); if (qemu_file_has_error(f)) { blk_mig_cleanup(mon); return 0; } blk_mig_reset_dirty_cursor(); if (stage == 2) { another question is: since you event-tap io write(I think IO READ should also be event-tapped, as read may cause io chip state to change), you then need not invoke qemu_savevm_state_full in qemu_savevm_trans_complete, right? thanks. It's not necessary to tap IO READ, but you can if you like. We also have experimental patches for this to reduce rams to be transfered. But I don't understand why we don't have to invoke qemu_savevm_state_full although I think we may reduce number of rams by replaying IO READ on the secondary. I first think the objective of io-Write event-tap is to reproduce the same device state on the other side, though I doubt this, so I think IO-Read also should be recorded and replayed. since event-tap is only to initial a ft transaction, the sync. of states still depend on qemu_save_vm_live/full, I understand the design now, thanks. but I don't understand why io-write event-tap can reduce transfered rams as you mentioned, the amount of rams only depend on dirty pages, IO write don't change the normal process unlike block write, right? Thanks, Yoshi Green. 2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT transaction, it queues up net/block requests, and flush them when the transaction gets completed. Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp --- Makefile.target | 1 + event-tap.c | 940 +++ event-tap.h | 44 +++ qemu-tool.c | 28 ++ trace-events | 10 + 5 files changed, 1023 insertions(+), 0 deletions(-) create mode 100644 event-tap.c create mode 100644 event-tap.h diff --git a/Makefile.target b/Makefile.target index 220589e..da57efe 100644 --- a/Makefile.target +++ b/Makefile.target @@ -199,6 +199,7 @@ obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_NO_KVM) += kvm-stub.o LIBS+=-lz +obj-y += event-tap.o QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) diff --git a/event-tap.c b/event-tap.c new file mode 100644 index 000..95c147a --- /dev/null +++ b/event-tap.c @@ -0,0 +1,940 @@ +/* + * Event Tap functions for QEMU + * + * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include qemu-common.h +#include qemu-error.h +#include block.h +#include block_int.h +#include ioport.h +#include osdep.h +#include sysemu.h +#include hw/hw.h +#include net.h +#include event-tap.h +#include trace.h + +enum EVENT_TAP_STATE { + EVENT_TAP_OFF, + EVENT_TAP_ON, + EVENT_TAP_SUSPEND, + EVENT_TAP_FLUSH, + EVENT_TAP_LOAD, + EVENT_TAP_REPLAY, +}; + +static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF; + +typedef struct EventTapIOport { + uint32_t address; + uint32_t data; + int index; +} EventTapIOport; + +#define MMIO_BUF_SIZE 8 + +typedef struct EventTapMMIO { + uint64_t address; + uint8_t buf[MMIO_BUF_SIZE]; + int len; +} EventTapMMIO; + +typedef
Re: [PATCH 09/18] Introduce event-tap.
Yoshi: I think event-tap is a great idea, it remove the reading from disk which will increase ft effiency much better as your plan in later series. one question: IO read/write may dirty rams, but it is difficute to differ them from other dirty pages like caused by running of softwares, whether that means you need change all the emulated device realization? actually I think it will not send too much rams caused by IO Read/Write in ram_save_live, but if It can event-tap IO read/write and replay on the other side, Does that means we don't need call qemu_savevm_state_full in ft transactoins? Green. 2011/3/9 Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp: ya su wrote: 2011/3/8 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: ya su wrote: Yokshiaki: event-tap record block and io wirte events, and replay these on the other side, so block_save_live is useless during the latter ft phase, right? if so, I think it need to process the following code in block_save_live function: Actually no. It just replays the last events only. We do have patches that enable block replication without using block live migration, like the way you described above. In that case, we disable block live migration when we go into ft mode. We're thinking to propose it after this series get settled. so event-tap's objective is to initial a ft transaction, to start the sync. of ram/block/device states? if so, it need not change bdrv_aio_writev/bdrv_aio_flush normal process, on the other side it need not invokde bdrv_aio_writev either, right? Mostly yes, but because event-tap is queuing requests from block/net, it needs to flush queued requests after the transaction on the primary side. On the secondary, it currently doesn't have to invoke bdrv_aio_writev as you mentioned. But will change soon to enable block replication with event-tap. if (stage == 1) { init_blk_migration(mon, f); /* start track dirty blocks */ set_dirty_tracking(1); } -- the following code will send block to the other side, as this will also be done by event-tap replay. I think it should placed in stage 3, before the assert line. (this may affect some stage 2 rate-limit then, so this can be placed in stage 2, though it looks ugly), another choice is to avoid the invocation of block_save_live, right? --- flush_blks(f); if (qemu_file_has_error(f)) { blk_mig_cleanup(mon); return 0; } blk_mig_reset_dirty_cursor(); if (stage == 2) { another question is: since you event-tap io write(I think IO READ should also be event-tapped, as read may cause io chip state to change), you then need not invoke qemu_savevm_state_full in qemu_savevm_trans_complete, right? thanks. It's not necessary to tap IO READ, but you can if you like. We also have experimental patches for this to reduce rams to be transfered. But I don't understand why we don't have to invoke qemu_savevm_state_full although I think we may reduce number of rams by replaying IO READ on the secondary. I first think the objective of io-Write event-tap is to reproduce the same device state on the other side, though I doubt this, so I think IO-Read also should be recorded and replayed. since event-tap is only to initial a ft transaction, the sync. of states still depend on qemu_save_vm_live/full, I understand the design now, thanks. but I don't understand why io-write event-tap can reduce transfered rams as you mentioned, the amount of rams only depend on dirty pages, IO write don't change the normal process unlike block write, right? The point is, if we can assure that IO read retrieves the same data on both sides, instead of dirtying the ram by read, meaning we have to transfer in the transaction, just replay the operation and get the same data on the otherside. Anyway, that's just a plan :) Thanks, Yoshi Thanks, Yoshi Green. 2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT transaction, it queues up net/block requests, and flush them when the transaction gets completed. Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp --- Makefile.target | 1 + event-tap.c | 940 +++ event-tap.h | 44 +++ qemu-tool.c | 28 ++ trace-events | 10 + 5 files changed, 1023 insertions(+), 0 deletions(-) create mode 100644 event-tap.c create mode 100644 event-tap.h diff --git a/Makefile.target b/Makefile.target index 220589e..da57efe 100644 --- a/Makefile.target +++ b/Makefile.target @@ -199,6 +199,7 @@ obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o
Re: [PATCH 09/18] Introduce event-tap.
ya su wrote: Yoshi: I think event-tap is a great idea, it remove the reading from disk which will increase ft effiency much better as your plan in later series. one question: IO read/write may dirty rams, but it is difficute to differ them from other dirty pages like caused by running of softwares, whether that means you need change all the emulated device realization? actually I think it will not send too much rams caused by IO Read/Write in ram_save_live, but if It can event-tap IO read/write and replay on the other side, Does that means we don't need call qemu_savevm_state_full in ft transactoins? I'm not expecting to remove qemu_savevm_state_full in the transaction. Just reduce the number of pages to be transfered as a result. Thanks, Yoshi Green. 2011/3/9 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: ya su wrote: 2011/3/8 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: ya su wrote: Yokshiaki: event-tap record block and io wirte events, and replay these on the other side, so block_save_live is useless during the latter ft phase, right? if so, I think it need to process the following code in block_save_live function: Actually no. It just replays the last events only. We do have patches that enable block replication without using block live migration, like the way you described above. In that case, we disable block live migration when we go into ft mode. We're thinking to propose it after this series get settled. so event-tap's objective is to initial a ft transaction, to start the sync. of ram/block/device states? if so, it need not change bdrv_aio_writev/bdrv_aio_flush normal process, on the other side it need not invokde bdrv_aio_writev either, right? Mostly yes, but because event-tap is queuing requests from block/net, it needs to flush queued requests after the transaction on the primary side. On the secondary, it currently doesn't have to invoke bdrv_aio_writev as you mentioned. But will change soon to enable block replication with event-tap. if (stage == 1) { init_blk_migration(mon, f); /* start track dirty blocks */ set_dirty_tracking(1); } -- the following code will send block to the other side, as this will also be done by event-tap replay. I think it should placed in stage 3, before the assert line. (this may affect some stage 2 rate-limit then, so this can be placed in stage 2, though it looks ugly), another choice is to avoid the invocation of block_save_live, right? --- flush_blks(f); if (qemu_file_has_error(f)) { blk_mig_cleanup(mon); return 0; } blk_mig_reset_dirty_cursor(); if (stage == 2) { another question is: since you event-tap io write(I think IO READ should also be event-tapped, as read may cause io chip state to change), you then need not invoke qemu_savevm_state_full in qemu_savevm_trans_complete, right? thanks. It's not necessary to tap IO READ, but you can if you like. We also have experimental patches for this to reduce rams to be transfered. But I don't understand why we don't have to invoke qemu_savevm_state_full although I think we may reduce number of rams by replaying IO READ on the secondary. I first think the objective of io-Write event-tap is to reproduce the same device state on the other side, though I doubt this, so I think IO-Read also should be recorded and replayed. since event-tap is only to initial a ft transaction, the sync. of states still depend on qemu_save_vm_live/full, I understand the design now, thanks. but I don't understand why io-write event-tap can reduce transfered rams as you mentioned, the amount of rams only depend on dirty pages, IO write don't change the normal process unlike block write, right? The point is, if we can assure that IO read retrieves the same data on both sides, instead of dirtying the ram by read, meaning we have to transfer in the transaction, just replay the operation and get the same data on the otherside. Anyway, that's just a plan :) Thanks, Yoshi Thanks, Yoshi Green. 2011/2/24 Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp: event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT transaction, it queues up net/block requests, and flush them when the transaction gets completed. Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp --- Makefile.target |1 + event-tap.c | 940 +++ event-tap.h | 44 +++ qemu-tool.c | 28 ++ trace-events| 10 + 5 files changed, 1023 insertions(+), 0 deletions(-) create mode 100644 event-tap.c create mode 100644 event-tap.h diff --git a/Makefile.target b/Makefile.target index 220589e..da57efe 100644
Re: [PATCH 09/18] Introduce event-tap.
Yokshiaki: event-tap record block and io wirte events, and replay these on the other side, so block_save_live is useless during the latter ft phase, right? if so, I think it need to process the following code in block_save_live function: if (stage == 1) { init_blk_migration(mon, f); /* start track dirty blocks */ set_dirty_tracking(1); } -- the following code will send block to the other side, as this will also be done by event-tap replay. I think it should placed in stage 3, before the assert line. (this may affect some stage 2 rate-limit then, so this can be placed in stage 2, though it looks ugly), another choice is to avoid the invocation of block_save_live, right? --- flush_blks(f); if (qemu_file_has_error(f)) { blk_mig_cleanup(mon); return 0; } blk_mig_reset_dirty_cursor(); if (stage == 2) { another question is: since you event-tap io write(I think IO READ should also be event-tapped, as read may cause io chip state to change), you then need not invoke qemu_savevm_state_full in qemu_savevm_trans_complete, right? thanks. Green. 2011/2/24 Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp: event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT transaction, it queues up net/block requests, and flush them when the transaction gets completed. Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp --- Makefile.target | 1 + event-tap.c | 940 +++ event-tap.h | 44 +++ qemu-tool.c | 28 ++ trace-events | 10 + 5 files changed, 1023 insertions(+), 0 deletions(-) create mode 100644 event-tap.c create mode 100644 event-tap.h diff --git a/Makefile.target b/Makefile.target index 220589e..da57efe 100644 --- a/Makefile.target +++ b/Makefile.target @@ -199,6 +199,7 @@ obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_NO_KVM) += kvm-stub.o LIBS+=-lz +obj-y += event-tap.o QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) diff --git a/event-tap.c b/event-tap.c new file mode 100644 index 000..95c147a --- /dev/null +++ b/event-tap.c @@ -0,0 +1,940 @@ +/* + * Event Tap functions for QEMU + * + * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include qemu-common.h +#include qemu-error.h +#include block.h +#include block_int.h +#include ioport.h +#include osdep.h +#include sysemu.h +#include hw/hw.h +#include net.h +#include event-tap.h +#include trace.h + +enum EVENT_TAP_STATE { + EVENT_TAP_OFF, + EVENT_TAP_ON, + EVENT_TAP_SUSPEND, + EVENT_TAP_FLUSH, + EVENT_TAP_LOAD, + EVENT_TAP_REPLAY, +}; + +static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF; + +typedef struct EventTapIOport { + uint32_t address; + uint32_t data; + int index; +} EventTapIOport; + +#define MMIO_BUF_SIZE 8 + +typedef struct EventTapMMIO { + uint64_t address; + uint8_t buf[MMIO_BUF_SIZE]; + int len; +} EventTapMMIO; + +typedef struct EventTapNetReq { + char *device_name; + int iovcnt; + int vlan_id; + bool vlan_needed; + bool async; + struct iovec *iov; + NetPacketSent *sent_cb; +} EventTapNetReq; + +#define MAX_BLOCK_REQUEST 32 + +typedef struct EventTapAIOCB EventTapAIOCB; + +typedef struct EventTapBlkReq { + char *device_name; + int num_reqs; + int num_cbs; + bool is_flush; + BlockRequest reqs[MAX_BLOCK_REQUEST]; + EventTapAIOCB *acb[MAX_BLOCK_REQUEST]; +} EventTapBlkReq; + +#define EVENT_TAP_IOPORT (1 0) +#define EVENT_TAP_MMIO (1 1) +#define EVENT_TAP_NET (1 2) +#define EVENT_TAP_BLK (1 3) + +#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1) + +typedef struct EventTapLog { + int mode; + union { + EventTapIOport ioport; + EventTapMMIO mmio; + }; + union { + EventTapNetReq net_req; + EventTapBlkReq blk_req; + }; + QTAILQ_ENTRY(EventTapLog) node; +} EventTapLog; + +struct EventTapAIOCB { + BlockDriverAIOCB common; + BlockDriverAIOCB *acb; + bool is_canceled; +}; + +static EventTapLog *last_event_tap; + +static QTAILQ_HEAD(, EventTapLog) event_list; +static QTAILQ_HEAD(, EventTapLog) event_pool; + +static int (*event_tap_cb)(void); +static QEMUBH *event_tap_bh; +static VMChangeStateEntry *vmstate; + +static void event_tap_bh_cb(void *p) +{ + if (event_tap_cb) { + event_tap_cb(); + } + + qemu_bh_delete(event_tap_bh); + event_tap_bh = NULL; +}
[PATCH 09/18] Introduce event-tap.
event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT transaction, it queues up net/block requests, and flush them when the transaction gets completed. Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp --- Makefile.target |1 + event-tap.c | 940 +++ event-tap.h | 44 +++ qemu-tool.c | 28 ++ trace-events| 10 + 5 files changed, 1023 insertions(+), 0 deletions(-) create mode 100644 event-tap.c create mode 100644 event-tap.h diff --git a/Makefile.target b/Makefile.target index 220589e..da57efe 100644 --- a/Makefile.target +++ b/Makefile.target @@ -199,6 +199,7 @@ obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_NO_KVM) += kvm-stub.o LIBS+=-lz +obj-y += event-tap.o QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) diff --git a/event-tap.c b/event-tap.c new file mode 100644 index 000..95c147a --- /dev/null +++ b/event-tap.c @@ -0,0 +1,940 @@ +/* + * Event Tap functions for QEMU + * + * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include qemu-common.h +#include qemu-error.h +#include block.h +#include block_int.h +#include ioport.h +#include osdep.h +#include sysemu.h +#include hw/hw.h +#include net.h +#include event-tap.h +#include trace.h + +enum EVENT_TAP_STATE { +EVENT_TAP_OFF, +EVENT_TAP_ON, +EVENT_TAP_SUSPEND, +EVENT_TAP_FLUSH, +EVENT_TAP_LOAD, +EVENT_TAP_REPLAY, +}; + +static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF; + +typedef struct EventTapIOport { +uint32_t address; +uint32_t data; +int index; +} EventTapIOport; + +#define MMIO_BUF_SIZE 8 + +typedef struct EventTapMMIO { +uint64_t address; +uint8_t buf[MMIO_BUF_SIZE]; +int len; +} EventTapMMIO; + +typedef struct EventTapNetReq { +char *device_name; +int iovcnt; +int vlan_id; +bool vlan_needed; +bool async; +struct iovec *iov; +NetPacketSent *sent_cb; +} EventTapNetReq; + +#define MAX_BLOCK_REQUEST 32 + +typedef struct EventTapAIOCB EventTapAIOCB; + +typedef struct EventTapBlkReq { +char *device_name; +int num_reqs; +int num_cbs; +bool is_flush; +BlockRequest reqs[MAX_BLOCK_REQUEST]; +EventTapAIOCB *acb[MAX_BLOCK_REQUEST]; +} EventTapBlkReq; + +#define EVENT_TAP_IOPORT (1 0) +#define EVENT_TAP_MMIO (1 1) +#define EVENT_TAP_NET(1 2) +#define EVENT_TAP_BLK(1 3) + +#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1) + +typedef struct EventTapLog { +int mode; +union { +EventTapIOport ioport; +EventTapMMIO mmio; +}; +union { +EventTapNetReq net_req; +EventTapBlkReq blk_req; +}; +QTAILQ_ENTRY(EventTapLog) node; +} EventTapLog; + +struct EventTapAIOCB { +BlockDriverAIOCB common; +BlockDriverAIOCB *acb; +bool is_canceled; +}; + +static EventTapLog *last_event_tap; + +static QTAILQ_HEAD(, EventTapLog) event_list; +static QTAILQ_HEAD(, EventTapLog) event_pool; + +static int (*event_tap_cb)(void); +static QEMUBH *event_tap_bh; +static VMChangeStateEntry *vmstate; + +static void event_tap_bh_cb(void *p) +{ +if (event_tap_cb) { +event_tap_cb(); +} + +qemu_bh_delete(event_tap_bh); +event_tap_bh = NULL; +} + +static void event_tap_schedule_bh(void) +{ +trace_event_tap_ignore_bh(!!event_tap_bh); + +/* if bh is already set, we ignore it for now */ +if (event_tap_bh) { +return; +} + +event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL); +qemu_bh_schedule(event_tap_bh); + +return; +} + +static void *event_tap_alloc_log(void) +{ +EventTapLog *log; + +if (QTAILQ_EMPTY(event_pool)) { +log = qemu_mallocz(sizeof(EventTapLog)); +} else { +log = QTAILQ_FIRST(event_pool); +QTAILQ_REMOVE(event_pool, log, node); +} + +return log; +} + +static void event_tap_free_net_req(EventTapNetReq *net_req); +static void event_tap_free_blk_req(EventTapBlkReq *blk_req); + +static void event_tap_free_log(EventTapLog *log) +{ +int mode = log-mode ~EVENT_TAP_TYPE_MASK; + +if (mode == EVENT_TAP_NET) { +event_tap_free_net_req(log-net_req); +} else if (mode == EVENT_TAP_BLK) { +event_tap_free_blk_req(log-blk_req); +} + +log-mode = 0; + +/* return the log to event_pool */ +QTAILQ_INSERT_HEAD(event_pool, log, node); +} + +static void event_tap_free_pool(void) +{ +EventTapLog *log, *next; + +QTAILQ_FOREACH_SAFE(log, event_pool, node, next) { +QTAILQ_REMOVE(event_pool, log, node); +qemu_free(log); +} +} + +static void event_tap_free_net_req(EventTapNetReq *net_req) +{ +int i; + +if (!net_req-async) { +for
[PATCH 09/18] Introduce event-tap.
event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT transaction, it queues up net/block requests, and flush them when the transaction gets completed. Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp --- Makefile.target |1 + event-tap.c | 940 +++ event-tap.h | 44 +++ qemu-tool.c | 28 ++ trace-events| 10 + 5 files changed, 1023 insertions(+), 0 deletions(-) create mode 100644 event-tap.c create mode 100644 event-tap.h diff --git a/Makefile.target b/Makefile.target index 220589e..da57efe 100644 --- a/Makefile.target +++ b/Makefile.target @@ -199,6 +199,7 @@ obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_NO_KVM) += kvm-stub.o LIBS+=-lz +obj-y += event-tap.o QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) diff --git a/event-tap.c b/event-tap.c new file mode 100644 index 000..95c147a --- /dev/null +++ b/event-tap.c @@ -0,0 +1,940 @@ +/* + * Event Tap functions for QEMU + * + * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include qemu-common.h +#include qemu-error.h +#include block.h +#include block_int.h +#include ioport.h +#include osdep.h +#include sysemu.h +#include hw/hw.h +#include net.h +#include event-tap.h +#include trace.h + +enum EVENT_TAP_STATE { +EVENT_TAP_OFF, +EVENT_TAP_ON, +EVENT_TAP_SUSPEND, +EVENT_TAP_FLUSH, +EVENT_TAP_LOAD, +EVENT_TAP_REPLAY, +}; + +static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF; + +typedef struct EventTapIOport { +uint32_t address; +uint32_t data; +int index; +} EventTapIOport; + +#define MMIO_BUF_SIZE 8 + +typedef struct EventTapMMIO { +uint64_t address; +uint8_t buf[MMIO_BUF_SIZE]; +int len; +} EventTapMMIO; + +typedef struct EventTapNetReq { +char *device_name; +int iovcnt; +int vlan_id; +bool vlan_needed; +bool async; +struct iovec *iov; +NetPacketSent *sent_cb; +} EventTapNetReq; + +#define MAX_BLOCK_REQUEST 32 + +typedef struct EventTapAIOCB EventTapAIOCB; + +typedef struct EventTapBlkReq { +char *device_name; +int num_reqs; +int num_cbs; +bool is_flush; +BlockRequest reqs[MAX_BLOCK_REQUEST]; +EventTapAIOCB *acb[MAX_BLOCK_REQUEST]; +} EventTapBlkReq; + +#define EVENT_TAP_IOPORT (1 0) +#define EVENT_TAP_MMIO (1 1) +#define EVENT_TAP_NET(1 2) +#define EVENT_TAP_BLK(1 3) + +#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1) + +typedef struct EventTapLog { +int mode; +union { +EventTapIOport ioport; +EventTapMMIO mmio; +}; +union { +EventTapNetReq net_req; +EventTapBlkReq blk_req; +}; +QTAILQ_ENTRY(EventTapLog) node; +} EventTapLog; + +struct EventTapAIOCB { +BlockDriverAIOCB common; +BlockDriverAIOCB *acb; +bool is_canceled; +}; + +static EventTapLog *last_event_tap; + +static QTAILQ_HEAD(, EventTapLog) event_list; +static QTAILQ_HEAD(, EventTapLog) event_pool; + +static int (*event_tap_cb)(void); +static QEMUBH *event_tap_bh; +static VMChangeStateEntry *vmstate; + +static void event_tap_bh_cb(void *p) +{ +if (event_tap_cb) { +event_tap_cb(); +} + +qemu_bh_delete(event_tap_bh); +event_tap_bh = NULL; +} + +static void event_tap_schedule_bh(void) +{ +trace_event_tap_ignore_bh(!!event_tap_bh); + +/* if bh is already set, we ignore it for now */ +if (event_tap_bh) { +return; +} + +event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL); +qemu_bh_schedule(event_tap_bh); + +return; +} + +static void *event_tap_alloc_log(void) +{ +EventTapLog *log; + +if (QTAILQ_EMPTY(event_pool)) { +log = qemu_mallocz(sizeof(EventTapLog)); +} else { +log = QTAILQ_FIRST(event_pool); +QTAILQ_REMOVE(event_pool, log, node); +} + +return log; +} + +static void event_tap_free_net_req(EventTapNetReq *net_req); +static void event_tap_free_blk_req(EventTapBlkReq *blk_req); + +static void event_tap_free_log(EventTapLog *log) +{ +int mode = log-mode ~EVENT_TAP_TYPE_MASK; + +if (mode == EVENT_TAP_NET) { +event_tap_free_net_req(log-net_req); +} else if (mode == EVENT_TAP_BLK) { +event_tap_free_blk_req(log-blk_req); +} + +log-mode = 0; + +/* return the log to event_pool */ +QTAILQ_INSERT_HEAD(event_pool, log, node); +} + +static void event_tap_free_pool(void) +{ +EventTapLog *log, *next; + +QTAILQ_FOREACH_SAFE(log, event_pool, node, next) { +QTAILQ_REMOVE(event_pool, log, node); +qemu_free(log); +} +} + +static void event_tap_free_net_req(EventTapNetReq *net_req) +{ +int i; + +if (!net_req-async) { +for
[PATCH 09/18] Introduce event-tap.
event-tap controls when to start FT transaction, and provides proxy functions to called from net/block devices. While FT transaction, it queues up net/block requests, and flush them when the transaction gets completed. Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp --- Makefile.target |1 + event-tap.c | 939 +++ event-tap.h | 44 +++ qemu-tool.c | 28 ++ trace-events| 10 + 5 files changed, 1022 insertions(+), 0 deletions(-) create mode 100644 event-tap.c create mode 100644 event-tap.h diff --git a/Makefile.target b/Makefile.target index b0ba95f..edbdbee 100644 --- a/Makefile.target +++ b/Makefile.target @@ -199,6 +199,7 @@ obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_NO_KVM) += kvm-stub.o LIBS+=-lz +obj-y += event-tap.o QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) diff --git a/event-tap.c b/event-tap.c new file mode 100644 index 000..f44d835 --- /dev/null +++ b/event-tap.c @@ -0,0 +1,939 @@ +/* + * Event Tap functions for QEMU + * + * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include qemu-common.h +#include qemu-error.h +#include block.h +#include block_int.h +#include ioport.h +#include osdep.h +#include sysemu.h +#include hw/hw.h +#include net.h +#include event-tap.h +#include trace.h + +enum EVENT_TAP_STATE { +EVENT_TAP_OFF, +EVENT_TAP_ON, +EVENT_TAP_SUSPEND, +EVENT_TAP_FLUSH, +EVENT_TAP_LOAD, +EVENT_TAP_REPLAY, +}; + +static enum EVENT_TAP_STATE event_tap_state = EVENT_TAP_OFF; + +typedef struct EventTapIOport { +uint32_t address; +uint32_t data; +int index; +} EventTapIOport; + +#define MMIO_BUF_SIZE 8 + +typedef struct EventTapMMIO { +uint64_t address; +uint8_t buf[MMIO_BUF_SIZE]; +int len; +} EventTapMMIO; + +typedef struct EventTapNetReq { +char *device_name; +int iovcnt; +int vlan_id; +bool vlan_needed; +bool async; +struct iovec *iov; +NetPacketSent *sent_cb; +} EventTapNetReq; + +#define MAX_BLOCK_REQUEST 32 + +typedef struct EventTapAIOCB EventTapAIOCB; + +typedef struct EventTapBlkReq { +char *device_name; +int num_reqs; +int num_cbs; +bool is_flush; +BlockRequest reqs[MAX_BLOCK_REQUEST]; +EventTapAIOCB *acb[MAX_BLOCK_REQUEST]; +} EventTapBlkReq; + +#define EVENT_TAP_IOPORT (1 0) +#define EVENT_TAP_MMIO (1 1) +#define EVENT_TAP_NET(1 2) +#define EVENT_TAP_BLK(1 3) + +#define EVENT_TAP_TYPE_MASK (EVENT_TAP_NET - 1) + +typedef struct EventTapLog { +int mode; +union { +EventTapIOport ioport; +EventTapMMIO mmio; +}; +union { +EventTapNetReq net_req; +EventTapBlkReq blk_req; +}; +QTAILQ_ENTRY(EventTapLog) node; +} EventTapLog; + +struct EventTapAIOCB { +BlockDriverAIOCB common; +BlockDriverAIOCB *acb; +bool is_canceled; +}; + +static EventTapLog *last_event_tap; + +static QTAILQ_HEAD(, EventTapLog) event_list; +static QTAILQ_HEAD(, EventTapLog) event_pool; + +static int (*event_tap_cb)(void); +static QEMUBH *event_tap_bh; +static VMChangeStateEntry *vmstate; + +static void event_tap_bh_cb(void *p) +{ +if (event_tap_cb) { +event_tap_cb(); +} + +qemu_bh_delete(event_tap_bh); +event_tap_bh = NULL; +} + +static void event_tap_schedule_bh(void) +{ +trace_event_tap_ignore_bh(!!event_tap_bh); + +/* if bh is already set, we ignore it for now */ +if (event_tap_bh) { +return; +} + +event_tap_bh = qemu_bh_new(event_tap_bh_cb, NULL); +qemu_bh_schedule(event_tap_bh); + +return; +} + +static void *event_tap_alloc_log(void) +{ +EventTapLog *log; + +if (QTAILQ_EMPTY(event_pool)) { +log = qemu_mallocz(sizeof(EventTapLog)); +} else { +log = QTAILQ_FIRST(event_pool); +QTAILQ_REMOVE(event_pool, log, node); +} + +return log; +} + +static void event_tap_free_net_req(EventTapNetReq *net_req); +static void event_tap_free_blk_req(EventTapBlkReq *blk_req); + +static void event_tap_free_log(EventTapLog *log) +{ +int mode = log-mode ~EVENT_TAP_TYPE_MASK; + +if (mode == EVENT_TAP_NET) { +event_tap_free_net_req(log-net_req); +} else if (mode == EVENT_TAP_BLK) { +event_tap_free_blk_req(log-blk_req); +} + +log-mode = 0; + +/* return the log to event_pool */ +QTAILQ_INSERT_HEAD(event_pool, log, node); +} + +static void event_tap_free_pool(void) +{ +EventTapLog *log, *next; + +QTAILQ_FOREACH_SAFE(log, event_pool, node, next) { +QTAILQ_REMOVE(event_pool, log, node); +qemu_free(log); +} +} + +static void event_tap_free_net_req(EventTapNetReq *net_req) +{ +int i; + +if (!net_req-async) { +for