date:20151110

[Qemu-devel] [PULL 39/44] xen_disk: Account for failed and invalid operations

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

Signed-off-by: Alberto Garcia 
Message-id: 
e0cbb96cb0e1f86c37c7ce332efdf02b57b9d365.1446044838.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/block/xen_disk.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 4869518..02eda6e 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -537,7 +537,11 @@ static void qemu_aio_complete(void *opaque, int ret)
 break;
 }
 case BLKIF_OP_READ:
-block_acct_done(blk_get_stats(ioreq->blkdev->blk), >acct);
+if (ioreq->status == BLKIF_RSP_OKAY) {
+block_acct_done(blk_get_stats(ioreq->blkdev->blk), >acct);
+} else {
+block_acct_failed(blk_get_stats(ioreq->blkdev->blk), >acct);
+}
 break;
 case BLKIF_OP_DISCARD:
 default:
@@ -722,6 +726,23 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 
 /* parse them */
 if (ioreq_parse(ioreq) != 0) {
+
+switch (ioreq->req.operation) {
+case BLKIF_OP_READ:
+block_acct_invalid(blk_get_stats(blkdev->blk),
+   BLOCK_ACCT_READ);
+break;
+case BLKIF_OP_WRITE:
+block_acct_invalid(blk_get_stats(blkdev->blk),
+   BLOCK_ACCT_WRITE);
+break;
+case BLKIF_OP_FLUSH_DISKCACHE:
+block_acct_invalid(blk_get_stats(blkdev->blk),
+   BLOCK_ACCT_FLUSH);
+default:
+break;
+};
+
 if (blk_send_response_one(ioreq)) {
 xen_be_send_notify(>xendev);
 }
-- 
2.5.0

[Qemu-devel] [PATCH for 2.5 v7 0/10] dataplane snapshot fixes

2015-11-10 Thread Denis V. Lunev

with test
while /bin/true ; do
virsh snapshot-create rhel7
sleep 10
virsh snapshot-delete rhel7 --current
done
with enabled iothreads on a running VM leads to a lot of troubles: hangs,
asserts, errors.

Anyway, I think that the construction like
assert(aio_context_is_locked(aio_context));
should be widely used to ensure proper locking.

Changes from v6:
- tricky part dropped from patch 7
- patch 5 reworked to process snapshot list differently in info commands
  and on savevm

Changes from v5:
- dropped already merged patch 11
- fixed spelling in patch 1
- changed order of condition in loops in all patches. Thank you Stefan
- dropped patch 9
- aio_context is not acquired any more in bdrv_all_find_vmstate_bs by request
  of Stefan
- patch 10 is implemented in completely different way

Changes from v4:
- only migration/savevm.c code and monitor is affected now. Generic block
  layer stuff will be sent separately to speedup merging. The approach
  in general was negotiated with Juan and Stefan.

Changes from v3:
- more places found
- new aio_poll concept, see patch 10

Changes from v2:
- droppped patch 5 as already merged
- changed locking scheme in patch 4 by suggestion of Juan

Changes from v1:
- aio-context locking added
- comment is rewritten

Signed-off-by: Denis V. Lunev 
CC: Stefan Hajnoczi 
CC: Juan Quintela 
CC: Kevin Wolf 

Denis V. Lunev (10):
  snapshot: create helper to test that block drivers supports snapshots
  snapshot: return error code from bdrv_snapshot_delete_by_id_or_name
  snapshot: create bdrv_all_delete_snapshot helper
  snapshot: create bdrv_all_goto_snapshot helper
  snapshot: create bdrv_all_find_snapshot helper
  migration: drop find_vmstate_bs check in hmp_delvm
  snapshot: create bdrv_all_create_snapshot helper
  migration: reorder processing in hmp_savevm
  migration: implement bdrv_all_find_vmstate_bs helper
  migration: normalize locking in migration/savevm.c

 block/snapshot.c | 135 ++-
 include/block/snapshot.h |  25 +-
 migration/savevm.c   | 207 +++
 3 files changed, 217 insertions(+), 150 deletions(-)

-- 
2.5.0

[Qemu-devel] [PULL 06/57] qemu_ram_block_by_name

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Add a function to find a RAMBlock by name; use it in two
of the places that already open code that loop; we've
got another use later in postcopy.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 exec.c| 20 
 include/exec/cpu-common.h |  1 +
 migration/ram.c   | 35 +++
 3 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/exec.c b/exec.c
index 53d3848..36886ee 100644
--- a/exec.c
+++ b/exec.c
@@ -1946,6 +1946,26 @@ found:
 return block;
 }

+/*
+ * Finds the named RAMBlock
+ *
+ * name: The name of RAMBlock to find
+ *
+ * Returns: RAMBlock (or NULL if not found)
+ */
+RAMBlock *qemu_ram_block_by_name(const char *name)
+{
+RAMBlock *block;
+
+QLIST_FOREACH_RCU(block, _list.blocks, next) {
+if (!strcmp(name, block->idstr)) {
+return block;
+}
+}
+
+return NULL;
+}
+
 /* Some of the softmmu routines need to translate from a host pointer
(typically a TLB entry) back to a ram offset.  */
 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 94d1f8a..85aa403 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -64,6 +64,7 @@ typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 /* This should not be used by devices.  */
 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+RAMBlock *qemu_ram_block_by_name(const char *name);
 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
ram_addr_t *ram_addr, ram_addr_t *offset);
 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
diff --git a/migration/ram.c b/migration/ram.c
index df3df9e..d654a73 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1420,14 +1420,12 @@ static inline void *host_from_stream_offset(QEMUFile *f,
 qemu_get_buffer(f, (uint8_t *)id, len);
 id[len] = 0;

-QLIST_FOREACH_RCU(block, _list.blocks, next) {
-if (!strncmp(id, block->idstr, sizeof(id)) &&
-block->max_length > offset) {
-return block->host + offset;
-}
+block = qemu_ram_block_by_name(id);
+if (block && block->max_length > offset) {
+return block->host + offset;
 }

-error_report("Can't find block %s!", id);
+error_report("Can't find block %s", id);
 return NULL;
 }

@@ -1576,23 +1574,20 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
 id[len] = 0;
 length = qemu_get_be64(f);

-QLIST_FOREACH_RCU(block, _list.blocks, next) {
-if (!strncmp(id, block->idstr, sizeof(id))) {
-if (length != block->used_length) {
-Error *local_err = NULL;
+block = qemu_ram_block_by_name(id);
+if (block) {
+if (length != block->used_length) {
+Error *local_err = NULL;

-ret = qemu_ram_resize(block->offset, length, 
_err);
-if (local_err) {
-error_report_err(local_err);
-}
+ret = qemu_ram_resize(block->offset, length,
+  _err);
+if (local_err) {
+error_report_err(local_err);
 }
-ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
-  block->idstr);
-break;
 }
-}
-
-if (!block) {
+ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
+  block->idstr);
+} else {
 error_report("Unknown ramblock \"%s\", cannot "
  "accept migration", id);
 ret = -EINVAL;
-- 
2.5.0

[Qemu-devel] [PULL 38/44] virtio-blk: Account for failed and invalid operations

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

Signed-off-by: Alberto Garcia 
Message-id: 
4f623ce52c9d673d35a043fc2959526b41b685c6.1446044838.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/block/virtio-blk.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 093e475..e70fccf 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -76,7 +76,7 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, 
int error,
 s->rq = req;
 } else if (action == BLOCK_ERROR_ACTION_REPORT) {
 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
-block_acct_done(blk_get_stats(s->blk), >acct);
+block_acct_failed(blk_get_stats(s->blk), >acct);
 virtio_blk_free_request(req);
 }
 
@@ -536,6 +536,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, 
MultiReqBuffer *mrb)
 if (!virtio_blk_sect_range_ok(req->dev, req->sector_num,
   req->qiov.size)) {
 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+block_acct_invalid(blk_get_stats(req->dev->blk),
+   is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
 virtio_blk_free_request(req);
 return;
 }
-- 
2.5.0

[Qemu-devel] [PULL 11/57] ram_debug_dump_bitmap: Dump a migration bitmap as text

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Useful for debugging the migration bitmap and other bitmaps
of the same format (including the sentmap in postcopy).

The bitmap is printed to stderr.
Lines that are all the expected value are excluded so the output
can be quite compact for many bitmaps.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |  1 +
 migration/ram.c   | 39 +++
 2 files changed, 40 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 83fba23..51bc348 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -145,6 +145,7 @@ uint64_t xbzrle_mig_pages_cache_miss(void);
 double xbzrle_mig_cache_miss_rate(void);

 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
+void ram_debug_dump_bitmap(unsigned long *todump, bool expected);

 /**
  * @migrate_add_blocker - prevent migration from proceeding
diff --git a/migration/ram.c b/migration/ram.c
index d654a73..86bf657 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1160,6 +1160,45 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t 
new)
 }
 }

+/*
+ * 'expected' is the value you expect the bitmap mostly to be full
+ * of; it won't bother printing lines that are all this value.
+ * If 'todump' is null the migration bitmap is dumped.
+ */
+void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
+{
+int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+
+int64_t cur;
+int64_t linelen = 128;
+char linebuf[129];
+
+if (!todump) {
+todump = atomic_rcu_read(_bitmap_rcu)->bmap;
+}
+
+for (cur = 0; cur < ram_pages; cur += linelen) {
+int64_t curb;
+bool found = false;
+/*
+ * Last line; catch the case where the line length
+ * is longer than remaining ram
+ */
+if (cur + linelen > ram_pages) {
+linelen = ram_pages - cur;
+}
+for (curb = 0; curb < linelen; curb++) {
+bool thisbit = test_bit(cur + curb, todump);
+linebuf[curb] = thisbit ? '1' : '.';
+found = found || (thisbit != expected);
+}
+if (found) {
+linebuf[curb] = '\0';
+fprintf(stderr,  "0x%08" PRIx64 " : %s\n", cur, linebuf);
+}
+}
+}
+
 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
  * long-running RCU critical section.  When rcu-reclaims in the code
  * start to become numerous it will be necessary to reduce the
-- 
2.5.0

[Qemu-devel] [PULL 33/57] Add qemu_savevm_state_complete_postcopy

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Add qemu_savevm_state_complete_postcopy to complement
qemu_savevm_state_complete_precopy together with a new
save_live_complete_postcopy method on devices.

The save_live_complete_precopy method is called on
all devices during a precopy migration, and all non-postcopy
devices during a postcopy migration at the transition.

The save_live_complete_postcopy method is called at
the end of postcopy for all postcopiable devices.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/vmstate.h |  1 +
 include/sysemu/sysemu.h |  1 +
 migration/ram.c |  1 +
 migration/savevm.c  | 49 +++--
 4 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 7f16a38..7267e38 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -40,6 +40,7 @@ typedef struct SaveVMHandlers {
 SaveStateHandler *save_state;

 void (*cleanup)(void *opaque);
+int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
 int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);

 /* This runs both outside and inside the iothread lock.  */
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 70835d0..76a0b36 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -111,6 +111,7 @@ void qemu_savevm_state_begin(QEMUFile *f,
 void qemu_savevm_state_header(QEMUFile *f);
 int qemu_savevm_state_iterate(QEMUFile *f);
 void qemu_savevm_state_cleanup(void);
+void qemu_savevm_state_complete_postcopy(QEMUFile *f);
 void qemu_savevm_state_complete_precopy(QEMUFile *f);
 void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
uint64_t *res_non_postcopiable,
diff --git a/migration/ram.c b/migration/ram.c
index 0e53325..4053ca1 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1704,6 +1704,7 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
 static SaveVMHandlers savevm_ram_handlers = {
 .save_live_setup = ram_save_setup,
 .save_live_iterate = ram_save_iterate,
+.save_live_complete_postcopy = ram_save_complete,
 .save_live_complete_precopy = ram_save_complete,
 .save_live_pending = ram_save_pending,
 .load_state = ram_load,
diff --git a/migration/savevm.c b/migration/savevm.c
index be61eb1..8e11877 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -978,19 +978,61 @@ static bool should_send_vmdesc(void)
 return !machine->suppress_vmdesc && !in_postcopy;
 }

+/*
+ * Calls the save_live_complete_postcopy methods
+ * causing the last few pages to be sent immediately and doing any associated
+ * cleanup.
+ * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
+ * all the other devices, but that happens at the point we switch to postcopy.
+ */
+void qemu_savevm_state_complete_postcopy(QEMUFile *f)
+{
+SaveStateEntry *se;
+int ret;
+
+QTAILQ_FOREACH(se, _state.handlers, entry) {
+if (!se->ops || !se->ops->save_live_complete_postcopy) {
+continue;
+}
+if (se->ops && se->ops->is_active) {
+if (!se->ops->is_active(se->opaque)) {
+continue;
+}
+}
+trace_savevm_section_start(se->idstr, se->section_id);
+/* Section type */
+qemu_put_byte(f, QEMU_VM_SECTION_END);
+qemu_put_be32(f, se->section_id);
+
+ret = se->ops->save_live_complete_postcopy(f, se->opaque);
+trace_savevm_section_end(se->idstr, se->section_id, ret);
+save_section_footer(f, se);
+if (ret < 0) {
+qemu_file_set_error(f, ret);
+return;
+}
+}
+
+qemu_put_byte(f, QEMU_VM_EOF);
+qemu_fflush(f);
+}
+
 void qemu_savevm_state_complete_precopy(QEMUFile *f)
 {
 QJSON *vmdesc;
 int vmdesc_len;
 SaveStateEntry *se;
 int ret;
+bool in_postcopy = migration_in_postcopy(migrate_get_current());

 trace_savevm_state_complete_precopy();

 cpu_synchronize_all_states();

 QTAILQ_FOREACH(se, _state.handlers, entry) {
-if (!se->ops || !se->ops->save_live_complete_precopy) {
+if (!se->ops ||
+(in_postcopy && se->ops->save_live_complete_postcopy) ||
+!se->ops->save_live_complete_precopy) {
 continue;
 }
 if (se->ops && se->ops->is_active) {
@@ -1039,7 +1081,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f)
 save_section_footer(f, se);
 }

-qemu_put_byte(f, QEMU_VM_EOF);
+if (!in_postcopy) {
+/* Postcopy stream will still be going */
+qemu_put_byte(f, QEMU_VM_EOF);
+}

 json_end_array(vmdesc);
 qjson_finish(vmdesc);
-- 
2.5.0

[Qemu-devel] [PULL 36/57] postcopy: Incoming initialisation

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: David Gibson 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h|   3 ++
 include/migration/postcopy-ram.h |  12 +
 migration/postcopy-ram.c | 104 +++
 migration/ram.c  |  11 +
 migration/savevm.c   |   6 +++
 trace-events |   2 +
 6 files changed, 138 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index b382d77..6e42b58 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -86,6 +86,8 @@ struct MigrationIncomingState {
  */
 QemuEvent main_thread_load_event;

+/* For the kernel to send us notifications */
+int   userfault_fd;
 QEMUFile *to_src_file;
 QemuMutex rp_mutex;/* We send replies from multiple threads */

@@ -204,6 +206,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms);
 /* For incoming postcopy discard */
 int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
   uint64_t start, size_t length);
+int ram_postcopy_incoming_init(MigrationIncomingState *mis);

 /**
  * @migrate_add_blocker - prevent migration from proceeding
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index de79fa7..f87020c 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -17,6 +17,18 @@
 bool postcopy_ram_supported_by_host(void);

 /*
+ * Initialise postcopy-ram, setting the RAM to a state where we can go into
+ * postcopy later; must be called prior to any precopy.
+ * called from ram.c's similarly named ram_postcopy_incoming_init
+ */
+int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages);
+
+/*
+ * At the end of a migration where postcopy_ram_incoming_init was called.
+ */
+int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis);
+
+/*
  * Discard the contents of 'length' bytes from 'start'
  * We can assume that if we've been called postcopy_ram_hosttest returned true
  */
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 261feda..8478bfd 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -184,6 +184,97 @@ int postcopy_ram_discard_range(MigrationIncomingState 
*mis, uint8_t *start,
 return 0;
 }

+/*
+ * Setup an area of RAM so that it *can* be used for postcopy later; this
+ * must be done right at the start prior to pre-copy.
+ * opaque should be the MIS.
+ */
+static int init_range(const char *block_name, void *host_addr,
+  ram_addr_t offset, ram_addr_t length, void *opaque)
+{
+MigrationIncomingState *mis = opaque;
+
+trace_postcopy_init_range(block_name, host_addr, offset, length);
+
+/*
+ * We need the whole of RAM to be truly empty for postcopy, so things
+ * like ROMs and any data tables built during init must be zero'd
+ * - we're going to get the copy from the source anyway.
+ * (Precopy will just overwrite this data, so doesn't need the discard)
+ */
+if (postcopy_ram_discard_range(mis, host_addr, length)) {
+return -1;
+}
+
+return 0;
+}
+
+/*
+ * At the end of migration, undo the effects of init_range
+ * opaque should be the MIS.
+ */
+static int cleanup_range(const char *block_name, void *host_addr,
+ram_addr_t offset, ram_addr_t length, void *opaque)
+{
+MigrationIncomingState *mis = opaque;
+struct uffdio_range range_struct;
+trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
+
+/*
+ * We turned off hugepage for the precopy stage with postcopy enabled
+ * we can turn it back on now.
+ */
+#ifdef MADV_HUGEPAGE
+if (madvise(host_addr, length, MADV_HUGEPAGE)) {
+error_report("%s HUGEPAGE: %s", __func__, strerror(errno));
+return -1;
+}
+#endif
+
+/*
+ * We can also turn off userfault now since we should have all the
+ * pages.   It can be useful to leave it on to debug postcopy
+ * if you're not sure it's always getting every page.
+ */
+range_struct.start = (uintptr_t)host_addr;
+range_struct.len = length;
+
+if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, _struct)) {
+error_report("%s: userfault unregister %s", __func__, strerror(errno));
+
+return -1;
+}
+
+return 0;
+}
+
+/*
+ * Initialise postcopy-ram, setting the RAM to a state where we can go into
+ * postcopy later; must be called prior to any precopy.
+ * called from arch_init's similarly named ram_postcopy_incoming_init
+ */
+int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
+{
+if (qemu_ram_foreach_block(init_range,

[Qemu-devel] [PULL 27/57] Modify save_live_pending for postcopy

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Modify save_live_pending to return separate postcopiable and
non-postcopiable counts.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/vmstate.h |  5 +++--
 include/sysemu/sysemu.h |  4 +++-
 migration/block.c   |  7 +--
 migration/migration.c   |  9 +++--
 migration/ram.c |  8 ++--
 migration/savevm.c  | 17 +
 trace-events|  2 +-
 7 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 9986ccc..7f16a38 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -54,8 +54,9 @@ typedef struct SaveVMHandlers {

 /* This runs outside the iothread lock!  */
 int (*save_live_setup)(QEMUFile *f, void *opaque);
-uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t 
max_size);
-
+void (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size,
+  uint64_t *non_postcopiable_pending,
+  uint64_t *postcopiable_pending);
 LoadStateHandler *load_state;
 } SaveVMHandlers;

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index c27b926..70835d0 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -112,7 +112,9 @@ void qemu_savevm_state_header(QEMUFile *f);
 int qemu_savevm_state_iterate(QEMUFile *f);
 void qemu_savevm_state_cleanup(void);
 void qemu_savevm_state_complete_precopy(QEMUFile *f);
-uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size);
+void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
+   uint64_t *res_non_postcopiable,
+   uint64_t *res_postcopiable);
 void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd command,
   uint16_t len, uint8_t *data);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
diff --git a/migration/block.c b/migration/block.c
index 4fb9b7c..310e2b3 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -748,7 +748,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
 return 0;
 }

-static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t 
max_size)
+static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
+   uint64_t *non_postcopiable_pending,
+   uint64_t *postcopiable_pending)
 {
 /* Estimate pending number of bytes to send */
 uint64_t pending;
@@ -767,7 +769,8 @@ static uint64_t block_save_pending(QEMUFile *f, void 
*opaque, uint64_t max_size)
 qemu_mutex_unlock_iothread();

 DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
-return pending;
+/* We don't do postcopy */
+*non_postcopiable_pending += pending;
 }

 static int block_load(QEMUFile *f, void *opaque, int version_id)
diff --git a/migration/migration.c b/migration/migration.c
index 7097e5b..bb4c92e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1288,8 +1288,13 @@ static void *migration_thread(void *opaque)
 uint64_t pending_size;

 if (!qemu_file_rate_limit(s->file)) {
-pending_size = qemu_savevm_state_pending(s->file, max_size);
-trace_migrate_pending(pending_size, max_size);
+uint64_t pend_post, pend_nonpost;
+
+qemu_savevm_state_pending(s->file, max_size, _nonpost,
+  _post);
+pending_size = pend_nonpost + pend_post;
+trace_migrate_pending(pending_size, max_size,
+  pend_post, pend_nonpost);
 if (pending_size && pending_size >= max_size) {
 qemu_savevm_state_iterate(s->file);
 } else {
diff --git a/migration/ram.c b/migration/ram.c
index 4fa44a7..0e53325 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1383,7 +1383,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
 return 0;
 }

-static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
+static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
+ uint64_t *non_postcopiable_pending,
+ uint64_t *postcopiable_pending)
 {
 uint64_t remaining_size;

@@ -1397,7 +1399,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void 
*opaque, uint64_t max_size)
 qemu_mutex_unlock_iothread();
 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
 }
-return remaining_size;
+
+/* We can do postcopy, and all the data is postcopiable */
+*postcopiable_pending += remaining_size;
 }

 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void

Re: [Qemu-devel] [PATCH] qga: fix append file open modes for win32

2015-11-10 Thread Michael Roth

Quoting Kirk Allan (2015-11-09 15:49:05)
> For append file open modes, use FILE_APPEND_DATA for the desired access for 
> writing at the end of the file.
> 
> Signed-off-by: Kirk Allan 
> ---
>  qga/commands-win32.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/qga/commands-win32.c b/qga/commands-win32.c
> index a5306e7..0a23b9b 100644
> --- a/qga/commands-win32.c
> +++ b/qga/commands-win32.c
> @@ -70,16 +70,16 @@ static OpenFlags guest_file_open_modes[] = {
>  {"rb",  GENERIC_READ,   OPEN_EXISTING},
>  {"w",   GENERIC_WRITE,  CREATE_ALWAYS},
>  {"wb",  GENERIC_WRITE,  CREATE_ALWAYS},
> -{"a",   GENERIC_WRITE,  OPEN_ALWAYS  },
> +{"a",   FILE_APPEND_DATA,   OPEN_ALWAYS  },
>  {"r+",  GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING},
>  {"rb+", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING},
>  {"r+b", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING},
>  {"w+",  GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS},
>  {"wb+", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS},
>  {"w+b", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS},
> -{"a+",  GENERIC_WRITE|GENERIC_READ, OPEN_ALWAYS  },
> -{"ab+", GENERIC_WRITE|GENERIC_READ, OPEN_ALWAYS  },
> -{"a+b", GENERIC_WRITE|GENERIC_READ, OPEN_ALWAYS  }
> +{"a+",  FILE_APPEND_DATA,   OPEN_ALWAYS  },
> +{"ab+", FILE_APPEND_DATA,   OPEN_ALWAYS  },
> +{"a+b", FILE_APPEND_DATA,   OPEN_ALWAYS  }

Cc'ing qemu-sta...@nongnu.org

Thanks for catching this, accidentally truncating files is
certainly not good...

I hit an issue testing this though, this does fix the append
case, but a+, ab+, a+b all imply append+read, while
FILE_APPEND_DATA only grants append access.

FILE_APPEND_DATA|GENERIC_READ seems to work, but I'm not
finding much official documentation on what's valid with
FILE_APPEND_DATA. Do you have a reference that might
confirm this is valid usage? The only reference to
FILE_APPEND_DATA I saw was a single comment in:

https://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx

I'd like to get this in soon for 2.5 (hard freeze / rc0 is thursday).

>  };
> 
>  static OpenFlags *find_open_flag(const char *mode_str)
> -- 
> 1.8.5.6
>

Re: [Qemu-devel] [PATCH 03/16] tlb_set_page_with_attrs: Take argument specifying AddressSpace to use

2015-11-10 Thread Peter Maydell

On 9 November 2015 at 10:49, Peter Maydell  wrote:
> On 9 November 2015 at 10:44, Paolo Bonzini  wrote:
>>
>>
>> On 05/11/2015 19:15, Peter Maydell wrote:
>>> Add an argument to tlb_set_page_with_attrs which allows the target CPU code
>>> to tell the core code which AddressSpace to use.
>>>
>>> The AddressSpace is specified by the index into the array of ASes which
>>> were registered with cpu_address_space_init().
>>>
>>> Signed-off-by: Peter Maydell 
>>
>> Can it be deduced from the attributes instead?  Basically, you would have
>>
>>int cpu_get_asidx(MemTxAttrs attrs);
>>
>> in cpu.h, which is called by tlb_set_page_with_attrs.
>> cpu_get_phys_page_asidx_debug could also be replaced by
>> cpu_get_phys_page_attrs_debug.
>
> For ARM it could, certainly (and as you say, if we go that
> way then some of the extra passing around of asidxes collapses,
> and in particular we don't need to keep them separately in the
> iotlb entries). I wasn't convinced that this would be true for
> all possible uses of AddressSpaces.

Having thought a bit more about this, I think you're right. At
any rate, our current planned multi-as use will certainly work
with the existing attrs. And we could always steal a few bits
in the MemTxAttrs to indicate the asidx for some hypothetical
future usage even if they wouldn't otherwise have to be
exposed to the rest of the system outside the CPU.

thanks
-- PMM

[Qemu-devel] [PATCH 0/2] Clean up 32bit compilation for Migration

2015-11-10 Thread Juan Quintela

Hi

We had two warnings on 32bit targets.  With this change I am able to
compile with -Werror on:
- linux 64bit
- linux 32bit
- windows 32bit (cross-compile with Fedora mingw64)
- windows 64bit (cross-compile with Fedora mingw64)
  I am lying in this one, really my crosscompiler for windows 64 bit barf at

   CXX   disas/libvixl/a64/disasm-a64.o
/mnt/kvm/qemu/next/disas/libvixl/a64/disasm-a64.cc: In member function 'virtual 
void vixl::Disassembler::AppendPCRelativeOffsetToOutput(const 
vixl::Instruction*, int64_t)':
/mnt/kvm/qemu/next/disas/libvixl/a64/disasm-a64.cc:1365:57: error: unknown 
conversion type character 'l' in format [-Werror=format=]
   AppendToOutput("#%c0x%" PRIx64, sign, std::abs(offset));
 ^
/mnt/kvm/qemu/next/disas/libvixl/a64/disasm-a64.cc:1365:57: error: too many 
arguments for format [-Werror=format-extra-args]
/mnt/kvm/qemu/next/disas/libvixl/a64/disasm-a64.cc: In member function 'virtual 
void vixl::Disassembler::AppendAddressToOutput(const vixl::Instruction*, const 
void*)':
/mnt/kvm/qemu/next/disas/libvixl/a64/disasm-a64.cc:1372:76: error: unknown 
conversion type character 'l' in format [-Werror=format=]
   AppendToOutput("(addr 0x%" PRIxPTR ")", reinterpret_cast(addr));
^
/mnt/kvm/qemu/next/disas/libvixl/a64/disasm-a64.cc:1372:76: error: too many 
arguments for format [-Werror=format-extra-args]
/mnt/kvm/qemu/next/disas/libvixl/a64/disasm-a64.cc: In member function 'virtual 
void vixl::Disassembler::AppendCodeRelativeAddressToOutput(const 
vixl::Instruction*, const void*)':
/mnt/kvm/qemu/next/disas/libvixl/a64/disasm-a64.cc:1393:52: error: unknown 
conversion type character 'l' in format [-Werror=format=]
 AppendToOutput("(addr 0x%" PRIx64 ")", rel_addr);
^
/mnt/kvm/qemu/next/disas/libvixl/a64/disasm-a64.cc:1393:52: error: too many 
arguments for format [-Werror=format-extra-args]
/mnt/kvm/qemu/next/disas/libvixl/a64/disasm-a64.cc:1395:54: error: unknown 
conversion type character 'l' in format [-Werror=format=]
 AppendToOutput("(addr -0x%" PRIx64 ")", -rel_addr);
  ^
.

I am wondering if it really understand PRI*64 at all.

Please, review.

Juan.


Juan Quintela (2):
  migration: print ram_addr_t as RAM_ADDR_FMT not %zx
  migration: Make 32bit linux compile with RDMA

 migration/ram.c  | 5 +++--
 migration/rdma.c | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

-- 
2.5.0

[Qemu-devel] [PATCH 2/2] migration: Make 32bit linux compile with RDMA

2015-11-10 Thread Juan Quintela

Rest of the file already use that trick. 64bit offsets make no sense in
32bit archs, but that is ram_addr_t for you.

Signed-off-by: Juan Quintela 
---
 migration/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 553fbd7..dcabb91 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -577,7 +577,7 @@ static int rdma_add_block(RDMAContext *rdma, const char 
*block_name,
 block->is_ram_block = local->init ? false : true;

 if (rdma->blockmap) {
-g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
+g_hash_table_insert(rdma->blockmap, (void *)(uintptr_t)block_offset, 
block);
 }

 trace_rdma_add_block(block_name, local->nb_blocks,
-- 
2.5.0

[Qemu-devel] [PATCH 1/2] migration: print ram_addr_t as RAM_ADDR_FMT not %zx

2015-11-10 Thread Juan Quintela

Not all the wold is 64bits (yet).

Signed-off-by: Juan Quintela 
---
 migration/ram.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 62cf42b..d8d5a50 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1184,7 +1184,8 @@ int ram_save_queue_pages(MigrationState *ms, const char 
*rbname,
 }
 trace_ram_save_queue_pages(ramblock->idstr, start, len);
 if (start+len > ramblock->used_length) {
-error_report("%s request overrun start=%zx len=%zx blocklen=%zx",
+error_report("%s request overrun start=" RAM_ADDR_FMT " len="
+ RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
  __func__, start, len, ramblock->used_length);
 goto err;
 }
@@ -1845,7 +1846,7 @@ int ram_discard_range(MigrationIncomingState *mis,
 ret = postcopy_ram_discard_range(mis, host_startaddr, length);
 } else {
 error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
- "/%zu/%zu)",
+ "/%zx/" RAM_ADDR_FMT")",
  block_name, start, length, rb->used_length);
 }

-- 
2.5.0

Re: [Qemu-devel] [PATCH v3 2/3] target-i386: calculate vcpu's TSC rate to be migrated

2015-11-10 Thread Haozhong Zhang

On 11/09/15 14:01, Eduardo Habkost wrote:
> On Mon, Nov 09, 2015 at 08:33:55AM +0800, haozhong.zh...@intel.com wrote:
> > On 11/06/15 13:12, Eduardo Habkost wrote:
> > > On Fri, Nov 06, 2015 at 10:32:24AM +0800, haozhong.zh...@intel.com wrote:
> > > > On 11/05/15 14:05, Eduardo Habkost wrote:
> > > > > On Thu, Nov 05, 2015 at 09:30:51AM +0800, Haozhong Zhang wrote:
> > > > > > On 11/04/15 19:42, Eduardo Habkost wrote:
> > > [...]
> > > > > > > > +env->tsc_khz_saved = r;
> > > > > > > > +}
> > > > > > > 
> > > > > > > Why do you need a separate tsc_khz_saved field, and don't simply 
> > > > > > > use
> > > > > > > tsc_khz? It would have the additional feature of letting QMP 
> > > > > > > clients
> > > > > > > query the current TSC rate by asking for the tsc-freq property on 
> > > > > > > CPU
> > > > > > > objects.
> > > > > > >
> > > > > > 
> > > > > > It's to avoid overriding env->tsc_khz on the destination in the
> > > > > > migration. I can change this line to
> > > > > >  env->tsc_khz = env->tsc_khz_saved = r;
> > > > > 
> > > > > You are already avoiding overriding env->tsc_khz, because you use
> > > > > KVM_GET_TSC_KHZ only if tsc_khz is not set yet. I still don't see why
> > > > > you need a tsc_khz_saved field that requires duplicating the 
> > > > > SET_TSC_KHZ
> > > > > code, if you could just do this:
> > > > > 
> > > > > if (!env->tsc_khz) {
> > > > > env->tsc_khz = kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ);
> > > > > }
> > > > >
> > > > 
> > > > Consider an example that we migrate a VM from machine A to machine B
> > > > and then to machine C, and QEMU on machine B is launched with the cpu
> > > > option 'tsc-freq' (i.e. env->tsc_khz on B is non-zero at the
> > > > beginning):
> > > >  1) In the migration from B to C, the user-specified TSC frequency by
> > > > 'tsc-freq' on B is expected to be migrated to C. That is, the
> > > > value of env->tsc_khz on B is migrated.
> > > >  2) If TSC frequency is migrated through env->tsc_khz, then
> > > > env->tsc_khz on B will be overrode in the migration from A to B
> > > > before kvm_arch_setup_tsc_khz(). If the guest TSC frequency is
> > > > different than the user-specified TSC frequency on B, the
> > > > expectation in 1) will not be satisfied anymore.
> > > 
> > > Setting tsc-freq on B when tsc-freq was not used on A is invalid usage.
> > > This is not different from changing the CPU model and adding or removing
> > > CPU flags when migrating, which is also incorrect. The command-line
> > > parameters defining the VM must be the same when you migrate.
> > >
> > 
> > Good to know it's an invalid usage. Then the question is what QEMU is
> > expected to do for this invalid usage?
> > 
> >  1) Abort the migration? But I find that the current QEMU does not
> > abort the migration between different CPU models (e.g. Nehalem and
> > Haswell).
> > 
> >  2) Or do not abort the migration and ignore tsc-freq option? If so,
> > tsc_khz_saved will be not needed.
> 
> My first choice is to abort migration. If we decide to abort today and
> find it to cause problems, we can easily fix it. If we decide to
> continue without aborting, it is difficult to change that behavior
> without breaking existing setups.
>

Two additional questions:

 1) Existing QEMU allows 'tsc-freq' on the destination in the
migration. If we decided to abort when both 'tsc-freq' and
migrated TSC were present on the destination, it would break some
existing usages. Considering backward compatibility, would above
choice 2) be better?

 2) If we do decide to abort, could I use abort()? Or are there other
clean approaches to abort?

Haozhong

> -- 
> Eduardo
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Qemu-devel] [PULL v2 32/40] qemu-io: Check for trailing chars

2015-11-10 Thread Kevin Wolf

From: John Snow 

Make sure there's not trailing garbage, e.g.
"64k-whatever-i-want-here"

Reported-by: Max Reitz 
Signed-off-by: John Snow 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 qemu-io-cmds.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 20605f2..238b1da 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -136,7 +136,14 @@ static char **breakline(char *input, int *count)
 static int64_t cvtnum(const char *s)
 {
 char *end;
-return qemu_strtosz_suffix(s, , QEMU_STRTOSZ_DEFSUFFIX_B);
+int64_t ret;
+
+ret = qemu_strtosz_suffix(s, , QEMU_STRTOSZ_DEFSUFFIX_B);
+if (*end != '\0') {
+/* Detritus at the end of the string */
+return -EINVAL;
+}
+return ret;
 }
 
 #define EXABYTES(x) ((long long)(x) << 60)
-- 
1.8.3.1

[Qemu-devel] [PULL 00/57] Migration pull (take 2)

2015-11-10 Thread Juan Quintela



Hi

Now with peter changes to make it compile on his windows crosscompiler.

thanks, Peter.


two items:

- Postcopy pull request (dave)
- fix for qemu_completion (Denis)

Please apply.

Thanks, Juan.


The following changes since commit a8b4f9585a0bf5186fca793ce2c5d754cd8ec49a:

  Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2015-11-10' into 
staging (2015-11-10 09:39:24 +)

are available in the git repository at:

  git://github.com/juanquintela/qemu.git tags/migration/20151110

for you to fetch changes up to 15b3b8eaae8dbcc903bb164311ea0066c77536a7:

  migration: qemu_savevm_state_cleanup becomes mandatory operation (2015-11-10 
15:00:28 +0100)


migration/next for 20151110


Denis V. Lunev (1):
  migration: qemu_savevm_state_cleanup becomes mandatory operation

Dr. David Alan Gilbert (56):
  Add postcopy documentation
  Provide runtime Target page information
  Move configuration section writing
  Move page_size_init earlier
  qemu_ram_block_from_host
  qemu_ram_block_by_name
  Rename mis->file to from_src_file
  Add qemu_get_buffer_in_place to avoid copies some of the time
  Add wrapper for setting blocking status on a QEMUFile
  Add QEMU_MADV_NOHUGEPAGE
  ram_debug_dump_bitmap: Dump a migration bitmap as text
  ram_load: Factor out host_from_stream_offset call and check
  migrate_init: Call from savevm
  Rename save_live_complete to save_live_complete_precopy
  Add Linux userfaultfd.h header
  Return path: Open a return path on QEMUFile for sockets
  Return path: socket_writev_buffer: Block even on non-blocking fd's
  Migration commands
  Return path: Control commands
  Return path: Send responses from destination to source
  migration_is_setup_or_active
  Return path: Source handling of return path
  Rework loadvm path for subloops
  Add migration-capability boolean for postcopy-ram.
  Add wrappers and handlers for sending/receiving the postcopy-ram 
migration messages.
  MIG_CMD_PACKAGED: Send a packaged chunk of migration stream
  Modify save_live_pending for postcopy
  postcopy: OS support test
  migrate_start_postcopy: Command to trigger transition to postcopy
  migration_completion: Take current state
  MIGRATION_STATUS_POSTCOPY_ACTIVE: Add new migration state
  Avoid sending vmdescription during postcopy
  Add qemu_savevm_state_complete_postcopy
  Postcopy: Maintain unsentmap
  migration_completion: Take current state
  postcopy: Incoming initialisation
  postcopy: ram_enable_notify to switch on userfault
  Postcopy: Postcopy startup in migration thread
  Postcopy: End of iteration
  Page request: Add MIG_RP_MSG_REQ_PAGES reverse command
  Page request: Process incoming page request
  Page request: Consume pages off the post-copy queue
  postcopy_ram.c: place_page and helpers
  Postcopy: Use helpers to map pages during migration
  postcopy: Check order of received target pages
  Don't sync dirty bitmaps in postcopy
  Don't iterate on precopy-only devices during postcopy
  Host page!=target page: Cleanup bitmaps
  Round up RAMBlock sizes to host page sizes
  Postcopy; Handle userfault requests
  Start up a postcopy/listener thread ready for incoming page data
  postcopy: Wire up loadvm_postcopy_handle_ commands
  Postcopy: Mark nohugepage before discard
  End of migration for postcopy
  Disable mlock around incoming postcopy
  Inhibit ballooning during postcopy

 balloon.c |  11 +
 docs/migration.txt| 191 
 exec.c|  92 +++-
 hmp-commands.hx   |  15 +
 hmp.c |   7 +
 hmp.h |   1 +
 hw/ppc/spapr.c|   2 +-
 hw/virtio/virtio-balloon.c|   4 +-
 include/exec/cpu-common.h |   4 +
 include/exec/exec-all.h   |   1 -
 include/exec/ram_addr.h   |   2 -
 include/migration/migration.h | 121 -
 include/migration/postcopy-ram.h  |  99 
 include/migration/qemu-file.h |  10 +
 include/migration/vmstate.h   |   8 +-
 include/qemu-common.h |   1 +
 include/qemu/osdep.h  |   9 +
 include/qemu/typedefs.h   |   3 +
 include/sysemu/balloon.h  |   2 +
 include/sysemu/sysemu.h   |  46 +-
 kvm-all.c |   1 -
 linux-headers/linux/userfaultfd.h | 167 +++
 migration/Makefile.objs   |   2 +-
 migration/block.c |   9 +-
 migration/migration.c | 723 +--
 migration/postcopy-ram.c  | 767 +
 migration/qemu-file-unix.c| 111 -
 migration/qemu-fil

[Qemu-devel] [PATCH 03/10] snapshot: create bdrv_all_delete_snapshot helper

2015-11-10 Thread Denis V. Lunev

to delete snapshots from all loaded block drivers.

The patch also ensures proper locking.

Signed-off-by: Denis V. Lunev 
CC: Juan Quintela 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/snapshot.c | 22 
 include/block/snapshot.h |  2 ++
 migration/savevm.c   | 54 +---
 3 files changed, 34 insertions(+), 44 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index ed0422d..61a6ad1 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -381,3 +381,25 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
 *first_bad_bs = bs;
 return ok;
 }
+
+int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
+ Error **err)
+{
+int ret = 0;
+BlockDriverState *bs = NULL;
+QEMUSnapshotInfo sn1, *snapshot = 
+
+while (ret == 0 && (bs = bdrv_next(bs))) {
+AioContext *ctx = bdrv_get_aio_context(bs);
+
+aio_context_acquire(ctx);
+if (bdrv_can_snapshot(bs) &&
+bdrv_snapshot_find(bs, snapshot, name) >= 0) {
+ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err);
+}
+aio_context_release(ctx);
+}
+
+*first_bad_bs = bs;
+return ret;
+}
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 9ddfd42..d02d2b1 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -82,5 +82,7 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
  * when appropriate for appropriate block drivers */
 
 bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs);
+int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs,
+ Error **err);
 
 #endif
diff --git a/migration/savevm.c b/migration/savevm.c
index c212288..1157a6f 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1248,35 +1248,6 @@ static BlockDriverState *find_vmstate_bs(void)
 return NULL;
 }
 
-/*
- * Deletes snapshots of a given name in all opened images.
- */
-static int del_existing_snapshots(Monitor *mon, const char *name)
-{
-BlockDriverState *bs;
-QEMUSnapshotInfo sn1, *snapshot = 
-Error *err = NULL;
-
-bs = NULL;
-while ((bs = bdrv_next(bs))) {
-if (bdrv_can_snapshot(bs) &&
-bdrv_snapshot_find(bs, snapshot, name) >= 0) {
-bdrv_snapshot_delete_by_id_or_name(bs, name, );
-if (err) {
-monitor_printf(mon,
-   "Error while deleting snapshot on device '%s':"
-   " %s\n",
-   bdrv_get_device_name(bs),
-   error_get_pretty(err));
-error_free(err);
-return -1;
-}
-}
-}
-
-return 0;
-}
-
 void hmp_savevm(Monitor *mon, const QDict *qdict)
 {
 BlockDriverState *bs, *bs1;
@@ -1334,7 +1305,11 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 }
 
 /* Delete old snapshots of the same name */
-if (name && del_existing_snapshots(mon, name) < 0) {
+if (name && bdrv_all_delete_snapshot(name, , _err) < 0) {
+monitor_printf(mon,
+   "Error while deleting snapshot on device '%s': %s\n",
+   bdrv_get_device_name(bs1), error_get_pretty(local_err));
+error_free(local_err);
 goto the_end;
 }
 
@@ -1494,20 +1469,11 @@ void hmp_delvm(Monitor *mon, const QDict *qdict)
 return;
 }
 
-bs = NULL;
-while ((bs = bdrv_next(bs))) {
-if (bdrv_can_snapshot(bs)) {
-err = NULL;
-bdrv_snapshot_delete_by_id_or_name(bs, name, );
-if (err) {
-monitor_printf(mon,
-   "Error while deleting snapshot on device '%s':"
-   " %s\n",
-   bdrv_get_device_name(bs),
-   error_get_pretty(err));
-error_free(err);
-}
-}
+if (bdrv_all_delete_snapshot(name, , ) < 0) {
+monitor_printf(mon,
+   "Error while deleting snapshot on device '%s': %s\n",
+   bdrv_get_device_name(bs), error_get_pretty(err));
+error_free(err);
 }
 }
 
-- 
2.5.0

[Qemu-devel] [PULL 16/57] Return path: Open a return path on QEMUFile for sockets

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Postcopy needs a method to send messages from the destination back to
the source, this is the 'return path'.

Wire it up for 'socket' QEMUFile's.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/qemu-file.h |  7 +
 migration/qemu-file-unix.c| 69 +--
 migration/qemu-file.c | 12 
 3 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 66e741f..b5d08d2 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -89,6 +89,11 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
uint64_t *bytes_sent);

 /*
+ * Return a QEMUFile for comms in the opposite direction
+ */
+typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
+
+/*
  * Stop any read or write (depending on flags) on the underlying
  * transport on the QEMUFile.
  * Existing blocking reads/writes must be woken
@@ -106,6 +111,7 @@ typedef struct QEMUFileOps {
 QEMURamHookFunc *after_ram_iterate;
 QEMURamHookFunc *hook_ram_load;
 QEMURamSaveFunc *save_page;
+QEMURetPathFunc *get_return_path;
 QEMUFileShutdownFunc *shut_down;
 } QEMUFileOps;

@@ -196,6 +202,7 @@ int64_t qemu_file_get_rate_limit(QEMUFile *f);
 int qemu_file_get_error(QEMUFile *f);
 void qemu_file_set_error(QEMUFile *f, int ret);
 int qemu_file_shutdown(QEMUFile *f);
+QEMUFile *qemu_file_get_return_path(QEMUFile *f);
 void qemu_fflush(QEMUFile *f);
 void qemu_file_set_blocking(QEMUFile *f, bool block);

diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c
index 809bf07..bcb744b 100644
--- a/migration/qemu-file-unix.c
+++ b/migration/qemu-file-unix.c
@@ -97,6 +97,56 @@ static int socket_shutdown(void *opaque, bool rd, bool wr)
 }
 }

+static int socket_return_close(void *opaque)
+{
+QEMUFileSocket *s = opaque;
+/*
+ * Note: We don't close the socket, that should be done by the forward
+ * path.
+ */
+g_free(s);
+return 0;
+}
+
+static const QEMUFileOps socket_return_read_ops = {
+.get_fd  = socket_get_fd,
+.get_buffer  = socket_get_buffer,
+.close   = socket_return_close,
+.shut_down   = socket_shutdown,
+};
+
+static const QEMUFileOps socket_return_write_ops = {
+.get_fd  = socket_get_fd,
+.writev_buffer   = socket_writev_buffer,
+.close   = socket_return_close,
+.shut_down   = socket_shutdown,
+};
+
+/*
+ * Give a QEMUFile* off the same socket but data in the opposite
+ * direction.
+ */
+static QEMUFile *socket_get_return_path(void *opaque)
+{
+QEMUFileSocket *forward = opaque;
+QEMUFileSocket *reverse;
+
+if (qemu_file_get_error(forward->file)) {
+/* If the forward file is in error, don't try and open a return */
+return NULL;
+}
+
+reverse = g_malloc0(sizeof(QEMUFileSocket));
+reverse->fd = forward->fd;
+/* I don't think there's a better way to tell which direction 'this' is */
+if (forward->file->ops->get_buffer != NULL) {
+/* being called from the read side, so we need to be able to write */
+return qemu_fopen_ops(reverse, _return_write_ops);
+} else {
+return qemu_fopen_ops(reverse, _return_read_ops);
+}
+}
+
 static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
   int64_t pos)
 {
@@ -206,18 +256,19 @@ QEMUFile *qemu_fdopen(int fd, const char *mode)
 }

 static const QEMUFileOps socket_read_ops = {
-.get_fd = socket_get_fd,
-.get_buffer = socket_get_buffer,
-.close  = socket_close,
-.shut_down  = socket_shutdown
-
+.get_fd  = socket_get_fd,
+.get_buffer  = socket_get_buffer,
+.close   = socket_close,
+.shut_down   = socket_shutdown,
+.get_return_path = socket_get_return_path
 };

 static const QEMUFileOps socket_write_ops = {
-.get_fd= socket_get_fd,
-.writev_buffer = socket_writev_buffer,
-.close = socket_close,
-.shut_down = socket_shutdown
+.get_fd  = socket_get_fd,
+.writev_buffer   = socket_writev_buffer,
+.close   = socket_close,
+.shut_down   = socket_shutdown,
+.get_return_path = socket_get_return_path
 };

 QEMUFile *qemu_fopen_socket(int fd, const char *mode)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 9ec2267..0bbd257 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -44,6 +44,18 @@ int qemu_file_shutdown(QEMUFile *f)
 return f->ops->shut_down(f->opaque, true, true);
 }

+/*
+ * Result: QEMUFile* for a 'return path' for comms in the opposite direction
+ * NULL if not available

[Qemu-devel] What to do about QAPI naming convention violations (was: [PATCH RFC 3/5] qapi: Use common name mangling for enumeration constants)

2015-11-10 Thread Markus Armbruster

Markus Armbruster  writes:

> Markus Armbruster  writes:
>
>> Eric Blake  writes:
>>
>>> On 11/05/2015 09:01 AM, Daniel P. Berrange wrote:
 On Thu, Nov 05, 2015 at 04:30:00PM +0100, Markus Armbruster wrote:
> QAPI names needn't be valid C identifiers, so we mangle them with
> c_name().  Except for enumeration constants, which we mangle with
> camel_to_upper().
>
> c_name() is easy enough to understand: replace '.' and '-' by '_',
> prefix certain ticklish identifiers with 'q_'.
>
> camel_to_upper() is a hairball of heuristics, and guessing how it'll
> mangle interesting input could serve as a (nerdy) game.  Despite some
> tweaking (commit 5d371f4), it's still inadqeuate for some QAPI names
> (commit 351d36e).
>>>
>>> One of the issues at hand is whether we want to (eventually) teach QMP
>>> to be case-insensitive.  Right now, our c_name() mangling preserves case
>>> (you can have a struct with members 'a' and 'A'), although (hopefully)
>>> no one is relying on it.  But camel_to_upper() is case-insensitive ('a'
>>> and 'A' would result in the same enum constant).
>>>
>>> In order to (later) support case-insensitive QMP, we need to decide up
>>> front that we will not allow any qapi member names to collide
>>> case-insensitively (outlaw 'a' and 'A' in the same struct; although the
>>> C code is still case-preserving); and now that this series is adding a
>>> single check_clash() function, it's very easy to do.  In fact, I'll add
>>> that to my series for 2.5 (it's always easier to reserve something now,
>>> especially if no one was using it, and then relax later; than it is to
>>> try and restrict things later but run into counter-cases).
>>
>> I doubt QMP should be made case-insensitive.  JSON isn't, C isn't.  Our
>> use of case is actually fairly consistent: event names are ALL_CAPS,
>> everything else is in lower case.  Complete list of exceptions found in
>> result of query-qmp-schema:
>>
>> * struct UuidInfo member UUID
>> * struct CpuInfo members CPU and PC
>> * enum ACPISlotType member DIMM
>> * enum InputButton members Left, Middle, Right, WheelUp, WheelDown
>> * enum InputAxis members X, Y
>>
>> That said, an interface where names differ only in case is a badly
>> designed interface.  I'd be fine with rejecting such abuse.
>>
>> Oddballs not related to case:
>>
>> * enum BlkdebugEvent uses '.' in member names
>> * enum QKeyCode uses member names starting with a digit
>>
>> For me, the one argument for some kind of insensitivity is our idiotic
>> habit to sometimes string words together with '_' instead of '-', which
>> has led to an unholy mess.  The offenders are
>>
>> * commands block_passwd, block_resize, block_set_io_throttle,
>>   client_migrate_info, device_del, expire_password, migrate_cancel,
>>   migrate_set_downtime, migrate_set_speed, netdev_add, netdev_del,
>>   set_link, set_password, system_powerdown, system_reset, system_wakeup

Missing: add_client.

>> * enum types BlkdebugEvent, BlockdevDriver, QKeyCode
>> * object types BlkdebugSetStateOptions, BlockDeviceInfo,
>>   BlockDeviceInfo, BlockDeviceStats, BlockInfo, CpuInfo, PciBusInfo,
>>   PciDeviceInfo, PciMemoryRegion, VncClientInfo
>
> I can think of a few ways to clean up the '_' vs. '-' mess:
>
> 1. Fix the offenders, keep the unfixed names as aliases.
>
>Requires an alias mechanism.
>
>If we do it in 2.5, we can keep the aliases out of QMP introspection.
>
> 2. Fix the offenders, map '_' to '-' in QMP input, but only in object
>keys and values of enumeration type, not other strings.
>
>Distinguishing the two kinds of strings might be non-trivial, dunno.
>
> 3. Compare '_' and '-' equal in all the necessary places.
>
>Need to find these places.
>
>The mess remains visible in QMP introspection unless we also fix the
>offenders.

Of course, there's a big difference between QMP input and output.

On input, we can accept a nicer name in addition to the ugly name, and
that's compatible.

On output, we can only duplicate data with an ugly name under a nicer
one.  Duplicating members or events doesn't feel like an improvement.
That leaves deprecating commands.

I had a closer look at how the screwy names are used in QMP to see how
much of the mess is fixable within reason.

Command names are all fixable.

InputButton and InputAxis are input for x-input-send-event.  Fixable.
May not even need backward compatibility.

QKeyCode is input for x-input-send-event and send-key.  Fixable.
However, I think we want to bend the rules instead.

ACPISlotType, BlockDeviceInfo, BlockDeviceStats, BlockInfo, CpuInfo,
PciBusInfo, PciDeviceInfo, PciMemoryRegion, UuidInfo, VncClientInfo are
all output of some query command.  The only fix is deprecating the query
commands, which is a big hammer.

BlkdebugEvent is related to the external blkdebug interface described by
docs/blkdebug.txt.  The two are actually decoupled, i.e.

[Qemu-devel] [PULL 43/57] postcopy_ram.c: place_page and helpers

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

postcopy_place_page (etc) provide a way for postcopy to place a page
into guests memory atomically (using the copy ioctl on the ufd).

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h|   1 +
 include/migration/postcopy-ram.h |  21 
 migration/postcopy-ram.c | 100 +++
 trace-events |   2 +
 4 files changed, 124 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1491bf3..a48471e 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -96,6 +96,7 @@ struct MigrationIncomingState {
 int   userfault_fd;
 QEMUFile *to_src_file;
 QemuMutex rp_mutex;/* We send replies from multiple threads */
+void *postcopy_tmp_page;

 /* See savevm.c */
 LoadStateEntry_Head loadvm_handlers;
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index b10c03d..d7c292f 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -69,4 +69,25 @@ void postcopy_discard_send_range(MigrationState *ms, 
PostcopyDiscardState *pds,
 void postcopy_discard_send_finish(MigrationState *ms,
   PostcopyDiscardState *pds);

+/*
+ * Place a page (from) at (host) efficiently
+ *There are restrictions on how 'from' must be mapped, in general best
+ *to use other postcopy_ routines to allocate.
+ * returns 0 on success
+ */
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from);
+
+/*
+ * Place a zero page at (host) atomically
+ * returns 0 on success
+ */
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host);
+
+/*
+ * Allocate a page of memory that can be mapped at a later point in time
+ * using postcopy_place_page
+ * Returns: Pointer to allocated page
+ */
+void *postcopy_get_tmp_page(MigrationIncomingState *mis);
+
 #endif
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 3110b2a..58492c0 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -272,6 +272,10 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState 
*mis)
 return -1;
 }

+if (mis->postcopy_tmp_page) {
+munmap(mis->postcopy_tmp_page, getpagesize());
+mis->postcopy_tmp_page = NULL;
+}
 return 0;
 }

@@ -338,6 +342,83 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 return 0;
 }

+/*
+ * Place a host page (from) at (host) atomically
+ * returns 0 on success
+ */
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+{
+struct uffdio_copy copy_struct;
+
+copy_struct.dst = (uint64_t)(uintptr_t)host;
+copy_struct.src = (uint64_t)(uintptr_t)from;
+copy_struct.len = getpagesize();
+copy_struct.mode = 0;
+
+/* copy also acks to the kernel waking the stalled thread up
+ * TODO: We can inhibit that ack and only do it if it was requested
+ * which would be slightly cheaper, but we'd have to be careful
+ * of the order of updating our page state.
+ */
+if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) {
+int e = errno;
+error_report("%s: %s copy host: %p from: %p",
+ __func__, strerror(e), host, from);
+
+return -e;
+}
+
+trace_postcopy_place_page(host);
+return 0;
+}
+
+/*
+ * Place a zero page at (host) atomically
+ * returns 0 on success
+ */
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+{
+struct uffdio_zeropage zero_struct;
+
+zero_struct.range.start = (uint64_t)(uintptr_t)host;
+zero_struct.range.len = getpagesize();
+zero_struct.mode = 0;
+
+if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) {
+int e = errno;
+error_report("%s: %s zero host: %p",
+ __func__, strerror(e), host);
+
+return -e;
+}
+
+trace_postcopy_place_page_zero(host);
+return 0;
+}
+
+/*
+ * Returns a target page of memory that can be mapped at a later point in time
+ * using postcopy_place_page
+ * The same address is used repeatedly, postcopy_place_page just takes the
+ * backing page away.
+ * Returns: Pointer to allocated page
+ *
+ */
+void *postcopy_get_tmp_page(MigrationIncomingState *mis)
+{
+if (!mis->postcopy_tmp_page) {
+mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
+ PROT_READ | PROT_WRITE, MAP_PRIVATE |
+ MAP_ANONYMOUS, -1, 0);
+if (!mis->postcopy_tmp_page) {
+error_report("%s: %s", __func__, strerror(errno));
+return NULL;
+}
+}
+
+return mis->postcopy_tmp_page;
+}
+
 #else
 /* No target OS support, stubs

[Qemu-devel] [PULL 30/57] migration_completion: Take current state

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Soon we'll be in either ACTIVE or POSTCOPY_ACTIVE when we
complete migration, and we need to know which we expect to be
in to change state safely.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/migration.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 9c46472..85e68bc 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1243,10 +1243,12 @@ static int 
await_return_path_close_on_source(MigrationState *ms)
  *   The caller 'breaks' the loop when this returns.
  *
  * @s: Current migration state
+ * @current_active_state: The migration state we expect to be in
  * @*old_vm_running: Pointer to old_vm_running flag
  * @*start_time: Pointer to time to update
  */
-static void migration_completion(MigrationState *s, bool *old_vm_running,
+static void migration_completion(MigrationState *s, int current_active_state,
+ bool *old_vm_running,
  int64_t *start_time)
 {
 int ret;
@@ -1275,11 +1277,11 @@ static void migration_completion(MigrationState *s, 
bool *old_vm_running,
 goto fail;
 }

-migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED);
+migrate_set_state(s, current_active_state, MIGRATION_STATUS_COMPLETED);
 return;

 fail:
-migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED);
+migrate_set_state(s, current_active_state, MIGRATION_STATUS_FAILED);
 }

 /*
@@ -1321,7 +1323,8 @@ static void *migration_thread(void *opaque)
 qemu_savevm_state_iterate(s->file);
 } else {
 trace_migration_thread_low_pending(pending_size);
-migration_completion(s, _vm_running, _time);
+migration_completion(s, MIGRATION_STATUS_ACTIVE,
+ _vm_running, _time);
 break;
 }
 }
-- 
2.5.0

Re: [Qemu-devel] [PATCH v3 0/2] Fix compilation of netmap backend

2015-11-10 Thread Markus Armbruster

Vincenzo Maffione  writes:

> This patch series adds some fixes to the netmap net backend. It contains
> two changes:
> (1) Fix compilation issue of netmap.c introduced by the reorganization
> of struct NetClientOptions
> (2) Address the FIXME comment that was asking to use error_setg()
> variants in place of error_report()

Series
Reviewed-by: Markus Armbruster

[Qemu-devel] [PATCH 3/3] disas/arm: avoid clang shifting negative signed warning

2015-11-10 Thread Stefan Hajnoczi

clang 3.7.0 on x86_64 warns about the following:

  disas/arm.c:1782:17: warning: shifting a negative signed value is undefined 
[-Wshift-negative-value]
imm |= (-1 << 7);
~~ ^

Note that this patch preserves the tab indent in this source file
because the surrounding code still uses tabs.

Signed-off-by: Stefan Hajnoczi 
---
 disas/arm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/disas/arm.c b/disas/arm.c
index 6165246..7a7354b 100644
--- a/disas/arm.c
+++ b/disas/arm.c
@@ -1779,7 +1779,7 @@ print_insn_coprocessor (bfd_vma pc, struct 
disassemble_info *info, long given,
 
/* Is ``imm'' a negative number?  */
if (imm & 0x40)
- imm |= (-1 << 7);
+ imm |= (~0u << 7);
 
func (stream, "%d", imm);
  }
-- 
2.5.0

[Qemu-devel] [PULL 44/44] block: Update copyright of the accounting code

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

Signed-off-by: Alberto Garcia 
Message-id: 
80a2278e3ec2dafd5daab20a7cb2d6a9b83371e4.1446044838.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 block/accounting.c | 1 +
 include/block/accounting.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/block/accounting.c b/block/accounting.c
index 05a5c5f..185025e 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -2,6 +2,7 @@
  * QEMU System Emulator block accounting
  *
  * Copyright (c) 2011 Christoph Hellwig
+ * Copyright (c) 2015 Igalia, S.L.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to 
deal
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 482926b..0f46cb4 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -2,6 +2,7 @@
  * QEMU System Emulator block accounting
  *
  * Copyright (c) 2011 Christoph Hellwig
+ * Copyright (c) 2015 Igalia, S.L.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to 
deal
-- 
2.5.0

[Qemu-devel] [PULL 03/57] Move configuration section writing

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

The vmstate_configuration is currently written
in 'qemu_savevm_state_begin', move it to
'qemu_savevm_state_header' since it's got a hard
requirement that it must be the 1st thing after
the header.
(In postcopy some 'command' sections get sent
early before the saving of the main sections
and hence before qemu_savevm_state_begin).

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/savevm.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index e05158d..638aa1f 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -713,6 +713,12 @@ void qemu_savevm_state_header(QEMUFile *f)
 trace_savevm_state_header();
 qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
 qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+
+if (!savevm_state.skip_configuration) {
+qemu_put_byte(f, QEMU_VM_CONFIGURATION);
+vmstate_save_state(f, _configuration, _state, 0);
+}
+
 }

 void qemu_savevm_state_begin(QEMUFile *f,
@@ -729,11 +735,6 @@ void qemu_savevm_state_begin(QEMUFile *f,
 se->ops->set_params(params, se->opaque);
 }

-if (!savevm_state.skip_configuration) {
-qemu_put_byte(f, QEMU_VM_CONFIGURATION);
-vmstate_save_state(f, _configuration, _state, 0);
-}
-
 QTAILQ_FOREACH(se, _state.handlers, entry) {
 if (!se->ops || !se->ops->save_live_setup) {
 continue;
-- 
2.5.0

[Qemu-devel] [PULL 05/57] qemu_ram_block_from_host

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Postcopy sends RAMBlock names and offsets over the wire (since it can't
rely on the order of ramaddr being the same), and it starts out with
HVA fault addresses from the kernel.

qemu_ram_block_from_host translates a HVA into a RAMBlock, an offset
in the RAMBlock and the global ram_addr_t value.

Rewrite qemu_ram_addr_from_host to use qemu_ram_block_from_host.

Provide qemu_ram_get_idstr since its the actual name text sent on the
wire.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: David Gibson 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 exec.c| 54 +++
 include/exec/cpu-common.h |  3 +++
 include/exec/ram_addr.h   |  2 --
 include/qemu/typedefs.h   |  1 +
 4 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/exec.c b/exec.c
index 4ced1a6..53d3848 100644
--- a/exec.c
+++ b/exec.c
@@ -1377,6 +1377,11 @@ static RAMBlock *find_ram_block(ram_addr_t addr)
 return NULL;
 }

+const char *qemu_ram_get_idstr(RAMBlock *rb)
+{
+return rb->idstr;
+}
+
 /* Called with iothread lock held.  */
 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
 {
@@ -1877,8 +1882,16 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr 
*size)
 }
 }

-/* Some of the softmmu routines need to translate from a host pointer
- * (typically a TLB entry) back to a ram offset.
+/*
+ * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
+ * in that RAMBlock.
+ *
+ * ptr: Host pointer to look up
+ * round_offset: If true round the result offset down to a page boundary
+ * *ram_addr: set to result ram_addr
+ * *offset: set to result offset within the RAMBlock
+ *
+ * Returns: RAMBlock (or NULL if not found)
  *
  * By the time this function returns, the returned pointer is not protected
  * by RCU anymore.  If the caller is not within an RCU critical section and
@@ -1886,18 +1899,22 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, 
hwaddr *size)
  * pointer, such as a reference to the region that includes the incoming
  * ram_addr_t.
  */
-MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
+RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
+   ram_addr_t *ram_addr,
+   ram_addr_t *offset)
 {
 RAMBlock *block;
 uint8_t *host = ptr;
-MemoryRegion *mr;

 if (xen_enabled()) {
 rcu_read_lock();
 *ram_addr = xen_ram_addr_from_mapcache(ptr);
-mr = qemu_get_ram_block(*ram_addr)->mr;
+block = qemu_get_ram_block(*ram_addr);
+if (block) {
+*offset = (host - block->host);
+}
 rcu_read_unlock();
-return mr;
+return block;
 }

 rcu_read_lock();
@@ -1920,10 +1937,29 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, 
ram_addr_t *ram_addr)
 return NULL;

 found:
-*ram_addr = block->offset + (host - block->host);
-mr = block->mr;
+*offset = (host - block->host);
+if (round_offset) {
+*offset &= TARGET_PAGE_MASK;
+}
+*ram_addr = block->offset + *offset;
 rcu_read_unlock();
-return mr;
+return block;
+}
+
+/* Some of the softmmu routines need to translate from a host pointer
+   (typically a TLB entry) back to a ram offset.  */
+MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
+{
+RAMBlock *block;
+ram_addr_t offset; /* Not used */
+
+block = qemu_ram_block_from_host(ptr, false, ram_addr, );
+
+if (!block) {
+return NULL;
+}
+
+return block->mr;
 }

 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 9fb1d54..94d1f8a 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -64,8 +64,11 @@ typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr 
addr);
 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 /* This should not be used by devices.  */
 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
+   ram_addr_t *ram_addr, ram_addr_t *offset);
 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
 void qemu_ram_unset_idstr(ram_addr_t addr);
+const char *qemu_ram_get_idstr(RAMBlock *rb);

 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
 int len, int is_write);
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 3360ac5..7115154 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -22,8 +22,6 @@
 #ifndef CONFIG_USER_ONLY
 #include "hw/xen/xen.h"

-typedef struct RAMBlock RAMBlock;
-
 struct RAMBlock

[Qemu-devel] [PULL 20/57] Return path: Send responses from destination to source

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Add migrate_send_rp_message to send a message from destination to source along 
the return path.
  (It uses a mutex to let it be called from multiple threads)
Add migrate_send_rp_shut to send a 'shut' message to indicate
  the destination is finished with the RP.
Add migrate_send_rp_ack to send a 'PONG' message in response to a PING
  Use it in the MSG_RP_PING handler

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h | 19 ++
 migration/migration.c | 45 +++
 migration/savevm.c|  2 +-
 trace-events  |  1 +
 4 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 98a6d07..3ce3fda 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -43,12 +43,22 @@ struct MigrationParams {
 bool shared;
 };

+/* Messages sent on the return path from destination to source */
+enum mig_rp_message_type {
+MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
+MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */
+MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */
+
+MIG_RP_MSG_MAX
+};
+
 typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
 /* State for the incoming migration */
 struct MigrationIncomingState {
 QEMUFile *from_src_file;

 QEMUFile *to_src_file;
+QemuMutex rp_mutex;/* We send replies from multiple threads */

 /* See savevm.c */
 LoadStateEntry_Head loadvm_handlers;
@@ -181,6 +191,15 @@ int migrate_compress_threads(void);
 int migrate_decompress_threads(void);
 bool migrate_use_events(void);

+/* Sending on the return path - generic and then for each message type */
+void migrate_send_rp_message(MigrationIncomingState *mis,
+ enum mig_rp_message_type message_type,
+ uint16_t len, void *data);
+void migrate_send_rp_shut(MigrationIncomingState *mis,
+  uint32_t value);
+void migrate_send_rp_pong(MigrationIncomingState *mis,
+  uint32_t value);
+
 void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
diff --git a/migration/migration.c b/migration/migration.c
index bb7dcb9..8380e2f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -97,6 +97,7 @@ MigrationIncomingState 
*migration_incoming_state_new(QEMUFile* f)
 mis_current = g_new0(MigrationIncomingState, 1);
 mis_current->from_src_file = f;
 QLIST_INIT(_current->loadvm_handlers);
+qemu_mutex_init(_current->rp_mutex);

 return mis_current;
 }
@@ -344,6 +345,50 @@ void process_incoming_migration(QEMUFile *f)
 qemu_coroutine_enter(co, f);
 }

+/*
+ * Send a message on the return channel back to the source
+ * of the migration.
+ */
+void migrate_send_rp_message(MigrationIncomingState *mis,
+ enum mig_rp_message_type message_type,
+ uint16_t len, void *data)
+{
+trace_migrate_send_rp_message((int)message_type, len);
+qemu_mutex_lock(>rp_mutex);
+qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
+qemu_put_be16(mis->to_src_file, len);
+qemu_put_buffer(mis->to_src_file, data, len);
+qemu_fflush(mis->to_src_file);
+qemu_mutex_unlock(>rp_mutex);
+}
+
+/*
+ * Send a 'SHUT' message on the return channel with the given value
+ * to indicate that we've finished with the RP.  Non-0 value indicates
+ * error.
+ */
+void migrate_send_rp_shut(MigrationIncomingState *mis,
+  uint32_t value)
+{
+uint32_t buf;
+
+buf = cpu_to_be32(value);
+migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), );
+}
+
+/*
+ * Send a 'PONG' message on the return channel with the given value
+ * (normally in response to a 'PING')
+ */
+void migrate_send_rp_pong(MigrationIncomingState *mis,
+  uint32_t value)
+{
+uint32_t buf;
+
+buf = cpu_to_be32(value);
+migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), );
+}
+
 /* amount of nanoseconds we are willing to wait for migration to be down.
  * the choice of nanoseconds is because it is the maximum resolution that
  * get_clock() can achieve. It is an internal measure. All user-visible
diff --git a/migration/savevm.c b/migration/savevm.c
index d47c55b..1ffe7b0 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1103,7 +1103,7 @@ static int loadvm_process_command(QEMUFile *f)
  tmp32);
 return -1;
 }
-/* migrate_send_rp_pong(mis, tmp32);

[Qemu-devel] Contribute to abstracting device address allocation

2015-11-10 Thread priyanshu jain

Hello,

I am undergraduate computer science student in india .I am new to
opensource and i want to contribute to project abstracting device address
allocation in QEMU . I have good knowledge of c . please suggest me how i
contribute to this.

Regards
Priyanshu Jain

[Qemu-devel] [PULL 25/57] Add wrappers and handlers for sending/receiving the postcopy-ram migration messages.

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

The state of the postcopy process is managed via a series of messages;
   * Add wrappers and handlers for sending/receiving these messages
   * Add state variable that track the current state of postcopy

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |  27 +
 include/sysemu/sysemu.h   |  19 
 migration/migration.c |  20 
 migration/savevm.c| 254 ++
 trace-events  |  10 ++
 5 files changed, 330 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 4ed7931..2e9fa3c 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -53,6 +53,29 @@ enum mig_rp_message_type {
 };

 typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
+
+/* The current postcopy state is read/set by postcopy_state_get/set
+ * which update it atomically.
+ * The state is updated as postcopy messages are received, and
+ * in general only one thread should be writing to the state at any one
+ * time, initially the main thread and then the listen thread;
+ * Corner cases are where either thread finishes early and/or errors.
+ * The state is checked as messages are received to ensure that
+ * the source is sending us messages in the correct order.
+ * The state is also used by the RAM reception code to know if it
+ * has to place pages atomically, and the cleanup code at the end of
+ * the main thread to know if it has to delay cleanup until the end
+ * of postcopy.
+ */
+typedef enum {
+POSTCOPY_INCOMING_NONE = 0,  /* Initial state - no postcopy */
+POSTCOPY_INCOMING_ADVISE,
+POSTCOPY_INCOMING_DISCARD,
+POSTCOPY_INCOMING_LISTENING,
+POSTCOPY_INCOMING_RUNNING,
+POSTCOPY_INCOMING_END
+} PostcopyState;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
 QEMUFile *from_src_file;
@@ -240,4 +263,8 @@ void global_state_set_optional(void);
 void savevm_skip_configuration(void);
 int global_state_store(void);
 void global_state_store_running(void);
+
+PostcopyState postcopy_state_get(void);
+/* Set the state and return the old state */
+PostcopyState postcopy_state_set(PostcopyState new_state);
 #endif
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 70473f4..6225e00 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -89,6 +89,16 @@ enum qemu_vm_cmd {
 MIG_CMD_INVALID = 0,   /* Must be 0 */
 MIG_CMD_OPEN_RETURN_PATH,  /* Tell the dest to open the Return path */
 MIG_CMD_PING,  /* Request a PONG on the RP */
+
+MIG_CMD_POSTCOPY_ADVISE,   /* Prior to any page transfers, just
+  warn we might want to do PC */
+MIG_CMD_POSTCOPY_LISTEN,   /* Start listening for incoming
+  pages as it's running. */
+MIG_CMD_POSTCOPY_RUN,  /* Start execution */
+
+MIG_CMD_POSTCOPY_RAM_DISCARD,  /* A list of pages to discard that
+  were previously sent during
+  precopy but are dirty. */
 MIG_CMD_MAX
 };

@@ -104,6 +114,15 @@ void qemu_savevm_command_send(QEMUFile *f, enum 
qemu_vm_cmd command,
   uint16_t len, uint8_t *data);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
+void qemu_savevm_send_postcopy_advise(QEMUFile *f);
+void qemu_savevm_send_postcopy_listen(QEMUFile *f);
+void qemu_savevm_send_postcopy_run(QEMUFile *f);
+
+void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
+   uint16_t len,
+   uint64_t *start_list,
+   uint64_t *length_list);
+
 int qemu_loadvm_state(QEMUFile *f);

 typedef enum DisplayType
diff --git a/migration/migration.c b/migration/migration.c
index f849f89..7097e5b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -57,6 +57,13 @@ static NotifierList migration_state_notifiers =

 static bool deferred_incoming;

+/*
+ * Current state of incoming postcopy; note this is not part of
+ * MigrationIncomingState since it's state is used during cleanup
+ * at the end as MIS is being freed.
+ */
+static PostcopyState incoming_postcopy_state;
+
 /* When we add fault tolerance, we could have several
migrations at once.  For now we don't need to add
dynamic creation of migration */
@@ -284,6 +291,7 @@ static void process_incoming_migration_co(void *opaque)
 int ret;

 migration_incoming_state_new(f);
+postcopy_state_set(POSTCOPY_INCOMING_NONE);

[Qemu-devel] [PULL 56/57] Inhibit ballooning during postcopy

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Postcopy detects accesses to pages that haven't been transferred yet
using userfaultfd, and it causes exceptions on pages that are 'not
present'.
Ballooning also causes pages to be marked as 'not present' when the
guest inflates the balloon.
Potentially a balloon could be inflated to discard pages that are
currently inflight during postcopy and that may be arriving at about
the same time.

To avoid this confusion, disable ballooning during postcopy.

When disabled we drop balloon requests from the guest.  Since ballooning
is generally initiated by the host, the management system should avoid
initiating any balloon instructions to the guest during migration,
although it's not possible to know how long it would take a guest to
process a request made prior to the start of migration.
Guest initiated ballooning will not know if it's really freed a page
of host memory or not.

Queueing the requests until after migration would be nice, but is
non-trivial, since the set of inflate/deflate requests have to
be compared with the state of the page to know what the final
outcome is allowed to be.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 balloon.c  | 11 +++
 hw/virtio/virtio-balloon.c |  4 +++-
 include/sysemu/balloon.h   |  2 ++
 migration/postcopy-ram.c   |  9 +
 4 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/balloon.c b/balloon.c
index 5d69e8a..0f45d1b 100644
--- a/balloon.c
+++ b/balloon.c
@@ -36,6 +36,17 @@
 static QEMUBalloonEvent *balloon_event_fn;
 static QEMUBalloonStatus *balloon_stat_fn;
 static void *balloon_opaque;
+static bool balloon_inhibited;
+
+bool qemu_balloon_is_inhibited(void)
+{
+return balloon_inhibited;
+}
+
+void qemu_balloon_inhibit(bool state)
+{
+balloon_inhibited = state;
+}

 static bool have_balloon(Error **errp)
 {
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index c419b17..9671635 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -37,9 +37,11 @@
 static void balloon_page(void *addr, int deflate)
 {
 #if defined(__linux__)
-if (!kvm_enabled() || kvm_has_sync_mmu())
+if (!qemu_balloon_is_inhibited() && (!kvm_enabled() ||
+ kvm_has_sync_mmu())) {
 qemu_madvise(addr, TARGET_PAGE_SIZE,
 deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
+}
 #endif
 }

diff --git a/include/sysemu/balloon.h b/include/sysemu/balloon.h
index 17fe300..3f976b4 100644
--- a/include/sysemu/balloon.h
+++ b/include/sysemu/balloon.h
@@ -22,5 +22,7 @@ typedef void (QEMUBalloonStatus)(void *opaque, BalloonInfo 
*info);
 int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
 QEMUBalloonStatus *stat_func, void *opaque);
 void qemu_remove_balloon_handler(void *opaque);
+bool qemu_balloon_is_inhibited(void);
+void qemu_balloon_inhibit(bool state);

 #endif
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 1a24b09..22d6b18 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -24,6 +24,7 @@
 #include "migration/migration.h"
 #include "migration/postcopy-ram.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/balloon.h"
 #include "qemu/error-report.h"
 #include "trace.h"

@@ -308,6 +309,8 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState 
*mis)
 mis->have_fault_thread = false;
 }

+qemu_balloon_inhibit(false);
+
 if (enable_mlock) {
 if (os_mlock() < 0) {
 error_report("mlock: %s", strerror(errno));
@@ -533,6 +536,12 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 return -1;
 }

+/*
+ * Ballooning can mark pages as absent while we're postcopying
+ * that would cause false userfaults.
+ */
+qemu_balloon_inhibit(true);
+
 trace_postcopy_ram_enable_notify();

 return 0;
-- 
2.5.0

[Qemu-devel] [PULL 51/57] Start up a postcopy/listener thread ready for incoming page data

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

The loading of a device state (during postcopy) may access guest
memory that's still on the source machine and thus might need
a page fill; split off a separate thread that handles the incoming
page data so that the original incoming migration code can finish
off the device data.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |  4 +++
 migration/migration.c |  6 
 migration/savevm.c| 79 ++-
 trace-events  |  2 ++
 4 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 329d535..fd018b7 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -93,6 +93,10 @@ struct MigrationIncomingState {
 QemuThread fault_thread;
 QemuSemaphore  fault_thread_sem;

+bool   have_listen_thread;
+QemuThread listen_thread;
+QemuSemaphore  listen_thread_sem;
+
 /* For the kernel to send us notifications */
 int   userfault_fd;
 /* To tell the fault_thread to quit */
diff --git a/migration/migration.c b/migration/migration.c
index 38d64ea..db3d2dd 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1441,6 +1441,12 @@ static int postcopy_start(MigrationState *ms, bool 
*old_vm_running)
 goto fail;
 }

+/*
+ * Make sure the receiver can get incoming pages before we send the rest
+ * of the state
+ */
+qemu_savevm_send_postcopy_listen(fb);
+
 qemu_savevm_state_complete_precopy(fb);
 qemu_savevm_send_ping(fb, 3);

diff --git a/migration/savevm.c b/migration/savevm.c
index 308b7d1..6ef9e62 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1380,6 +1380,65 @@ static int 
loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
 return 0;
 }

+/*
+ * Triggered by a postcopy_listen command; this thread takes over reading
+ * the input stream, leaving the main thread free to carry on loading the rest
+ * of the device state (from RAM).
+ * (TODO:This could do with being in a postcopy file - but there again it's
+ * just another input loop, not that postcopy specific)
+ */
+static void *postcopy_ram_listen_thread(void *opaque)
+{
+QEMUFile *f = opaque;
+MigrationIncomingState *mis = migration_incoming_get_current();
+int load_res;
+
+qemu_sem_post(>listen_thread_sem);
+trace_postcopy_ram_listen_thread_start();
+
+/*
+ * Because we're a thread and not a coroutine we can't yield
+ * in qemu_file, and thus we must be blocking now.
+ */
+qemu_file_set_blocking(f, true);
+load_res = qemu_loadvm_state_main(f, mis);
+/* And non-blocking again so we don't block in any cleanup */
+qemu_file_set_blocking(f, false);
+
+trace_postcopy_ram_listen_thread_exit();
+if (load_res < 0) {
+error_report("%s: loadvm failed: %d", __func__, load_res);
+qemu_file_set_error(f, load_res);
+} else {
+/*
+ * This looks good, but it's possible that the device loading in the
+ * main thread hasn't finished yet, and so we might not be in 'RUN'
+ * state yet; wait for the end of the main thread.
+ */
+qemu_event_wait(>main_thread_load_event);
+}
+postcopy_ram_incoming_cleanup(mis);
+/*
+ * If everything has worked fine, then the main thread has waited
+ * for us to start, and we're the last use of the mis.
+ * (If something broke then qemu will have to exit anyway since it's
+ * got a bad migration state).
+ */
+migration_incoming_state_destroy();
+
+if (load_res < 0) {
+/*
+ * If something went wrong then we have a bad state so exit;
+ * depending how far we got it might be possible at this point
+ * to leave the guest running and fire MCEs for pages that never
+ * arrived as a desperate recovery step.
+ */
+exit(EXIT_FAILURE);
+}
+
+return NULL;
+}
+
 /* After this message we must be able to immediately receive postcopy data */
 static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
 {
@@ -1399,7 +1458,20 @@ static int 
loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
 return -1;
 }

-/* TODO start up the postcopy listening thread */
+if (mis->have_listen_thread) {
+error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
+return -1;
+}
+
+mis->have_listen_thread = true;
+/* Start up the listening thread and wait for it to signal ready */
+qemu_sem_init(>listen_thread_sem, 0);
+qemu_thread_create(>listen_thread, "postcopy/listen",
+   postcopy_ram_listen_thread, mis->from_src_file,
+   QEMU_THREAD_JOINABLE);

Re: [Qemu-devel] [PATCH v4 00/21] Extended I/O accounting

2015-11-10 Thread Stefan Hajnoczi

On Wed, Oct 28, 2015 at 05:32:57PM +0200, Alberto Garcia wrote:
> Here's v4 of the series that implements extended I/O accounting for
> block devices.
> 
> Since part of Max's BlockBackend series has already been merged, this
> series can now be applied cleanly on top of the master branch without
> additional dependencies.
> 
> Here's the summary of what this series provides:
> 
>  - New block_acct_failed() and block_acct_invalid() calls.
>We keep track now of the number of successful, failed and invalid
>operations (each one separated into read, write and flush). So from
>the API point of view, BlockDeviceStats contains 6 new fields for
>those.
> 
>  - idle_time_ns: time since the last I/O operation.
> 
>  - New BlockDeviceTimedStats struct: it has statistics for the I/O
>during a given interval of time. It keeps minimum, maximum and
>average latencies for read, write and flush operations.
> 
>It also keeps the average read and write queue depths.
> 
>  - New 'stats-intervals' option that allows the user to define the
>intervals used to keep the aforementioned statistics. An arbitrary
>number of intervals can be specified, the length of each one is in
>seconds.
> 
>For the API I opted for a colon-separated list of numbers,
> 
>   stats-intervals=60:3600:86400
> 
>I also considered something a different syntax,
> 
>   stats-intervals.0.length=60,
>   stats-intervals.1.length=3600,
>   stats-intervals.2.length=86400
> 
>This one could be useful if we want to specify any other attribute
>for each interval, but I couldn't come up with any, so I chose the
>simpler solution.
> 
>  - Two new options, stats-account-invalid and stats-account-failed,
>which allow the user to decide whether to count invalid and failed
>operations when computing the idle time and total latency.
> 
> Regards,
> 
> Berto
> 
> v4:
> - Rebase on top of the current master. This series no longer depends
>   on any other.
> - patch 8: clarify that interval_length is in seconds [Stefan]
> - patch 9: rewrite timed_average_sum() so it does not call
>   qemu_clock_get_ns() twice [Stefan]
> 
> v3: https://lists.gnu.org/archive/html/qemu-block/2015-10/msg00785.html
> - Rebased on top of the current master and on Max's BlockBackend
>   series v7
> - patch 4: minor documentation fixes [Stefan]
> - patch 5: s/miliseconds/nanoseconds/ [Stefan]
> - patch 6: dropped, there's no "supports_stats" anymore [Stefan]
> - patch 7 (now 6): explain why block_acct_invalid() does not update
>   total_time_ns[] [Stefan]
> - patch 12 (now 11): don't initialize BlockAcctCookie to { 0 }, it's
>not needed.
> 
> v2: https://lists.gnu.org/archive/html/qemu-block/2015-10/msg00161.html
> - First complete implementation of the new statistics
> 
> v1: https://lists.gnu.org/archive/html/qemu-devel/2015-06/msg03321.html
> - Initial series containing only the timed average infrastructure.
> 
> Alberto Garcia (21):
>   xen_disk: Account for flush operations
>   ide: Account for write operations correctly
>   block: define 'clock_type' for the accounting code
>   util: Infrastructure for computing recent averages
>   block: Add idle_time_ns to BlockDeviceStats
>   block: Add statistics for failed and invalid I/O operations
>   block: Allow configuring whether to account failed and invalid ops
>   block: Compute minimum, maximum and average I/O latencies
>   block: Add average I/O queue depth to BlockDeviceTimedStats
>   block: New option to define the intervals for collecting I/O
> statistics
>   qemu-io: Account for failed, invalid and flush operations
>   block: Use QEMU_CLOCK_VIRTUAL for the accounting code in qtest mode
>   iotests: Add test for the block device statistics
>   nvme: Account for failed and invalid operations
>   virtio-blk: Account for failed and invalid operations
>   xen_disk: Account for failed and invalid operations
>   atapi: Account for failed and invalid operations
>   ide: Account for failed and invalid operations
>   macio: Account for failed operations
>   scsi-disk: Account for failed operations
>   block: Update copyright of the accounting code
> 
>  block/accounting.c   | 123 ++-
>  block/block-backend.c|   1 +
>  block/qapi.c |  51 +++
>  blockdev.c   |  53 +++
>  hmp.c|   4 +-
>  hw/block/nvme.c  |  11 +-
>  hw/block/virtio-blk.c|   4 +-
>  hw/block/xen_disk.c  |  27 +++-
>  hw/ide/atapi.c   |  31 ++--
>  hw/ide/core.c|  12 +-
>  hw/ide/macio.c   |  12 +-
>  hw/scsi/scsi-disk.c  |  46 --
>  include/block/accounting.h   |  28 
>  include/qemu/timed-average.h |  64 
>  qapi/block-core.json | 103 -
>  qemu-io-cmds.c   |   9 ++
>  qmp-commands.hx  |  80 +-
>  tests/Makefile   |   4 +
>

[Qemu-devel] [PULL 57/57] migration: qemu_savevm_state_cleanup becomes mandatory operation

2015-11-10 Thread Juan Quintela

From: "Denis V. Lunev" 

since commit
commit 94f5a43704129ca4995aa3385303c5ae225bde42
Author: Liang Li 
Date:   Mon Nov 2 15:37:00 2015 +0800

migration: defer migration_end & blk_mig_cleanup

when actual .cleanup callbacks calling was removed from complete operations.

The patch fixes regression introduced by the commit above results in
100% reliable assert for virtio-scsi VM with iothreads enabled during
'virsh create-snapshot' operation:
assert(i != mr->ioeventfd_nb);
memory_region_del_eventfd
virtio_pci_set_host_notifier_internal
virtio_pci_set_host_notifier
virtio_scsi_dataplane_start
virtio_scsi_handle_cmd
virtio_queue_notify_vq
virtio_queue_host_notifier_read
aio_dispatch

Signed-off-by: Denis V. Lunev 
Reviewed-by: Liang Li 
Reviewed-by: Juan Quintela 
CC: Paolo Bonzini 
CC: Juan Quintela 
CC: Amit Shah 
Signed-off-by: Juan Quintela 
---
 migration/savevm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 0596f7b..be52314 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1179,8 +1179,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
 qemu_savevm_state_complete_precopy(f);
 ret = qemu_file_get_error(f);
 }
+qemu_savevm_state_cleanup();
 if (ret != 0) {
-qemu_savevm_state_cleanup();
 error_setg_errno(errp, -ret, "Error while writing VM state");
 }
 return ret;
-- 
2.5.0

[Qemu-devel] [PATCH 1/3] monitor: avoid clang shifting negative signed warning

2015-11-10 Thread Stefan Hajnoczi

clang 3.7.0 on x86_64 warns about the following:

  target-i386/monitor.c:38:22: warning: shifting a negative signed value is 
undefined [-Wshift-negative-value]
addr |= -1LL << 48;
 ^

Signed-off-by: Stefan Hajnoczi 
---
 target-i386/monitor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-i386/monitor.c b/target-i386/monitor.c
index aac6b1b..6f5c280 100644
--- a/target-i386/monitor.c
+++ b/target-i386/monitor.c
@@ -35,7 +35,7 @@ static void print_pte(Monitor *mon, hwaddr addr,
 {
 #ifdef TARGET_X86_64
 if (addr & (1ULL << 47)) {
-addr |= -1LL << 48;
+addr |= ~0ULL << 48;
 }
 #endif
 monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx
-- 
2.5.0

Re: [Qemu-devel] [PATCH] kvm-all: PAGE_SIZE should be real host page size

2015-11-10 Thread Peter Maydell

On 10 November 2015 at 16:59, Andrew Jones  wrote:
> On Tue, Nov 10, 2015 at 04:29:31PM +, Peter Maydell wrote:
>> On 10 November 2015 at 00:23, Andrew Jones  wrote:
>> > -/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
>> > -#define PAGE_SIZE TARGET_PAGE_SIZE
>> > +/* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
>> > + * need to use the real host PAGE_SIZE, as that's what KVM will use.
>> > + */
>> > +#define PAGE_SIZE getpagesize()
>>
>> Rather than defining PAGE_SIZE here (a confusing macro given
>> we have several page sizes to deal with), why not just use
>> getpagesize() in the one and only location where we currently
>> use this macro?
>
> The macro is used by kernel headers that we import and include in
> kvm-all.c. It's ugly, I agree, but that's how the this cookie crumbled.

Oh, I see. That's pretty horrible.

thanks
-- PMM

[Qemu-devel] [PATCH 01/10] snapshot: create helper to test that block drivers supports snapshots

2015-11-10 Thread Denis V. Lunev

The patch enforces proper locking for this operation.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Greg Kurz 
CC: Juan Quintela 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/snapshot.c | 24 
 include/block/snapshot.h |  8 
 migration/savevm.c   | 17 -
 3 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index 89500f2..d929d08 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -356,3 +356,27 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState 
*bs,
 
 return ret;
 }
+
+
+/* Group operations. All block drivers are involved.
+ * These functions will properly handle dataplane (take aio_context_acquire
+ * when appropriate for appropriate block drivers) */
+
+bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
+{
+bool ok = true;
+BlockDriverState *bs = NULL;
+
+while (ok && (bs = bdrv_next(bs))) {
+AioContext *ctx = bdrv_get_aio_context(bs);
+
+aio_context_acquire(ctx);
+if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) {
+ok = bdrv_can_snapshot(bs);
+}
+aio_context_release(ctx);
+}
+
+*first_bad_bs = bs;
+return ok;
+}
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 770d9bb..6195c9c 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -75,4 +75,12 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
 int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
  const char *id_or_name,
  Error **errp);
+
+
+/* Group operations. All block drivers are involved.
+ * These functions will properly handle dataplane (take aio_context_acquire
+ * when appropriate for appropriate block drivers */
+
+bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs);
+
 #endif
diff --git a/migration/savevm.c b/migration/savevm.c
index 9f2230f..c212288 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1290,19 +1290,10 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 const char *name = qdict_get_try_str(qdict, "name");
 Error *local_err = NULL;
 
-/* Verify if there is a device that doesn't support snapshots and is 
writable */
-bs = NULL;
-while ((bs = bdrv_next(bs))) {
-
-if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
-continue;
-}
-
-if (!bdrv_can_snapshot(bs)) {
-monitor_printf(mon, "Device '%s' is writable but does not support 
snapshots.\n",
-   bdrv_get_device_name(bs));
-return;
-}
+if (!bdrv_all_can_snapshot()) {
+monitor_printf(mon, "Device '%s' is writable but does not "
+   "support snapshots.\n", bdrv_get_device_name(bs));
+return;
 }
 
 bs = find_vmstate_bs();
-- 
2.5.0

[Qemu-devel] [PULL 36/44] iotests: Add test for the block device statistics

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

Signed-off-by: Alberto Garcia 
Message-id: 
0fb8501bbf3666b3d5d3f67fa899729c88f21baf.1446044838.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 tests/qemu-iotests/136 | 349 +
 tests/qemu-iotests/136.out |   5 +
 tests/qemu-iotests/group   |   1 +
 3 files changed, 355 insertions(+)
 create mode 100644 tests/qemu-iotests/136
 create mode 100644 tests/qemu-iotests/136.out

diff --git a/tests/qemu-iotests/136 b/tests/qemu-iotests/136
new file mode 100644
index 000..f574d83
--- /dev/null
+++ b/tests/qemu-iotests/136
@@ -0,0 +1,349 @@
+#!/usr/bin/env python
+#
+# Tests for block device statistics
+#
+# Copyright (C) 2015 Igalia, S.L.
+# Author: Alberto Garcia 
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import iotests
+import os
+
+interval_length = 10
+nsec_per_sec = 10
+op_latency = nsec_per_sec / 1000 # See qtest_latency_ns in accounting.c
+bad_sector = 8192
+bad_offset = bad_sector * 512
+blkdebug_file = os.path.join(iotests.test_dir, 'blkdebug.conf')
+
+class BlockDeviceStatsTestCase(iotests.QMPTestCase):
+test_img = "null-aio://"
+total_rd_bytes = 0
+total_rd_ops = 0
+total_wr_bytes = 0
+total_wr_ops = 0
+total_wr_merged = 0
+total_flush_ops = 0
+failed_rd_ops = 0
+failed_wr_ops = 0
+invalid_rd_ops = 0
+invalid_wr_ops = 0
+wr_highest_offset = 0
+account_invalid = False
+account_failed = False
+
+def blockstats(self, device):
+result = self.vm.qmp("query-blockstats")
+for r in result['return']:
+if r['device'] == device:
+return r['stats']
+raise Exception("Device not found for blockstats: %s" % device)
+
+def create_blkdebug_file(self):
+file = open(blkdebug_file, 'w')
+file.write('''
+[inject-error]
+event = "read_aio"
+errno = "5"
+sector = "%d"
+
+[inject-error]
+event = "write_aio"
+errno = "5"
+sector = "%d"
+''' % (bad_sector, bad_sector))
+file.close()
+
+def setUp(self):
+drive_args = []
+drive_args.append("stats-intervals=%d" % interval_length)
+drive_args.append("stats-account-invalid=%s" %
+  (self.account_invalid and "on" or "off"))
+drive_args.append("stats-account-failed=%s" %
+  (self.account_failed and "on" or "off"))
+self.create_blkdebug_file()
+self.vm = iotests.VM().add_drive('blkdebug:%s:%s ' %
+ (blkdebug_file, self.test_img),
+ ','.join(drive_args))
+self.vm.launch()
+# Set an initial value for the clock
+self.vm.qtest("clock_step %d" % nsec_per_sec)
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(blkdebug_file)
+
+def accounted_ops(self, read = False, write = False, flush = False):
+ops = 0
+if write:
+ops += self.total_wr_ops
+if self.account_failed:
+ops += self.failed_wr_ops
+if self.account_invalid:
+ops += self.invalid_wr_ops
+if read:
+ops += self.total_rd_ops
+if self.account_failed:
+ops += self.failed_rd_ops
+if self.account_invalid:
+ops += self.invalid_rd_ops
+if flush:
+ops += self.total_flush_ops
+return ops
+
+def accounted_latency(self, read = False, write = False, flush = False):
+latency = 0
+if write:
+latency += self.total_wr_ops * op_latency
+if self.account_failed:
+latency += self.failed_wr_ops * op_latency
+if read:
+latency += self.total_rd_ops * op_latency
+if self.account_failed:
+latency += self.failed_rd_ops * op_latency
+if flush:
+latency += self.total_flush_ops * op_latency
+return latency
+
+def check_values(self):
+stats = self.blockstats('drive0')
+
+# Check that the totals match with what we have calculated
+self.assertEqual(self.total_rd_bytes, stats['rd_bytes'])
+self.assertEqual(self.total_wr_bytes, stats['wr_bytes'])
+

[Qemu-devel] [PULL 25/44] ide: Account for write operations correctly

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

Signed-off-by: Alberto Garcia 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
2e71323c0875c2b66a8ae9545e0c013af8d4.1446044837.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/ide/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 364ba21..35ba1ad 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -895,7 +895,7 @@ static void ide_sector_write(IDEState *s)
 qemu_iovec_init_external(>qiov, >iov, 1);
 
 block_acct_start(blk_get_stats(s->blk), >acct,
- n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
+ n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE);
 s->pio_aiocb = blk_aio_writev(s->blk, sector_num, >qiov, n,
   ide_sector_write_cb, s);
 }
-- 
2.5.0

[Qemu-devel] [PATCH 08/10] migration: reorder processing in hmp_savevm

2015-11-10 Thread Denis V. Lunev

State deletion can be performed on running VM which reduces VM downtime
This approach looks a bit more natural.

Signed-off-by: Denis V. Lunev 
CC: Juan Quintela 
---
 migration/savevm.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index c2d677d..f4da064 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1267,6 +1267,15 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 return;
 }
 
+/* Delete old snapshots of the same name */
+if (name && bdrv_all_delete_snapshot(name, , _err) < 0) {
+monitor_printf(mon,
+   "Error while deleting snapshot on device '%s': %s\n",
+   bdrv_get_device_name(bs1), error_get_pretty(local_err));
+error_free(local_err);
+return;
+}
+
 bs = find_vmstate_bs();
 if (!bs) {
 monitor_printf(mon, "No block device can accept snapshots\n");
@@ -1304,15 +1313,6 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", );
 }
 
-/* Delete old snapshots of the same name */
-if (name && bdrv_all_delete_snapshot(name, , _err) < 0) {
-monitor_printf(mon,
-   "Error while deleting snapshot on device '%s': %s\n",
-   bdrv_get_device_name(bs1), error_get_pretty(local_err));
-error_free(local_err);
-goto the_end;
-}
-
 /* save the VM state */
 f = qemu_fopen_bdrv(bs, 1);
 if (!f) {
-- 
2.5.0

[Qemu-devel] [PULL 33/44] block: New option to define the intervals for collecting I/O statistics

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

The BlockAcctStats structure contains a list of BlockAcctTimedStats.
Each one of these collects statistics about the minimum, maximum and
average latencies of all I/O operations in a certain interval of time.

This patch adds a new "stats-intervals" option that allows defining
these intervals.

Signed-off-by: Alberto Garcia 
Message-id: 
41cbcd334a61c6157f0f495cdfd21eff6c156f2a.1446044837.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 blockdev.c   | 37 +
 qapi/block-core.json |  4 
 2 files changed, 41 insertions(+)

diff --git a/blockdev.c b/blockdev.c
index 5b7aac3..769859c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -442,6 +442,7 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 int bdrv_flags = 0;
 int on_read_error, on_write_error;
 bool account_invalid, account_failed;
+const char *stats_intervals;
 BlockBackend *blk;
 BlockDriverState *bs;
 ThrottleConfig cfg;
@@ -481,6 +482,8 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 account_invalid = qemu_opt_get_bool(opts, "stats-account-invalid", true);
 account_failed = qemu_opt_get_bool(opts, "stats-account-failed", true);
 
+stats_intervals = qemu_opt_get(opts, "stats-intervals");
+
 extract_common_blockdev_options(opts, _flags, _group, ,
 _zeroes, );
 if (error) {
@@ -579,6 +582,35 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 }
 
 block_acct_init(blk_get_stats(blk), account_invalid, account_failed);
+
+if (stats_intervals) {
+char **intervals = g_strsplit(stats_intervals, ":", 0);
+unsigned i;
+
+if (*stats_intervals == '\0') {
+error_setg(, "stats-intervals can't have an empty 
value");
+}
+
+for (i = 0; !error && intervals[i] != NULL; i++) {
+unsigned long long val;
+if (parse_uint_full(intervals[i], , 10) == 0 &&
+val > 0 && val <= UINT_MAX) {
+block_acct_add_interval(blk_get_stats(blk), val);
+} else {
+error_setg(, "Invalid interval length: '%s'",
+   intervals[i]);
+}
+}
+
+g_strfreev(intervals);
+
+if (error) {
+error_propagate(errp, error);
+blk_unref(blk);
+blk = NULL;
+goto err_no_bs_opts;
+}
+}
 }
 
 blk_set_on_error(blk, on_read_error, on_write_error);
@@ -3655,6 +3687,11 @@ QemuOptsList qemu_common_drive_opts = {
 .type = QEMU_OPT_BOOL,
 .help = "whether to account for failed I/O operations "
 "in the statistics",
+},{
+.name = "stats-intervals",
+.type = QEMU_OPT_STRING,
+.help = "colon-separated list of intervals "
+"for collecting I/O statistics, in seconds",
 },
 { /* end of list */ }
 },
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0742794..273d073 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1503,6 +1503,9 @@
 # @stats-account-failed: #optional whether to include failed
 # operations when computing latency and last
 # access statistics (default: true) (Since 2.5)
+# @stats-intervals: #optional colon-separated list of intervals for
+#   collecting I/O statistics, in seconds (default: none)
+#   (Since 2.5)
 # @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
 # (default: off)
 #
@@ -1520,6 +1523,7 @@
 '*read-only': 'bool',
 '*stats-account-invalid': 'bool',
 '*stats-account-failed': 'bool',
+'*stats-intervals': 'str',
 '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
 
 ##
-- 
2.5.0

[Qemu-devel] [PULL 35/57] migration_completion: Take current state

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Soon we'll be in either ACTIVE or POSTCOPY_ACTIVE when we
complete migration, and we need to know which we expect to be
in to change state safely.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h|   5 ++
 include/migration/postcopy-ram.h |  35 
 include/qemu/typedefs.h  |   1 +
 migration/migration.c|   1 +
 migration/postcopy-ram.c | 126 +++
 migration/ram.c  | 181 ++-
 migration/savevm.c   |   2 -
 trace-events |   6 ++
 8 files changed, 354 insertions(+), 3 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 219032d..b382d77 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -199,6 +199,11 @@ double xbzrle_mig_cache_miss_rate(void);

 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
 void ram_debug_dump_bitmap(unsigned long *todump, bool expected);
+/* For outgoing discard bitmap */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms);
+/* For incoming postcopy discard */
+int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
+  uint64_t start, size_t length);

 /**
  * @migrate_add_blocker - prevent migration from proceeding
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index d81934f..de79fa7 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -16,4 +16,39 @@
 /* Return true if the host supports everything we need to do postcopy-ram */
 bool postcopy_ram_supported_by_host(void);

+/*
+ * Discard the contents of 'length' bytes from 'start'
+ * We can assume that if we've been called postcopy_ram_hosttest returned true
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+   size_t length);
+
+
+/*
+ * Called at the start of each RAMBlock by the bitmap code.
+ * 'offset' is the bitmap offset of the named RAMBlock in the migration
+ * bitmap.
+ * Returns a new PDS
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+ unsigned long offset,
+ const char *name);
+
+/*
+ * Called by the bitmap code for each chunk to discard.
+ * May send a discard message, may just leave it queued to
+ * be sent later.
+ * @start,@length: a range of pages in the migration bitmap in the
+ *  RAM block passed to postcopy_discard_send_init() (length=1 is one page)
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+ unsigned long start, unsigned long length);
+
+/*
+ * Called at the end of each RAMBlock by the bitmap code.
+ * Sends any outstanding discard messages, frees the PDS.
+ */
+void postcopy_discard_send_finish(MigrationState *ms,
+  PostcopyDiscardState *pds);
+
 #endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index f7e0ed0..6b1093d 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -67,6 +67,7 @@ typedef struct PCMachineState PCMachineState;
 typedef struct PCMachineClass PCMachineClass;
 typedef struct PCMCIACardState PCMCIACardState;
 typedef struct PixelFormat PixelFormat;
+typedef struct PostcopyDiscardState PostcopyDiscardState;
 typedef struct PropertyInfo PropertyInfo;
 typedef struct Property Property;
 typedef struct QEMUBH QEMUBH;
diff --git a/migration/migration.c b/migration/migration.c
index 2acd0e6..afc863a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -24,6 +24,7 @@
 #include "qemu/sockets.h"
 #include "qemu/rcu.h"
 #include "migration/block.h"
+#include "migration/postcopy-ram.h"
 #include "qemu/thread.h"
 #include "qmp-commands.h"
 #include "trace.h"
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index cdd0168..261feda 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -27,6 +27,24 @@
 #include "qemu/error-report.h"
 #include "trace.h"

+/* Arbitrary limit on size of each discard command,
+ * keeps them around ~200 bytes
+ */
+#define MAX_DISCARDS_PER_COMMAND 12
+
+struct PostcopyDiscardState {
+const char *ramblock_name;
+uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
+uint16_t cur_entry;
+/*
+ * Start and length of a discard range (bytes)
+ */
+uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
+uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
+unsigned int nsentwords;
+unsigned int nsentcmds;
+};
+
 /* Postcopy needs to detect accesses to pages that haven't yet been copied
  * across, and efficiently map new pages in, the

[Qemu-devel] [PULL 31/57] MIGRATION_STATUS_POSTCOPY_ACTIVE: Add new migration state

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

'MIGRATION_STATUS_POSTCOPY_ACTIVE' is entered after migrate_start_postcopy

'migration_in_postcopy' is provided for other sections to know if
they're in postcopy.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: David Gibson 
Reviewed-by: Eric Blake 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |  2 ++
 migration/migration.c | 47 +--
 qapi-schema.json  |  4 +++-
 trace-events  |  1 +
 4 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 217..219032d 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -170,6 +170,8 @@ MigrationState *migrate_init(const MigrationParams *params);
 bool migration_in_setup(MigrationState *);
 bool migration_has_finished(MigrationState *);
 bool migration_has_failed(MigrationState *);
+/* True if outgoing migration has entered postcopy phase */
+bool migration_in_postcopy(MigrationState *);
 MigrationState *migrate_get_current(void);

 void migrate_compress_threads_create(void);
diff --git a/migration/migration.c b/migration/migration.c
index 85e68bc..2acd0e6 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -462,6 +462,7 @@ static bool migration_is_setup_or_active(int state)
 {
 switch (state) {
 case MIGRATION_STATUS_ACTIVE:
+case MIGRATION_STATUS_POSTCOPY_ACTIVE:
 case MIGRATION_STATUS_SETUP:
 return true;

@@ -537,6 +538,39 @@ MigrationInfo *qmp_query_migrate(Error **errp)

 get_xbzrle_cache_stats(info);
 break;
+case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+/* Mostly the same as active; TODO add some postcopy stats */
+info->has_status = true;
+info->has_total_time = true;
+info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+- s->total_time;
+info->has_expected_downtime = true;
+info->expected_downtime = s->expected_downtime;
+info->has_setup_time = true;
+info->setup_time = s->setup_time;
+
+info->has_ram = true;
+info->ram = g_malloc0(sizeof(*info->ram));
+info->ram->transferred = ram_bytes_transferred();
+info->ram->remaining = ram_bytes_remaining();
+info->ram->total = ram_bytes_total();
+info->ram->duplicate = dup_mig_pages_transferred();
+info->ram->skipped = skipped_mig_pages_transferred();
+info->ram->normal = norm_mig_pages_transferred();
+info->ram->normal_bytes = norm_mig_bytes_transferred();
+info->ram->dirty_pages_rate = s->dirty_pages_rate;
+info->ram->mbps = s->mbps;
+
+if (blk_mig_active()) {
+info->has_disk = true;
+info->disk = g_malloc0(sizeof(*info->disk));
+info->disk->transferred = blk_mig_bytes_transferred();
+info->disk->remaining = blk_mig_bytes_remaining();
+info->disk->total = blk_mig_bytes_total();
+}
+
+get_xbzrle_cache_stats(info);
+break;
 case MIGRATION_STATUS_COMPLETED:
 get_xbzrle_cache_stats(info);

@@ -718,7 +752,8 @@ static void migrate_fd_cleanup(void *opaque)
 s->file = NULL;
 }

-assert(s->state != MIGRATION_STATUS_ACTIVE);
+assert((s->state != MIGRATION_STATUS_ACTIVE) &&
+   (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));

 if (s->state == MIGRATION_STATUS_CANCELLING) {
 migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
@@ -793,6 +828,11 @@ bool migration_has_failed(MigrationState *s)
 s->state == MIGRATION_STATUS_FAILED);
 }

+bool migration_in_postcopy(MigrationState *s)
+{
+return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
+}
+
 MigrationState *migrate_init(const MigrationParams *params)
 {
 MigrationState *s = migrate_get_current();
@@ -1307,7 +1347,10 @@ static void *migration_thread(void *opaque)
 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
 migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);

-while (s->state == MIGRATION_STATUS_ACTIVE) {
+trace_migration_thread_setup_complete();
+
+while (s->state == MIGRATION_STATUS_ACTIVE ||
+   s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
 int64_t current_time;
 uint64_t pending_size;

diff --git a/qapi-schema.json b/qapi-schema.json
index d25df93..8c3a42a 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -430,6 +430,8 @@
 #
 # @active: in the process of doing migration.
 #
+# @postcopy-active: like active, but now in postcopy mode. (since 2.5)
+#
 # @completed: migration is finished.
 #
 # @failed: some error occurred during migration process.
@@ -439,7 +441,7 @@

Re: [Qemu-devel] [Qemu-block] [PULL v2 39/40] iotests: Add tests for the x-blockdev-del command

2015-11-10 Thread Kevin Wolf

Am 10.11.2015 um 15:59 hat Stefan Hajnoczi geschrieben:
> On Tue, Nov 10, 2015 at 2:09 PM, Kevin Wolf  wrote:
> > From: Alberto Garcia 
> >
> > Signed-off-by: Alberto Garcia 
> > Message-id: 
> > 57c3b0d4d0c73ddadd19e5bded9492c359cc4568.1446475331.git.be...@igalia.com
> > Reviewed-by: Max Reitz 
> > Signed-off-by: Max Reitz 
> > ---
> >  tests/qemu-iotests/139 | 414 
> > +
> >  tests/qemu-iotests/139.out |   5 +
> >  tests/qemu-iotests/group   |   1 +
> >  3 files changed, 420 insertions(+)
> >  create mode 100644 tests/qemu-iotests/139
> >  create mode 100644 tests/qemu-iotests/139.out
> 
> I'm seeing the following failure:
> 
>  ./check -qcow2 139
> QEMU  -- "./qemu" -nodefaults
> QEMU_IMG  -- "./qemu-img"
> QEMU_IO   -- "./qemu-io"  -f qcow2 --cache writeback
> QEMU_NBD  -- "./qemu-nbd"
> IMGFMT-- qcow2 (compat=1.1)
> IMGPROTO  -- file
> PLATFORM  -- Linux/x86_64 stefanha-x1 4.2.5-300.fc23.x86_64
> TEST_DIR  -- /home/stefanha/qemu/tests/qemu-iotests/scratch
> SOCKET_SCM_HELPER -- /home/stefanha/qemu/tests/qemu-iotests/socket_scm_helper
> 
> 139 [failed, exit status 1] - output mismatch (see 139.out.bad)
> --- /home/stefanha/qemu/tests/qemu-iotests/139.out2015-11-10
> 14:24:03.728322694 +
> +++ 139.out.bad2015-11-10 14:54:51.617899443 +
> @@ -1,5 +1,19 @@
> -
> +F...
> +==
> +FAIL: testQuorum (__main__.TestBlockdevDel)
> +--
> +Traceback (most recent call last):
> +  File "139", line 403, in testQuorum
> +self.addQuorum('quorum0', 'node0', 'node1')
> +  File "139", line 291, in addQuorum
> +self.assert_qmp(result, 'return', {})
> +  File "/home/stefanha/qemu/tests/qemu-iotests/iotests.py", line 314,
> in assert_qmp
> +result = self.dictpath(d, path)
> +  File "/home/stefanha/qemu/tests/qemu-iotests/iotests.py", line 293,
> in dictpath
> +self.fail('failed path traversal for "%s" in "%s"' % (path, str(d)))
> +AssertionError: failed path traversal for "return" in "{u'error':
> {u'class': u'GenericError', u'desc': u"Unknown driver 'quorum'"}}"
> +

I guess we need a follow-up patch for -rc1 that simply skips the test
case if quorum isn't available.

Kevin

Re: [Qemu-devel] [PATCH] kvm-all: PAGE_SIZE should be real host page size

2015-11-10 Thread Andrew Jones

On Tue, Nov 10, 2015 at 04:41:16PM +0100, Paolo Bonzini wrote:
> 
> 
> On 10/11/2015 01:23, Andrew Jones wrote:
> > Just noticed this while grepping TARGET_PAGE_SIZE for an unrelated
> > reason. I didn't use qemu_real_host_page_size as kvm_set_phys_mem()
> > does, because we'd need to make sure page_size_init() has run first.
> > 
> > Signed-off-by: Andrew Jones 
> > ---
> >  kvm-all.c | 6 --
> >  1 file changed, 4 insertions(+), 2 deletions(-)
> > 
> > diff --git a/kvm-all.c b/kvm-all.c
> > index 1bc12737723c3..de9ff5971fb3b 100644
> > --- a/kvm-all.c
> > +++ b/kvm-all.c
> > @@ -45,8 +45,10 @@
> >  #include 
> >  #endif
> >  
> > -/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
> > -#define PAGE_SIZE TARGET_PAGE_SIZE
> > +/* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
> > + * need to use the real host PAGE_SIZE, as that's what KVM will use.
> > + */
> > +#define PAGE_SIZE getpagesize()
> >  
> >  //#define DEBUG_KVM
> >  
> > 
> 
> Is this a bugfix or just a cleanup?  If the former, on which targets?

It's a bugfix for any targets that have a TARGET_PAGE_SIZE !=
real-host-page-size. For example ARM has TARGET_PAGE_SIZE set to 1024,
even when the host is using 4k or 64k pages. However, I didn't find this
due to a bug, because on ARM I'm not using emulated devices that make
use of the coalesced-mmio feature at this time.

Thanks,
drew

> 
> Paolo
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Qemu-devel] [PATCH 0/3] fix clang negative signed bit shift warning

2015-11-10 Thread Stefan Hajnoczi

LLVM's clang 3.7.0 compile warns about bit shifting negative numbers because
the result is undefined.  This series includes 3 small fixes to appease clang.

Stefan Hajnoczi (3):
  monitor: avoid clang shifting negative signed warning
  tpm: avoid clang shifting negative signed warning
  disas/arm: avoid clang shifting negative signed warning

 disas/arm.c   | 2 +-
 hw/tpm/tpm_tis.c  | 2 +-
 target-i386/monitor.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

-- 
2.5.0

Re: [Qemu-devel] [PATCH v13 3/3] block/gluster: add support for multiple gluster servers

2015-11-10 Thread Eric Blake

On 11/10/2015 02:09 AM, Prasanna Kumar Kalever wrote:
> This patch adds a way to specify multiple volfile servers to the gluster
> block backend of QEMU with tcp|rdma transport types and their port numbers.
> 

[...]

> 2.
>  'json:{"driver":"qcow2","file":{"driver":"gluster","volume":"testvol",
>  "path":"/path/a.qcow2","servers":
>  [{"host":"1.2.3.4","port":"24007","transport":"tcp"},
>   {"host":"4.5.6.7","port":"24008","transport":"rdma"}] } }'
> 
> This patch gives a mechanism to provide all the server addresses, which are in
> replica set, so in case host1 is down VM can still boot from any of the
> active hosts.
> 
> This is equivalent to the backup-volfile-servers option supported by
> mount.glusterfs (FUSE way of mounting gluster volume)
> 
> Credits: Sincere thanks to Kevin Wolf  and
> "Deepak C Shetty"  for inputs and all their support
> 
> Signed-off-by: Prasanna Kumar Kalever 
> ---

> v10:
> fix mem-leak as per Peter Krempa  review comments
> 
> v11:
> using qapi-types* defined structures as per "Eric Blake" 
> review comments.
> 
> v12:
> fix crash caused in qapi_free_BlockdevOptionsGluster
> 
> v13:
> address comments from "Jeff Cody" 

I had some other comments against v10 that I don't see addressed yet:
https://lists.gnu.org/archive/html/qemu-devel/2015-10/msg06377.html

> ---
>  block/gluster.c  | 468 
> ---
>  qapi/block-core.json |  60 ++-
>  2 files changed, 461 insertions(+), 67 deletions(-)
> 
> diff --git a/block/gluster.c b/block/gluster.c
> index ededda2..8939072 100644
> --- a/block/gluster.c
> +++ b/block/gluster.c
> @@ -11,6 +11,19 @@
>  #include "block/block_int.h"
>  #include "qemu/uri.h"
>  
> +#define GLUSTER_OPT_FILENAME"filename"
> +#define GLUSTER_OPT_VOLUME  "volume"
> +#define GLUSTER_OPT_PATH"path"
> +#define GLUSTER_OPT_HOST"host"
> +#define GLUSTER_OPT_PORT"port"
> +#define GLUSTER_OPT_TRANSPORT   "transport"
> +#define GLUSTER_OPT_SERVERS_PATTERN "servers."
> +
> +#define GLUSTER_DEFAULT_PORT24007
> +
> +#define MAX_SERVERS "1"

Why is this a string rather than an integer?

> +
> +
>  typedef struct GlusterAIOCB {
>  int64_t size;
>  int ret;
> @@ -29,15 +42,6 @@ typedef struct BDRVGlusterReopenState {
>  struct glfs_fd *fd;
>  } BDRVGlusterReopenState;
>  
> -typedef struct GlusterConf {
> -char *host;
> -int port;
> -char *volume;
> -char *path;
> -char *transport;
> -} GlusterConf;
> -

This patch feels pretty big. It may be smarter to break it into two
pieces - one that adds GlusterConf to qapi/block-core.json and replaces
existing uses of this definition to the qapi type but with no changes in
semantics; and the other that then extends things to add support for
multiple servers (so that we aren't trying to do too much in one patch).

> @@ -143,8 +176,11 @@ static int parse_volume_options(GlusterConf *gconf, char 
> *path)
>   * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
>   * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
>   */
> -static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
> +static int qemu_gluster_parseuri(BlockdevOptionsGluster **pgconf,
> + const char *filename)
>  {
> +BlockdevOptionsGluster *gconf;
> +GlusterServer *gsconf;
>  URI *uri;
>  QueryParams *qp = NULL;
>  bool is_unix = false;
> @@ -155,20 +191,24 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, 
> const char *filename)
>  return -EINVAL;
>  }
>  
> +gconf = g_new0(BlockdevOptionsGluster, 1);
> +gsconf = g_new0(GlusterServer, 1);

gconf and gsconf are both allocated here...

> +
>  /* transport */
>  if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
> -gconf->transport = g_strdup("tcp");
> +gsconf->transport = GLUSTER_TRANSPORT_TCP;
>  } else if (!strcmp(uri->scheme, "gluster+tcp")) {
> -gconf->transport = g_strdup("tcp");
> +gsconf->transport = GLUSTER_TRANSPORT_TCP;
>  } else if (!strcmp(uri->scheme, "gluster+unix")) {
> -gconf->transport = g_strdup("unix");
> +gsconf->transport = GLUSTER_TRANSPORT_UNIX;
>  is_unix = true;
>  } else if (!strcmp(uri->scheme, "gluster+rdma")) {
> -gconf->transport = g_strdup("rdma");
> +gsconf->transport = GLUSTER_TRANSPORT_RDMA;
>  } else {
>  ret = -EINVAL;
>  goto out;

...but you can error here...

>  }
> +gsconf->has_transport = true;
>  
>  ret = parse_volume_options(gconf, uri->path);
>  if (ret < 0) {
> @@ -190,13 +230,27 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, 
> const char *filename)
>  ret = -EINVAL;
>  goto out;
>  }
>

[Qemu-devel] [PULL 24/44] xen_disk: Account for flush operations

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

Currently both BLKIF_OP_WRITE and BLKIF_OP_FLUSH_DISKCACHE are being
accounted as write operations.

Signed-off-by: Alberto Garcia 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
7a2a14e3ac62027aa6267a6c02abc70717be9c0a.1446044837.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/block/xen_disk.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 1bbc111..4869518 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -576,7 +576,9 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
 }
 
 block_acct_start(blk_get_stats(blkdev->blk), >acct,
- ioreq->v.size, BLOCK_ACCT_WRITE);
+ ioreq->v.size,
+ ioreq->req.operation == BLKIF_OP_WRITE ?
+ BLOCK_ACCT_WRITE : BLOCK_ACCT_FLUSH);
 ioreq->aio_inflight++;
 blk_aio_writev(blkdev->blk, ioreq->start / BLOCK_SIZE,
>v, ioreq->v.size / BLOCK_SIZE,
-- 
2.5.0

[Qemu-devel] [PATCH 05/10] snapshot: create bdrv_all_find_snapshot helper

2015-11-10 Thread Denis V. Lunev

to check that snapshot is available for all loaded block drivers. The
ability to switch to snapshot is verified separately using
bdrv_all_can_snapshot.

The patch also ensures proper locking.

Signed-off-by: Denis V. Lunev 
CC: Juan Quintela 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/snapshot.c | 21 ++
 include/block/snapshot.h |  2 ++
 migration/savevm.c   | 55 +---
 3 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index 9f07a63..97dc315 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -423,3 +423,24 @@ int bdrv_all_goto_snapshot(const char *name, 
BlockDriverState **first_bad_bs)
 *first_bad_bs = bs;
 return err;
 }
+
+int bdrv_all_find_snapshot(const char *name, bool read_only,
+   BlockDriverState **first_bad_bs)
+{
+QEMUSnapshotInfo sn;
+int err = 0;
+BlockDriverState *bs = NULL;
+
+while (err == 0 && (bs = bdrv_next(bs))) {
+AioContext *ctx = bdrv_get_aio_context(bs);
+
+aio_context_acquire(ctx);
+if (read_only || (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs))) {
+err = bdrv_snapshot_find(bs, , name);
+}
+aio_context_release(ctx);
+}
+
+*first_bad_bs = bs;
+return err;
+}
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 0a176c7..0fae32b 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -85,5 +85,7 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs);
 int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs,
  Error **err);
 int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bsd_bs);
+int bdrv_all_find_snapshot(const char *name, bool read_only,
+   BlockDriverState **first_bad_bs);
 
 #endif
diff --git a/migration/savevm.c b/migration/savevm.c
index d18ff13..90aa565 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1383,6 +1383,18 @@ int load_vmstate(const char *name)
 QEMUFile *f;
 int ret;
 
+if (!bdrv_all_can_snapshot()) {
+error_report("Device '%s' is writable but does not support snapshots.",
+ bdrv_get_device_name(bs));
+return -ENOTSUP;
+}
+ret = bdrv_all_find_snapshot(name, false, );
+if (ret < 0) {
+error_report("Device '%s' does not have the requested snapshot '%s'",
+ bdrv_get_device_name(bs), name);
+return ret;
+}
+
 bs_vm_state = find_vmstate_bs();
 if (!bs_vm_state) {
 error_report("No block device supports snapshots");
@@ -1399,29 +1411,6 @@ int load_vmstate(const char *name)
 return -EINVAL;
 }
 
-/* Verify if there is any device that doesn't support snapshots and is
-writable and check if the requested snapshot is available too. */
-bs = NULL;
-while ((bs = bdrv_next(bs))) {
-
-if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
-continue;
-}
-
-if (!bdrv_can_snapshot(bs)) {
-error_report("Device '%s' is writable but does not support 
snapshots.",
-   bdrv_get_device_name(bs));
-return -ENOTSUP;
-}
-
-ret = bdrv_snapshot_find(bs, , name);
-if (ret < 0) {
-error_report("Device '%s' does not have the requested snapshot 
'%s'",
-   bdrv_get_device_name(bs), name);
-return ret;
-}
-}
-
 /* Flush all IO requests so they don't interfere with the new state.  */
 bdrv_drain_all();
 
@@ -1475,8 +1464,8 @@ void hmp_delvm(Monitor *mon, const QDict *qdict)
 void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
 {
 BlockDriverState *bs, *bs1;
-QEMUSnapshotInfo *sn_tab, *sn, s, *sn_info = 
-int nb_sns, i, ret, available;
+QEMUSnapshotInfo *sn_tab, *sn;
+int nb_sns, i;
 int total;
 int *available_snapshots;
 
@@ -1500,21 +1489,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
 available_snapshots = g_new0(int, nb_sns);
 total = 0;
 for (i = 0; i < nb_sns; i++) {
-sn = _tab[i];
-available = 1;
-bs1 = NULL;
-
-while ((bs1 = bdrv_next(bs1))) {
-if (bdrv_can_snapshot(bs1) && bs1 != bs) {
-ret = bdrv_snapshot_find(bs1, sn_info, sn->id_str);
-if (ret < 0) {
-available = 0;
-break;
-}
-}
-}
-
-if (available) {
+if (bdrv_all_find_snapshot(sn_tab[i].id_str, true, ) == 0) {
 available_snapshots[total] = i;
 total++;
 }
-- 
2.5.0

[Qemu-devel] [PULL 14/57] Rename save_live_complete to save_live_complete_precopy

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

In postcopy we're going to need to perform the complete phase
for postcopiable devices at a different point, start out by
renaming all of the 'complete's to make the difference obvious.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 hw/ppc/spapr.c  |  2 +-
 include/migration/vmstate.h |  2 +-
 include/sysemu/sysemu.h |  2 +-
 migration/block.c   |  2 +-
 migration/migration.c   |  2 +-
 migration/ram.c |  2 +-
 migration/savevm.c  | 10 +-
 trace-events|  2 +-
 8 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0ed8527..37d071e 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1588,7 +1588,7 @@ static int htab_load(QEMUFile *f, void *opaque, int 
version_id)
 static SaveVMHandlers savevm_htab_handlers = {
 .save_live_setup = htab_save_setup,
 .save_live_iterate = htab_save_iterate,
-.save_live_complete = htab_save_complete,
+.save_live_complete_precopy = htab_save_complete,
 .load_state = htab_load,
 };

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index d173b56..9986ccc 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -40,7 +40,7 @@ typedef struct SaveVMHandlers {
 SaveStateHandler *save_state;

 void (*cleanup)(void *opaque);
-int (*save_live_complete)(QEMUFile *f, void *opaque);
+int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);

 /* This runs both outside and inside the iothread lock.  */
 bool (*is_active)(void *opaque);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 8dc2add..470445f 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -89,8 +89,8 @@ void qemu_savevm_state_begin(QEMUFile *f,
  const MigrationParams *params);
 void qemu_savevm_state_header(QEMUFile *f);
 int qemu_savevm_state_iterate(QEMUFile *f);
-void qemu_savevm_state_complete(QEMUFile *f);
 void qemu_savevm_state_cleanup(void);
+void qemu_savevm_state_complete_precopy(QEMUFile *f);
 uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size);
 int qemu_loadvm_state(QEMUFile *f);

diff --git a/migration/block.c b/migration/block.c
index cf9d9f8..4fb9b7c 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -876,7 +876,7 @@ static SaveVMHandlers savevm_block_handlers = {
 .set_params = block_set_params,
 .save_live_setup = block_save_setup,
 .save_live_iterate = block_save_iterate,
-.save_live_complete = block_save_complete,
+.save_live_complete_precopy = block_save_complete,
 .save_live_pending = block_save_pending,
 .load_state = block_load,
 .cleanup = block_migration_cleanup,
diff --git a/migration/migration.c b/migration/migration.c
index 82efbeb..bb7dcb9 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -994,7 +994,7 @@ static void migration_completion(MigrationState *s, bool 
*old_vm_running,
 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
 if (ret >= 0) {
 qemu_file_set_rate_limit(s->file, INT64_MAX);
-qemu_savevm_state_complete(s->file);
+qemu_savevm_state_complete_precopy(s->file);
 }
 }
 qemu_mutex_unlock_iothread();
diff --git a/migration/ram.c b/migration/ram.c
index 298332c..4fa44a7 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1700,7 +1700,7 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
 static SaveVMHandlers savevm_ram_handlers = {
 .save_live_setup = ram_save_setup,
 .save_live_iterate = ram_save_iterate,
-.save_live_complete = ram_save_complete,
+.save_live_complete_precopy = ram_save_complete,
 .save_live_pending = ram_save_pending,
 .load_state = ram_load,
 .cleanup = ram_migration_cleanup,
diff --git a/migration/savevm.c b/migration/savevm.c
index 9dc57d3..fed41e6 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -807,19 +807,19 @@ static bool should_send_vmdesc(void)
 return !machine->suppress_vmdesc;
 }

-void qemu_savevm_state_complete(QEMUFile *f)
+void qemu_savevm_state_complete_precopy(QEMUFile *f)
 {
 QJSON *vmdesc;
 int vmdesc_len;
 SaveStateEntry *se;
 int ret;

-trace_savevm_state_complete();
+trace_savevm_state_complete_precopy();

 cpu_synchronize_all_states();

 QTAILQ_FOREACH(se, _state.handlers, entry) {
-if (!se->ops || !se->ops->save_live_complete) {
+if (!se->ops || !se->ops->save_live_complete_precopy) {
 continue;
 }
 if (se->ops && se->ops->is_active) {
@@ -831,7 +831,7 @@ void qemu_savevm_state_complete(QEMUFile *f)

 save_section_header(f, se, QEMU_VM_SECTION_END);

-ret =

[Qemu-devel] [PULL 37/57] postcopy: ram_enable_notify to switch on userfault

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Mark the area of RAM as 'userfault'
Start up a fault-thread to handle any userfaults we might receive
from it (to be filled in later)

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: David Gibson 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h|  3 ++
 include/migration/postcopy-ram.h |  6 
 migration/postcopy-ram.c | 69 
 migration/savevm.c   |  9 ++
 4 files changed, 87 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 6e42b58..2ad0d2b 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -86,6 +86,9 @@ struct MigrationIncomingState {
  */
 QemuEvent main_thread_load_event;

+QemuThread fault_thread;
+QemuSemaphore  fault_thread_sem;
+
 /* For the kernel to send us notifications */
 int   userfault_fd;
 QEMUFile *to_src_file;
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index f87020c..b10c03d 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -17,6 +17,12 @@
 bool postcopy_ram_supported_by_host(void);

 /*
+ * Make all of RAM sensitive to accesses to areas that haven't yet been written
+ * and wire up anything necessary to deal with it.
+ */
+int postcopy_ram_enable_notify(MigrationIncomingState *mis);
+
+/*
  * Initialise postcopy-ram, setting the RAM to a state where we can go into
  * postcopy later; must be called prior to any precopy.
  * called from ram.c's similarly named ram_postcopy_incoming_init
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 8478bfd..3110b2a 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -275,6 +275,69 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState 
*mis)
 return 0;
 }

+/*
+ * Mark the given area of RAM as requiring notification to unwritten areas
+ * Used as a  callback on qemu_ram_foreach_block.
+ *   host_addr: Base of area to mark
+ *   offset: Offset in the whole ram arena
+ *   length: Length of the section
+ *   opaque: MigrationIncomingState pointer
+ * Returns 0 on success
+ */
+static int ram_block_enable_notify(const char *block_name, void *host_addr,
+   ram_addr_t offset, ram_addr_t length,
+   void *opaque)
+{
+MigrationIncomingState *mis = opaque;
+struct uffdio_register reg_struct;
+
+reg_struct.range.start = (uintptr_t)host_addr;
+reg_struct.range.len = length;
+reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
+
+/* Now tell our userfault_fd that it's responsible for this area */
+if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, _struct)) {
+error_report("%s userfault register: %s", __func__, strerror(errno));
+return -1;
+}
+
+return 0;
+}
+
+/*
+ * Handle faults detected by the USERFAULT markings
+ */
+static void *postcopy_ram_fault_thread(void *opaque)
+{
+MigrationIncomingState *mis = opaque;
+
+fprintf(stderr, "postcopy_ram_fault_thread\n");
+/* TODO: In later patch */
+qemu_sem_post(>fault_thread_sem);
+while (1) {
+/* TODO: In later patch */
+}
+
+return NULL;
+}
+
+int postcopy_ram_enable_notify(MigrationIncomingState *mis)
+{
+/* Create the fault handler thread and wait for it to be ready */
+qemu_sem_init(>fault_thread_sem, 0);
+qemu_thread_create(>fault_thread, "postcopy/fault",
+   postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
+qemu_sem_wait(>fault_thread_sem);
+qemu_sem_destroy(>fault_thread_sem);
+
+/* Mark so that we get notified of accesses to unwritten areas */
+if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
+return -1;
+}
+
+return 0;
+}
+
 #else
 /* No target OS support, stubs just fail */
 bool postcopy_ram_supported_by_host(void)
@@ -301,6 +364,12 @@ int postcopy_ram_discard_range(MigrationIncomingState 
*mis, uint8_t *start,
 assert(0);
 return -1;
 }
+
+int postcopy_ram_enable_notify(MigrationIncomingState *mis)
+{
+assert(0);
+return -1;
+}
 #endif

 /* - */
diff --git a/migration/savevm.c b/migration/savevm.c
index 674f0fb..a7210a2 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1381,6 +1381,15 @@ static int 
loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
 return -1;
 }

+/*
+ * Sensitise RAM - can now generate requests for blocks that don't exist
+ * However, at this point the CPU shouldn't be running, and the IO
+ * shouldn't be doing anything yet so don't actually expect requests
+ */
+if

[Qemu-devel] [PULL 44/57] Postcopy: Use helpers to map pages during migration

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

In postcopy, the destination guest is running at the same time
as it's receiving pages; as we receive new pages we must put
them into the guests address space atomically to avoid a running
CPU accessing a partially written page.

Use the helpers in postcopy-ram.c to map these pages.

qemu_get_buffer_in_place is used to avoid a copy out of qemu_file
in the case that postcopy is going to do a copy anyway.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 130 +++-
 trace-events|   1 +
 2 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/migration/ram.c b/migration/ram.c
index d09d5ab..af5f369 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1932,6 +1932,14 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, 
void *host)
 /* Must be called from within a rcu critical section.
  * Returns a pointer from within the RCU-protected ram_list.
  */
+/*
+ * Read a RAMBlock ID from the stream f, find the host address of the
+ * start of that block and add on 'offset'
+ *
+ * f: Stream to read from
+ * offset: Offset within the block
+ * flags: Page flags (mostly to see if it's a continuation of previous block)
+ */
 static inline void *host_from_stream_offset(QEMUFile *f,
 ram_addr_t offset,
 int flags)
@@ -2077,11 +2085,126 @@ int ram_postcopy_incoming_init(MigrationIncomingState 
*mis)
 return postcopy_ram_incoming_init(mis, ram_pages);
 }

+/*
+ * Called in postcopy mode by ram_load().
+ * rcu_read_lock is taken prior to this being called.
+ */
+static int ram_load_postcopy(QEMUFile *f)
+{
+int flags = 0, ret = 0;
+bool place_needed = false;
+bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
+MigrationIncomingState *mis = migration_incoming_get_current();
+/* Temporary page that is later 'placed' */
+void *postcopy_host_page = postcopy_get_tmp_page(mis);
+
+while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
+ram_addr_t addr;
+void *host = NULL;
+void *page_buffer = NULL;
+void *place_source = NULL;
+uint8_t ch;
+bool all_zero = false;
+
+addr = qemu_get_be64(f);
+flags = addr & ~TARGET_PAGE_MASK;
+addr &= TARGET_PAGE_MASK;
+
+trace_ram_load_postcopy_loop((uint64_t)addr, flags);
+place_needed = false;
+if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
+host = host_from_stream_offset(f, addr, flags);
+if (!host) {
+error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+ret = -EINVAL;
+break;
+}
+page_buffer = host;
+/*
+ * Postcopy requires that we place whole host pages atomically.
+ * To make it atomic, the data is read into a temporary page
+ * that's moved into place later.
+ * The migration protocol uses,  possibly smaller, target-pages
+ * however the source ensures it always sends all the components
+ * of a host page in order.
+ */
+page_buffer = postcopy_host_page +
+  ((uintptr_t)host & ~qemu_host_page_mask);
+/* If all TP are zero then we can optimise the place */
+if (!((uintptr_t)host & ~qemu_host_page_mask)) {
+all_zero = true;
+}
+
+/*
+ * If it's the last part of a host page then we place the host
+ * page
+ */
+place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
+ ~qemu_host_page_mask) == 0;
+place_source = postcopy_host_page;
+}
+
+switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
+case RAM_SAVE_FLAG_COMPRESS:
+ch = qemu_get_byte(f);
+memset(page_buffer, ch, TARGET_PAGE_SIZE);
+if (ch) {
+all_zero = false;
+}
+break;
+
+case RAM_SAVE_FLAG_PAGE:
+all_zero = false;
+if (!place_needed || !matching_page_sizes) {
+qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
+} else {
+/* Avoids the qemu_file copy during postcopy, which is
+ * going to do a copy later; can only do it when we
+ * do this read in one go (matching page sizes)
+ */
+qemu_get_buffer_in_place(f, (uint8_t **)_source,
+ TARGET_PAGE_SIZE);
+}
+break;
+case RAM_SAVE_FLAG_EOS:
+/* normal exit */
+break;
+default:
+

Re: [Qemu-devel] [PATCH for-2.5 v3 0/2] Fix compilation of netmap backend

2015-11-10 Thread Eric Blake

On 11/10/2015 02:47 AM, Vincenzo Maffione wrote:
> This patch series adds some fixes to the netmap net backend. It contains
> two changes:
> (1) Fix compilation issue of netmap.c introduced by the reorganization
> of struct NetClientOptions
> (2) Address the FIXME comment that was asking to use error_setg()
> variants in place of error_report()
> 
> CHANGELOG:
> - removed dead return and use error_setg_file_open() in place
>   of error_setg_errno()
> - I noticed that net_init_netmap() has to return int, so I restored
>   the return statements in that function
> 
> Vincenzo Maffione (2):
>   net: netmap: Fix compilation issue
>   net: netmap: use error_setg() helpers in place of error_report()

Series belongs in 2.5.
Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [PULL 40/57] Page request: Add MIG_RP_MSG_REQ_PAGES reverse command

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Add MIG_RP_MSG_REQ_PAGES command on Return path for the postcopy
destination to request a page from the source.

Two versions exist:
   MIG_RP_MSG_REQ_PAGES_ID that includes a RAMBlock name and start/len
   MIG_RP_MSG_REQ_PAGES that just has start/len for use with the same
RAMBlock as a previous MIG_RP_MSG_REQ_PAGES_ID

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |  5 
 migration/migration.c | 70 +++
 trace-events  |  1 +
 3 files changed, 76 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index ff13ff2..1046d4e 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -49,6 +49,9 @@ enum mig_rp_message_type {
 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */
 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */

+MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
+MIG_RP_MSG_REQ_PAGES,/* data (start: be64, len: be32) */
+
 MIG_RP_MSG_MAX
 };

@@ -256,6 +259,8 @@ void migrate_send_rp_shut(MigrationIncomingState *mis,
   uint32_t value);
 void migrate_send_rp_pong(MigrationIncomingState *mis,
   uint32_t value);
+void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname,
+  ram_addr_t start, size_t len);

 void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
diff --git a/migration/migration.c b/migration/migration.c
index 3cdb4f7..6ccdeb8 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -259,6 +259,35 @@ static void deferred_incoming_migration(Error **errp)
 deferred_incoming = true;
 }

+/* Request a range of pages from the source VM at the given
+ * start address.
+ *   rbname: Name of the RAMBlock to request the page in, if NULL it's the same
+ *   as the last request (a name must have been given previously)
+ *   Start: Address offset within the RB
+ *   Len: Length in bytes required - must be a multiple of pagesize
+ */
+void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
+   ram_addr_t start, size_t len)
+{
+uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname upto 256 */
+size_t msglen = 12; /* start + len */
+
+*(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
+*(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
+
+if (rbname) {
+int rbname_len = strlen(rbname);
+assert(rbname_len < 256);
+
+bufc[msglen++] = rbname_len;
+memcpy(bufc + msglen, rbname, rbname_len);
+msglen += rbname_len;
+migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc);
+} else {
+migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc);
+}
+}
+
 void qemu_start_incoming_migration(const char *uri, Error **errp)
 {
 const char *p;
@@ -1151,10 +1180,23 @@ static struct rp_cmd_args {
 [MIG_RP_MSG_INVALID]= { .len = -1, .name = "INVALID" },
 [MIG_RP_MSG_SHUT]   = { .len =  4, .name = "SHUT" },
 [MIG_RP_MSG_PONG]   = { .len =  4, .name = "PONG" },
+[MIG_RP_MSG_REQ_PAGES]  = { .len = 12, .name = "REQ_PAGES" },
+[MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
 [MIG_RP_MSG_MAX]= { .len = -1, .name = "MAX" },
 };

 /*
+ * Process a request for pages received on the return path,
+ * We're allowed to send more than requested (e.g. to round to our page size)
+ * and we don't need to send pages that have already been sent.
+ */
+static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
+   ram_addr_t start, size_t len)
+{
+trace_migrate_handle_rp_req_pages(rbname, start, len);
+}
+
+/*
  * Handles messages sent on the return path towards the source VM
  *
  */
@@ -1166,6 +1208,8 @@ static void *source_return_path_thread(void *opaque)
 const int max_len = 512;
 uint8_t buf[max_len];
 uint32_t tmp32, sibling_error;
+ram_addr_t start = 0; /* =0 to silence warning */
+size_t  len = 0, expected_len;
 int res;

 trace_source_return_path_thread_entry();
@@ -1225,6 +1269,32 @@ static void *source_return_path_thread(void *opaque)
 trace_source_return_path_thread_pong(tmp32);
 break;

+case MIG_RP_MSG_REQ_PAGES:
+start = be64_to_cpup((uint64_t *)buf);
+len = be32_to_cpup((uint32_t *)(buf + 8));
+migrate_handle_rp_req_pages(ms, NULL, start, len);
+

[Qemu-devel] [PATCH] nand: fix address overflow

2015-11-10 Thread Rabin Vincent

The shifts of the address mask and value shift beyond 32 bits when there
are 5 address cycles.

Signed-off-by: Rabin Vincent 
---
 hw/block/nand.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/block/nand.c b/hw/block/nand.c
index 61d2cec..a68266f 100644
--- a/hw/block/nand.c
+++ b/hw/block/nand.c
@@ -522,8 +522,8 @@ void nand_setio(DeviceState *dev, uint32_t value)
 
 if (s->ale) {
 unsigned int shift = s->addrlen * 8;
-unsigned int mask = ~(0xff << shift);
-unsigned int v = value << shift;
+uint64_t mask = ~(0xffull << shift);
+uint64_t v = (uint64_t)value << shift;
 
 s->addr = (s->addr & mask) | v;
 s->addrlen ++;
-- 
1.7.10.4

[Qemu-devel] [PULL 55/57] Disable mlock around incoming postcopy

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Userfault doesn't work with mlock; mlock is designed to nail down pages
so they don't move, userfault is designed to tell you when they're not
there.

munlock the pages we userfault protect before postcopy.
mlock everything again at the end if mlock is enabled.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: David Gibson 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/sysemu/sysemu.h  |  1 +
 migration/postcopy-ram.c | 24 
 2 files changed, 25 insertions(+)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 05d1982..f992494 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -172,6 +172,7 @@ extern int boot_menu;
 extern bool boot_strict;
 extern uint8_t *boot_splash_filedata;
 extern size_t boot_splash_filedata_size;
+extern bool enable_mlock;
 extern uint8_t qemu_extra_params_fw[2];
 extern QEMUClockType rtc_clock;
 extern const char *mem_path;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 8e107fe..1a24b09 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -87,6 +87,11 @@ static bool ufd_version_check(int ufd)
 return true;
 }

+/*
+ * Note: This has the side effect of munlock'ing all of RAM, that's
+ * normally fine since if the postcopy succeeds it gets turned back on at the
+ * end.
+ */
 bool postcopy_ram_supported_by_host(void)
 {
 long pagesize = getpagesize();
@@ -115,6 +120,15 @@ bool postcopy_ram_supported_by_host(void)
 }

 /*
+ * userfault and mlock don't go together; we'll put it back later if
+ * it was enabled.
+ */
+if (munlockall()) {
+error_report("%s: munlockall: %s", __func__,  strerror(errno));
+return -1;
+}
+
+/*
  *  We need to check that the ops we need are supported on anon memory
  *  To do that we need to register a chunk and see the flags that
  *  are returned.
@@ -294,6 +308,16 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState 
*mis)
 mis->have_fault_thread = false;
 }

+if (enable_mlock) {
+if (os_mlock() < 0) {
+error_report("mlock: %s", strerror(errno));
+/*
+ * It doesn't feel right to fail at this point, we have a valid
+ * VM state.
+ */
+}
+}
+
 postcopy_state_set(POSTCOPY_INCOMING_END);
 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);

-- 
2.5.0

[Qemu-devel] [PULL 42/57] Page request: Consume pages off the post-copy queue

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

When transmitting RAM pages, consume pages that have been queued by
MIG_RPCOMM_REQPAGE commands and send them ahead of normal page scanning.

Note:
  a) After a queued page the linear walk carries on from after the
unqueued page; there is a reasonable chance that the destination
was about to ask for other closeby pages anyway.

  b) We have to be careful of any assumptions that the page walking
code makes, in particular it does some short cuts on its first linear
walk that break as soon as we do a queued page.

  c) We have to be careful to not break up host-page size chunks, since
this makes it harder to place the pages on the destination.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 249 +---
 trace-events|   2 +
 2 files changed, 220 insertions(+), 31 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 8302d09..d09d5ab 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -548,9 +548,9 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t 
**current_data,
  * Returns: byte offset within memory region of the start of a dirty page
  */
 static inline
-ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb,
- ram_addr_t start,
- ram_addr_t *ram_addr_abs)
+ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
+   ram_addr_t start,
+   ram_addr_t *ram_addr_abs)
 {
 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
@@ -567,15 +567,24 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock 
*rb,
 next = find_next_bit(bitmap, size, nr);
 }

-if (next < size) {
-clear_bit(next, bitmap);
-migration_dirty_pages--;
-}
 *ram_addr_abs = next << TARGET_PAGE_BITS;
 return (next - base) << TARGET_PAGE_BITS;
 }

-/* Called with rcu_read_lock() to protect migration_bitmap */
+static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
+{
+bool ret;
+int nr = addr >> TARGET_PAGE_BITS;
+unsigned long *bitmap = atomic_rcu_read(_bitmap_rcu)->bmap;
+
+ret = test_and_clear_bit(nr, bitmap);
+
+if (ret) {
+migration_dirty_pages--;
+}
+return ret;
+}
+
 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
 {
 unsigned long *bitmap;
@@ -974,9 +983,8 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock 
*block,
 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
  bool *again, ram_addr_t *ram_addr_abs)
 {
-pss->offset = migration_bitmap_find_and_reset_dirty(pss->block,
-   pss->offset,
-   ram_addr_abs);
+pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
+  ram_addr_abs);
 if (pss->complete_round && pss->block == last_seen_block &&
 pss->offset >= last_offset) {
 /*
@@ -1015,6 +1023,107 @@ static bool find_dirty_block(QEMUFile *f, 
PageSearchStatus *pss,
 }
 }

+/*
+ * Helper for 'get_queued_page' - gets a page off the queue
+ *  ms:  MigrationState in
+ * *offset:  Used to return the offset within the RAMBlock
+ * ram_addr_abs: global offset in the dirty/sent bitmaps
+ *
+ * Returns:  block (or NULL if none available)
+ */
+static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
+  ram_addr_t *ram_addr_abs)
+{
+RAMBlock *block = NULL;
+
+qemu_mutex_lock(>src_page_req_mutex);
+if (!QSIMPLEQ_EMPTY(>src_page_requests)) {
+struct MigrationSrcPageRequest *entry =
+QSIMPLEQ_FIRST(>src_page_requests);
+block = entry->rb;
+*offset = entry->offset;
+*ram_addr_abs = (entry->offset + entry->rb->offset) &
+TARGET_PAGE_MASK;
+
+if (entry->len > TARGET_PAGE_SIZE) {
+entry->len -= TARGET_PAGE_SIZE;
+entry->offset += TARGET_PAGE_SIZE;
+} else {
+memory_region_unref(block->mr);
+QSIMPLEQ_REMOVE_HEAD(>src_page_requests, next_req);
+g_free(entry);
+}
+}
+qemu_mutex_unlock(>src_page_req_mutex);
+
+return block;
+}
+
+/*
+ * Unqueue a page from the queue fed by postcopy page requests; skips pages
+ * that are already sent (!dirty)
+ *
+ *  ms:  MigrationState in
+ * pss:  PageSearchStatus structure updated with found block/offset
+ * ram_addr_abs: global offset in the dirty/sent bitmaps
+ *
+ * Returns:  true if a queued page is

[Qemu-devel] [PULL 50/57] Postcopy; Handle userfault requests

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

userfaultfd is a Linux syscall that gives an fd that receives a stream
of notifications of accesses to pages registered with it and allows
the program to acknowledge those stalls and tell the accessing
thread to carry on.

We convert the requests from the kernel into messages back to the
source asking for the pages.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |   3 +
 migration/postcopy-ram.c  | 155 +++---
 trace-events  |   9 +++
 3 files changed, 158 insertions(+), 9 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index a48471e..329d535 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -89,11 +89,14 @@ struct MigrationIncomingState {
  */
 QemuEvent main_thread_load_event;

+bool   have_fault_thread;
 QemuThread fault_thread;
 QemuSemaphore  fault_thread_sem;

 /* For the kernel to send us notifications */
 int   userfault_fd;
+/* To tell the fault_thread to quit */
+int   userfault_quit_fd;
 QEMUFile *to_src_file;
 QemuMutex rp_mutex;/* We send replies from multiple threads */
 void *postcopy_tmp_page;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 58492c0..4f1e329 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -51,6 +51,8 @@ struct PostcopyDiscardState {
  */
 #if defined(__linux__)

+#include 
+#include 
 #include 
 #include 
 #include 
@@ -267,15 +269,41 @@ int postcopy_ram_incoming_init(MigrationIncomingState 
*mis, size_t ram_pages)
  */
 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 {
-/* TODO: Join the fault thread once we're sure it will exit */
-if (qemu_ram_foreach_block(cleanup_range, mis)) {
-return -1;
+trace_postcopy_ram_incoming_cleanup_entry();
+
+if (mis->have_fault_thread) {
+uint64_t tmp64;
+
+if (qemu_ram_foreach_block(cleanup_range, mis)) {
+return -1;
+}
+/*
+ * Tell the fault_thread to exit, it's an eventfd that should
+ * currently be at 0, we're going to increment it to 1
+ */
+tmp64 = 1;
+if (write(mis->userfault_quit_fd, , 8) == 8) {
+trace_postcopy_ram_incoming_cleanup_join();
+qemu_thread_join(>fault_thread);
+} else {
+/* Not much we can do here, but may as well report it */
+error_report("%s: incrementing userfault_quit_fd: %s", __func__,
+ strerror(errno));
+}
+trace_postcopy_ram_incoming_cleanup_closeuf();
+close(mis->userfault_fd);
+close(mis->userfault_quit_fd);
+mis->have_fault_thread = false;
 }

+postcopy_state_set(POSTCOPY_INCOMING_END);
+migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
+
 if (mis->postcopy_tmp_page) {
 munmap(mis->postcopy_tmp_page, getpagesize());
 mis->postcopy_tmp_page = NULL;
 }
+trace_postcopy_ram_incoming_cleanup_exit();
 return 0;
 }

@@ -314,31 +342,140 @@ static int ram_block_enable_notify(const char 
*block_name, void *host_addr,
 static void *postcopy_ram_fault_thread(void *opaque)
 {
 MigrationIncomingState *mis = opaque;
+struct uffd_msg msg;
+int ret;
+size_t hostpagesize = getpagesize();
+RAMBlock *rb = NULL;
+RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */

-fprintf(stderr, "postcopy_ram_fault_thread\n");
-/* TODO: In later patch */
+trace_postcopy_ram_fault_thread_entry();
 qemu_sem_post(>fault_thread_sem);
-while (1) {
-/* TODO: In later patch */
+
+while (true) {
+ram_addr_t rb_offset;
+ram_addr_t in_raspace;
+struct pollfd pfd[2];
+
+/*
+ * We're mainly waiting for the kernel to give us a faulting HVA,
+ * however we can be told to quit via userfault_quit_fd which is
+ * an eventfd
+ */
+pfd[0].fd = mis->userfault_fd;
+pfd[0].events = POLLIN;
+pfd[0].revents = 0;
+pfd[1].fd = mis->userfault_quit_fd;
+pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
+pfd[1].revents = 0;
+
+if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
+error_report("%s: userfault poll: %s", __func__, strerror(errno));
+break;
+}
+
+if (pfd[1].revents) {
+trace_postcopy_ram_fault_thread_quit();
+break;
+}
+
+ret = read(mis->userfault_fd, , sizeof(msg));
+if (ret != sizeof(msg)) {
+if (errno == EAGAIN) {
+/*
+ * if a wake up

Re: [Qemu-devel] [PATCH v10 24/30] qapi: Factor out QAPISchemaObjectType.check_clash()

2015-11-10 Thread Markus Armbruster

Eric Blake  writes:

> On 11/10/2015 02:15 AM, Markus Armbruster wrote:
>
>>> On the other hand, we've been arguing that check() should populate
>>> everything after construction prior to anything else being run; and not
>>> running Variant.type.check() during Variants.check() of flat unions
>>> feels like we may have a hole (a flat union will have to inline its
>>> types to the overall JSON object, and inlining types requires access to
>>> type.members - but as written, we aren't populating them until
>>> Variants.check_clash()).  I can play with hoisting the type.check() out
>>> of type.check_clash() and instead keep base.check() in type.check(), and
>>> add variant.type.check() in Variants.check() (but only for unions, not
>>> for alternates), if you are interested.
>> 
>> My "qapi: Factor out QAPISchemaObjectTypeMember.check_clash()" adds
>> QAPISchemaObjectTypeMember.check_clash() without changing the common
>> protocol.  The new QAPISchemaObjectTypeMember.check_clash() is merely a
>> helper for QAPISchemaObjectType.check().
>> 
>> The two .check_clash() you add (one in this patch, one in the previous
>> one) are different: both contain calls of QAPISchemaObjectType.check().
>> 
>> I feel the .check() calls are too important to be buried deep like that.
>> I'd stick to prior practice and put the .check() calls right into
>> .check().  Obviously, the .check_clash() methods may only called after
>> .check() then, but that's nothing new.
>> 
>> Fixup for your previous patch:
>> 
>> diff --git a/scripts/qapi.py b/scripts/qapi.py
>> index 4c56935..357127d 100644
>> --- a/scripts/qapi.py
>> +++ b/scripts/qapi.py
>> @@ -1065,7 +1065,6 @@ class QAPISchemaObjectTypeVariants(object):
>>  vseen = dict(seen)
>>  assert isinstance(v.type, QAPISchemaObjectType)
>>  assert not v.type.variants   # not implemented
>> -v.type.check(schema)
>>  for m in v.type.members:
>>  m.check_clash(vseen)
>>  
>> @@ -1077,6 +1076,7 @@ class 
>> QAPISchemaObjectTypeVariant(QAPISchemaObjectTypeMember):
>>  def check(self, schema, tag_type):
>>  QAPISchemaObjectTypeMember.check(self, schema)
>>  assert self.name in tag_type.values
>> +self.type.check(schema)
>>  
>
> Won't quite work.  You are right that we must call
> self.type.check(schema) for variants used by a union; but calling it for
> ALL variants used by an alternate is wrong, because self.type for at
> least one branch of an alternate will not be an instance of
> QAPISchemaObjectType.  However, I'm currently testing whether it is safe
> to check to just blindly check an object branch of an alternate, if
> present (and that should not lead to cycles, since alternates have no
> base class and since we don't allow one alternate type as a variant of
> another alternate), in which case the fixup for 23/30 is more like:
>
> diff --git i/scripts/qapi.py w/scripts/qapi.py
> index a005c87..25fa642 100644
> --- i/scripts/qapi.py
> +++ w/scripts/qapi.py
> @@ -1065,7 +1065,6 @@ class QAPISchemaObjectTypeVariants(object):
>  vseen = dict(seen)
>  assert isinstance(v.type, QAPISchemaObjectType)
>  assert not v.type.variants   # not implemented
> -v.type.check(schema)
>  for m in v.type.members:
>  m.check_clash(vseen)
>
> @@ -1077,6 +1076,8 @@ class
> QAPISchemaObjectTypeVariant(QAPISchemaObjectTypeMember):
>  def check(self, schema, tag_type):
>  QAPISchemaObjectTypeMember.check(self, schema)
>  assert self.name in tag_type.values
> +if isinstance(self.type, QAPISchemaObjectType):
> +self.type.check(schema)
>
>  # This function exists to support ugly simple union special cases
>  # TODO get rid of them, and drop the function
> @@ -1098,6 +1099,8 @@ class QAPISchemaAlternateType(QAPISchemaType):
>
>  def check(self, schema):
>  self.variants.tag_member.check(schema)
> +# Not calling self.variants.check_clash(), because there's
> +# nothing to clash with
>  self.variants.check(schema, {})
>
>  def json_type(self):

Makes sense to me.

[Qemu-devel] [PULL v2 for-2.5] Block pull request

2015-11-10 Thread Stefan Hajnoczi

v2:
 * Rebase onto Kevin's block pull request

The following changes since commit c400bddb916268394e352f82809eb4728424a5b1:

  Merge remote-tracking branch 'mreitz/tags/pull-block-for-kevin-2015-11-10' 
into queue-block (2015-11-10 14:59:26 +0100)

are available in the git repository at:

  git://github.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to 7651e1c68acdaf60954668540c9df778397631d6:

  block: Update copyright of the accounting code (2015-11-10 14:48:26 +)




Alberto Garcia (21):
  xen_disk: Account for flush operations
  ide: Account for write operations correctly
  block: define 'clock_type' for the accounting code
  util: Infrastructure for computing recent averages
  block: Add idle_time_ns to BlockDeviceStats
  block: Add statistics for failed and invalid I/O operations
  block: Allow configuring whether to account failed and invalid ops
  block: Compute minimum, maximum and average I/O latencies
  block: Add average I/O queue depth to BlockDeviceTimedStats
  block: New option to define the intervals for collecting I/O statistics
  qemu-io: Account for failed, invalid and flush operations
  block: Use QEMU_CLOCK_VIRTUAL for the accounting code in qtest mode
  iotests: Add test for the block device statistics
  nvme: Account for failed and invalid operations
  virtio-blk: Account for failed and invalid operations
  xen_disk: Account for failed and invalid operations
  atapi: Account for failed and invalid operations
  ide: Account for failed and invalid operations
  macio: Account for failed operations
  scsi-disk: Account for failed operations
  block: Update copyright of the accounting code

Fam Zheng (15):
  block: Add more types for tracked request
  block: Track flush requests
  block: Track discard requests
  iscsi: Emulate commands in iscsi_aio_ioctl as iscsi_ioctl
  block: Add ioctl parameter fields to BlockRequest
  block: Emulate bdrv_ioctl with bdrv_aio_ioctl and track both
  block: Drop BlockDriver.bdrv_ioctl
  block: Introduce BlockDriver.bdrv_drain callback
  qed: Implement .bdrv_drain
  backup: Extract dirty bitmap handling as a separate function
  blockjob: Introduce reference count and fix reference to job->bs
  blockjob: Add .commit and .abort block job actions
  blockjob: Add "completed" and "ret" in BlockJob
  blockjob: Simplify block_job_finish_sync
  block: Add block job transactions

John Snow (6):
  iotests: add transactional incremental backup test
  block: rename BlkTransactionState and BdrvActionOps
  block/backup: Rely on commit/abort for cleanup
  block: Add BlockJobTxn support to backup_run
  block: add transactional properties
  iotests: 124 - transactional failure test

Stefan Hajnoczi (1):
  tests: add BlockJobTxn unit test

 block.c  |  19 ++-
 block/accounting.c   | 123 ++--
 block/backup.c   |  50 +-
 block/block-backend.c|   1 +
 block/io.c   | 150 
++
 block/iscsi.c|  73 ++-
 block/mirror.c   |   2 +-
 block/qapi.c |  51 +++
 block/qed.c  |  13 +
 block/raw-posix.c|   8 ---
 block/raw_bsd.c  |   6 ---
 blockdev.c   | 485 
--
 blockjob.c   | 189 

 docs/bitmaps.md  |   6 +--
 hmp.c|   4 +-
 hw/block/nvme.c  |  11 ++--
 hw/block/virtio-blk.c|   4 +-
 hw/block/xen_disk.c  |  27 +-
 hw/ide/atapi.c   |  31 +++-
 hw/ide/core.c|  12 +++--
 hw/ide/macio.c   |  12 -
 hw/scsi/scsi-disk.c  |  46 +++--
 include/block/accounting.h   |  28 ++
 include/block/block.h|  18 +--
 include/block/block_int.h|  23 +++--
 include/block/blockjob.h |  85 +--
 include/qemu/timed-average.h |  64 +++
 qapi-schema.json |  56 ++--
 qapi/block-core.json | 103 -
 qemu-img.c   |   3 --
 qemu-io-cmds.c   |   9 
 qmp-commands.hx  |  82 +++---
 tests/Makefile   |   7 +++
 tests/qemu-iotests/124   | 182

Re: [Qemu-devel] [PATCH] kvm-all: PAGE_SIZE should be real host page size

2015-11-10 Thread Paolo Bonzini



On 10/11/2015 01:23, Andrew Jones wrote:
> Just noticed this while grepping TARGET_PAGE_SIZE for an unrelated
> reason. I didn't use qemu_real_host_page_size as kvm_set_phys_mem()
> does, because we'd need to make sure page_size_init() has run first.
> 
> Signed-off-by: Andrew Jones 
> ---
>  kvm-all.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/kvm-all.c b/kvm-all.c
> index 1bc12737723c3..de9ff5971fb3b 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -45,8 +45,10 @@
>  #include 
>  #endif
>  
> -/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
> -#define PAGE_SIZE TARGET_PAGE_SIZE
> +/* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
> + * need to use the real host PAGE_SIZE, as that's what KVM will use.
> + */
> +#define PAGE_SIZE getpagesize()
>  
>  //#define DEBUG_KVM
>  
> 

Is this a bugfix or just a cleanup?  If the former, on which targets?

Paolo

Re: [Qemu-devel] [PATCH 0/3] fix clang negative signed bit shift warning

2015-11-10 Thread Peter Maydell

On 10 November 2015 at 15:57, Stefan Hajnoczi  wrote:
> LLVM's clang 3.7.0 compile warns about bit shifting negative numbers because
> the result is undefined.  This series includes 3 small fixes to appease clang.
>
> Stefan Hajnoczi (3):
>   monitor: avoid clang shifting negative signed warning
>   tpm: avoid clang shifting negative signed warning
>   disas/arm: avoid clang shifting negative signed warning
>
>  disas/arm.c   | 2 +-
>  hw/tpm/tpm_tis.c  | 2 +-
>  target-i386/monitor.c | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)

Whole series
Reviewed-by: Peter Maydell 

thanks
-- PMM

[Qemu-devel] [PATCH] iotests: Check for quorum support in test 139

2015-11-10 Thread Alberto Garcia

The quorum driver is always built in, but it is disabled during
run-time if there's no SHA256 support available (see commit e94867e).

This patch skips the quorum test in iotest 139 in that case.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/139 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/139 b/tests/qemu-iotests/139
index b5470f7..42f78c7 100644
--- a/tests/qemu-iotests/139
+++ b/tests/qemu-iotests/139
@@ -400,6 +400,8 @@ class TestBlockdevDel(iotests.QMPTestCase):
 self.checkBlockDriverState('node1', False)
 
 def testQuorum(self):
+if not 'quorum' in iotests.qemu_img_pipe('--help'):
+return
 self.addQuorum('quorum0', 'node0', 'node1')
 # We cannot remove the children of a Quorum device
 self.delBlockDriverState('node0', expect_error = True)
-- 
2.6.2

Re: [Qemu-devel] [PATCH] kvm-all: PAGE_SIZE should be real host page size

2015-11-10 Thread Peter Maydell

On 10 November 2015 at 00:23, Andrew Jones  wrote:
> Just noticed this while grepping TARGET_PAGE_SIZE for an unrelated
> reason. I didn't use qemu_real_host_page_size as kvm_set_phys_mem()
> does, because we'd need to make sure page_size_init() has run first.
>
> Signed-off-by: Andrew Jones 
> ---
>  kvm-all.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/kvm-all.c b/kvm-all.c
> index 1bc12737723c3..de9ff5971fb3b 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -45,8 +45,10 @@
>  #include 
>  #endif
>
> -/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
> -#define PAGE_SIZE TARGET_PAGE_SIZE
> +/* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
> + * need to use the real host PAGE_SIZE, as that's what KVM will use.
> + */
> +#define PAGE_SIZE getpagesize()

Rather than defining PAGE_SIZE here (a confusing macro given
we have several page sizes to deal with), why not just use
getpagesize() in the one and only location where we currently
use this macro?

Also, you're guaranteed that page_size_init() has been run, because
we call that from kvm_init(), and you can't call kvm_vcpu_init()
before kvm_init().

thanks
-- PMM

Re: [Qemu-devel] [PULL 0/7] target-arm queue

2015-11-10 Thread Peter Maydell

On 10 November 2015 at 13:51, Peter Maydell <peter.mayd...@linaro.org> wrote:
> A small set of ARM patches, notably fixing bugs in breakpoint
> and singlestep code, and repairing the long-broken highbank model.
>
> The only other ARM thing I have on my radar for 2.5 is the Zynq
> ADC controller, which I'll send separately if it makes it before
> the freeze deadline.
>
> thanks
> -- PMM
>
> The following changes since commit a8b4f9585a0bf5186fca793ce2c5d754cd8ec49a:
>
>   Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2015-11-10' 
> into staging (2015-11-10 09:39:24 +)
>
> are available in the git repository at:
>
>
>   git://git.linaro.org/people/pmaydell/qemu-arm.git 
> tags/pull-target-arm-20151110
>
> for you to fetch changes up to 577bf808958d06497928c639efaa473bf8c5e099:
>
>   target-arm: Clean up DISAS_UPDATE usage in AArch32 translation code 
> (2015-11-10 13:37:33 +)
>
> 
> target-arm queue:
>  * fix bugs in gdb singlestep handling and breakpoints
>  * minor code cleanup in arm_gic
>  * clean up error messages in hw/arm/virt
>  * fix highbank kernel booting by adding a board-setup blob
>

Applied, thanks.

-- PMM

[Qemu-devel] [PULL 08/44] block: Introduce BlockDriver.bdrv_drain callback

2015-11-10 Thread Stefan Hajnoczi

From: Fam Zheng 

Drivers can have internal request sources that generate IO, like the
need_check_timer in QED. Since we want quiesced periods that contain
nested event loops in block layer, we need to have a way to disable such
event sources.

Block drivers must implement the "bdrv_drain" callback if it has any
internal sources that can generate I/O activity, like a timer or a
worker thread (even in a library) that can schedule QEMUBH in an
asynchronous callback.

Update the comments of bdrv_drain and bdrv_drained_begin accordingly.

Like bdrv_requests_pending(), we should consider all the children of bs.
Before, the while loop just works, as bdrv_requests_pending() already
tracks its children; now we mustn't miss the callback, so recurse down
explicitly.

Signed-off-by: Fam Zheng 
Reviewed-by: Paolo Bonzini 
Message-id: 1447064214-29930-9-git-send-email-f...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 block/io.c| 16 +++-
 include/block/block_int.h |  6 ++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/block/io.c b/block/io.c
index 4ecb171..adc1eab 100644
--- a/block/io.c
+++ b/block/io.c
@@ -237,8 +237,21 @@ bool bdrv_requests_pending(BlockDriverState *bs)
 return false;
 }
 
+static void bdrv_drain_recurse(BlockDriverState *bs)
+{
+BdrvChild *child;
+
+if (bs->drv && bs->drv->bdrv_drain) {
+bs->drv->bdrv_drain(bs);
+}
+QLIST_FOREACH(child, >children, next) {
+bdrv_drain_recurse(child->bs);
+}
+}
+
 /*
- * Wait for pending requests to complete on a single BlockDriverState subtree
+ * Wait for pending requests to complete on a single BlockDriverState subtree,
+ * and suspend block driver's internal I/O until next request arrives.
  *
  * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
  * AioContext.
@@ -251,6 +264,7 @@ void bdrv_drain(BlockDriverState *bs)
 {
 bool busy = true;
 
+bdrv_drain_recurse(bs);
 while (busy) {
 /* Keep iterating */
  bdrv_flush_io_queue(bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 550ce18..4a9f8ff 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -295,6 +295,12 @@ struct BlockDriver {
  */
 int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
 
+/**
+ * Drain and stop any internal sources of requests in the driver, and
+ * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
+ */
+void (*bdrv_drain)(BlockDriverState *bs);
+
 QLIST_ENTRY(BlockDriver) list;
 };
 
-- 
2.5.0

[Qemu-devel] [PULL 22/57] Return path: Source handling of return path

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Open a return path, and handle messages that are received upon it.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |   8 +++
 migration/migration.c | 159 +-
 trace-events  |  10 +++
 3 files changed, 175 insertions(+), 2 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 3ce3fda..571466b 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -80,6 +80,14 @@ struct MigrationState

 int state;
 MigrationParams params;
+
+/* State related to return path */
+struct {
+QEMUFile *from_dst_file;
+QemuThreadrp_thread;
+bool  error;
+} rp_state;
+
 double mbps;
 int64_t total_time;
 int64_t downtime;
diff --git a/migration/migration.c b/migration/migration.c
index 4317bab..295deb8 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -696,6 +696,11 @@ static void migrate_fd_cancel(MigrationState *s)
 QEMUFile *f = migrate_get_current()->file;
 trace_migrate_fd_cancel();

+if (s->rp_state.from_dst_file) {
+/* shutdown the rp socket, so causing the rp thread to shutdown */
+qemu_file_shutdown(s->rp_state.from_dst_file);
+}
+
 do {
 old_state = s->state;
 if (!migration_is_setup_or_active(old_state)) {
@@ -1030,6 +1035,154 @@ int64_t migrate_xbzrle_cache_size(void)
 return s->xbzrle_cache_size;
 }

+/* migration thread support */
+/*
+ * Something bad happened to the RP stream, mark an error
+ * The caller shall print or trace something to indicate why
+ */
+static void mark_source_rp_bad(MigrationState *s)
+{
+s->rp_state.error = true;
+}
+
+static struct rp_cmd_args {
+ssize_t len; /* -1 = variable */
+const char *name;
+} rp_cmd_args[] = {
+[MIG_RP_MSG_INVALID]= { .len = -1, .name = "INVALID" },
+[MIG_RP_MSG_SHUT]   = { .len =  4, .name = "SHUT" },
+[MIG_RP_MSG_PONG]   = { .len =  4, .name = "PONG" },
+[MIG_RP_MSG_MAX]= { .len = -1, .name = "MAX" },
+};
+
+/*
+ * Handles messages sent on the return path towards the source VM
+ *
+ */
+static void *source_return_path_thread(void *opaque)
+{
+MigrationState *ms = opaque;
+QEMUFile *rp = ms->rp_state.from_dst_file;
+uint16_t header_len, header_type;
+const int max_len = 512;
+uint8_t buf[max_len];
+uint32_t tmp32, sibling_error;
+int res;
+
+trace_source_return_path_thread_entry();
+while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
+   migration_is_setup_or_active(ms->state)) {
+trace_source_return_path_thread_loop_top();
+header_type = qemu_get_be16(rp);
+header_len = qemu_get_be16(rp);
+
+if (header_type >= MIG_RP_MSG_MAX ||
+header_type == MIG_RP_MSG_INVALID) {
+error_report("RP: Received invalid message 0x%04x length 0x%04x",
+header_type, header_len);
+mark_source_rp_bad(ms);
+goto out;
+}
+
+if ((rp_cmd_args[header_type].len != -1 &&
+header_len != rp_cmd_args[header_type].len) ||
+header_len > max_len) {
+error_report("RP: Received '%s' message (0x%04x) with"
+"incorrect length %d expecting %zu",
+rp_cmd_args[header_type].name, header_type, header_len,
+(size_t)rp_cmd_args[header_type].len);
+mark_source_rp_bad(ms);
+goto out;
+}
+
+/* We know we've got a valid header by this point */
+res = qemu_get_buffer(rp, buf, header_len);
+if (res != header_len) {
+error_report("RP: Failed reading data for message 0x%04x"
+ " read %d expected %d",
+ header_type, res, header_len);
+mark_source_rp_bad(ms);
+goto out;
+}
+
+/* OK, we have the message and the data */
+switch (header_type) {
+case MIG_RP_MSG_SHUT:
+sibling_error = be32_to_cpup((uint32_t *)buf);
+trace_source_return_path_thread_shut(sibling_error);
+if (sibling_error) {
+error_report("RP: Sibling indicated error %d", sibling_error);
+mark_source_rp_bad(ms);
+}
+/*
+ * We'll let the main thread deal with closing the RP
+ * we could do a shutdown(2) on it, but we're the only user
+ * anyway, so there's nothing gained.
+ */
+goto out;
+
+case MIG_RP_MSG_PONG:
+tmp32 = be32_to_cpup((uint32_t *)buf);
+trace_source_return_path_thread_pong(tmp32);
+break;
+
+

[Qemu-devel] [PULL 10/57] Add QEMU_MADV_NOHUGEPAGE

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Add QEMU_MADV_NOHUGEPAGE as an OS-independent version of
MADV_NOHUGEPAGE.

We include sys/mman.h before making the test to ensure
that we pick up the system defines.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/qemu/osdep.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index ab2d5d9..861d84b 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -139,6 +139,8 @@ void qemu_anon_ram_free(void *ptr, size_t size);

 #if defined(CONFIG_MADVISE)

+#include 
+
 #define QEMU_MADV_WILLNEED  MADV_WILLNEED
 #define QEMU_MADV_DONTNEED  MADV_DONTNEED
 #ifdef MADV_DONTFORK
@@ -171,6 +173,11 @@ void qemu_anon_ram_free(void *ptr, size_t size);
 #else
 #define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
 #endif
+#ifdef MADV_NOHUGEPAGE
+#define QEMU_MADV_NOHUGEPAGE MADV_NOHUGEPAGE
+#else
+#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
+#endif

 #elif defined(CONFIG_POSIX_MADVISE)

@@ -182,6 +189,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
 #define QEMU_MADV_DODUMP QEMU_MADV_INVALID
 #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
 #define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
+#define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID

 #else /* no-op */

@@ -193,6 +201,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
 #define QEMU_MADV_DODUMP QEMU_MADV_INVALID
 #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
 #define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
+#define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID

 #endif

-- 
2.5.0

[Qemu-devel] [PULL 19/57] Return path: Control commands

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Add two src->dest commands:
   * OPEN_RETURN_PATH - To request that the destination open the return path
   * PING - Request an acknowledge from the destination

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |  2 ++
 include/sysemu/sysemu.h   |  4 
 migration/savevm.c| 43 ++-
 trace-events  |  3 +++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 0bb4383..98a6d07 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -48,6 +48,8 @@ typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
 struct MigrationIncomingState {
 QEMUFile *from_src_file;

+QEMUFile *to_src_file;
+
 /* See savevm.c */
 LoadStateEntry_Head loadvm_handlers;
 };
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 0a8790e..70473f4 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -87,6 +87,8 @@ void qemu_announce_self(void);
 /* Subcommands for QEMU_VM_COMMAND */
 enum qemu_vm_cmd {
 MIG_CMD_INVALID = 0,   /* Must be 0 */
+MIG_CMD_OPEN_RETURN_PATH,  /* Tell the dest to open the Return path */
+MIG_CMD_PING,  /* Request a PONG on the RP */
 MIG_CMD_MAX
 };

@@ -100,6 +102,8 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f);
 uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size);
 void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd command,
   uint16_t len, uint8_t *data);
+void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
+void qemu_savevm_send_open_return_path(QEMUFile *f);
 int qemu_loadvm_state(QEMUFile *f);

 typedef enum DisplayType
diff --git a/migration/savevm.c b/migration/savevm.c
index a6829e1..d47c55b 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -64,6 +64,8 @@ static struct mig_cmd_args {
 const char *name;
 } mig_cmd_args[] = {
 [MIG_CMD_INVALID]  = { .len = -1, .name = "INVALID" },
+[MIG_CMD_OPEN_RETURN_PATH] = { .len =  0, .name = "OPEN_RETURN_PATH" },
+[MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
 [MIG_CMD_MAX]  = { .len = -1, .name = "MAX" },
 };

@@ -724,6 +726,21 @@ void qemu_savevm_command_send(QEMUFile *f,
 qemu_fflush(f);
 }

+void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
+{
+uint32_t buf;
+
+trace_savevm_send_ping(value);
+buf = cpu_to_be32(value);
+qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *));
+}
+
+void qemu_savevm_send_open_return_path(QEMUFile *f)
+{
+trace_savevm_send_open_return_path();
+qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
+}
+
 bool qemu_savevm_state_blocked(Error **errp)
 {
 SaveStateEntry *se;
@@ -1043,8 +1060,10 @@ static SaveStateEntry *find_se(const char *idstr, int 
instance_id)
  */
 static int loadvm_process_command(QEMUFile *f)
 {
+MigrationIncomingState *mis = migration_incoming_get_current();
 uint16_t cmd;
 uint16_t len;
+uint32_t tmp32;

 cmd = qemu_get_be16(f);
 len = qemu_get_be16(f);
@@ -1063,7 +1082,29 @@ static int loadvm_process_command(QEMUFile *f)
 }

 switch (cmd) {
-/* Filling added in next patch */
+case MIG_CMD_OPEN_RETURN_PATH:
+if (mis->to_src_file) {
+error_report("CMD_OPEN_RETURN_PATH called when RP already open");
+/* Not really a problem, so don't give up */
+return 0;
+}
+mis->to_src_file = qemu_file_get_return_path(f);
+if (!mis->to_src_file) {
+error_report("CMD_OPEN_RETURN_PATH failed");
+return -1;
+}
+break;
+
+case MIG_CMD_PING:
+tmp32 = qemu_get_be32(f);
+trace_loadvm_process_command_ping(tmp32);
+if (!mis->to_src_file) {
+error_report("CMD_PING (0x%x) received with no return path",
+ tmp32);
+return -1;
+}
+/* migrate_send_rp_pong(mis, tmp32); TODO: gets added later */
+break;
 }

 return 0;
diff --git a/trace-events b/trace-events
index f077c23..500f1e3 100644
--- a/trace-events
+++ b/trace-events
@@ -1205,10 +1205,13 @@ qemu_loadvm_state_section(unsigned int section_type) 
"%d"
 qemu_loadvm_state_section_partend(uint32_t section_id) "%u"
 qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, 
uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
 loadvm_process_command(uint16_t com, uint16_t len) "com=0x%x len=%d"
+loadvm_process_command_ping(uint32_t val) "%x"
 savevm_command_send(uint16_t command, uint16_t len) "com=0x%x len=%d"

[Qemu-devel] [PULL 46/57] Don't sync dirty bitmaps in postcopy

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Once we're in postcopy the source processors are stopped and memory
shouldn't change any more, so there's no need to look at the dirty
map.

There are two notes to this:
  1) If we do resync and a page had changed then the page would get
 sent again, which the destination wouldn't allow (since it might
 have also modified the page)
  2) Before disabling this I'd seen very rare cases where a page had been
 marked dirtied although the memory contents are apparently identical

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: David Gibson 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 8bf0841..339b001 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1846,7 +1846,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
 {
 rcu_read_lock();

-migration_bitmap_sync();
+if (!migration_in_postcopy(migrate_get_current())) {
+migration_bitmap_sync();
+}

 ram_control_before_iterate(f, RAM_CONTROL_FINISH);

@@ -1881,7 +1883,8 @@ static void ram_save_pending(QEMUFile *f, void *opaque, 
uint64_t max_size,

 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;

-if (remaining_size < max_size) {
+if (!migration_in_postcopy(migrate_get_current()) &&
+remaining_size < max_size) {
 qemu_mutex_lock_iothread();
 rcu_read_lock();
 migration_bitmap_sync();
-- 
2.5.0

[Qemu-devel] [PULL 24/57] Add migration-capability boolean for postcopy-ram.

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

The 'postcopy ram' capability allows postcopy migration of RAM;
note that the migration starts off in precopy mode until
postcopy mode is triggered (see the migrate_start_postcopy
patch later in the series).

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |  1 +
 migration/migration.c | 23 +++
 qapi-schema.json  |  6 +-
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 3dc95f4..4ed7931 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -186,6 +186,7 @@ void migrate_add_blocker(Error *reason);
  */
 void migrate_del_blocker(Error *reason);

+bool migrate_postcopy_ram(void);
 bool migrate_zero_blocks(void);

 bool migrate_auto_converge(void);
diff --git a/migration/migration.c b/migration/migration.c
index e4f91a9..f849f89 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -578,6 +578,20 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 for (cap = params; cap; cap = cap->next) {
 s->enabled_capabilities[cap->value->capability] = cap->value->state;
 }
+
+if (migrate_postcopy_ram()) {
+if (migrate_use_compression()) {
+/* The decompression threads asynchronously write into RAM
+ * rather than use the atomic copies needed to avoid
+ * userfaulting.  It should be possible to fix the decompression
+ * threads for compatibility in future.
+ */
+error_report("Postcopy is not currently compatible with "
+ "compression");
+s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM] =
+false;
+}
+}
 }

 void qmp_migrate_set_parameters(bool has_compress_level,
@@ -956,6 +970,15 @@ void qmp_migrate_set_downtime(double value, Error **errp)
 max_downtime = (uint64_t)value;
 }

+bool migrate_postcopy_ram(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM];
+}
+
 bool migrate_auto_converge(void)
 {
 MigrationState *s;
diff --git a/qapi-schema.json b/qapi-schema.json
index e18f14c..8638d42 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -540,11 +540,15 @@
 # @auto-converge: If enabled, QEMU will automatically throttle down the guest
 #  to speed up convergence of RAM migration. (since 1.6)
 #
+# @x-postcopy-ram: Start executing on the migration target before all of RAM 
has
+#  been migrated, pulling the remaining pages along as needed. NOTE: If
+#  the migration fails during postcopy the VM will fail.  (since 2.5)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
-   'compress', 'events'] }
+   'compress', 'events', 'x-postcopy-ram'] }

 ##
 # @MigrationCapabilityStatus
-- 
2.5.0

[Qemu-devel] [PULL 48/57] Host page!=target page: Cleanup bitmaps

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Prior to the start of postcopy, ensure that everything that will
be transferred later is a whole host-page in size.

This is accomplished by discarding partially transferred host pages
and marking any that are partially dirty as fully dirty.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 172 
 1 file changed, 172 insertions(+)

diff --git a/migration/ram.c b/migration/ram.c
index 339b001..62cf42b 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1584,6 +1584,171 @@ static int 
postcopy_each_ram_send_discard(MigrationState *ms)
 }

 /*
+ * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
+ *   the two bitmaps, that are similar, but one is inverted.
+ *
+ * We search for runs of target-pages that don't start or end on a
+ * host page boundary;
+ * unsent_pass=true: Cleans up partially unsent host pages by searching
+ * the unsentmap
+ * unsent_pass=false: Cleans up partially dirty host pages by searching
+ * the main migration bitmap
+ *
+ */
+static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
+  RAMBlock *block,
+  PostcopyDiscardState *pds)
+{
+unsigned long *bitmap;
+unsigned long *unsentmap;
+unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
+unsigned long first = block->offset >> TARGET_PAGE_BITS;
+unsigned long len = block->used_length >> TARGET_PAGE_BITS;
+unsigned long last = first + (len - 1);
+unsigned long run_start;
+
+bitmap = atomic_rcu_read(_bitmap_rcu)->bmap;
+unsentmap = atomic_rcu_read(_bitmap_rcu)->unsentmap;
+
+if (unsent_pass) {
+/* Find a sent page */
+run_start = find_next_zero_bit(unsentmap, last + 1, first);
+} else {
+/* Find a dirty page */
+run_start = find_next_bit(bitmap, last + 1, first);
+}
+
+while (run_start <= last) {
+bool do_fixup = false;
+unsigned long fixup_start_addr;
+unsigned long host_offset;
+
+/*
+ * If the start of this run of pages is in the middle of a host
+ * page, then we need to fixup this host page.
+ */
+host_offset = run_start % host_ratio;
+if (host_offset) {
+do_fixup = true;
+run_start -= host_offset;
+fixup_start_addr = run_start;
+/* For the next pass */
+run_start = run_start + host_ratio;
+} else {
+/* Find the end of this run */
+unsigned long run_end;
+if (unsent_pass) {
+run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
+} else {
+run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
+}
+/*
+ * If the end isn't at the start of a host page, then the
+ * run doesn't finish at the end of a host page
+ * and we need to discard.
+ */
+host_offset = run_end % host_ratio;
+if (host_offset) {
+do_fixup = true;
+fixup_start_addr = run_end - host_offset;
+/*
+ * This host page has gone, the next loop iteration starts
+ * from after the fixup
+ */
+run_start = fixup_start_addr + host_ratio;
+} else {
+/*
+ * No discards on this iteration, next loop starts from
+ * next sent/dirty page
+ */
+run_start = run_end + 1;
+}
+}
+
+if (do_fixup) {
+unsigned long page;
+
+/* Tell the destination to discard this page */
+if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
+/* For the unsent_pass we:
+ * discard partially sent pages
+ * For the !unsent_pass (dirty) we:
+ * discard partially dirty pages that were sent
+ * (any partially sent pages were already discarded
+ * by the previous unsent_pass)
+ */
+postcopy_discard_send_range(ms, pds, fixup_start_addr,
+host_ratio);
+}
+
+/* Clean up the bitmap */
+for (page = fixup_start_addr;
+ page < fixup_start_addr + host_ratio; page++) {
+/* All pages in this host page are now not sent */
+set_bit(page, unsentmap);
+
+/*
+ * Remark them as dirty, updating the count for any pages
+ * that weren't

[Qemu-devel] [PULL 49/57] Round up RAMBlock sizes to host page sizes

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

RAMBlocks that are not a multiple of host pages in length
cause problems for postcopy (I've seen an ACPI table on aarch64
be 5k in length - i.e. 5x target-page), so round RAMBlock sizes
up to a host-page.

This potentially breaks migration compatibility due to changes
in RAMBlock sizes; however:
   1) x86 and s390 I think always have host=target page size
   2) When I've tried on Power the block sizes already seem aligned.
   3) I don't think there's anything else that maintains per-version
  machine-types for compatibility.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 exec.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/exec.c b/exec.c
index 36886ee..b09f18b 100644
--- a/exec.c
+++ b/exec.c
@@ -1452,7 +1452,7 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, 
Error **errp)

 assert(block);

-newsize = TARGET_PAGE_ALIGN(newsize);
+newsize = HOST_PAGE_ALIGN(newsize);

 if (block->used_length == newsize) {
 return 0;
@@ -1596,7 +1596,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, 
MemoryRegion *mr,
 return -1;
 }

-size = TARGET_PAGE_ALIGN(size);
+size = HOST_PAGE_ALIGN(size);
 new_block = g_malloc0(sizeof(*new_block));
 new_block->mr = mr;
 new_block->used_length = size;
@@ -1632,8 +1632,8 @@ ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, 
ram_addr_t max_size,
 ram_addr_t addr;
 Error *local_err = NULL;

-size = TARGET_PAGE_ALIGN(size);
-max_size = TARGET_PAGE_ALIGN(max_size);
+size = HOST_PAGE_ALIGN(size);
+max_size = HOST_PAGE_ALIGN(max_size);
 new_block = g_malloc0(sizeof(*new_block));
 new_block->mr = mr;
 new_block->resized = resized;
-- 
2.5.0

[Qemu-devel] [PULL 53/57] Postcopy: Mark nohugepage before discard

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Prior to servicing userfault requests we must ensure we've not got
huge pages in the area that might include non-transferred memory,
since a hugepage could incorrectly mark the whole huge page as present.

We mark the area as non-huge page (nhp) just before we perform
discards; the discard code now tells us to discard any areas
that haven't been sent (as well as any that are redirtied);
any already formed transparent-huge-pages get fragmented
by this discard process if they cotnain any discards.

Transparent huge pages that have been entirely transferred
and don't contain any discards are not broken by this mechanism;
they stay as huge pages.

By starting postcopy after a full precopy pass, many of the pages
then stay as huge pages; this is important for maintaining performance
after the end of the migration.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/postcopy-ram.h |  6 +
 migration/postcopy-ram.c | 47 +---
 migration/savevm.c   |  9 +++-
 trace-events |  1 +
 4 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index d7c292f..b6a7491 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -41,6 +41,12 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState 
*mis);
 int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
size_t length);

+/*
+ * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
+ * however leaving it until after precopy means that most of the precopy
+ * data is still THPd
+ */
+int postcopy_ram_prepare_discard(MigrationIncomingState *mis);

 /*
  * Called at the start of each RAMBlock by the bitmap code.
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 4f1e329..8e107fe 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -226,12 +226,10 @@ static int cleanup_range(const char *block_name, void 
*host_addr,
  * We turned off hugepage for the precopy stage with postcopy enabled
  * we can turn it back on now.
  */
-#ifdef MADV_HUGEPAGE
-if (madvise(host_addr, length, MADV_HUGEPAGE)) {
+if (qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE)) {
 error_report("%s HUGEPAGE: %s", __func__, strerror(errno));
 return -1;
 }
-#endif

 /*
  * We can also turn off userfault now since we should have all the
@@ -308,6 +306,43 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState 
*mis)
 }

 /*
+ * Disable huge pages on an area
+ */
+static int nhp_range(const char *block_name, void *host_addr,
+ram_addr_t offset, ram_addr_t length, void *opaque)
+{
+trace_postcopy_nhp_range(block_name, host_addr, offset, length);
+
+/*
+ * Before we do discards we need to ensure those discards really
+ * do delete areas of the page, even if THP thinks a hugepage would
+ * be a good idea, so force hugepages off.
+ */
+if (qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE)) {
+error_report("%s: NOHUGEPAGE: %s", __func__, strerror(errno));
+return -1;
+}
+
+return 0;
+}
+
+/*
+ * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
+ * however leaving it until after precopy means that most of the precopy
+ * data is still THPd
+ */
+int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
+{
+if (qemu_ram_foreach_block(nhp_range, mis)) {
+return -1;
+}
+
+postcopy_state_set(POSTCOPY_INCOMING_DISCARD);
+
+return 0;
+}
+
+/*
  * Mark the given area of RAM as requiring notification to unwritten areas
  * Used as a  callback on qemu_ram_foreach_block.
  *   host_addr: Base of area to mark
@@ -583,6 +618,12 @@ int postcopy_ram_discard_range(MigrationIncomingState 
*mis, uint8_t *start,
 return -1;
 }

+int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
+{
+assert(0);
+return -1;
+}
+
 int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 {
 assert(0);
diff --git a/migration/savevm.c b/migration/savevm.c
index eb32199..0596f7b 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1316,7 +1316,7 @@ static int 
loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
 switch (ps) {
 case POSTCOPY_INCOMING_ADVISE:
 /* 1st discard */
-tmp = 0; /* TODO: later patch postcopy_ram_prepare_discard(mis); */
+tmp = postcopy_ram_prepare_discard(mis);
 if (tmp) {
 return tmp;
 }
@@ -1448,6 +1448,13 @@ static int 
loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
 error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
 return -1;

Re: [Qemu-devel] [PATCH RESEND v2 0/3] pc: Set hw_version on all machine classes

2015-11-10 Thread Paolo Bonzini



On 09/11/2015 21:46, Michael S. Tsirkin wrote:
> On Mon, Nov 02, 2015 at 12:55:07PM +0100, Paolo Bonzini wrote:
>> On 30/10/2015 20:36, Eduardo Habkost wrote:
>>> In 2012, QEMU had a bug where it exposed QEMU version information
>>> to the guest, meaning a QEMU upgrade would expose different
>>> hardware to the guest OS even if the same machine-type is being
>>> used.
>>>
>>> The bug was fixed by commit 93bfef4c6e4b23caea9d51e1099d06433d8835a4,
>>> on all machines up to pc-1.0. But we kept introducing the same
>>> bug on all newer machines since then. That means we are breaking
>>> guest ABI every time QEMU was upgraded.
>>>
>>> Fix this by setting the hw_version on all PC machines, making
>>> sure the hardware won't change when upgrading QEMU.
>>>
>>> Eduardo Habkost (3):
>>>   pc: Set hw_version on all machine classes
>>>   osdep: Rename qemu_{get,set}_version() to qemu_{,set_}hw_version()
>>>   megasas: Use qemu_hw_version() instead of QEMU_VERSION
>>>
>>>  hw/arm/nseries.c |  2 +-
>>>  hw/i386/pc_piix.c| 13 +
>>>  hw/i386/pc_q35.c | 10 ++
>>>  hw/ide/core.c|  2 +-
>>>  hw/scsi/megasas.c|  2 +-
>>>  hw/scsi/scsi-bus.c   |  2 +-
>>>  hw/scsi/scsi-disk.c  |  2 +-
>>>  include/qemu/osdep.h |  4 ++--
>>>  target-i386/cpu.c|  2 +-
>>>  util/osdep.c | 10 +-
>>>  vl.c |  2 +-
>>>  11 files changed, 37 insertions(+), 14 deletions(-)
>>>
>>
>> Michael, is it okay for you if I merge this patch series?
> 
> Sorry about missing this the 1st time around.
> Let's discuss the right thing to do here -
> it's a bugfix so we can merge it after hard freeze.

It's already in...

Paolo

Re: [Qemu-devel] [PATCH 2/2] migration: Make 32bit linux compile with RDMA

2015-11-10 Thread Dr. David Alan Gilbert

* Juan Quintela (quint...@redhat.com) wrote:
> Rest of the file already use that trick. 64bit offsets make no sense in
> 32bit archs, but that is ram_addr_t for you.
> 
> Signed-off-by: Juan Quintela 
> ---
>  migration/rdma.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 553fbd7..dcabb91 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -577,7 +577,7 @@ static int rdma_add_block(RDMAContext *rdma, const char 
> *block_name,
>  block->is_ram_block = local->init ? false : true;
> 
>  if (rdma->blockmap) {
> -g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
> +g_hash_table_insert(rdma->blockmap, (void *)(uintptr_t)block_offset, 
> block);

Reviewed-by: Dr. David Alan Gilbert 

Dave

>  }
> 
>  trace_rdma_add_block(block_name, local->nb_blocks,
> -- 
> 2.5.0
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[Qemu-devel] [PULL 37/44] nvme: Account for failed and invalid operations

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

Signed-off-by: Alberto Garcia 
Message-id: 
678dc67da229759d404b44f7cc2bf5ed8bf8ad14.1446044838.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/block/nvme.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 5da41b2..169e4fa 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -201,10 +201,11 @@ static void nvme_rw_cb(void *opaque, int ret)
 NvmeCtrl *n = sq->ctrl;
 NvmeCQueue *cq = n->cq[sq->cqid];
 
-block_acct_done(blk_get_stats(n->conf.blk), >acct);
 if (!ret) {
+block_acct_done(blk_get_stats(n->conf.blk), >acct);
 req->status = NVME_SUCCESS;
 } else {
+block_acct_failed(blk_get_stats(n->conf.blk), >acct);
 req->status = NVME_INTERNAL_DEV_ERROR;
 }
 if (req->has_sg) {
@@ -238,18 +239,22 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 uint64_t data_size = (uint64_t)nlb << data_shift;
 uint64_t aio_slba  = slba << (data_shift - BDRV_SECTOR_BITS);
 int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
+enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
 
 if ((slba + nlb) > ns->id_ns.nsze) {
+block_acct_invalid(blk_get_stats(n->conf.blk), acct);
 return NVME_LBA_RANGE | NVME_DNR;
 }
+
 if (nvme_map_prp(>qsg, prp1, prp2, data_size, n)) {
+block_acct_invalid(blk_get_stats(n->conf.blk), acct);
 return NVME_INVALID_FIELD | NVME_DNR;
 }
+
 assert((nlb << data_shift) == req->qsg.size);
 
 req->has_sg = true;
-dma_acct_start(n->conf.blk, >acct, >qsg,
-   is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
+dma_acct_start(n->conf.blk, >acct, >qsg, acct);
 req->aiocb = is_write ?
 dma_blk_write(n->conf.blk, >qsg, aio_slba, nvme_rw_cb, req) :
 dma_blk_read(n->conf.blk, >qsg, aio_slba, nvme_rw_cb, req);
-- 
2.5.0

[Qemu-devel] [PULL 43/44] scsi-disk: Account for failed operations

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

Signed-off-by: Alberto Garcia 
Message-id: 
0ead7b0e59c22926e033ca12725e3a31985ec46b.1446044838.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/scsi/scsi-disk.c | 46 +++---
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 707e734..4797d83 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -90,7 +90,7 @@ struct SCSIDiskState
 bool tray_locked;
 };
 
-static int scsi_handle_rw_error(SCSIDiskReq *r, int error);
+static int scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed);
 
 static void scsi_free_request(SCSIRequest *req)
 {
@@ -169,18 +169,18 @@ static void scsi_aio_complete(void *opaque, int ret)
 
 assert(r->req.aiocb != NULL);
 r->req.aiocb = NULL;
-block_acct_done(blk_get_stats(s->qdev.conf.blk), >acct);
 if (r->req.io_canceled) {
 scsi_req_cancel_complete(>req);
 goto done;
 }
 
 if (ret < 0) {
-if (scsi_handle_rw_error(r, -ret)) {
+if (scsi_handle_rw_error(r, -ret, true)) {
 goto done;
 }
 }
 
+block_acct_done(blk_get_stats(s->qdev.conf.blk), >acct);
 scsi_req_complete(>req, GOOD);
 
 done:
@@ -247,7 +247,7 @@ static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
 }
 
 if (ret < 0) {
-if (scsi_handle_rw_error(r, -ret)) {
+if (scsi_handle_rw_error(r, -ret, false)) {
 goto done;
 }
 }
@@ -273,7 +273,11 @@ static void scsi_dma_complete(void *opaque, int ret)
 assert(r->req.aiocb != NULL);
 r->req.aiocb = NULL;
 
-block_acct_done(blk_get_stats(s->qdev.conf.blk), >acct);
+if (ret < 0) {
+block_acct_failed(blk_get_stats(s->qdev.conf.blk), >acct);
+} else {
+block_acct_done(blk_get_stats(s->qdev.conf.blk), >acct);
+}
 scsi_dma_complete_noio(r, ret);
 }
 
@@ -285,18 +289,18 @@ static void scsi_read_complete(void * opaque, int ret)
 
 assert(r->req.aiocb != NULL);
 r->req.aiocb = NULL;
-block_acct_done(blk_get_stats(s->qdev.conf.blk), >acct);
 if (r->req.io_canceled) {
 scsi_req_cancel_complete(>req);
 goto done;
 }
 
 if (ret < 0) {
-if (scsi_handle_rw_error(r, -ret)) {
+if (scsi_handle_rw_error(r, -ret, true)) {
 goto done;
 }
 }
 
+block_acct_done(blk_get_stats(s->qdev.conf.blk), >acct);
 DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->qiov.size);
 
 n = r->qiov.size / 512;
@@ -322,7 +326,7 @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
 }
 
 if (ret < 0) {
-if (scsi_handle_rw_error(r, -ret)) {
+if (scsi_handle_rw_error(r, -ret, false)) {
 goto done;
 }
 }
@@ -355,7 +359,11 @@ static void scsi_do_read_cb(void *opaque, int ret)
 assert (r->req.aiocb != NULL);
 r->req.aiocb = NULL;
 
-block_acct_done(blk_get_stats(s->qdev.conf.blk), >acct);
+if (ret < 0) {
+block_acct_failed(blk_get_stats(s->qdev.conf.blk), >acct);
+} else {
+block_acct_done(blk_get_stats(s->qdev.conf.blk), >acct);
+}
 scsi_do_read(opaque, ret);
 }
 
@@ -407,7 +415,7 @@ static void scsi_read_data(SCSIRequest *req)
  * scsi_handle_rw_error always manages its reference counts, independent
  * of the return value.
  */
-static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
+static int scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed)
 {
 bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV);
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
@@ -415,6 +423,9 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
is_read, error);
 
 if (action == BLOCK_ERROR_ACTION_REPORT) {
+if (acct_failed) {
+block_acct_failed(blk_get_stats(s->qdev.conf.blk), >acct);
+}
 switch (error) {
 case ENOMEDIUM:
 scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
@@ -452,7 +463,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int 
ret)
 }
 
 if (ret < 0) {
-if (scsi_handle_rw_error(r, -ret)) {
+if (scsi_handle_rw_error(r, -ret, false)) {
 goto done;
 }
 }
@@ -481,7 +492,11 @@ static void scsi_write_complete(void * opaque, int ret)
 assert (r->req.aiocb != NULL);
 r->req.aiocb = NULL;
 
-block_acct_done(blk_get_stats(s->qdev.conf.blk), >acct);
+if (ret < 0) {
+block_acct_failed(blk_get_stats(s->qdev.conf.blk), >acct);
+} else {
+block_acct_done(blk_get_stats(s->qdev.conf.blk), >acct);
+}
 scsi_write_complete_noio(r, ret);
 }
 
@@ -1592,7 +1607,7 @@ static void scsi_unmap_complete_noio(UnmapCBData *data, 
int ret)
 }
 
 if (ret < 0) {
-if (scsi_handle_rw_error(r, -ret)) {
+if

[Qemu-devel] [PATCH 02/10] snapshot: return error code from bdrv_snapshot_delete_by_id_or_name

2015-11-10 Thread Denis V. Lunev

this will make code better in the next patch

Signed-off-by: Denis V. Lunev 
CC: Juan Quintela 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/snapshot.c | 7 ---
 include/block/snapshot.h | 6 +++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index d929d08..ed0422d 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -253,9 +253,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
 return -ENOTSUP;
 }
 
-void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
-const char *id_or_name,
-Error **errp)
+int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
+   const char *id_or_name,
+   Error **errp)
 {
 int ret;
 Error *local_err = NULL;
@@ -270,6 +270,7 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState 
*bs,
 if (ret < 0) {
 error_propagate(errp, local_err);
 }
+return ret;
 }
 
 int bdrv_snapshot_list(BlockDriverState *bs,
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 6195c9c..9ddfd42 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -63,9 +63,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
  const char *snapshot_id,
  const char *name,
  Error **errp);
-void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
-const char *id_or_name,
-Error **errp);
+int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
+   const char *id_or_name,
+   Error **errp);
 int bdrv_snapshot_list(BlockDriverState *bs,
QEMUSnapshotInfo **psn_info);
 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
-- 
2.5.0

[Qemu-devel] [PULL 01/57] Add postcopy documentation

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 docs/migration.txt | 191 +
 1 file changed, 191 insertions(+)

diff --git a/docs/migration.txt b/docs/migration.txt
index f6df4be..fda8d61 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -291,3 +291,194 @@ save/send this state when we are in the middle of a pio 
operation
 (that is what ide_drive_pio_state_needed() checks).  If DRQ_STAT is
 not enabled, the values on that fields are garbage and don't need to
 be sent.
+
+= Return path =
+
+In most migration scenarios there is only a single data path that runs
+from the source VM to the destination, typically along a single fd (although
+possibly with another fd or similar for some fast way of throwing pages 
across).
+
+However, some uses need two way communication; in particular the Postcopy
+destination needs to be able to request pages on demand from the source.
+
+For these scenarios there is a 'return path' from the destination to the 
source;
+qemu_file_get_return_path(QEMUFile* fwdpath) gives the QEMUFile* for the return
+path.
+
+  Source side
+ Forward path - written by migration thread
+ Return path  - opened by main thread, read by return-path thread
+
+  Destination side
+ Forward path - read by main thread
+ Return path  - opened by main thread, written by main thread AND postcopy
+thread (protected by rp_mutex)
+
+= Postcopy =
+'Postcopy' migration is a way to deal with migrations that refuse to converge
+(or take too long to converge) its plus side is that there is an upper bound on
+the amount of migration traffic and time it takes, the down side is that during
+the postcopy phase, a failure of *either* side or the network connection causes
+the guest to be lost.
+
+In postcopy the destination CPUs are started before all the memory has been
+transferred, and accesses to pages that are yet to be transferred cause
+a fault that's translated by QEMU into a request to the source QEMU.
+
+Postcopy can be combined with precopy (i.e. normal migration) so that if 
precopy
+doesn't finish in a given time the switch is made to postcopy.
+
+=== Enabling postcopy ===
+
+To enable postcopy, issue this command on the monitor prior to the
+start of migration:
+
+migrate_set_capability x-postcopy-ram on
+
+The normal commands are then used to start a migration, which is still
+started in precopy mode.  Issuing:
+
+migrate_start_postcopy
+
+will now cause the transition from precopy to postcopy.
+It can be issued immediately after migration is started or any
+time later on.  Issuing it after the end of a migration is harmless.
+
+Note: During the postcopy phase, the bandwidth limits set using
+migrate_set_speed is ignored (to avoid delaying requested pages that
+the destination is waiting for).
+
+=== Postcopy device transfer ===
+
+Loading of device data may cause the device emulation to access guest RAM
+that may trigger faults that have to be resolved by the source, as such
+the migration stream has to be able to respond with page data *during* the
+device load, and hence the device data has to be read from the stream 
completely
+before the device load begins to free the stream up.  This is achieved by
+'packaging' the device data into a blob that's read in one go.
+
+Source behaviour
+
+Until postcopy is entered the migration stream is identical to normal
+precopy, except for the addition of a 'postcopy advise' command at
+the beginning, to tell the destination that postcopy might happen.
+When postcopy starts the source sends the page discard data and then
+forms the 'package' containing:
+
+   Command: 'postcopy listen'
+   The device state
+  A series of sections, identical to the precopy streams device state 
stream
+  containing everything except postcopiable devices (i.e. RAM)
+   Command: 'postcopy run'
+
+The 'package' is sent as the data part of a Command: 'CMD_PACKAGED', and the
+contents are formatted in the same way as the main migration stream.
+
+During postcopy the source scans the list of dirty pages and sends them
+to the destination without being requested (in much the same way as precopy),
+however when a page request is received from the destination, the dirty page
+scanning restarts from the requested location.  This causes requested pages
+to be sent quickly, and also causes pages directly after the requested page
+to be sent quickly in the hope that those pages are likely to be used
+by the destination soon.
+
+Destination behaviour
+
+Initially the destination looks the same as precopy, with a single thread
+reading the migration stream; the 'postcopy advise' and 'discard' commands
+are processed to change the way RAM is managed, but don't affect the stream

[Qemu-devel] [PULL 28/44] block: Add idle_time_ns to BlockDeviceStats

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

This patch adds the new field 'idle_time_ns' to the BlockDeviceStats
structure, indicating the time that has passed since the previous I/O
operation.

It also adds the block_acct_idle_time_ns() call, to ensure that all
references to the clock type used for accounting are in the same
place. This will later allow us to use a different clock for iotests.

Signed-off-by: Alberto Garcia 
Message-id: 
7d8cfcf931453e1a2443e6626e8c1edc347c7c8a.1446044837.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 block/accounting.c | 12 ++--
 block/qapi.c   |  5 +
 hmp.c  |  4 +++-
 include/block/accounting.h |  2 ++
 qapi/block-core.json   |  6 +-
 qmp-commands.hx| 10 --
 6 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/block/accounting.c b/block/accounting.c
index 6f4c0f1..d427fa8 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -40,12 +40,15 @@ void block_acct_start(BlockAcctStats *stats, 
BlockAcctCookie *cookie,
 
 void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
 {
+int64_t time_ns = qemu_clock_get_ns(clock_type);
+int64_t latency_ns = time_ns - cookie->start_time_ns;
+
 assert(cookie->type < BLOCK_MAX_IOTYPE);
 
 stats->nr_bytes[cookie->type] += cookie->bytes;
 stats->nr_ops[cookie->type]++;
-stats->total_time_ns[cookie->type] +=
-qemu_clock_get_ns(clock_type) - cookie->start_time_ns;
+stats->total_time_ns[cookie->type] += latency_ns;
+stats->last_access_time_ns = time_ns;
 }
 
 
@@ -55,3 +58,8 @@ void block_acct_merge_done(BlockAcctStats *stats, enum 
BlockAcctType type,
 assert(type < BLOCK_MAX_IOTYPE);
 stats->merged[type] += num_requests;
 }
+
+int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
+{
+return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns;
+}
diff --git a/block/qapi.c b/block/qapi.c
index ec0f513..539c2e3 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -357,6 +357,11 @@ static BlockStats *bdrv_query_stats(const BlockDriverState 
*bs,
 s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
 s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
 s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+
+s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
+if (s->stats->has_idle_time_ns) {
+s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
+}
 }
 
 s->stats->wr_highest_offset = bs->wr_highest_offset;
diff --git a/hmp.c b/hmp.c
index a15d00c..754e447 100644
--- a/hmp.c
+++ b/hmp.c
@@ -521,6 +521,7 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict)
" flush_total_time_ns=%" PRId64
" rd_merged=%" PRId64
" wr_merged=%" PRId64
+   " idle_time_ns=%" PRId64
"\n",
stats->value->stats->rd_bytes,
stats->value->stats->wr_bytes,
@@ -531,7 +532,8 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict)
stats->value->stats->rd_total_time_ns,
stats->value->stats->flush_total_time_ns,
stats->value->stats->rd_merged,
-   stats->value->stats->wr_merged);
+   stats->value->stats->wr_merged,
+   stats->value->stats->idle_time_ns);
 }
 
 qapi_free_BlockStatsList(stats_list);
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 66637cd..4b2b999 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -40,6 +40,7 @@ typedef struct BlockAcctStats {
 uint64_t nr_ops[BLOCK_MAX_IOTYPE];
 uint64_t total_time_ns[BLOCK_MAX_IOTYPE];
 uint64_t merged[BLOCK_MAX_IOTYPE];
+int64_t last_access_time_ns;
 } BlockAcctStats;
 
 typedef struct BlockAcctCookie {
@@ -53,5 +54,6 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie 
*cookie,
 void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie);
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
int num_requests);
+int64_t block_acct_idle_time_ns(BlockAcctStats *stats);
 
 #endif
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 425fdab..b00be46 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -448,6 +448,10 @@
 # @wr_merged: Number of write requests that have been merged into another
 # request (Since 2.3).
 #
+# @idle_time_ns: #optional Time since the last I/O operation, in
+#nanoseconds. If the field is absent it means that
+#there haven't been any operations yet (Since 2.5).
+#
 # Since: 0.14.0
 ##
 { 'struct': 'BlockDeviceStats',
@@ -455,7 +459,7 @@

[Qemu-devel] [PATCH 10/10] migration: normalize locking in migration/savevm.c

2015-11-10 Thread Denis V. Lunev

basically all bdrv_* operations must be called under aio_context_acquire
except ones with bdrv_all prefix.

Signed-off-by: Denis V. Lunev 
CC: Juan Quintela 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 migration/savevm.c | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 20c95b2..01110a4 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1249,6 +1249,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 struct tm tm;
 const char *name = qdict_get_try_str(qdict, "name");
 Error *local_err = NULL;
+AioContext *aio_context;
 
 if (!bdrv_all_can_snapshot()) {
 monitor_printf(mon, "Device '%s' is writable but does not "
@@ -1270,6 +1271,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 monitor_printf(mon, "No block device can accept snapshots\n");
 return;
 }
+aio_context = bdrv_get_aio_context(bs);
 
 saved_vm_running = runstate_is_running();
 
@@ -1280,6 +1282,8 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 }
 vm_stop(RUN_STATE_SAVE_VM);
 
+aio_context_acquire(aio_context);
+
 memset(sn, 0, sizeof(*sn));
 
 /* fill auxiliary fields */
@@ -1324,6 +1328,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 }
 
  the_end:
+aio_context_release(aio_context);
 if (saved_vm_running) {
 vm_start();
 }
@@ -1362,6 +1367,7 @@ int load_vmstate(const char *name)
 QEMUSnapshotInfo sn;
 QEMUFile *f;
 int ret;
+AioContext *aio_context;
 
 if (!bdrv_all_can_snapshot()) {
 error_report("Device '%s' is writable but does not support snapshots.",
@@ -1380,9 +1386,12 @@ int load_vmstate(const char *name)
 error_report("No block device supports snapshots");
 return -ENOTSUP;
 }
+aio_context = bdrv_get_aio_context(bs);
 
 /* Don't even try to load empty VM states */
+aio_context_acquire(aio_context);
 ret = bdrv_snapshot_find(bs_vm_state, , name);
+aio_context_release(aio_context);
 if (ret < 0) {
 return ret;
 } else if (sn.vm_state_size == 0) {
@@ -1410,9 +1419,12 @@ int load_vmstate(const char *name)
 
 qemu_system_reset(VMRESET_SILENT);
 migration_incoming_state_new(f);
-ret = qemu_loadvm_state(f);
 
+aio_context_acquire(aio_context);
+ret = qemu_loadvm_state(f);
 qemu_fclose(f);
+aio_context_release(aio_context);
+
 migration_incoming_state_destroy();
 if (ret < 0) {
 error_report("Error %d while loading VM state", ret);
@@ -1443,14 +1455,19 @@ void hmp_info_snapshots(Monitor *mon, const QDict 
*qdict)
 int nb_sns, i;
 int total;
 int *available_snapshots;
+AioContext *aio_context;
 
 bs = bdrv_all_find_vmstate_bs();
 if (!bs) {
 monitor_printf(mon, "No available block device supports snapshots\n");
 return;
 }
+aio_context = bdrv_get_aio_context(bs);
 
+aio_context_acquire(aio_context);
 nb_sns = bdrv_snapshot_list(bs, _tab);
+aio_context_release(aio_context);
+
 if (nb_sns < 0) {
 monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns);
 return;
-- 
2.5.0

[Qemu-devel] [PULL 41/44] ide: Account for failed and invalid operations

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

Signed-off-by: Alberto Garcia 
Message-id: 
bf4d6c9c563877e699b0bf42e7eaf8b096c4a35e.1446044838.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/ide/core.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 35ba1ad..2725dd3 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -574,7 +574,6 @@ static void ide_sector_read_cb(void *opaque, int ret)
 if (ret == -ECANCELED) {
 return;
 }
-block_acct_done(blk_get_stats(s->blk), >acct);
 if (ret != 0) {
 if (ide_handle_rw_error(s, -ret, IDE_RETRY_PIO |
 IDE_RETRY_READ)) {
@@ -582,6 +581,8 @@ static void ide_sector_read_cb(void *opaque, int ret)
 }
 }
 
+block_acct_done(blk_get_stats(s->blk), >acct);
+
 n = s->nsector;
 if (n > s->req_nb_sectors) {
 n = s->req_nb_sectors;
@@ -621,6 +622,7 @@ static void ide_sector_read(IDEState *s)
 
 if (!ide_sect_range_ok(s, sector_num, n)) {
 ide_rw_error(s);
+block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_READ);
 return;
 }
 
@@ -672,6 +674,7 @@ static int ide_handle_rw_error(IDEState *s, int error, int 
op)
 assert(s->bus->retry_unit == s->unit);
 s->bus->error_status = op;
 } else if (action == BLOCK_ERROR_ACTION_REPORT) {
+block_acct_failed(blk_get_stats(s->blk), >acct);
 if (op & IDE_RETRY_DMA) {
 ide_dma_error(s);
 } else {
@@ -750,6 +753,7 @@ static void ide_dma_cb(void *opaque, int ret)
 if ((s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) &&
 !ide_sect_range_ok(s, sector_num, n)) {
 ide_dma_error(s);
+block_acct_invalid(blk_get_stats(s->blk), s->acct.type);
 return;
 }
 
@@ -826,7 +830,6 @@ static void ide_sector_write_cb(void *opaque, int ret)
 if (ret == -ECANCELED) {
 return;
 }
-block_acct_done(blk_get_stats(s->blk), >acct);
 
 s->pio_aiocb = NULL;
 s->status &= ~BUSY_STAT;
@@ -837,6 +840,8 @@ static void ide_sector_write_cb(void *opaque, int ret)
 }
 }
 
+block_acct_done(blk_get_stats(s->blk), >acct);
+
 n = s->nsector;
 if (n > s->req_nb_sectors) {
 n = s->req_nb_sectors;
@@ -887,6 +892,7 @@ static void ide_sector_write(IDEState *s)
 
 if (!ide_sect_range_ok(s, sector_num, n)) {
 ide_rw_error(s);
+block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE);
 return;
 }
 
-- 
2.5.0

[Qemu-devel] [PULL 13/57] migrate_init: Call from savevm

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Suspend to file is very much like a migrate, and it makes life
easier if we have the Migration state available, so initialise it
in the savevm.c code for suspending.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: David Gibson 
Reviewed-by: Juan Quintela 
Reviewd-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h | 4 +---
 include/qemu/typedefs.h   | 1 +
 migration/migration.c | 2 +-
 migration/savevm.c| 2 ++
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 51bc348..82cc3a6 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -42,10 +42,7 @@ struct MigrationParams {
 bool shared;
 };

-typedef struct MigrationState MigrationState;
-
 typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
-
 /* State for the incoming migration */
 struct MigrationIncomingState {
 QEMUFile *from_src_file;
@@ -116,6 +113,7 @@ int migrate_fd_close(MigrationState *s);

 void add_migration_state_change_notifier(Notifier *notify);
 void remove_migration_state_change_notifier(Notifier *notify);
+MigrationState *migrate_init(const MigrationParams *params);
 bool migration_in_setup(MigrationState *);
 bool migration_has_finished(MigrationState *);
 bool migration_has_failed(MigrationState *);
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 4338767..f7e0ed0 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -44,6 +44,7 @@ typedef struct MemoryRegion MemoryRegion;
 typedef struct MemoryRegionSection MemoryRegionSection;
 typedef struct MigrationIncomingState MigrationIncomingState;
 typedef struct MigrationParams MigrationParams;
+typedef struct MigrationState MigrationState;
 typedef struct Monitor Monitor;
 typedef struct MouseTransformInfo MouseTransformInfo;
 typedef struct MSIMessage MSIMessage;
diff --git a/migration/migration.c b/migration/migration.c
index 6fef544..82efbeb 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -682,7 +682,7 @@ bool migration_has_failed(MigrationState *s)
 s->state == MIGRATION_STATUS_FAILED);
 }

-static MigrationState *migrate_init(const MigrationParams *params)
+MigrationState *migrate_init(const MigrationParams *params)
 {
 MigrationState *s = migrate_get_current();
 int64_t bandwidth_limit = s->bandwidth_limit;
diff --git a/migration/savevm.c b/migration/savevm.c
index 638aa1f..9dc57d3 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -922,6 +922,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
 .blk = 0,
 .shared = 0
 };
+MigrationState *ms = migrate_init();
+ms->file = f;

 if (qemu_savevm_state_blocked(errp)) {
 return -EINVAL;
-- 
2.5.0

[Qemu-devel] [PULL 04/57] Move page_size_init earlier

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

The HOST_PAGE_ALIGN macros don't work until the page size variables
have been set up; later in postcopy I use those macros in the RAM
code, and it can be triggered using -object.

Fix this by initialising page_size_init() earlier - it's currently
initialised inside the accelerators, move it up into vl.c.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/exec/exec-all.h | 1 -
 include/qemu-common.h   | 1 +
 kvm-all.c   | 1 -
 qtest.c | 1 -
 vl.c| 1 +
 5 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index b07de10..d900b0d 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -72,7 +72,6 @@ void restore_state_to_opc(CPUArchState *env, struct 
TranslationBlock *tb,

 void cpu_gen_init(void);
 bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc);
-void page_size_init(void);

 void QEMU_NORETURN cpu_resume_from_signal(CPUState *cpu, void *puc);
 void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 2f74540..405364f 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -499,5 +499,6 @@ size_t buffer_find_nonzero_offset(const void *buf, size_t 
len);
 int parse_debug_env(const char *name, int max, int initial);

 const char *qemu_ether_ntoa(const MACAddr *mac);
+void page_size_init(void);

 #endif
diff --git a/kvm-all.c b/kvm-all.c
index 1bc1273..de3c8c4 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1461,7 +1461,6 @@ static int kvm_init(MachineState *ms)
  * page size for the system though.
  */
 assert(TARGET_PAGE_SIZE <= getpagesize());
-page_size_init();

 s->sigmask_len = 8;

diff --git a/qtest.c b/qtest.c
index 8e10340..05cefd2 100644
--- a/qtest.c
+++ b/qtest.c
@@ -657,7 +657,6 @@ void qtest_init(const char *qtest_chrdev, const char 
*qtest_log, Error **errp)

 inbuf = g_string_new("");
 qtest_chr = chr;
-page_size_init();
 }

 bool qtest_driver(void)
diff --git a/vl.c b/vl.c
index 21e8876..7d993a5 100644
--- a/vl.c
+++ b/vl.c
@@ -4285,6 +4285,7 @@ int main(int argc, char **argv, char **envp)
 exit(1);
 }

+page_size_init();
 socket_init();

 if (qemu_opts_foreach(qemu_find_opts("object"),
-- 
2.5.0

[Qemu-devel] [PULL 29/57] migrate_start_postcopy: Command to trigger transition to postcopy

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Once postcopy is enabled (with migrate_set_capability), the migration
will still start on precopy mode.  To cause a transition into postcopy
the:

  migrate_start_postcopy

command must be issued.  Postcopy will start sometime after this
(when it's next checked in the migration loop).

Issuing the command before migration has started will error,
and issuing after it has finished is ignored.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Eric Blake 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 hmp-commands.hx   | 15 +++
 hmp.c |  7 +++
 hmp.h |  1 +
 include/migration/migration.h |  3 +++
 migration/migration.c | 22 ++
 qapi-schema.json  |  8 
 qmp-commands.hx   | 19 +++
 7 files changed, 75 insertions(+)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 3a4ae39..8939b98 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1008,6 +1008,21 @@ Set the parameter @var{parameter} for migration.
 ETEXI

 {
+.name   = "migrate_start_postcopy",
+.args_type  = "",
+.params = "",
+.help   = "Switch migration to postcopy mode",
+.mhandler.cmd = hmp_migrate_start_postcopy,
+},
+
+STEXI
+@item migrate_start_postcopy
+@findex migrate_start_postcopy
+Switch in-progress migration to postcopy mode. Ignored after the end of
+migration (or once already in postcopy).
+ETEXI
+
+{
 .name   = "client_migrate_info",
 .args_type  = 
"protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
 .params = "protocol hostname port tls-port cert-subject",
diff --git a/hmp.c b/hmp.c
index a15d00c..e1f854a 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1293,6 +1293,13 @@ void hmp_client_migrate_info(Monitor *mon, const QDict 
*qdict)
 hmp_handle_error(mon, );
 }

+void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict)
+{
+Error *err = NULL;
+qmp_migrate_start_postcopy();
+hmp_handle_error(mon, );
+}
+
 void hmp_set_password(Monitor *mon, const QDict *qdict)
 {
 const char *protocol  = qdict_get_str(qdict, "protocol");
diff --git a/hmp.h b/hmp.h
index 81656c3..a8c5b5a 100644
--- a/hmp.h
+++ b/hmp.h
@@ -69,6 +69,7 @@ void hmp_migrate_set_capability(Monitor *mon, const QDict 
*qdict);
 void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
 void hmp_client_migrate_info(Monitor *mon, const QDict *qdict);
+void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict);
 void hmp_set_password(Monitor *mon, const QDict *qdict);
 void hmp_expire_password(Monitor *mon, const QDict *qdict);
 void hmp_eject(Monitor *mon, const QDict *qdict);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 2e9fa3c..217 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -127,6 +127,9 @@ struct MigrationState
 int64_t xbzrle_cache_size;
 int64_t setup_time;
 int64_t dirty_sync_count;
+
+/* Flag set once the migration has been asked to enter postcopy */
+bool start_postcopy;
 };

 void process_incoming_migration(QEMUFile *f);
diff --git a/migration/migration.c b/migration/migration.c
index bb4c92e..9c46472 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -668,6 +668,28 @@ void qmp_migrate_set_parameters(bool has_compress_level,
 }
 }

+void qmp_migrate_start_postcopy(Error **errp)
+{
+MigrationState *s = migrate_get_current();
+
+if (!migrate_postcopy_ram()) {
+error_setg(errp, "Enable postcopy with migration_set_capability before"
+ " the start of migration");
+return;
+}
+
+if (s->state == MIGRATION_STATUS_NONE) {
+error_setg(errp, "Postcopy must be started after migration has been"
+ " started");
+return;
+}
+/*
+ * we don't error if migration has finished since that would be racy
+ * with issuing this command.
+ */
+atomic_set(>start_postcopy, true);
+}
+
 /* shared migration helpers */

 static void migrate_set_state(MigrationState *s, int old_state, int new_state)
diff --git a/qapi-schema.json b/qapi-schema.json
index 8638d42..d25df93 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -702,6 +702,14 @@
 '*tls-port': 'int', '*cert-subject': 'str' } }

 ##
+# @migrate-start-postcopy
+#
+# Switch migration to postcopy mode
+#
+# Since: 2.5
+{ 'command': 'migrate-start-postcopy' }
+
+##
 # @MouseInfo:
 #
 # Information about a mouse device.
diff --git a/qmp-commands.hx b/qmp-commands.hx
index d7cf0ff..7f85d40 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -718,6

[Qemu-devel] [PULL 38/57] Postcopy: Postcopy startup in migration thread

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Rework the migration thread to setup and start postcopy.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |   3 +
 migration/migration.c | 174 --
 trace-events  |   4 +
 3 files changed, 174 insertions(+), 7 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 2ad0d2b..ff13ff2 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -135,6 +135,9 @@ struct MigrationState

 /* Flag set once the migration has been asked to enter postcopy */
 bool start_postcopy;
+
+/* Flag set once the migration thread is running (and needs joining) */
+bool migration_thread_running;
 };

 void process_incoming_migration(QEMUFile *f);
diff --git a/migration/migration.c b/migration/migration.c
index afc863a..064986b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -745,7 +745,10 @@ static void migrate_fd_cleanup(void *opaque)
 if (s->file) {
 trace_migrate_fd_cleanup();
 qemu_mutex_unlock_iothread();
-qemu_thread_join(>thread);
+if (s->migration_thread_running) {
+qemu_thread_join(>thread);
+s->migration_thread_running = false;
+}
 qemu_mutex_lock_iothread();

 migrate_compress_threads_join();
@@ -1238,7 +1241,6 @@ out:
 return NULL;
 }

-__attribute__ (( unused )) /* Until later in patch series */
 static int open_return_path_on_source(MigrationState *ms)
 {

@@ -1279,6 +1281,109 @@ static int 
await_return_path_close_on_source(MigrationState *ms)
 return ms->rp_state.error;
 }

+/*
+ * Switch from normal iteration to postcopy
+ * Returns non-0 on error
+ */
+static int postcopy_start(MigrationState *ms, bool *old_vm_running)
+{
+int ret;
+const QEMUSizedBuffer *qsb;
+int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+migrate_set_state(ms, MIGRATION_STATUS_ACTIVE,
+  MIGRATION_STATUS_POSTCOPY_ACTIVE);
+
+trace_postcopy_start();
+qemu_mutex_lock_iothread();
+trace_postcopy_start_set_run();
+
+qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
+*old_vm_running = runstate_is_running();
+global_state_store();
+ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+
+if (ret < 0) {
+goto fail;
+}
+
+/*
+ * in Finish migrate and with the io-lock held everything should
+ * be quiet, but we've potentially still got dirty pages and we
+ * need to tell the destination to throw any pages it's already received
+ * that are dirty
+ */
+if (ram_postcopy_send_discard_bitmap(ms)) {
+error_report("postcopy send discard bitmap failed");
+goto fail;
+}
+
+/*
+ * send rest of state - note things that are doing postcopy
+ * will notice we're in POSTCOPY_ACTIVE and not actually
+ * wrap their state up here
+ */
+qemu_file_set_rate_limit(ms->file, INT64_MAX);
+/* Ping just for debugging, helps line traces up */
+qemu_savevm_send_ping(ms->file, 2);
+
+/*
+ * While loading the device state we may trigger page transfer
+ * requests and the fd must be free to process those, and thus
+ * the destination must read the whole device state off the fd before
+ * it starts processing it.  Unfortunately the ad-hoc migration format
+ * doesn't allow the destination to know the size to read without fully
+ * parsing it through each devices load-state code (especially the open
+ * coded devices that use get/put).
+ * So we wrap the device state up in a package with a length at the start;
+ * to do this we use a qemu_buf to hold the whole of the device state.
+ */
+QEMUFile *fb = qemu_bufopen("w", NULL);
+if (!fb) {
+error_report("Failed to create buffered file");
+goto fail;
+}
+
+qemu_savevm_state_complete_precopy(fb);
+qemu_savevm_send_ping(fb, 3);
+
+qemu_savevm_send_postcopy_run(fb);
+
+/* <><> end of stuff going into the package */
+qsb = qemu_buf_get(fb);
+
+/* Now send that blob */
+if (qemu_savevm_send_packaged(ms->file, qsb)) {
+goto fail_closefb;
+}
+qemu_fclose(fb);
+ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
+
+qemu_mutex_unlock_iothread();
+
+/*
+ * Although this ping is just for debug, it could potentially be
+ * used for getting a better measurement of downtime at the source.
+ */
+qemu_savevm_send_ping(ms->file, 4);
+
+ret = qemu_file_get_error(ms->file);
+if (ret) {
+error_report("postcopy_start: Migration stream errored");
+migrate_set_state(ms, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+

[Qemu-devel] [PULL 54/57] End of migration for postcopy

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Tweak the end of migration cleanup; we don't want to close stuff down
at the end of the main stream, since the postcopy is still sending pages
on the other thread.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 migration/migration.c | 26 +-
 trace-events  |  6 --
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index db3d2dd..c5c977e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -325,13 +325,37 @@ static void process_incoming_migration_co(void *opaque)
 {
 QEMUFile *f = opaque;
 Error *local_err = NULL;
+MigrationIncomingState *mis;
+PostcopyState ps;
 int ret;

-migration_incoming_state_new(f);
+mis = migration_incoming_state_new(f);
 postcopy_state_set(POSTCOPY_INCOMING_NONE);
 migrate_generate_event(MIGRATION_STATUS_ACTIVE);
+
 ret = qemu_loadvm_state(f);

+ps = postcopy_state_get();
+trace_process_incoming_migration_co_end(ret, ps);
+if (ps != POSTCOPY_INCOMING_NONE) {
+if (ps == POSTCOPY_INCOMING_ADVISE) {
+/*
+ * Where a migration had postcopy enabled (and thus went to advise)
+ * but managed to complete within the precopy period, we can use
+ * the normal exit.
+ */
+postcopy_ram_incoming_cleanup(mis);
+} else if (ret >= 0) {
+/*
+ * Postcopy was started, cleanup should happen at the end of the
+ * postcopy thread.
+ */
+trace_process_incoming_migration_co_postcopy_end_main();
+return;
+}
+/* Else if something went wrong then just fall out of the normal exit 
*/
+}
+
 qemu_fclose(f);
 free_xbzrle_decoded_buf();
 migration_incoming_state_destroy();
diff --git a/trace-events b/trace-events
index 452435d..ef6bc41 100644
--- a/trace-events
+++ b/trace-events
@@ -1480,11 +1480,13 @@ source_return_path_thread_entry(void) ""
 source_return_path_thread_loop_top(void) ""
 source_return_path_thread_pong(uint32_t val) "%x"
 source_return_path_thread_shut(uint32_t val) "%x"
-migrate_transferred(uint64_t tranferred, uint64_t time_spent, double 
bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " 
bandwidth %g max_size %" PRId64
-migrate_state_too_big(void) ""
 migrate_global_state_post_load(const char *state) "loaded state: %s"
 migrate_global_state_pre_save(const char *state) "saved state: %s"
 migration_thread_low_pending(uint64_t pending) "%" PRIu64
+migrate_state_too_big(void) ""
+migrate_transferred(uint64_t tranferred, uint64_t time_spent, double 
bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " 
bandwidth %g max_size %" PRId64
+process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
+process_incoming_migration_co_postcopy_end_main(void) ""

 # migration/rdma.c
 qemu_rdma_accept_incoming_migration(void) ""
-- 
2.5.0

Re: [Qemu-devel] [Qemu-block] [PULL v2 39/40] iotests: Add tests for the x-blockdev-del command

2015-11-10 Thread Stefan Hajnoczi

On Tue, Nov 10, 2015 at 2:09 PM, Kevin Wolf  wrote:
> From: Alberto Garcia 
>
> Signed-off-by: Alberto Garcia 
> Message-id: 
> 57c3b0d4d0c73ddadd19e5bded9492c359cc4568.1446475331.git.be...@igalia.com
> Reviewed-by: Max Reitz 
> Signed-off-by: Max Reitz 
> ---
>  tests/qemu-iotests/139 | 414 
> +
>  tests/qemu-iotests/139.out |   5 +
>  tests/qemu-iotests/group   |   1 +
>  3 files changed, 420 insertions(+)
>  create mode 100644 tests/qemu-iotests/139
>  create mode 100644 tests/qemu-iotests/139.out

I'm seeing the following failure:

 ./check -qcow2 139
QEMU  -- "./qemu" -nodefaults
QEMU_IMG  -- "./qemu-img"
QEMU_IO   -- "./qemu-io"  -f qcow2 --cache writeback
QEMU_NBD  -- "./qemu-nbd"
IMGFMT-- qcow2 (compat=1.1)
IMGPROTO  -- file
PLATFORM  -- Linux/x86_64 stefanha-x1 4.2.5-300.fc23.x86_64
TEST_DIR  -- /home/stefanha/qemu/tests/qemu-iotests/scratch
SOCKET_SCM_HELPER -- /home/stefanha/qemu/tests/qemu-iotests/socket_scm_helper

139 [failed, exit status 1] - output mismatch (see 139.out.bad)
--- /home/stefanha/qemu/tests/qemu-iotests/139.out2015-11-10
14:24:03.728322694 +
+++ 139.out.bad2015-11-10 14:54:51.617899443 +
@@ -1,5 +1,19 @@
-
+F...
+==
+FAIL: testQuorum (__main__.TestBlockdevDel)
+--
+Traceback (most recent call last):
+  File "139", line 403, in testQuorum
+self.addQuorum('quorum0', 'node0', 'node1')
+  File "139", line 291, in addQuorum
+self.assert_qmp(result, 'return', {})
+  File "/home/stefanha/qemu/tests/qemu-iotests/iotests.py", line 314,
in assert_qmp
+result = self.dictpath(d, path)
+  File "/home/stefanha/qemu/tests/qemu-iotests/iotests.py", line 293,
in dictpath
+self.fail('failed path traversal for "%s" in "%s"' % (path, str(d)))
+AssertionError: failed path traversal for "return" in "{u'error':
{u'class': u'GenericError', u'desc': u"Unknown driver 'quorum'"}}"
+
 --
 Ran 12 tests

-OK
+FAILED (failures=1)
Failures: 139
Failed 1 of 1 tests

[Qemu-devel] [PULL 34/57] Postcopy: Maintain unsentmap

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Maintain an 'unsentmap' of pages that have yet to be sent.
This is used in the following patches to discard some set of
the pages already sent as we enter postcopy mode.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 51 +--
 1 file changed, 45 insertions(+), 6 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 4053ca1..c703176 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -237,7 +237,14 @@ typedef struct PageSearchStatus PageSearchStatus;

 static struct BitmapRcu {
 struct rcu_head rcu;
+/* Main migration bitmap */
 unsigned long *bmap;
+/* bitmap of pages that haven't been sent even once
+ * only maintained and used in postcopy at the moment
+ * where it's used to send the dirtymap at the start
+ * of the postcopy phase
+ */
+unsigned long *unsentmap;
 } *migration_bitmap_rcu;

 struct CompressParam {
@@ -531,10 +538,18 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t 
**current_data,
 return 1;
 }

-/* Called with rcu_read_lock() to protect migration_bitmap */
+/* Called with rcu_read_lock() to protect migration_bitmap
+ * rb: The RAMBlock  to search for dirty pages in
+ * start: Start address (typically so we can continue from previous page)
+ * ram_addr_abs: Pointer into which to store the address of the dirty page
+ *   within the global ram_addr space
+ *
+ * Returns: byte offset within memory region of the start of a dirty page
+ */
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb,
- ram_addr_t start)
+ ram_addr_t start,
+ ram_addr_t *ram_addr_abs)
 {
 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
@@ -555,6 +570,7 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock 
*rb,
 clear_bit(next, bitmap);
 migration_dirty_pages--;
 }
+*ram_addr_abs = next << TARGET_PAGE_BITS;
 return (next - base) << TARGET_PAGE_BITS;
 }

@@ -953,10 +969,11 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock 
*block,
  * @*again: Set to false if the search has scanned the whole of RAM
  */
 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
- bool *again)
+ bool *again, ram_addr_t *ram_addr_abs)
 {
 pss->offset = migration_bitmap_find_and_reset_dirty(pss->block,
-   pss->offset);
+   pss->offset,
+   ram_addr_abs);
 if (pss->complete_round && pss->block == last_seen_block &&
 pss->offset >= last_offset) {
 /*
@@ -1014,6 +1031,8 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
last_stage,
 PageSearchStatus pss;
 int pages = 0;
 bool again, found;
+ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
+ ram_addr_t space */

 pss.block = last_seen_block;
 pss.offset = last_offset;
@@ -1024,7 +1043,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
last_stage,
 }

 do {
-found = find_dirty_block(f, , );
+found = find_dirty_block(f, , , _ram_abs);

 if (found) {
 if (compression_switch && migrate_use_compression()) {
@@ -1038,7 +1057,14 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
last_stage,

 /* if page is unmodified, continue to the next */
 if (pages > 0) {
+unsigned long *unsentmap;
+
+unsentmap = atomic_rcu_read(_bitmap_rcu)->unsentmap;
 last_sent_block = pss.block;
+if (unsentmap) {
+clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
+}
+
 }
 }
 } while (!pages && again);
@@ -1097,6 +1123,7 @@ void free_xbzrle_decoded_buf(void)
 static void migration_bitmap_free(struct BitmapRcu *bmap)
 {
 g_free(bmap->bmap);
+g_free(bmap->unsentmap);
 g_free(bmap);
 }

@@ -1153,6 +1180,13 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t 
new)
 qemu_mutex_lock(_bitmap_mutex);
 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
 bitmap_set(bitmap->bmap, old, new - old);
+
+/* We don't have a way to safely extend the sentmap
+ * with RCU; so mark it as missing, entry to postcopy
+ * will fail.
+ */
+bitmap->unsentmap = NULL;
+
 atomic_rcu_set(_bitmap_rcu, bitmap);
 qemu_mutex_unlock(_bitmap_mutex);

Re: [Qemu-devel] [PATCH 0/3] block/gluster: add support for multiple gluster servers

2015-11-10 Thread Eric Blake

On 11/10/2015 02:09 AM, Prasanna Kumar Kalever wrote:
> This release is rebased on qemu master branch.
> In this series of patches 1/3 and 2/3 are unchanged.

It's still nice to send the _entire_ series with v13 in the subject line
(cover letter included), rather than mixing and matching (no version in
cover letter, v2 in patches 1 and 2, and v13 in patch 3).  'git
send-email -v13' makes this easy.

> 
> Prasanna Kumar Kalever (3):
>   block/gluster: rename [server, volname, image] -> [host, volume, path]
>   block/gluster: code cleanup
>   block/gluster: add support for multiple gluster servers
> 
>  block/gluster.c  | 597 
> ---
>  qapi/block-core.json |  60 +-
>  2 files changed, 529 insertions(+), 128 deletions(-)
> 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [PULL 09/44] qed: Implement .bdrv_drain

2015-11-10 Thread Stefan Hajnoczi

From: Fam Zheng 

The "need_check_timer" is used to clear the "NEED_CHECK" flag in the
image header after a grace period once metadata update has finished. In
compliance to the bdrv_drain semantics we should make sure it remains
deleted once .bdrv_drain is called.

We cannot reuse qed_need_check_timer_cb because here it doesn't satisfy
the assertion.  Do the "plug" and "flush" calls manually.

Signed-off-by: Fam Zheng 
Reviewed-by: Kevin Wolf 
Message-id: 1447064214-29930-10-git-send-email-f...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 block/qed.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/block/qed.c b/block/qed.c
index 5ea05d4..9b88895 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -375,6 +375,18 @@ static void bdrv_qed_attach_aio_context(BlockDriverState 
*bs,
 }
 }
 
+static void bdrv_qed_drain(BlockDriverState *bs)
+{
+BDRVQEDState *s = bs->opaque;
+
+/* Cancel timer and start doing I/O that were meant to happen as if it
+ * fired, that way we get bdrv_drain() taking care of the ongoing requests
+ * correctly. */
+qed_cancel_need_check_timer(s);
+qed_plug_allocating_write_reqs(s);
+bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+}
+
 static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
 {
@@ -1676,6 +1688,7 @@ static BlockDriver bdrv_qed = {
 .bdrv_check   = bdrv_qed_check,
 .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
 .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
+.bdrv_drain   = bdrv_qed_drain,
 };
 
 static void bdrv_qed_init(void)
-- 
2.5.0

[Qemu-devel] [PULL 40/44] atapi: Account for failed and invalid operations

2015-11-10 Thread Stefan Hajnoczi

From: Alberto Garcia 

Signed-off-by: Alberto Garcia 
Message-id: 
59dee4e2921b0c79d41c49b67dfb93d32db9f7f9.1446044838.git.be...@igalia.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/ide/atapi.c | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 747f466..cf0b78e 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -108,27 +108,30 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
 static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
 {
 int ret;
+block_acct_start(blk_get_stats(s->blk), >acct,
+ 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
 
 switch(sector_size) {
 case 2048:
-block_acct_start(blk_get_stats(s->blk), >acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
 ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
-block_acct_done(blk_get_stats(s->blk), >acct);
 break;
 case 2352:
-block_acct_start(blk_get_stats(s->blk), >acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
 ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
-block_acct_done(blk_get_stats(s->blk), >acct);
-if (ret < 0)
-return ret;
-cd_data_to_raw(buf, lba);
+if (ret >= 0) {
+cd_data_to_raw(buf, lba);
+}
 break;
 default:
-ret = -EIO;
-break;
+block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_READ);
+return -EIO;
 }
+
+if (ret < 0) {
+block_acct_failed(blk_get_stats(s->blk), >acct);
+} else {
+block_acct_done(blk_get_stats(s->blk), >acct);
+}
+
 return ret;
 }
 
@@ -357,7 +360,11 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int 
ret)
 return;
 
 eot:
-block_acct_done(blk_get_stats(s->blk), >acct);
+if (ret < 0) {
+block_acct_failed(blk_get_stats(s->blk), >acct);
+} else {
+block_acct_done(blk_get_stats(s->blk), >acct);
+}
 ide_set_inactive(s, false);
 }
 
-- 
2.5.0

[Qemu-devel] [PATCH 06/10] migration: drop find_vmstate_bs check in hmp_delvm

2015-11-10 Thread Denis V. Lunev

There is no much sense to do the check and write warning.

Signed-off-by: Denis V. Lunev 
CC: Juan Quintela 
---
 migration/savevm.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 90aa565..4c652f3 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1448,11 +1448,6 @@ void hmp_delvm(Monitor *mon, const QDict *qdict)
 Error *err;
 const char *name = qdict_get_str(qdict, "name");
 
-if (!find_vmstate_bs()) {
-monitor_printf(mon, "No block device supports snapshots\n");
-return;
-}
-
 if (bdrv_all_delete_snapshot(name, , ) < 0) {
 monitor_printf(mon,
"Error while deleting snapshot on device '%s': %s\n",
-- 
2.5.0

[Qemu-devel] [PULL 15/57] Add Linux userfaultfd.h header

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Postcopy uses the userfaultfd.h feature in the Linux kernel; include
the header.

(In early versions of the patch series we had this, and then we dropped
this by only including it if the kernel headers defined the syscall
number; however 1842bdfd added the syscall definition to our
headers, which means we can't tell if the kernel has it or not)

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 linux-headers/linux/userfaultfd.h | 167 ++
 1 file changed, 167 insertions(+)
 create mode 100644 linux-headers/linux/userfaultfd.h

diff --git a/linux-headers/linux/userfaultfd.h 
b/linux-headers/linux/userfaultfd.h
new file mode 100644
index 000..9057d7a
--- /dev/null
+++ b/linux-headers/linux/userfaultfd.h
@@ -0,0 +1,167 @@
+/*
+ *  include/linux/userfaultfd.h
+ *
+ *  Copyright (C) 2007  Davide Libenzi 
+ *  Copyright (C) 2015  Red Hat, Inc.
+ *
+ */
+
+#ifndef _LINUX_USERFAULTFD_H
+#define _LINUX_USERFAULTFD_H
+
+#include 
+
+#define UFFD_API ((__u64)0xAA)
+/*
+ * After implementing the respective features it will become:
+ * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
+ *   UFFD_FEATURE_EVENT_FORK)
+ */
+#define UFFD_API_FEATURES (0)
+#define UFFD_API_IOCTLS\
+   ((__u64)1 << _UFFDIO_REGISTER | \
+(__u64)1 << _UFFDIO_UNREGISTER |   \
+(__u64)1 << _UFFDIO_API)
+#define UFFD_API_RANGE_IOCTLS  \
+   ((__u64)1 << _UFFDIO_WAKE | \
+(__u64)1 << _UFFDIO_COPY | \
+(__u64)1 << _UFFDIO_ZEROPAGE)
+
+/*
+ * Valid ioctl command number range with this API is from 0x00 to
+ * 0x3F.  UFFDIO_API is the fixed number, everything else can be
+ * changed by implementing a different UFFD_API. If sticking to the
+ * same UFFD_API more ioctl can be added and userland will be aware of
+ * which ioctl the running kernel implements through the ioctl command
+ * bitmask written by the UFFDIO_API.
+ */
+#define _UFFDIO_REGISTER   (0x00)
+#define _UFFDIO_UNREGISTER (0x01)
+#define _UFFDIO_WAKE   (0x02)
+#define _UFFDIO_COPY   (0x03)
+#define _UFFDIO_ZEROPAGE   (0x04)
+#define _UFFDIO_API(0x3F)
+
+/* userfaultfd ioctl ids */
+#define UFFDIO 0xAA
+#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API,  \
+ struct uffdio_api)
+#define UFFDIO_REGISTER_IOWR(UFFDIO, _UFFDIO_REGISTER, \
+ struct uffdio_register)
+#define UFFDIO_UNREGISTER  _IOR(UFFDIO, _UFFDIO_UNREGISTER,\
+struct uffdio_range)
+#define UFFDIO_WAKE_IOR(UFFDIO, _UFFDIO_WAKE,  \
+struct uffdio_range)
+#define UFFDIO_COPY_IOWR(UFFDIO, _UFFDIO_COPY, \
+ struct uffdio_copy)
+#define UFFDIO_ZEROPAGE_IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
+ struct uffdio_zeropage)
+
+/* read() structure */
+struct uffd_msg {
+   __u8event;
+
+   __u8reserved1;
+   __u16   reserved2;
+   __u32   reserved3;
+
+   union {
+   struct {
+   __u64   flags;
+   __u64   address;
+   } pagefault;
+
+   struct {
+   /* unused reserved fields */
+   __u64   reserved1;
+   __u64   reserved2;
+   __u64   reserved3;
+   } reserved;
+   } arg;
+} __packed;
+
+/*
+ * Start at 0x12 and not at 0 to be more strict against bugs.
+ */
+#define UFFD_EVENT_PAGEFAULT   0x12
+#if 0 /* not available yet */
+#define UFFD_EVENT_FORK0x13
+#endif
+
+/* flags for UFFD_EVENT_PAGEFAULT */
+#define UFFD_PAGEFAULT_FLAG_WRITE  (1<<0)  /* If this was a write fault */
+#define UFFD_PAGEFAULT_FLAG_WP (1<<1)  /* If reason is VM_UFFD_WP */
+
+struct uffdio_api {
+   /* userland asks for an API number and the features to enable */
+   __u64 api;
+   /*
+* Kernel answers below with the all available features for
+* the API, this notifies userland of which events and/or
+* which flags for each event are enabled in the current
+* kernel.
+*
+* Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
+* are to be considered implicitly always enabled in all kernels as
+* long as the uffdio_api.api requested matches UFFD_API.
+*/
+#if 0 /* not available yet */
+#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
+#define UFFD_FEATURE_EVENT_FORK(1<<1)

[Qemu-devel] [PATCH 07/10] snapshot: create bdrv_all_create_snapshot helper

2015-11-10 Thread Denis V. Lunev

to create snapshot for all loaded block drivers.

The patch also ensures proper locking.

Signed-off-by: Denis V. Lunev 
CC: Juan Quintela 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/snapshot.c | 26 ++
 include/block/snapshot.h |  4 
 migration/savevm.c   | 17 -
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index 97dc315..6de53cb 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -444,3 +444,29 @@ int bdrv_all_find_snapshot(const char *name, bool 
read_only,
 *first_bad_bs = bs;
 return err;
 }
+
+int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
+ BlockDriverState *vm_state_bs,
+ uint64_t vm_state_size,
+ BlockDriverState **first_bad_bs)
+{
+int err = 0;
+BlockDriverState *bs = NULL;
+
+while (err == 0 && (bs = bdrv_next(bs))) {
+AioContext *ctx = bdrv_get_aio_context(bs);
+
+aio_context_acquire(ctx);
+if (bs == vm_state_bs) {
+sn->vm_state_size = vm_state_size;
+err = bdrv_snapshot_create(bs, sn);
+} else if (bdrv_can_snapshot(bs)) {
+sn->vm_state_size = 0;
+err = bdrv_snapshot_create(bs, sn);
+}
+aio_context_release(ctx);
+}
+
+*first_bad_bs = bs;
+return err;
+}
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 0fae32b..5f43c0b 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -87,5 +87,9 @@ int bdrv_all_delete_snapshot(const char *name, 
BlockDriverState **first_bsd_bs,
 int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bsd_bs);
 int bdrv_all_find_snapshot(const char *name, bool read_only,
BlockDriverState **first_bad_bs);
+int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
+ BlockDriverState *vm_state_bs,
+ uint64_t vm_state_size,
+ BlockDriverState **first_bad_bs);
 
 #endif
diff --git a/migration/savevm.c b/migration/savevm.c
index 4c652f3..c2d677d 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1328,19 +1328,10 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 goto the_end;
 }
 
-/* create the snapshots */
-
-bs1 = NULL;
-while ((bs1 = bdrv_next(bs1))) {
-if (bdrv_can_snapshot(bs1)) {
-/* Write VM state size only to the image that contains the state */
-sn->vm_state_size = (bs == bs1 ? vm_state_size : 0);
-ret = bdrv_snapshot_create(bs1, sn);
-if (ret < 0) {
-monitor_printf(mon, "Error while creating snapshot on '%s'\n",
-   bdrv_get_device_name(bs1));
-}
-}
+ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, );
+if (ret < 0) {
+monitor_printf(mon, "Error while creating snapshot on '%s'\n",
+   bdrv_get_device_name(bs));
 }
 
  the_end:
-- 
2.5.0

[Qemu-devel] [PULL 28/57] postcopy: OS support test

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Provide a check to see if the OS we're running on has all the bits
needed for postcopy.

Creates postcopy-ram.c which will get most of the other helpers we need.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/postcopy-ram.h |  19 +
 migration/Makefile.objs  |   2 +-
 migration/postcopy-ram.c | 157 +++
 migration/savevm.c   |   5 ++
 4 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 include/migration/postcopy-ram.h
 create mode 100644 migration/postcopy-ram.c

diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
new file mode 100644
index 000..d81934f
--- /dev/null
+++ b/include/migration/postcopy-ram.h
@@ -0,0 +1,19 @@
+/*
+ * Postcopy migration for RAM
+ *
+ * Copyright 2013 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *  Dave Gilbert  
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_POSTCOPY_RAM_H
+#define QEMU_POSTCOPY_RAM_H
+
+/* Return true if the host supports everything we need to do postcopy-ram */
+bool postcopy_ram_supported_by_host(void);
+
+#endif
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index d929e96..0cac6d7 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -1,7 +1,7 @@
 common-obj-y += migration.o tcp.o
 common-obj-y += vmstate.o
 common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o
-common-obj-y += xbzrle.o
+common-obj-y += xbzrle.o postcopy-ram.o

 common-obj-$(CONFIG_RDMA) += rdma.o
 common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
new file mode 100644
index 000..cdd0168
--- /dev/null
+++ b/migration/postcopy-ram.c
@@ -0,0 +1,157 @@
+/*
+ * Postcopy migration for RAM
+ *
+ * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *  Dave Gilbert  
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/*
+ * Postcopy is a migration technique where the execution flips from the
+ * source to the destination before all the data has been copied.
+ */
+
+#include 
+#include 
+#include 
+
+#include "qemu-common.h"
+#include "migration/migration.h"
+#include "migration/postcopy-ram.h"
+#include "sysemu/sysemu.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+/* Postcopy needs to detect accesses to pages that haven't yet been copied
+ * across, and efficiently map new pages in, the techniques for doing this
+ * are target OS specific.
+ */
+#if defined(__linux__)
+
+#include 
+#include 
+#include 
+#include 
+#include  /* for __u64 */
+#endif
+
+#if defined(__linux__) && defined(__NR_userfaultfd)
+#include 
+
+static bool ufd_version_check(int ufd)
+{
+struct uffdio_api api_struct;
+uint64_t ioctl_mask;
+
+api_struct.api = UFFD_API;
+api_struct.features = 0;
+if (ioctl(ufd, UFFDIO_API, _struct)) {
+error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
+ strerror(errno));
+return false;
+}
+
+ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
+ (__u64)1 << _UFFDIO_UNREGISTER;
+if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
+error_report("Missing userfault features: %" PRIx64,
+ (uint64_t)(~api_struct.ioctls & ioctl_mask));
+return false;
+}
+
+return true;
+}
+
+bool postcopy_ram_supported_by_host(void)
+{
+long pagesize = getpagesize();
+int ufd = -1;
+bool ret = false; /* Error unless we change it */
+void *testarea = NULL;
+struct uffdio_register reg_struct;
+struct uffdio_range range_struct;
+uint64_t feature_mask;
+
+if ((1ul << qemu_target_page_bits()) > pagesize) {
+error_report("Target page size bigger than host page size");
+goto out;
+}
+
+ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
+if (ufd == -1) {
+error_report("%s: userfaultfd not available: %s", __func__,
+ strerror(errno));
+goto out;
+}
+
+/* Version and features check */
+if (!ufd_version_check(ufd)) {
+goto out;
+}
+
+/*
+ *  We need to check that the ops we need are supported on anon memory
+ *  To do that we need to register a chunk and see the flags that
+ *  are returned.
+ */
+testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+MAP_ANONYMOUS, -1, 0);
+if (testarea == MAP_FAILED) {
+error_report("%s: Failed to map

[Qemu-devel] [PULL 09/57] Add wrapper for setting blocking status on a QEMUFile

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

Add a wrapper to change the blocking status on a QEMUFile
rather than having to use qemu_set_block(qemu_get_fd(f));
it seems best to avoid exposing the fd since not all QEMUFile's
really have one.  With this wrapper we could move the implementation
down to be different on different transports.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/qemu-file.h |  1 +
 migration/qemu-file.c | 15 +++
 2 files changed, 16 insertions(+)

diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 86bb972..66e741f 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -197,6 +197,7 @@ int qemu_file_get_error(QEMUFile *f);
 void qemu_file_set_error(QEMUFile *f, int ret);
 int qemu_file_shutdown(QEMUFile *f);
 void qemu_fflush(QEMUFile *f);
+void qemu_file_set_blocking(QEMUFile *f, bool block);

 static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
 {
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index e41a677..9ec2267 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -648,3 +648,18 @@ size_t qemu_get_counted_string(QEMUFile *f, char buf[256])

 return res == len ? res : 0;
 }
+
+/*
+ * Set the blocking state of the QEMUFile.
+ * Note: On some transports the OS only keeps a single blocking state for
+ *   both directions, and thus changing the blocking on the main
+ *   QEMUFile can also affect the return path.
+ */
+void qemu_file_set_blocking(QEMUFile *f, bool block)
+{
+if (block) {
+qemu_set_block(qemu_get_fd(f));
+} else {
+qemu_set_nonblock(qemu_get_fd(f));
+}
+}
-- 
2.5.0

[Qemu-devel] [PULL 26/57] MIG_CMD_PACKAGED: Send a packaged chunk of migration stream

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

MIG_CMD_PACKAGED is a migration command that wraps a chunk of migration
stream inside a package whose length can be determined purely by reading
its header.  The destination guarantees that the whole MIG_CMD_PACKAGED
is read off the stream prior to parsing the contents.

This is used by postcopy to load device state (from the package)
while leaving the main stream free to receive memory pages.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/sysemu/sysemu.h |   4 ++
 migration/savevm.c  | 104 +---
 trace-events|   4 ++
 3 files changed, 107 insertions(+), 5 deletions(-)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 6225e00..c27b926 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -99,9 +99,12 @@ enum qemu_vm_cmd {
 MIG_CMD_POSTCOPY_RAM_DISCARD,  /* A list of pages to discard that
   were previously sent during
   precopy but are dirty. */
+MIG_CMD_PACKAGED,  /* Send a wrapped stream within this stream */
 MIG_CMD_MAX
 };

+#define MAX_VM_CMD_PACKAGED_SIZE (1ul << 24)
+
 bool qemu_savevm_state_blocked(Error **errp);
 void qemu_savevm_state_begin(QEMUFile *f,
  const MigrationParams *params);
@@ -114,6 +117,7 @@ void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd 
command,
   uint16_t len, uint8_t *data);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
+int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb);
 void qemu_savevm_send_postcopy_advise(QEMUFile *f);
 void qemu_savevm_send_postcopy_listen(QEMUFile *f);
 void qemu_savevm_send_postcopy_run(QEMUFile *f);
diff --git a/migration/savevm.c b/migration/savevm.c
index 1ce022a..f499dfa 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -74,6 +74,7 @@ static struct mig_cmd_args {
 [MIG_CMD_POSTCOPY_RUN] = { .len =  0, .name = "POSTCOPY_RUN" },
 [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
.len = -1, .name = "POSTCOPY_RAM_DISCARD" },
+[MIG_CMD_PACKAGED] = { .len =  4, .name = "PACKAGED" },
 [MIG_CMD_MAX]  = { .len = -1, .name = "MAX" },
 };

@@ -749,6 +750,48 @@ void qemu_savevm_send_open_return_path(QEMUFile *f)
 qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
 }

+/* We have a buffer of data to send; we don't want that all to be loaded
+ * by the command itself, so the command contains just the length of the
+ * extra buffer that we then send straight after it.
+ * TODO: Must be a better way to organise that
+ *
+ * Returns:
+ *0 on success
+ *-ve on error
+ */
+int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb)
+{
+size_t cur_iov;
+size_t len = qsb_get_length(qsb);
+uint32_t tmp;
+
+if (len > MAX_VM_CMD_PACKAGED_SIZE) {
+error_report("%s: Unreasonably large packaged state: %zu",
+ __func__, len);
+return -1;
+}
+
+tmp = cpu_to_be32(len);
+
+trace_qemu_savevm_send_packaged();
+qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *));
+
+/* all the data follows (concatinating the iov's) */
+for (cur_iov = 0; cur_iov < qsb->n_iov; cur_iov++) {
+/* The iov entries are partially filled */
+size_t towrite = MIN(qsb->iov[cur_iov].iov_len, len);
+len -= towrite;
+
+if (!towrite) {
+break;
+}
+
+qemu_put_buffer(f, qsb->iov[cur_iov].iov_base, towrite);
+}
+
+return 0;
+}
+
 /* Send prior to any postcopy transfer */
 void qemu_savevm_send_postcopy_advise(QEMUFile *f)
 {
@@ -1300,12 +1343,60 @@ static int 
loadvm_postcopy_handle_run(MigrationIncomingState *mis)
 }

 /**
- * loadvm_process_command: Process an incoming 'QEMU_VM_COMMAND'
+ * Immediately following this command is a blob of data containing an embedded
+ * chunk of migration stream; read it and load it.
  *
- * Returns: 0 on just a normal return
- *  LOADVM_QUIT All good, but exit the loop
- *  <0 error (in which case it will issue an error message).
- * @f: The stream to read the command data from.
+ * @mis: Incoming state
+ * @length: Length of packaged data to read
+ *
+ * Returns: Negative values on error
+ *
+ */
+static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
+{
+int ret;
+uint8_t *buffer;
+uint32_t length;
+QEMUSizedBuffer *qsb;
+
+length = qemu_get_be32(mis->from_src_file);
+trace_loadvm_handle_cmd_packaged(length);
+
+if (length > MAX_VM_CMD_PACKAGED_SIZE) {
+error_report("Unreasonably large packaged

[Qemu-devel] [PULL 02/57] Provide runtime Target page information

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

The migration code generally is built target-independent, however
there are a few places where knowing the target page size would
avoid artificially moving stuff into migration/ram.c.

Provide 'qemu_target_page_bits()' that returns TARGET_PAGE_BITS
to other bits of code so that they can stay target-independent.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 exec.c  | 10 ++
 include/sysemu/sysemu.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/exec.c b/exec.c
index a028961..4ced1a6 100644
--- a/exec.c
+++ b/exec.c
@@ -3502,6 +3502,16 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
 }
 return 0;
 }
+
+/*
+ * Allows code that needs to deal with migration bitmaps etc to still be built
+ * target independent.
+ */
+size_t qemu_target_page_bits(void)
+{
+return TARGET_PAGE_BITS;
+}
+
 #endif

 /*
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 5cb0f05..8dc2add 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -70,6 +70,7 @@ void qemu_system_killed(int signal, pid_t pid);
 void qemu_devices_reset(void);
 void qemu_system_reset(bool report);
 void qemu_system_guest_panicked(void);
+size_t qemu_target_page_bits(void);

 void qemu_add_exit_notifier(Notifier *notify);
 void qemu_remove_exit_notifier(Notifier *notify);
-- 
2.5.0

[Qemu-devel] [PULL 12/57] ram_load: Factor out host_from_stream_offset call and check

2015-11-10 Thread Juan Quintela

From: "Dr. David Alan Gilbert" 

The main RAM load loop has a call to host_from_stream_offset for
each page type that actually loads data with the same test;
factor it out before the switch.

The host = NULL is to silence a bogus gcc warning of
an unitialised in the RAM_SAVE_COMPRESS_PAGE case, it
doesn't seem to realise that host is always initialised by the if at
the top in the cases the switch takes.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 41 +++--
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 86bf657..298332c 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1592,13 +1592,23 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
 rcu_read_lock();
 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
 ram_addr_t addr, total_ram_bytes;
-void *host;
+void *host = NULL;
 uint8_t ch;

 addr = qemu_get_be64(f);
 flags = addr & ~TARGET_PAGE_MASK;
 addr &= TARGET_PAGE_MASK;

+if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
+ RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
+host = host_from_stream_offset(f, addr, flags);
+if (!host) {
+error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+ret = -EINVAL;
+break;
+}
+}
+
 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
 case RAM_SAVE_FLAG_MEM_SIZE:
 /* Synchronize RAM block list */
@@ -1635,33 +1645,17 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
 total_ram_bytes -= length;
 }
 break;
+
 case RAM_SAVE_FLAG_COMPRESS:
-host = host_from_stream_offset(f, addr, flags);
-if (!host) {
-error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
-ret = -EINVAL;
-break;
-}
 ch = qemu_get_byte(f);
 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
 break;
+
 case RAM_SAVE_FLAG_PAGE:
-host = host_from_stream_offset(f, addr, flags);
-if (!host) {
-error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
-ret = -EINVAL;
-break;
-}
 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
 break;
+
 case RAM_SAVE_FLAG_COMPRESS_PAGE:
-host = host_from_stream_offset(f, addr, flags);
-if (!host) {
-error_report("Invalid RAM offset " RAM_ADDR_FMT, addr);
-ret = -EINVAL;
-break;
-}
-
 len = qemu_get_be32(f);
 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
 error_report("Invalid compressed data length: %d", len);
@@ -1671,13 +1665,8 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
 qemu_get_buffer(f, compressed_data_buf, len);
 decompress_data_with_multi_threads(compressed_data_buf, host, len);
 break;
+
 case RAM_SAVE_FLAG_XBZRLE:
-host = host_from_stream_offset(f, addr, flags);
-if (!host) {
-error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
-ret = -EINVAL;
-break;
-}
 if (load_xbzrle(f, addr, host) < 0) {
 error_report("Failed to decompress XBZRLE page at "
  RAM_ADDR_FMT, addr);
-- 
2.5.0

1 2 3 4 5 6 >

1 - 100 of 520 matches

Mail list logo