[Qemu-devel] [PATCH 3/5] migration: implement the get_return_path for RDMA iochannel

2018-04-07 Thread Lidong Chen
the default get_return_path function does not work for RDMA live
migration, the patch implement the get_return_path for RDMA iochannel.

Signed-off-by: Lidong Chen 
---
 migration/rdma.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/migration/rdma.c b/migration/rdma.c
index 230bd97..53773c7 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3638,6 +3638,40 @@ err:
 return ret;
 }
 
+static QEMUFile *qio_channel_rdma_get_return_path(void *opaque, int input)
+{
+QIOChannel *ioc = opaque;
+QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+RDMAContext *rdma = rioc->rdma;
+
+QIOChannelRDMA *rioc_return_path;
+RDMAContext *rdma_return_path = rdma->return_path;
+
+if (!rdma_return_path) {
+return NULL;
+}
+
+rioc_return_path = QIO_CHANNEL_RDMA(object_new(TYPE_QIO_CHANNEL_RDMA));
+rioc_return_path->rdma = rdma_return_path;
+if (input) {
+rioc_return_path->file = qemu_fopen_channel_output(
+ QIO_CHANNEL(rioc_return_path));
+} else {
+rioc_return_path->file = qemu_fopen_channel_input(
+ QIO_CHANNEL(rioc_return_path));
+}
+return rioc_return_path->file;
+}
+
+static QEMUFile *qio_channel_rdma_get_output_return_path(void *opaque)
+{
+   return qio_channel_rdma_get_return_path(opaque, 0);
+}
+
+static QEMUFile *qio_channel_rdma_get_input_return_path(void *opaque)
+{
+   return qio_channel_rdma_get_return_path(opaque, 1);
+}
 static const QEMUFileHooks rdma_read_hooks = {
 .hook_ram_load = rdma_load_hook,
 };
@@ -3700,9 +3734,13 @@ static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, 
const char *mode)
 if (mode[0] == 'w') {
 rioc->file = qemu_fopen_channel_output(QIO_CHANNEL(rioc));
 qemu_file_set_hooks(rioc->file, &rdma_write_hooks);
+qemu_file_set_return_path(rioc->file,
+  qio_channel_rdma_get_output_return_path);
 } else {
 rioc->file = qemu_fopen_channel_input(QIO_CHANNEL(rioc));
 qemu_file_set_hooks(rioc->file, &rdma_read_hooks);
+qemu_file_set_return_path(rioc->file,
+  qio_channel_rdma_get_input_return_path);
 }
 
 return rioc->file;
-- 
1.8.3.1




[Qemu-devel] [PATCH 0/5] Enable postcopy RDMA live migration

2018-04-07 Thread Lidong Chen
Current Qemu RDMA communication does not support send and receive
data at the same time, so when RDMA live migration with postcopy
enabled, the source qemu return path thread get qemu file error.

Those patch add the postcopy support for RDMA live migration.

Lidong Chen (5):
  migration: create a dedicated connection for rdma return path
  migration: add the interface to set get_return_path
  migration: implement the get_return_path for RDMA iochannel
  migration: fix qemu carsh when RDMA live migration
  migration: disable RDMA WRITR after postcopy started.

 migration/qemu-file-channel.c |  12 ++--
 migration/qemu-file.c |  13 +++-
 migration/qemu-file.h |   2 +-
 migration/rdma.c  | 148 --
 4 files changed, 163 insertions(+), 12 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [PATCH 1/5] migration: create a dedicated connection for rdma return path

2018-04-07 Thread Lidong Chen
If start a RDMA migration with postcopy enabled, the source qemu
establish a dedicated connection for return path.

Signed-off-by: Lidong Chen 
---
 migration/rdma.c | 94 ++--
 1 file changed, 91 insertions(+), 3 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index da474fc..230bd97 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -387,6 +387,10 @@ typedef struct RDMAContext {
 uint64_t unregistrations[RDMA_SIGNALED_SEND_MAX];
 
 GHashTable *blockmap;
+
+/* the RDMAContext for return path */
+struct RDMAContext *return_path;
+bool is_return_path;
 } RDMAContext;
 
 #define TYPE_QIO_CHANNEL_RDMA "qio-channel-rdma"
@@ -2329,10 +2333,22 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
 rdma_destroy_id(rdma->cm_id);
 rdma->cm_id = NULL;
 }
+
+/* the destination side, listen_id and channel is shared */
 if (rdma->listen_id) {
-rdma_destroy_id(rdma->listen_id);
+if (!rdma->is_return_path) {
+rdma_destroy_id(rdma->listen_id);
+}
 rdma->listen_id = NULL;
+
+if (rdma->channel) {
+if (!rdma->is_return_path) {
+rdma_destroy_event_channel(rdma->channel);
+}
+rdma->channel = NULL;
+}
 }
+
 if (rdma->channel) {
 rdma_destroy_event_channel(rdma->channel);
 rdma->channel = NULL;
@@ -2561,6 +2577,25 @@ err_dest_init_create_listen_id:
 
 }
 
+static void qemu_rdma_return_path_dest_init(RDMAContext *rdma_return_path,
+RDMAContext *rdma)
+{
+int idx;
+
+for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
+rdma_return_path->wr_data[idx].control_len = 0;
+rdma_return_path->wr_data[idx].control_curr = NULL;
+}
+
+/*the CM channel and CM id is shared*/
+rdma_return_path->channel = rdma->channel;
+rdma_return_path->listen_id = rdma->listen_id;
+
+rdma->return_path = rdma_return_path;
+rdma_return_path->return_path = rdma;
+rdma_return_path->is_return_path = true;
+}
+
 static void *qemu_rdma_data_init(const char *host_port, Error **errp)
 {
 RDMAContext *rdma = NULL;
@@ -3014,6 +3049,8 @@ err:
 return ret;
 }
 
+static void rdma_accept_incoming_migration(void *opaque);
+
 static int qemu_rdma_accept(RDMAContext *rdma)
 {
 RDMACapabilities cap;
@@ -3108,7 +3145,14 @@ static int qemu_rdma_accept(RDMAContext *rdma)
 }
 }
 
-qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL);
+/* Accept the second connection request for return path */
+if (migrate_postcopy() && !rdma->is_return_path) {
+qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
+NULL,
+(void *)(intptr_t)rdma->return_path);
+} else {
+qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL);
+}
 
 ret = rdma_accept(rdma->cm_id, &conn_param);
 if (ret) {
@@ -3681,6 +3725,10 @@ static void rdma_accept_incoming_migration(void *opaque)
 
 trace_qemu_rdma_accept_incoming_migration_accepted();
 
+if (rdma->is_return_path) {
+return;
+}
+
 f = qemu_fopen_rdma(rdma, "rb");
 if (f == NULL) {
 ERROR(errp, "could not qemu_fopen_rdma!");
@@ -3695,7 +3743,7 @@ static void rdma_accept_incoming_migration(void *opaque)
 void rdma_start_incoming_migration(const char *host_port, Error **errp)
 {
 int ret;
-RDMAContext *rdma;
+RDMAContext *rdma, *rdma_return_path;
 Error *local_err = NULL;
 
 trace_rdma_start_incoming_migration();
@@ -3722,12 +3770,24 @@ void rdma_start_incoming_migration(const char 
*host_port, Error **errp)
 
 trace_rdma_start_incoming_migration_after_rdma_listen();
 
+/* initialize the RDMAContext for return path */
+if (migrate_postcopy()) {
+rdma_return_path = qemu_rdma_data_init(host_port, &local_err);
+
+if (rdma_return_path == NULL) {
+goto err;
+}
+
+qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
+}
+
 qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
 NULL, (void *)(intptr_t)rdma);
 return;
 err:
 error_propagate(errp, local_err);
 g_free(rdma);
+g_free(rdma_return_path);
 }
 
 void rdma_start_outgoing_migration(void *opaque,
@@ -3735,6 +3795,7 @@ void rdma_start_outgoing_migration(void *opaque,
 {
 MigrationState *s = opaque;
 RDMAContext *rdma = qemu_rdma_data_init(host_port, errp);
+RDMAContext *rdma_return_path = NULL;
 int ret = 0;
 
 if (rdma == NULL) {
@@ -3755,6 +3816,32 @@ void rdma_start_outgoing_migration(void *opaque,
 goto err;
 }
 
+/* RDMA postcopy need a seprate queue pair for return path */
+if (migrate_postcopy()) {
+rdma_return_path = qemu_rdma_data_init(host_port, errp);
+
+if (rdma_return_path == NULL) {
+   

[Qemu-devel] [PATCH 2/5] migration: add the interface to set get_return_path

2018-04-07 Thread Lidong Chen
The default get_return_path function of iochannel does not work for
RDMA live migration. So add the interface to set get_return_path.

Signed-off-by: Lidong Chen 
---
 migration/qemu-file-channel.c | 12 
 migration/qemu-file.c | 10 --
 migration/qemu-file.h |  2 +-
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
index e202d73..d4dd8c4 100644
--- a/migration/qemu-file-channel.c
+++ b/migration/qemu-file-channel.c
@@ -156,7 +156,6 @@ static const QEMUFileOps channel_input_ops = {
 .close = channel_close,
 .shut_down = channel_shutdown,
 .set_blocking = channel_set_blocking,
-.get_return_path = channel_get_input_return_path,
 };
 
 
@@ -165,18 +164,23 @@ static const QEMUFileOps channel_output_ops = {
 .close = channel_close,
 .shut_down = channel_shutdown,
 .set_blocking = channel_set_blocking,
-.get_return_path = channel_get_output_return_path,
 };
 
 
 QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc)
 {
+QEMUFile *f;
 object_ref(OBJECT(ioc));
-return qemu_fopen_ops(ioc, &channel_input_ops);
+f = qemu_fopen_ops(ioc, &channel_input_ops);
+qemu_file_set_return_path(f, channel_get_input_return_path);
+return f;
 }
 
 QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc)
 {
+QEMUFile *f;
 object_ref(OBJECT(ioc));
-return qemu_fopen_ops(ioc, &channel_output_ops);
+f = qemu_fopen_ops(ioc, &channel_output_ops);
+qemu_file_set_return_path(f, channel_get_output_return_path);
+return f;
 }
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index bb63c77..8acb574 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -36,6 +36,7 @@
 struct QEMUFile {
 const QEMUFileOps *ops;
 const QEMUFileHooks *hooks;
+QEMURetPathFunc *get_return_path;
 void *opaque;
 
 int64_t bytes_xfer;
@@ -72,10 +73,15 @@ int qemu_file_shutdown(QEMUFile *f)
  */
 QEMUFile *qemu_file_get_return_path(QEMUFile *f)
 {
-if (!f->ops->get_return_path) {
+if (!f->get_return_path) {
 return NULL;
 }
-return f->ops->get_return_path(f->opaque);
+return f->get_return_path(f->opaque);
+}
+
+void qemu_file_set_return_path(QEMUFile *f, QEMURetPathFunc *get_return_path)
+{
+f->get_return_path = get_return_path;
 }
 
 bool qemu_file_mode_is_not_valid(const char *mode)
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index f4f356a..74210b7 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -102,7 +102,6 @@ typedef struct QEMUFileOps {
 QEMUFileCloseFunc *close;
 QEMUFileSetBlocking *set_blocking;
 QEMUFileWritevBufferFunc *writev_buffer;
-QEMURetPathFunc *get_return_path;
 QEMUFileShutdownFunc *shut_down;
 } QEMUFileOps;
 
@@ -114,6 +113,7 @@ typedef struct QEMUFileHooks {
 } QEMUFileHooks;
 
 QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
+void qemu_file_set_return_path(QEMUFile *f, QEMURetPathFunc *get_return_path);
 void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
 int qemu_get_fd(QEMUFile *f);
 int qemu_fclose(QEMUFile *f);
-- 
1.8.3.1




[Qemu-devel] [PATCH 4/5] migration: fix qemu carsh when RDMA live migration

2018-04-07 Thread Lidong Chen
After postcopy, the destination qemu work in the dedicated
thread, so only invoke yield_until_fd_readable before postcopy
migration.

Signed-off-by: Lidong Chen 
---
 migration/rdma.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 53773c7..81be482 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -1489,11 +1489,13 @@ static int qemu_rdma_wait_comp_channel(RDMAContext 
*rdma)
  * Coroutine doesn't start until migration_fd_process_incoming()
  * so don't yield unless we know we're running inside of a coroutine.
  */
-if (rdma->migration_started_on_destination) {
+if (rdma->migration_started_on_destination &&
+migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE) {
 yield_until_fd_readable(rdma->comp_channel->fd);
 } else {
 /* This is the source side, we're in a separate thread
  * or destination prior to migration_fd_process_incoming()
+ * after postcopy, the destination also in a seprate thread.
  * we can't yield; so we have to poll the fd.
  * But we need to be able to handle 'cancel' or an error
  * without hanging forever.
-- 
1.8.3.1




[Qemu-devel] [PATCH 5/5] migration: disable RDMA WRITR after postcopy started.

2018-04-07 Thread Lidong Chen
RDMA write operations are performed with no notification to the destination
qemu, then the destination qemu can not wakeup. So disable RDMA WRITE after
postcopy started.

Signed-off-by: Lidong Chen 
---
 migration/qemu-file.c |  3 ++-
 migration/rdma.c  | 12 
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 8acb574..a64ac3a 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -260,7 +260,8 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t 
block_offset,
 int ret = f->hooks->save_page(f, f->opaque, block_offset,
   offset, size, bytes_sent);
 f->bytes_xfer += size;
-if (ret != RAM_SAVE_CONTROL_DELAYED) {
+if (ret != RAM_SAVE_CONTROL_DELAYED &&
+ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 if (bytes_sent && *bytes_sent > 0) {
 qemu_update_position(f, *bytes_sent);
 } else if (ret < 0) {
diff --git a/migration/rdma.c b/migration/rdma.c
index 81be482..8529ddd 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2964,6 +2964,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void 
*opaque,
 
 CHECK_ERROR_STATE();
 
+if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+return RAM_SAVE_CONTROL_NOT_SUPP;
+}
+
 qemu_fflush(f);
 
 if (size > 0) {
@@ -3528,6 +3532,10 @@ static int qemu_rdma_registration_start(QEMUFile *f, 
void *opaque,
 
 CHECK_ERROR_STATE();
 
+if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+return 0;
+}
+
 trace_qemu_rdma_registration_start(flags);
 qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
 qemu_fflush(f);
@@ -3550,6 +3558,10 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void 
*opaque,
 
 CHECK_ERROR_STATE();
 
+if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+return 0;
+}
+
 qemu_fflush(f);
 ret = qemu_rdma_drain_cq(f, rdma);
 
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 1/2] vfio-ccw: fix memory leaks in vfio_ccw_realize()

2018-04-07 Thread Greg Kurz
If the subchannel is already attached or if vfio_get_device() fails, the
code jumps to the 'out_device_err' label and doesn't free the string it
has just allocated.

The code should be reworked so that vcdev->vdev.name only gets set when
the device has been attached, and freed when it is about to be detached.
This could be achieved  with the addition of a vfio_ccw_get_device()
function that would be the counterpart of vfio_put_device(). But this is
a more elaborate cleanup that should be done in a follow-up. For now,
let's just add calls to g_free() on the buggy error paths.

Signed-off-by: Greg Kurz 
---
 hw/vfio/ccw.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 4e5855741a64..fe34b507699f 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -357,11 +357,13 @@ static void vfio_ccw_realize(DeviceState *dev, Error 
**errp)
 if (strcmp(vbasedev->name, vcdev->vdev.name) == 0) {
 error_setg(&err, "vfio: subchannel %s has already been attached",
vcdev->vdev.name);
+g_free(vcdev->vdev.name);
 goto out_device_err;
 }
 }
 
 if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) {
+g_free(vcdev->vdev.name);
 goto out_device_err;
 }
 




[Qemu-devel] [PATCH v2 0/2] vfio-ccw: fix memory leak + cleanup

2018-04-07 Thread Greg Kurz
This series fixes two leaks in vfio_ccw_realize(). The v2 is basically
the same as the initial series, split in two patches: a trivial bugfix
suitable for 2.12 and stable, and a follow-up with more cleanup for
2.13.

--
Greg

---

Greg Kurz (2):
  vfio-ccw: fix memory leaks in vfio_ccw_realize()
  vfio-ccw: introduce vfio_ccw_get_device()


 hw/vfio/ccw.c |   54 --
 1 file changed, 36 insertions(+), 18 deletions(-)




[Qemu-devel] [PATCH v2 2/2] vfio-ccw: introduce vfio_ccw_get_device()

2018-04-07 Thread Greg Kurz
A recent patch fixed leaks of the dynamically allocated vcdev->vdev.name
field in vfio_ccw_realize(), but we now have three freeing sites for it.
This is unfortunate and seems to indicate something is wrong with its
life cycle.

The root issue is that vcdev->vdev.name is set before vfio_get_device()
is called, which theoretically prevents to call vfio_put_device() to
do the freeing. Well actually, we could call it anyway  because
vfio_put_base_device() is a nop if the device isn't attached, but this
would be confusing.

This patch hence moves all the logic of attaching the device, including
the "already attached" check, to a separate vfio_ccw_get_device() function,
counterpart of vfio_put_device(). While here, vfio_put_device() is renamed
to vfio_ccw_put_device() for consistency.

Signed-off-by: Greg Kurz 
---
 hw/vfio/ccw.c |   56 
 1 file changed, 36 insertions(+), 20 deletions(-)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index fe34b507699f..49ae986d288d 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -292,12 +292,44 @@ static void vfio_ccw_put_region(VFIOCCWDevice *vcdev)
 g_free(vcdev->io_region);
 }
 
-static void vfio_put_device(VFIOCCWDevice *vcdev)
+static void vfio_ccw_put_device(VFIOCCWDevice *vcdev)
 {
 g_free(vcdev->vdev.name);
 vfio_put_base_device(&vcdev->vdev);
 }
 
+static int vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev,
+   Error **errp)
+{
+char *name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid,
+ vcdev->cdev.hostid.ssid,
+ vcdev->cdev.hostid.devid);
+VFIODevice *vbasedev;
+
+QLIST_FOREACH(vbasedev, &group->device_list, next) {
+if (strcmp(vbasedev->name, name) == 0) {
+error_setg(errp, "vfio: subchannel %s has already been attached",
+   name);
+goto out_err;
+}
+}
+
+if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
+goto out_err;
+}
+
+vcdev->vdev.ops = &vfio_ccw_ops;
+vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW;
+vcdev->vdev.name = name;
+vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj;
+
+return 0;
+
+out_err:
+g_free(name);
+return -1;
+}
+
 static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp)
 {
 char *tmp, group_path[PATH_MAX];
@@ -327,7 +359,6 @@ static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, 
Error **errp)
 
 static void vfio_ccw_realize(DeviceState *dev, Error **errp)
 {
-VFIODevice *vbasedev;
 VFIOGroup *group;
 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
@@ -348,22 +379,7 @@ static void vfio_ccw_realize(DeviceState *dev, Error 
**errp)
 goto out_group_err;
 }
 
-vcdev->vdev.ops = &vfio_ccw_ops;
-vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW;
-vcdev->vdev.name = g_strdup_printf("%x.%x.%04x", cdev->hostid.cssid,
-   cdev->hostid.ssid, cdev->hostid.devid);
-vcdev->vdev.dev = dev;
-QLIST_FOREACH(vbasedev, &group->device_list, next) {
-if (strcmp(vbasedev->name, vcdev->vdev.name) == 0) {
-error_setg(&err, "vfio: subchannel %s has already been attached",
-   vcdev->vdev.name);
-g_free(vcdev->vdev.name);
-goto out_device_err;
-}
-}
-
-if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) {
-g_free(vcdev->vdev.name);
+if (vfio_ccw_get_device(group, vcdev, &err)) {
 goto out_device_err;
 }
 
@@ -382,7 +398,7 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
 out_notifier_err:
 vfio_ccw_put_region(vcdev);
 out_region_err:
-vfio_put_device(vcdev);
+vfio_ccw_put_device(vcdev);
 out_device_err:
 vfio_put_group(group);
 out_group_err:
@@ -403,7 +419,7 @@ static void vfio_ccw_unrealize(DeviceState *dev, Error 
**errp)
 
 vfio_ccw_unregister_io_notifier(vcdev);
 vfio_ccw_put_region(vcdev);
-vfio_put_device(vcdev);
+vfio_ccw_put_device(vcdev);
 vfio_put_group(group);
 
 if (cdc->unrealize) {




Re: [Qemu-devel] [Qemu-arm] [PATCH v2 2/2] arm_gicv3_kvm: kvm_dist_get/put: skip the registers banked by GICR

2018-04-07 Thread Shannon Zhao


On 2018/4/6 17:36, Peter Maydell wrote:
> On 5 April 2018 at 15:22, Peter Maydell  wrote:
>> > On 29 March 2018 at 11:54, Peter Maydell  wrote:
>>> >> On 23 March 2018 at 12:08, Peter Maydell  
>>> >> wrote:
 >>> On 21 March 2018 at 08:00, Shannon Zhao  
 >>> wrote:
>  On 2018/3/20 19:54, Peter Maydell wrote:
>> > Can you still successfully migrate a VM from a QEMU version
>> > without this bugfix to one with the bugfix ?
>> >
>  I've tested this case. I can migrate a VM between these two versions.
 >>>
 >>> Hmm. Looking at the code I can't see how that would work,
 >>> except by accident. Let me see if I understand what's happening
 >>> here:
>> >
>>> >> I was thinking a bit more about how to handle this, and
>>> >> my best idea was:
>>> >>
>>> >> (1) send something in the migration stream that says
>>> >> "I don't have this bug" (version number change?
>>> >> vmstate field that's just a "no bug" flag? subsection
>>> >> with no contents?)
>>> >>
>>> >> (2) on the destination, if the source doesn't tell us
>>> >> it doesn't have this bug, and we are running KVM, then
>>> >> shift all the data in the arrays down to fix it up
>>> >> [Strictly what we want to know is if the source is
>>> >> running KVM, not if the destination is, but I don't
>>> >> know of a way to find that out, and in practice TCG->KVM
>>> >> migrations don't work anyway, so it's not a big deal.]
>> >
>> > Shannon, are you planning to look at this for 2.12, or should
>> > we postpone it to 2.13? (It's not a regression, right? So
>> > we don't necessarily have to urgently fix it for 2.12.)
> On reflection, I think I'd aim for 2.13 for this, since:
>  * it's not a regression
>  * it doesn't actually affect any of our boards, because
>none of them define enough interrupt lines that they
>would actually be using the top 32 that we fail to migrate
>  * getting the migration compat right is a bit tricky and
>will benefit from having the time for careful review and testing
> 
> Let me know if I'm wrong with any of those assumptions.
Yes, it's no need to merge this for 2.12. I'll respin this patch later.

Thanks,
-- 
Shannon




[Qemu-devel] [PATCH v3 1/2] Implement .hex file loader

2018-04-07 Thread Su Hang
This patch adds Intel Hexadecimal Object File format support to
the loader.  The file format specification is available here:
http://www.piclist.com/techref/fileext/hex/intel.htm

The file format is mainly intended for embedded systems
and microcontrollers, such as Arduino, ARM, STM32, etc.

Suggested-by: Stefan Hajnoczi 
Signed-off-by: Su Hang 
---
 hw/arm/boot.c   |   9 +-
 hw/core/loader.c| 280 
 include/hw/loader.h |  17 
 3 files changed, 305 insertions(+), 1 deletion(-)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 9319b12fcd2a..07ce54e5936d 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -1060,8 +1060,15 @@ static void arm_load_kernel_notify(Notifier *notifier, 
void *data)
 kernel_size = load_aarch64_image(info->kernel_filename,
  info->loader_start, &entry, as);
 is_linux = 1;
+} else if (kernel_size < 0 && strstr(info->kernel_filename, ".hex")) {
+/* 32-bit ARM .hex file */
+entry = info->loader_start + KERNEL_LOAD_ADDR;
+kernel_size = load_targphys_hex_as(info->kernel_filename, entry,
+   info->ram_size - KERNEL_LOAD_ADDR,
+   as);
+is_linux = 1;
 } else if (kernel_size < 0) {
-/* 32-bit ARM */
+/* 32-bit ARM binary file */
 entry = info->loader_start + KERNEL_LOAD_ADDR;
 kernel_size = load_image_targphys_as(info->kernel_filename, entry,
  info->ram_size - KERNEL_LOAD_ADDR,
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 06bdbca53709..41d714520be4 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1286,3 +1286,283 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict)
 }
 }
 }
+
+typedef enum HexRecord HexRecord;
+enum HexRecord {
+DATA_RECORD = 0,
+EOF_RECORD,
+EXT_SEG_ADDR_RECORD,
+START_SEG_ADDR_RECORD,
+EXT_LINEAR_ADDR_RECORD,
+START_LINEAR_ADDR_RECORD,
+};
+
+typedef union HexLine HexLine;
+union HexLine {
+uint8_t buf[0x25];
+struct __attribute__((packed)) {
+uint8_t byte_count;
+uint16_t address;
+uint8_t record_type;
+uint8_t data[0x25 - 0x5];
+uint8_t checksum;
+};
+};
+
+static uint8_t ctoh(char c)
+{
+return (c & 0x10) ? /*0-9*/ c & 0xf : /*A-F, a-f*/ (c & 0xf) + 9;
+}
+
+static uint8_t validate_checksum(HexLine *record)
+{
+uint8_t result = 0, i = 0;
+
+for (; i < (record->byte_count + 5); ++i) {
+result += record->buf[i];
+}
+
+return result == 0;
+}
+
+/* return pointer of bin_blob or NULL if error */
+static uint8_t *parse_hex_blob(char *filename, size_t *p_size)
+{
+int fd;
+off_t hex_blob_size;
+uint8_t *p_data = NULL;
+uint8_t *hex_blob;
+uint8_t *hex_blob_ori; /* used to free temporary memory */
+uint8_t *bin_buf;
+uint8_t *end;
+uint8_t idx = 0;
+uint8_t in_process = 0;/* avoid re-enter */
+uint8_t low_nibble = 0;/* process two hex char into 8-bits */
+uint8_t ext_linear_record = 0; /* record non-constitutes block */
+uint32_t next_address_to_write = 0;
+uint32_t current_address = 0;
+uint32_t last_address = 0;
+HexLine line = {0};
+
+fd = open(filename, O_RDONLY);
+if (fd < 0) {
+return NULL;
+}
+hex_blob_size = lseek(fd, 0, SEEK_END);
+if (hex_blob_size < 0) {
+close(fd);
+return NULL;
+}
+hex_blob = g_malloc(hex_blob_size);
+hex_blob_ori = hex_blob;
+bin_buf = g_malloc(hex_blob_size * 2);
+lseek(fd, 0, SEEK_SET);
+if (read(fd, hex_blob, hex_blob_size) != hex_blob_size) {
+close(fd);
+goto hex_parser_exit;
+}
+close(fd);
+
+memset(line.buf, 0, sizeof(HexLine));
+end = (uint8_t *)hex_blob + hex_blob_size;
+
+for (; hex_blob != end; ++hex_blob) {
+switch ((uint8_t)(*hex_blob)) {
+case '\r':
+case '\n':
+if (!in_process) {
+break;
+}
+
+in_process = 0;
+if (validate_checksum(&line) == 0) {
+p_data = NULL;
+goto hex_parser_exit;
+}
+
+line.address = bswap16(line.address);
+switch (line.record_type) {
+case DATA_RECORD:
+current_address =
+(next_address_to_write & 0x) | line.address;
+/* verify this is a continous block of memory */
+if (current_address != next_address_to_write ||
+ext_linear_record) {
+if (!ext_linear_record) {
+/* Store next address to write */
+last_address = next_address_to_write;
+next_address_to_write = current_address;
+}
+ext_linear_recor

[Qemu-devel] [PATCH v3 RFC 0/2] Implement Hex file loader and add test case

2018-04-07 Thread Su Hang
These series of patchs implement Intel Hexadecimal File loader and
add QTest testcase to verify the correctness of Loader.

v1: Basic version.
v2: Replace `do{}while(cond);` block with `for(;;)` block.
v3: Add two new files information in MAINTAINERS.

Su Hang (2):
  Implement .hex file loader
  Add QTest testcase for the Intel Hexadecimal Object File Loader.

 MAINTAINERS  |   6 +
 hw/arm/boot.c|   9 +-
 hw/core/loader.c | 280 +++
 include/hw/loader.h  |  17 +++
 tests/Makefile.include   |   2 +
 tests/hex-loader-check-data/test.hex |  11 ++
 tests/hexloader-test.c   |  56 +++
 7 files changed, 380 insertions(+), 1 deletion(-)
 create mode 100644 tests/hex-loader-check-data/test.hex
 create mode 100644 tests/hexloader-test.c

-- 
2.7.4




[Qemu-devel] [PATCH v3 2/2] Add QTest testcase for the Intel Hexadecimal Object File Loader.

2018-04-07 Thread Su Hang
'test.hex' file is a bare metal ARM software stored in Hexadecimal
Object Format. When it's loaded by QEMU, it will print "Hello world!\n"
on console.

`pre_store` array in 'hexloader-test.c' file, stores the binary format
of 'test.hex' file, which is used to verify correctness.

Signed-off-by: Su Hang 
---
 MAINTAINERS  |  6 
 tests/Makefile.include   |  2 ++
 tests/hex-loader-check-data/test.hex | 11 +++
 tests/hexloader-test.c   | 56 
 4 files changed, 75 insertions(+)
 create mode 100644 tests/hex-loader-check-data/test.hex
 create mode 100644 tests/hexloader-test.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 24b70169bc37..3d37d04c3345 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1291,6 +1291,12 @@ F: hw/core/generic-loader.c
 F: include/hw/core/generic-loader.h
 F: docs/generic-loader.txt
 
+Intel Hexadecimal Object File Loader
+M: Su Hang 
+S: Maintained
+F: tests/hexloader-test.c
+F: tests/hex-loader-check-data/test.hex
+
 CHRP NVRAM
 M: Thomas Huth 
 S: Maintained
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 3b9a5e31a2c2..f4a3e71f34ee 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -380,6 +380,7 @@ check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF)
 gcov-files-arm-y += hw/timer/arm_mptimer.c
 check-qtest-arm-y += tests/boot-serial-test$(EXESUF)
 check-qtest-arm-y += tests/sdhci-test$(EXESUF)
+check-qtest-arm-y += tests/hexloader-test$(EXESUF)
 
 check-qtest-aarch64-y = tests/numa-test$(EXESUF)
 check-qtest-aarch64-y += tests/sdhci-test$(EXESUF)
@@ -755,6 +756,7 @@ tests/qmp-test$(EXESUF): tests/qmp-test.o
 tests/device-introspect-test$(EXESUF): tests/device-introspect-test.o
 tests/rtc-test$(EXESUF): tests/rtc-test.o
 tests/m48t59-test$(EXESUF): tests/m48t59-test.o
+tests/hexloader-test$(EXESUF): tests/hexloader-test.o
 tests/endianness-test$(EXESUF): tests/endianness-test.o
 tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y)
 tests/prom-env-test$(EXESUF): tests/prom-env-test.o $(libqos-obj-y)
diff --git a/tests/hex-loader-check-data/test.hex 
b/tests/hex-loader-check-data/test.hex
new file mode 100644
index ..59b96e3e6fa7
--- /dev/null
+++ b/tests/hex-loader-check-data/test.hex
@@ -0,0 +1,11 @@
+:14D09FE516EBFEEA9810010008
+:100014B02DE500B08DE20CD04DE208000BE5F8
+:100026EA08301BE50020D3E52C309FE5F0
+:1000302083E508301BE5013083E208300BE542
+:100048301BE50030D3E553E3F41A4E
+:10005000A0E100D08BE204B09DE41EFF2FE180
+:100060101F1000482DE904B08DE208009FE544
+:10007000E6EBA0E10088BDE8840001007E
+:100080101F1048656C6C6F20776F726C6421D4
+:02009A0064
+:0001FF
diff --git a/tests/hexloader-test.c b/tests/hexloader-test.c
new file mode 100644
index ..b1a491dcd9de
--- /dev/null
+++ b/tests/hexloader-test.c
@@ -0,0 +1,56 @@
+/*
+ * QTest testcase for the Intel Hexadecimal Object File Loader
+ *
+ * Authors:
+ *  Su Hang  2018
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+
+#define BIN_SIZE 146
+
+static unsigned char pre_store[BIN_SIZE] = {
+4,   208, 159, 229, 22,  0,   0,   235, 254, 255, 255, 234, 152, 16,  1,
+0,   4,   176, 45,  229, 0,   176, 141, 226, 12,  208, 77,  226, 8,   0,
+11,  229, 6,   0,   0,   234, 8,   48,  27,  229, 0,   32,  211, 229, 44,
+48,  159, 229, 0,   32,  131, 229, 8,   48,  27,  229, 1,   48,  131, 226,
+8,   48,  11,  229, 8,   48,  27,  229, 0,   48,  211, 229, 0,   0,   83,
+227, 244, 255, 255, 26,  0,   0,   160, 225, 0,   208, 139, 226, 4,   176,
+157, 228, 30,  255, 47,  225, 0,   16,  31,  16,  0,   72,  45,  233, 4,
+176, 141, 226, 8,   0,   159, 229, 230, 255, 255, 235, 0,   0,   160, 225,
+0,   136, 189, 232, 132, 0,   1,   0,   0,   16,  31,  16,  72,  101, 108,
+108, 111, 32,  119, 111, 114, 108, 100, 33,  10,  0};
+
+/* success if no crash or abort */
+static void hex_loader_test(void)
+{
+unsigned int i;
+unsigned char memory_content[BIN_SIZE];
+const unsigned int base_addr = 0x0001;
+
+QTestState *s = qtest_startf(
+"-M versatilepb -m 128M -nographic -kernel ../tests/test.hex");
+
+for (i = 0; i < BIN_SIZE; ++i) {
+memory_content[i] = qtest_readb(s, base_addr + i);
+g_assert_cmpuint(memory_content[i], ==, pre_store[i]);
+}
+qtest_quit(s);
+}
+
+int main(int argc, char **argv)
+{
+int ret;
+
+g_test_init(&argc, &argv, NULL);
+
+qtest_add_func("/tmp/hex_loader", hex_loader_test);
+ret = g_test_run();
+
+return ret;
+}
-- 
2.7.4




[Qemu-devel] [PATCH v3 0/2] Implement Hex file loader and add test case

2018-04-07 Thread Su Hang
These series of patchs implement Intel Hexadecimal File loader and
add QTest testcase to verify the correctness of Loader.

v1: Basic version.
v2: Replace `do{}while(cond);` block with `for(;;)` block.
v3: Add two new files information in MAINTAINERS.

Su Hang (2):
  Implement .hex file loader
  Add QTest testcase for the Intel Hexadecimal Object File Loader.

 MAINTAINERS  |   6 +
 hw/arm/boot.c|   9 +-
 hw/core/loader.c | 280 +++
 include/hw/loader.h  |  17 +++
 tests/Makefile.include   |   2 +
 tests/hex-loader-check-data/test.hex |  11 ++
 tests/hexloader-test.c   |  56 +++
 7 files changed, 380 insertions(+), 1 deletion(-)
 create mode 100644 tests/hex-loader-check-data/test.hex
 create mode 100644 tests/hexloader-test.c

-- 
2.7.4




[Qemu-devel] [PATCH v3 1/2] Implement .hex file loader

2018-04-07 Thread Su Hang
This patch adds Intel Hexadecimal Object File format support to
the loader.  The file format specification is available here:
http://www.piclist.com/techref/fileext/hex/intel.htm

The file format is mainly intended for embedded systems
and microcontrollers, such as Arduino, ARM, STM32, etc.

Suggested-by: Stefan Hajnoczi 
Signed-off-by: Su Hang 
---
 hw/arm/boot.c   |   9 +-
 hw/core/loader.c| 280 
 include/hw/loader.h |  17 
 3 files changed, 305 insertions(+), 1 deletion(-)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 9319b12fcd2a..07ce54e5936d 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -1060,8 +1060,15 @@ static void arm_load_kernel_notify(Notifier *notifier, 
void *data)
 kernel_size = load_aarch64_image(info->kernel_filename,
  info->loader_start, &entry, as);
 is_linux = 1;
+} else if (kernel_size < 0 && strstr(info->kernel_filename, ".hex")) {
+/* 32-bit ARM .hex file */
+entry = info->loader_start + KERNEL_LOAD_ADDR;
+kernel_size = load_targphys_hex_as(info->kernel_filename, entry,
+   info->ram_size - KERNEL_LOAD_ADDR,
+   as);
+is_linux = 1;
 } else if (kernel_size < 0) {
-/* 32-bit ARM */
+/* 32-bit ARM binary file */
 entry = info->loader_start + KERNEL_LOAD_ADDR;
 kernel_size = load_image_targphys_as(info->kernel_filename, entry,
  info->ram_size - KERNEL_LOAD_ADDR,
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 06bdbca53709..41d714520be4 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1286,3 +1286,283 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict)
 }
 }
 }
+
+typedef enum HexRecord HexRecord;
+enum HexRecord {
+DATA_RECORD = 0,
+EOF_RECORD,
+EXT_SEG_ADDR_RECORD,
+START_SEG_ADDR_RECORD,
+EXT_LINEAR_ADDR_RECORD,
+START_LINEAR_ADDR_RECORD,
+};
+
+typedef union HexLine HexLine;
+union HexLine {
+uint8_t buf[0x25];
+struct __attribute__((packed)) {
+uint8_t byte_count;
+uint16_t address;
+uint8_t record_type;
+uint8_t data[0x25 - 0x5];
+uint8_t checksum;
+};
+};
+
+static uint8_t ctoh(char c)
+{
+return (c & 0x10) ? /*0-9*/ c & 0xf : /*A-F, a-f*/ (c & 0xf) + 9;
+}
+
+static uint8_t validate_checksum(HexLine *record)
+{
+uint8_t result = 0, i = 0;
+
+for (; i < (record->byte_count + 5); ++i) {
+result += record->buf[i];
+}
+
+return result == 0;
+}
+
+/* return pointer of bin_blob or NULL if error */
+static uint8_t *parse_hex_blob(char *filename, size_t *p_size)
+{
+int fd;
+off_t hex_blob_size;
+uint8_t *p_data = NULL;
+uint8_t *hex_blob;
+uint8_t *hex_blob_ori; /* used to free temporary memory */
+uint8_t *bin_buf;
+uint8_t *end;
+uint8_t idx = 0;
+uint8_t in_process = 0;/* avoid re-enter */
+uint8_t low_nibble = 0;/* process two hex char into 8-bits */
+uint8_t ext_linear_record = 0; /* record non-constitutes block */
+uint32_t next_address_to_write = 0;
+uint32_t current_address = 0;
+uint32_t last_address = 0;
+HexLine line = {0};
+
+fd = open(filename, O_RDONLY);
+if (fd < 0) {
+return NULL;
+}
+hex_blob_size = lseek(fd, 0, SEEK_END);
+if (hex_blob_size < 0) {
+close(fd);
+return NULL;
+}
+hex_blob = g_malloc(hex_blob_size);
+hex_blob_ori = hex_blob;
+bin_buf = g_malloc(hex_blob_size * 2);
+lseek(fd, 0, SEEK_SET);
+if (read(fd, hex_blob, hex_blob_size) != hex_blob_size) {
+close(fd);
+goto hex_parser_exit;
+}
+close(fd);
+
+memset(line.buf, 0, sizeof(HexLine));
+end = (uint8_t *)hex_blob + hex_blob_size;
+
+for (; hex_blob != end; ++hex_blob) {
+switch ((uint8_t)(*hex_blob)) {
+case '\r':
+case '\n':
+if (!in_process) {
+break;
+}
+
+in_process = 0;
+if (validate_checksum(&line) == 0) {
+p_data = NULL;
+goto hex_parser_exit;
+}
+
+line.address = bswap16(line.address);
+switch (line.record_type) {
+case DATA_RECORD:
+current_address =
+(next_address_to_write & 0x) | line.address;
+/* verify this is a continous block of memory */
+if (current_address != next_address_to_write ||
+ext_linear_record) {
+if (!ext_linear_record) {
+/* Store next address to write */
+last_address = next_address_to_write;
+next_address_to_write = current_address;
+}
+ext_linear_recor

[Qemu-devel] [PATCH v3 2/2] Add QTest testcase for the Intel Hexadecimal Object File Loader.

2018-04-07 Thread Su Hang
'test.hex' file is a bare metal ARM software stored in Hexadecimal
Object Format. When it's loaded by QEMU, it will print "Hello world!\n"
on console.

`pre_store` array in 'hexloader-test.c' file, stores the binary format
of 'test.hex' file, which is used to verify correctness.

Signed-off-by: Su Hang 
---
 MAINTAINERS  |  6 
 tests/Makefile.include   |  2 ++
 tests/hex-loader-check-data/test.hex | 11 +++
 tests/hexloader-test.c   | 56 
 4 files changed, 75 insertions(+)
 create mode 100644 tests/hex-loader-check-data/test.hex
 create mode 100644 tests/hexloader-test.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 24b70169bc37..3d37d04c3345 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1291,6 +1291,12 @@ F: hw/core/generic-loader.c
 F: include/hw/core/generic-loader.h
 F: docs/generic-loader.txt
 
+Intel Hexadecimal Object File Loader
+M: Su Hang 
+S: Maintained
+F: tests/hexloader-test.c
+F: tests/hex-loader-check-data/test.hex
+
 CHRP NVRAM
 M: Thomas Huth 
 S: Maintained
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 3b9a5e31a2c2..f4a3e71f34ee 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -380,6 +380,7 @@ check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF)
 gcov-files-arm-y += hw/timer/arm_mptimer.c
 check-qtest-arm-y += tests/boot-serial-test$(EXESUF)
 check-qtest-arm-y += tests/sdhci-test$(EXESUF)
+check-qtest-arm-y += tests/hexloader-test$(EXESUF)
 
 check-qtest-aarch64-y = tests/numa-test$(EXESUF)
 check-qtest-aarch64-y += tests/sdhci-test$(EXESUF)
@@ -755,6 +756,7 @@ tests/qmp-test$(EXESUF): tests/qmp-test.o
 tests/device-introspect-test$(EXESUF): tests/device-introspect-test.o
 tests/rtc-test$(EXESUF): tests/rtc-test.o
 tests/m48t59-test$(EXESUF): tests/m48t59-test.o
+tests/hexloader-test$(EXESUF): tests/hexloader-test.o
 tests/endianness-test$(EXESUF): tests/endianness-test.o
 tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y)
 tests/prom-env-test$(EXESUF): tests/prom-env-test.o $(libqos-obj-y)
diff --git a/tests/hex-loader-check-data/test.hex 
b/tests/hex-loader-check-data/test.hex
new file mode 100644
index ..59b96e3e6fa7
--- /dev/null
+++ b/tests/hex-loader-check-data/test.hex
@@ -0,0 +1,11 @@
+:14D09FE516EBFEEA9810010008
+:100014B02DE500B08DE20CD04DE208000BE5F8
+:100026EA08301BE50020D3E52C309FE5F0
+:1000302083E508301BE5013083E208300BE542
+:100048301BE50030D3E553E3F41A4E
+:10005000A0E100D08BE204B09DE41EFF2FE180
+:100060101F1000482DE904B08DE208009FE544
+:10007000E6EBA0E10088BDE8840001007E
+:100080101F1048656C6C6F20776F726C6421D4
+:02009A0064
+:0001FF
diff --git a/tests/hexloader-test.c b/tests/hexloader-test.c
new file mode 100644
index ..b1a491dcd9de
--- /dev/null
+++ b/tests/hexloader-test.c
@@ -0,0 +1,56 @@
+/*
+ * QTest testcase for the Intel Hexadecimal Object File Loader
+ *
+ * Authors:
+ *  Su Hang  2018
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+
+#define BIN_SIZE 146
+
+static unsigned char pre_store[BIN_SIZE] = {
+4,   208, 159, 229, 22,  0,   0,   235, 254, 255, 255, 234, 152, 16,  1,
+0,   4,   176, 45,  229, 0,   176, 141, 226, 12,  208, 77,  226, 8,   0,
+11,  229, 6,   0,   0,   234, 8,   48,  27,  229, 0,   32,  211, 229, 44,
+48,  159, 229, 0,   32,  131, 229, 8,   48,  27,  229, 1,   48,  131, 226,
+8,   48,  11,  229, 8,   48,  27,  229, 0,   48,  211, 229, 0,   0,   83,
+227, 244, 255, 255, 26,  0,   0,   160, 225, 0,   208, 139, 226, 4,   176,
+157, 228, 30,  255, 47,  225, 0,   16,  31,  16,  0,   72,  45,  233, 4,
+176, 141, 226, 8,   0,   159, 229, 230, 255, 255, 235, 0,   0,   160, 225,
+0,   136, 189, 232, 132, 0,   1,   0,   0,   16,  31,  16,  72,  101, 108,
+108, 111, 32,  119, 111, 114, 108, 100, 33,  10,  0};
+
+/* success if no crash or abort */
+static void hex_loader_test(void)
+{
+unsigned int i;
+unsigned char memory_content[BIN_SIZE];
+const unsigned int base_addr = 0x0001;
+
+QTestState *s = qtest_startf(
+"-M versatilepb -m 128M -nographic -kernel ../tests/test.hex");
+
+for (i = 0; i < BIN_SIZE; ++i) {
+memory_content[i] = qtest_readb(s, base_addr + i);
+g_assert_cmpuint(memory_content[i], ==, pre_store[i]);
+}
+qtest_quit(s);
+}
+
+int main(int argc, char **argv)
+{
+int ret;
+
+g_test_init(&argc, &argv, NULL);
+
+qtest_add_func("/tmp/hex_loader", hex_loader_test);
+ret = g_test_run();
+
+return ret;
+}
-- 
2.7.4




Re: [Qemu-devel] [PATCH v8 14/23] monitor: separate QMP parser and dispatcher

2018-04-07 Thread Peter Xu
On Wed, Apr 04, 2018 at 03:58:56PM +0200, Marc-André Lureau wrote:
> Hi Peter
> 
> On Wed, Mar 28, 2018 at 6:02 AM, Peter Xu  wrote:
> > On Mon, Mar 26, 2018 at 11:46:13AM +0200, Marc-André Lureau wrote:
> >> Hi
> >>
> >> On Mon, Mar 26, 2018 at 11:08 AM, Peter Xu  wrote:
> >> > On Mon, Mar 26, 2018 at 10:33:27AM +0200, Marc-André Lureau wrote:
> >> >> Hi
> >> >>
> >> >> On Mon, Mar 26, 2018 at 10:07 AM, Peter Xu  wrote:
> >> >> > On Fri, Mar 23, 2018 at 05:18:53PM +0100, Marc-André Lureau wrote:
> >> >> >
> >> >> > [...]
> >> >> >
> >> >> >> > +/*
> >> >> >> > + * Dispatch one single QMP request. The function will free the 
> >> >> >> > req_obj
> >> >> >> > + * and objects inside it before return.
> >> >> >> > + */
> >> >> >> > +static void monitor_qmp_dispatch_one(QMPRequest *req_obj)
> >> >> >> >  {
> >> >> >> > -QObject *req, *rsp = NULL, *id = NULL;
> >> >> >> > +Monitor *mon, *old_mon;
> >> >> >> > +QObject *req, *rsp = NULL, *id;
> >> >> >> >  QDict *qdict = NULL;
> >> >> >> > -MonitorQMP *mon_qmp = container_of(parser, MonitorQMP, 
> >> >> >> > parser);
> >> >> >> > -Monitor *old_mon, *mon = container_of(mon_qmp, Monitor, qmp);
> >> >> >> > -
> >> >> >> > -Error *err = NULL;
> >> >> >> > +bool need_resume;
> >> >> >> >
> >> >> >> > -req = json_parser_parse_err(tokens, NULL, &err);
> >> >> >> > -if (!req && !err) {
> >> >> >> > -/* json_parser_parse_err() sucks: can fail without 
> >> >> >> > setting @err */
> >> >> >> > -error_setg(&err, QERR_JSON_PARSING);
> >> >> >> > -}
> >> >> >> > -if (err) {
> >> >> >> > -goto err_out;
> >> >> >> > -}
> >> >> >> > +req = req_obj->req;
> >> >> >> > +mon = req_obj->mon;
> >> >> >> > +id = req_obj->id;
> >> >> >> > +need_resume = req_obj->need_resume;
> >> >> >> >
> >> >> >> > -qdict = qobject_to_qdict(req);
> >> >> >> > -if (qdict) {
> >> >> >> > -id = qdict_get(qdict, "id");
> >> >> >> > -qobject_incref(id);
> >> >> >> > -qdict_del(qdict, "id");
> >> >> >> > -} /* else will fail qmp_dispatch() */
> >> >> >> > +g_free(req_obj);
> >> >> >> >
> >> >> >> >  if (trace_event_get_state_backends(TRACE_HANDLE_QMP_COMMAND)) 
> >> >> >> > {
> >> >> >> >  QString *req_json = qobject_to_json(req);
> >> >> >> > @@ -3900,7 +3932,7 @@ static void 
> >> >> >> > handle_qmp_command(JSONMessageParser *parser, GQueue *tokens)
> >> >> >> >  old_mon = cur_mon;
> >> >> >> >  cur_mon = mon;
> >> >> >>
> >> >> >> There is another issue with this series, since cur_mon is global (and
> >> >> >> not protected), an oob command may change the cur_mon while another
> >> >> >> command is running in the main thread with unexpected consequences. I
> >> >> >> don't have a clear idea what is the best way to solve it. Making the
> >> >> >> variable per-thread, or going all the way to get rid of cur_mon (my
> >> >> >> preference, but much harder)
> >> >> >
> >> >> > IMHO it is fine too.
> >> >> >
> >> >> > Note that this cur_mon operation is in monitor_qmp_dispatch_one() now,
> >> >> > which is still running in main thread.  So AFAICT all the cur_mon
> >> >> > references are in main thread, and monitor IOThread does not modify
> >> >> > that variable at all.  Then we should probably be safe.
> >> >>
> >> >> But monitor_qmp_dispatch_one() is called from iothread if the command
> >> >> is oob, so cur_mon may be updated while another command is running in
> >> >> main thread, or am I wrong?
> >> >
> >> > You are right. I missed that, sorry...
> >> >
> >> > Would this be a simple workaround (but hopefully efficient) solution?
> >> >
> >> > diff --git a/monitor.c b/monitor.c
> >> > index 77f4c41cfa..99641c0c6d 100644
> >> > --- a/monitor.c
> >> > +++ b/monitor.c
> >> > @@ -4023,7 +4023,7 @@ typedef struct QMPRequest QMPRequest;
> >> >   * Dispatch one single QMP request. The function will free the req_obj
> >> >   * and objects inside it before return.
> >> >   */
> >> > -static void monitor_qmp_dispatch_one(QMPRequest *req_obj)
> >> > +static void monitor_qmp_dispatch_one(QMPRequest *req_obj, bool 
> >> > hack_curmon)
> >> >  {
> >> >  Monitor *mon, *old_mon;
> >> >  QObject *req, *rsp = NULL, *id;
> >> > @@ -4043,12 +4043,16 @@ static void monitor_qmp_dispatch_one(QMPRequest 
> >> > *req_obj)
> >> >  QDECREF(req_json);
> >> >  }
> >> >
> >> > -old_mon = cur_mon;
> >> > -cur_mon = mon;
> >> > +if (hack_curmon) {
> >> > +old_mon = cur_mon;
> >> > +cur_mon = mon;
> >> > +}
> >> >
> >> >  rsp = qmp_dispatch(mon->qmp.commands, req);
> >> >
> >> > -cur_mon = old_mon;
> >> > +if (hack_curmon) {
> >> > +cur_mon = old_mon;
> >> > +}
> >> >
> >> >  if (mon->qmp.commands == &qmp_cap_negotiation_commands) {
> >> >  qdict = qdict_get_qdict(qobject_to(QDict, rsp), "error");
> >> > @@ -4116,7 +4120,7 @@ static void monitor_qmp_bh_dispatcher(void *data)
> >> >
> >> >  if (req_o

[Qemu-devel] [PATCH v2] iotests: fix wait_until_completed()

2018-04-07 Thread Peter Xu
If there are more than one events, wait_until_completed() might return
the 2nd event even if the 1st event is JOB_COMPLETED, since the for loop
will continue to run even if completed is set to True.

It never happened before, but it can be triggered when OOB is enabled
due to the RESUME startup message. Fix that up.

Signed-off-by: Peter Xu 
---
 tests/qemu-iotests/iotests.py | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index b5d7945af8..119c8e270a 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -470,18 +470,15 @@ class QMPTestCase(unittest.TestCase):
 
 def wait_until_completed(self, drive='drive0', check_offset=True):
 '''Wait for a block job to finish, returning the event'''
-completed = False
-while not completed:
+while True:
 for event in self.vm.get_qmp_events(wait=True):
 if event['event'] == 'BLOCK_JOB_COMPLETED':
 self.assert_qmp(event, 'data/device', drive)
 self.assert_qmp_absent(event, 'data/error')
 if check_offset:
 self.assert_qmp(event, 'data/offset', 
event['data']['len'])
-completed = True
-
-self.assert_no_active_block_jobs()
-return event
+self.assert_no_active_block_jobs()
+return event
 
 def wait_ready(self, drive='drive0'):
 '''Wait until a block job BLOCK_JOB_READY event'''
-- 
2.14.3




Re: [Qemu-devel] [PATCH v3 00/10] migration: improve and cleanup compression

2018-04-07 Thread Xiao Guangrong


Hi Paolo, Michael, Stefan and others,

Could anyone merge this patchset if it is okay to you guys?

On 03/30/2018 03:51 PM, guangrong.x...@gmail.com wrote:

From: Xiao Guangrong 

Changelog in v3:
Following changes are from Peter's review:
1) use comp_param[i].file and decomp_param[i].compbuf to indicate if
the thread is properly init'd or not
2) save the file which is used by ram loader to the global variable
instead it is cached per decompression thread

Changelog in v2:
Thanks to the review from Dave, Peter, Wei and Jiang Biao, the changes
in this version are:
1) include the performance number in the cover letter
2)add some comments to explain how to use z_stream->opaque in the
patchset
3) allocate a internal buffer for per thread to store the data to
be compressed
4) add a new patch that moves some code to ram_save_host_page() so
that 'goto' can be omitted gracefully
5) split the optimization of compression and decompress into two
separated patches
6) refine and correct code styles


This is the first part of our work to improve compression to make it
be more useful in the production.

The first patch resolves the problem that the migration thread spends
too much CPU resource to compression memory if it jumps to a new block
that causes the network is used very deficient.

The second patch fixes the performance issue that too many VM-exits
happen during live migration if compression is being used, it is caused
by huge memory returned to kernel frequently as the memory is allocated
and freed for every signal call to compress2()

The remaining patches clean the code up dramatically

Performance numbers:
We have tested it on my desktop, i7-4790 + 16G, by locally live migrate
the VM which has 8 vCPUs + 6G memory and the max-bandwidth is limited to
350. During the migration, a workload which has 8 threads repeatedly
written total 6G memory in the VM.

Before this patchset, its bandwidth is ~25 mbps, after applying, the
bandwidth is ~50 mbp.

We also collected the perf data for patch 2 and 3 on our production,
before the patchset:
+  57.88%  kqemu  [kernel.kallsyms][k] queued_spin_lock_slowpath
+  10.55%  kqemu  [kernel.kallsyms][k] __lock_acquire
+   4.83%  kqemu  [kernel.kallsyms][k] flush_tlb_func_common

-   1.16%  kqemu  [kernel.kallsyms][k] lock_acquire 
  ▒
- lock_acquire  
   ▒
   - 15.68% _raw_spin_lock  
   ▒
  + 29.42% __schedule   
   ▒
  + 29.14% perf_event_context_sched_out 
   ▒
  + 23.60% tdp_page_fault   
   ▒
  + 10.54% do_anonymous_page
   ▒
  + 2.07% kvm_mmu_notifier_invalidate_range_start   
   ▒
  + 1.83% zap_pte_range 
   ▒
  + 1.44% kvm_mmu_notifier_invalidate_range_end


apply our work:
+  51.92%  kqemu  [kernel.kallsyms][k] queued_spin_lock_slowpath
+  14.82%  kqemu  [kernel.kallsyms][k] __lock_acquire
+   1.47%  kqemu  [kernel.kallsyms][k] mark_lock.clone.0
+   1.46%  kqemu  [kernel.kallsyms][k] native_sched_clock
+   1.31%  kqemu  [kernel.kallsyms][k] lock_acquire
+   1.24%  kqemu  libc-2.12.so [.] __memset_sse2

-  14.82%  kqemu  [kernel.kallsyms][k] __lock_acquire   
  ▒
- __lock_acquire
   ▒
   - 99.75% lock_acquire
   ▒
  - 18.38% _raw_spin_lock   
   ▒
 + 39.62% tdp_page_fault
   ▒
 + 31.32% __schedule
   ▒
 + 27.53% perf_event_context_sched_out  
   ▒
 + 0.58% hrtimer_interrupt


We can see the TLB flush and mmu-lock contention have gone.

Xiao Guangrong (10):
   migration: stop compressing page in migration thread
   migration: stop compression to allocate and free memory frequently
   migration: stop decompression to allocate and free memory frequently
   migration: detect compression and decompression errors
   migration: introduce control_save_page()
   migration: move some code to ram_save_host_page
   migration: move calling control_save_page to the common place
   migration: move calling save_zero_page to the common place
   migratio

Re: [Qemu-devel] [PATCH v2 for-2.12] tap: set vhostfd passed from qemu cli to non-blocking

2018-04-07 Thread Michael S. Tsirkin
On Fri, Apr 06, 2018 at 01:51:25PM -0500, Brijesh Singh wrote:
> A guest boot hangs while probing the network interface when
> iommu_platform=on is used.
> 
> The following qemu cli hangs without this patch:
> 
> # $QEMU \
>   -netdev tap,fd=3,id=hostnet0,vhost=on,vhostfd=4 3<>/dev/tap67 
> 4<>/dev/host-net \
>   -device 
> virtio-net-pci,netdev=hostnet0,id=net0,iommu_platform=on,disable-legacy=on \
>   ...
> 
> Commit: c471ad0e9bd46 (vhost_net: device IOTLB support) took care of
> setting vhostfd to non-blocking when QEMU opens /dev/host-net but if
> the fd is passed from qemu cli then we need to ensure that fd is set
> to non-blocking.
> 
> Fixes: c471ad0e9bd46 "vhost_net: device IOTLB support"
> Cc: Michael S. Tsirkin 
> Cc: Jason Wang 
> Signed-off-by: Brijesh Singh 

Reviewed-by: Michael S. Tsirkin 

> ---
> 
> Changes since v1:
>  - use qemu_set_nonblock() instead of fcntl(..)
> 
>  net/tap.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/net/tap.c b/net/tap.c
> index 2b3a36f9b50d..89c4e19162a2 100644
> --- a/net/tap.c
> +++ b/net/tap.c
> @@ -40,6 +40,7 @@
>  #include "qemu-common.h"
>  #include "qemu/cutils.h"
>  #include "qemu/error-report.h"
> +#include "qemu/sockets.h"
>  
>  #include "net/tap.h"
>  
> @@ -693,6 +694,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, 
> NetClientState *peer,
>  }
>  return;
>  }
> +qemu_set_nonblock(vhostfd);
>  } else {
>  vhostfd = open("/dev/vhost-net", O_RDWR);
>  if (vhostfd < 0) {
> -- 
> 2.14.3



[Qemu-devel] [Bug 1416246] Re: create guest fail when compile qemu with parameter "--disable-gtk"

2018-04-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1416246

Title:
  create guest fail when compile qemu with parameter "--disable-gtk"

Status in QEMU:
  Expired

Bug description:
  Environment:
  
  Host OS (ia32/ia32e/IA64):ia32e
  Guest OS (ia32/ia32e/IA64):ia32e
  Guest OS Type (Linux/Windows):Linux
  kvm.git Commit:8fff5e374a2f6047d1bb52288af7da119bc75765
  qemu.kvm Commit:16017c48547960539fcadb1f91d252124f442482
  Host Kernel Version:3.19.0-rc3
  Hardware:Ivytown_EP, Haswell_EP

  
  Bug detailed description:
  --
  compile the qemu with disable gtk, the create guest , the guest create fail

  note:
  1.qemu.git: 699eae17b841e6784dc3864bf357e26bff1e9dfe
  when compile the qemu with enable gtk or disable gtk, the guest create pass

  2. this should be a qemu bug
  kvm.git   +  qemu.git   = result
  8fff5e37  +  16017c48   = bad
  8fff5e37  +  699eae17   = good

  Reproduce steps:
  
  1. git clone git://vt-sync/qemu.git qemu.git
  2. cd qemu.git
  3. ./configure --target-list=x86_64-softmmu --disable-sdl --disable-gtk
  4. make -j16
  5. ./x86_64-softmmu/qemu-system-x86_64 -enable-kvm -m 4G -smp 2 -net none 
/root/rhel6u5.qcow

  Current result:
  
  create gust fail when compile qemu with disable gtk

  Expected result:
  
  create guest pass when compile qemu with disable or enable gtk

  Basic root-causing log:
  --
  [root@vt-ivt2 qemu.git]# ./x86_64-softmmu/qemu-system-x86_64 -enable-kvm -m 
4G -smp 2 -net none /root/rhel6u5-1.qcow 
  qemu-system-x86_64: Invalid parameter 'to'
  Segmentation fault (core dumped)

  some dmesg message:
  qemu-system-x86[96364]: segfault at 24 ip 7fe6d9636a69 sp 
7fffc03cf970 error 4 in qemu-system-x86_64[7fe6d933+4ba000]

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1416246/+subscriptions



Re: [Qemu-devel] [PATCH for-2.12] monitor: bind dispatch bh to iohandler context

2018-04-07 Thread Peter Xu
On Wed, Apr 04, 2018 at 03:22:20PM +0100, Stefan Hajnoczi wrote:
> On Tue, Apr 03, 2018 at 01:01:15PM +0800, Peter Xu wrote:
> > Eric Auger reported the problem days ago that OOB broke ARM when running
> > with libvirt:
> > 
> > http://lists.gnu.org/archive/html/qemu-devel/2018-03/msg06231.html

[1]

> > 
> > This patch fixes the problem.
> > 
> > It's not really needed now since we have turned OOB off now, but it's
> > still a bug fix, and it'll start to work when we turn OOB on for ARM.
> > 
> > The problem was that the monitor dispatcher bottom half was bound to
> > qemu_aio_context, but that context seems to be for block only.
> 
> No, it is not block-only.  iohandler_ctx is for the legacy
> qemu_set_fd_handler() API only and modern code should use
> qemu_aio_context.
> 
> The difference between qemu_aio_context and iohandler_ctx is that
> aio_poll(qemu_aio_context) does not process iohandler_ctx (since it's a
> difference context).  That is the legacy behavior that
> qemu_set_fd_handler() expects and it's implemented by keeping a separate
> iohandler_ctx.
> 
> > For the
> > rest of the QEMU world we should be using iohandler context.  So
> > assigning monitor dispatcher bottom half to that context.
> 
> This patch relies on the side-effect that iohandler_ctx is only called
> later by the main loop, which seems to prevent the crash below.

Actually I thought that's why we need that iohandler_ctx, no?

Any better suggestion would be welcomed...

> 
> What is the actual crash/problem?  You mentioned the GIC, but what does
> that have to do with monitor code crashing?

The actually crash is mentioned above [1].  Please have a look on that
thread for details.

Basic idea is that the QMP command "cont" is run earlier than before,
which breaks the ordering of past, so vcpu threads are running
earlier, even if they are not really fully setup.  Strange things
happen when we want to run un-prepared vcpus.

> 
> > 
> > If without this change, QMP dispatcher might be run even before reaching
> > main loop in block IO path, for example, in a stack like:
> > 
> > #0  qmp_cont ()
> > #1  0x006bd210 in qmp_marshal_cont ()
> > #2  0x00ac05c4 in do_qmp_dispatch ()
> > #3  0x00ac07a0 in qmp_dispatch ()
> > #4  0x00472d60 in monitor_qmp_dispatch_one ()
> > #5  0x0047302c in monitor_qmp_bh_dispatcher ()
> > #6  0x00acf374 in aio_bh_call ()
> > #7  0x00acf428 in aio_bh_poll ()
> > #8  0x00ad5110 in aio_poll ()
> > #9  0x00a08ab8 in blk_prw ()
> > #10 0x00a091c4 in blk_pread ()
> > #11 0x00734f94 in pflash_cfi01_realize ()
> > #12 0x0075a3a4 in device_set_realized ()
> > #13 0x009a26cc in property_set_bool ()
> > #14 0x009a0a40 in object_property_set ()
> > #15 0x009a3a08 in object_property_set_qobject ()
> > #16 0x009a0c8c in object_property_set_bool ()
> > #17 0x00758f94 in qdev_init_nofail ()
> > #18 0x0058e190 in create_one_flash ()
> > #19 0x0058e2f4 in create_flash ()
> > #20 0x005902f0 in machvirt_init ()
> > #21 0x007635cc in machine_run_board_init ()
> > #22 0x006b135c in main ()
> > 
> > This can cause ARM to crash when used with both OOB capability enabled
> > and libvirt as upper layer, since libvirt will start QEMU with "-S" and
> > the first "cont" command will arrive very early if the context is not
> > correct (which is what above stack shows).  Then, the vcpu threads will
> > start to run right after the qmp_cont() call, even when GICs have not
> > been setup correctly yet (which is done in kvm_arm_machine_init_done()).
> >
> > My sincere thanks to Eric Auger who offered great help during both
> > debugging and verifying the problem.  The ARM test was carried out by
> > applying this patch upon QEMU 2.12.0-rc0 and problem is gone after the
> > patch.
> > 
> > A quick test of mine shows that after this patch applied we can pass all
> > raw iotests even with OOB on by default.
> > 
> > CC: Eric Blake 
> > CC: Markus Armbruster 
> > CC: Stefan Hajnoczi 
> > CC: Fam Zheng 
> > Reported-by: Eric Auger 
> > Tested-by: Eric Auger 
> > Signed-off-by: Peter Xu 
> > ---
> > 
> > This patch will fix all known OOB breakages I know so far, but I think
> > for better safety I'll still keep OOB off, and I'll send another patch
> > to turn default OOB on after 2.12 release.
> > ---
> >  monitor.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/monitor.c b/monitor.c
> > index 51f4cf480f..39f8ee17ba 100644
> > --- a/monitor.c
> > +++ b/monitor.c
> > @@ -4467,7 +4467,7 @@ static void monitor_iothread_init(void)
> >   * have assumption to be run on main loop thread.  It would be
> >   * nice that one day we can remove this assumption 

Re: [Qemu-devel] [PATCH for-2.12] monitor: bind dispatch bh to iohandler context

2018-04-07 Thread Peter Xu
On Thu, Apr 05, 2018 at 01:34:31PM +0100, Stefan Hajnoczi wrote:
> On Wed, Apr 04, 2018 at 10:07:34AM -0500, Eric Blake wrote:
> > On 04/04/2018 09:22 AM, Stefan Hajnoczi wrote:
> > > On Tue, Apr 03, 2018 at 01:01:15PM +0800, Peter Xu wrote:
> > >> Eric Auger reported the problem days ago that OOB broke ARM when running
> > >> with libvirt:
> > >>
> > >> http://lists.gnu.org/archive/html/qemu-devel/2018-03/msg06231.html
> > >>
> > >> This patch fixes the problem.
> > >>
> > >> It's not really needed now since we have turned OOB off now, but it's
> > >> still a bug fix, and it'll start to work when we turn OOB on for ARM.
> > >>
> > >> The problem was that the monitor dispatcher bottom half was bound to
> > >> qemu_aio_context, but that context seems to be for block only.
> > > 
> > > No, it is not block-only.  iohandler_ctx is for the legacy
> > > qemu_set_fd_handler() API only and modern code should use
> > > qemu_aio_context.
> > > 
> > > The difference between qemu_aio_context and iohandler_ctx is that
> > > aio_poll(qemu_aio_context) does not process iohandler_ctx (since it's a
> > > difference context).  That is the legacy behavior that
> > > qemu_set_fd_handler() expects and it's implemented by keeping a separate
> > > iohandler_ctx.
> > 
> > Do I need to put a hold on my pull request while we come to a better
> > understanding of root cause, or is this patch still okay to include?
> 
> The cover letter says this patch is not critical for 2.12:
> 
> "It's not really needed now since we have turned OOB off now, but it's
> still a bug fix, and it'll start to work when we turn OOB on for ARM."
> 
> Please hold off until the nature of the bug is understood.

But frankly speaking I would still prefer the fix be ready for 2.12
since that's still a bugfix and after all we have x-oob=on parameter
to enable that (that's why I mentioned for-2.12 in subject).  The fix
can be something better than this patch, but still it'll be good 2.12
has that.

Let's first discuss which's the best way to fix this up, then we'll
see whether this can be fixed for 2.12, or later.

Thanks,

-- 
Peter Xu



Re: [Qemu-devel] [PATCH for-2.12] iothread: workaround glib bug which hangs qmp-test

2018-04-07 Thread Peter Xu
On Wed, Apr 04, 2018 at 03:53:05PM +0100, Stefan Hajnoczi wrote:
> On Wed, Apr 04, 2018 at 02:53:46PM +0800, Peter Xu wrote:
> > Free the AIO context earlier than the GMainContext (if we have) to
> > workaround a possible Glib bug.  No functional change at all.
> > 
> > We encountered a qmp-test hang with oob:
> > 
> >   #0  0x7f35ffe45334 in __lll_lock_wait () from /lib64/libpthread.so.0
> >   #1  0x7f35ffe405d8 in _L_lock_854 () from /lib64/libpthread.so.0
> >   #2  0x7f35ffe404a7 in pthread_mutex_lock () from 
> > /lib64/libpthread.so.0
> >   #3  0x7f35fc5b9c9d in g_source_unref_internal (source=0x24f0600, 
> > context=0x7f35f960, have_lock=0) at gmain.c:1685
> >   #4  0x00aa6672 in aio_context_unref (ctx=0x24f0600) at 
> > /root/qemu/util/async.c:497
> >   #5  0x0065851c in iothread_instance_finalize (obj=0x24f0380) at 
> > /root/qemu/iothread.c:129
> >   #6  0x00962d79 in object_deinit (obj=0x24f0380, type=0x242e960) 
> > at /root/qemu/qom/object.c:462
> >   #7  0x00962e0d in object_finalize (data=0x24f0380) at 
> > /root/qemu/qom/object.c:476
> >   #8  0x00964146 in object_unref (obj=0x24f0380) at 
> > /root/qemu/qom/object.c:924
> >   #9  0x00965880 in object_finalize_child_property (obj=0x24ec640, 
> > name=0x24efca0 "mon_iothread", opaque=0x24f0380) at 
> > /root/qemu/qom/object.c:1436
> >   #10 0x00962c33 in object_property_del_child (obj=0x24ec640, 
> > child=0x24f0380, errp=0x0) at /root/qemu/qom/object.c:436
> >   #11 0x00962d26 in object_unparent (obj=0x24f0380) at 
> > /root/qemu/qom/object.c:455
> >   #12 0x00658f00 in iothread_destroy (iothread=0x24f0380) at 
> > /root/qemu/iothread.c:365
> >   #13 0x004c67a8 in monitor_cleanup () at /root/qemu/monitor.c:4663
> >   #14 0x00669e27 in main (argc=16, argv=0x7ffc8b1ae2f8, 
> > envp=0x7ffc8b1ae380) at /root/qemu/vl.c:4749
> > 
> > With glib version 2.28.8-9 (current default version on centos6) we might
> > encounter above with the old code. It is verified that glib version
> > 2.50.3-3 won't trigger that bug again, but since we are still supporting
> > glib 2.28.8-9, we may want this workaround.
> 
> This patch does not contain enough information to explain what this
> "possible Glib bug" is.  Please provide information on the root cause.
> 
> Without understanding the problem, it's hard for anyone to review this
> patch and for other developers to avoid regressions in the future.

I suspect this can be the fix of the problem (commit ID of glib
repository, https://github.com/GNOME/glib):

commit 26056558be4656ee6e891a4fae5d4198de7519cf
Author: Dan Winship 
Date:   Mon Jul 30 08:06:57 2012 -0400

gmain: allow g_source_get_context() on destroyed sources

The thing is that before this commit glib won't zero the
source->context fields of bound gsources when a context is
unreferenced, and after this patch it did.  Here I suspect the old
glib will try to take a lock of context which has already freed, hence
hanged death.

The commit is included in glib 2.33.10 or later, which seems
reasonable (the bad version I tried was 2.28.8, and I also verified
one of the good versions is 2.50.3, and 2.33.10 lies in them). I
didn't further verify the commit and rebuild glib/qemu, hopefully this
can be a valid explanation already.

If this is correct, then the rule of thumb would be: let's destroy all
the gsources that bound to the context before destroying the context
itself, until we drop support for glib 2.33.10.

I understand your worry about not having everything clear in the
commit message. Indeed we'd better know why the bug happened and then
we can avoid that and even use that information when we want to
increase the minimum version of glib we support.  However IMHO that's
something extra, it's not a reason to not merge the patch, it's not
the reason to refuse to fix QEMU which can at least let patchew run
nicely with QEMU 2.12 on centos6.

After all, the thing unclear is out of QEMU, we can't force every QEMU
developer to be fluent with internals of every library that QEMU uses
(glib is only one of them)...  And it may take a lot of time to dig
the real thing out for a QEMU developer.  So, if the patch can well
explain itself (IMHO this patch does - it only switches which object
to destroy first, nothing else is changed) and the patch is tested,
and can prove that it fixes something wrong has happened beneath QEMU,
then IMHO we should merge it.

So I would still like that we merge this patch for 2.12 (we can for
sure enhance the commit message a bit, though). In all cases, thanks
for your review.

-- 
Peter Xu