date:20150331

[Xen-devel] [RFC PATCH COLO v5 29/29] cmdline switches and config vars to control colo-proxy

2015-03-31 Thread Yang Hongyang

Add cmdline switches to 'xl migrate-receive' command to specify
a domain-specific hotplug script to setup COLO proxy.

Add a new config var 'colo.default.agentscript' to xl.conf, that
allows the user to override the default global script used to
setup COLO proxy.

Signed-off-by: Yang Hongyang 
Signed-off-by: Wen Congyang 
---
 docs/man/xl.conf.pod.5  |  6 ++
 docs/man/xl.pod.1   |  1 -
 tools/libxl/libxl.c | 12 +++
 tools/libxl/libxl_create.c  | 14 +++--
 tools/libxl/libxl_types.idl |  1 +
 tools/libxl/xl.c|  3 +++
 tools/libxl/xl.h|  1 +
 tools/libxl/xl_cmdimpl.c| 51 ++---
 8 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/docs/man/xl.conf.pod.5 b/docs/man/xl.conf.pod.5
index 8ae19bb..8f7fd28 100644
--- a/docs/man/xl.conf.pod.5
+++ b/docs/man/xl.conf.pod.5
@@ -111,6 +111,12 @@ Configures the default script used by Remus to setup 
network buffering.
 
 Default: C
 
+=item B
+
+Configures the default script used by COLO to setup colo-proxy.
+
+Default: C
+
 =item B
 
 Configures the default output format used by xl when printing "machine
diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index 431ef5e..47d58da 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -447,7 +447,6 @@ N.B: Remus support in xl is still in experimental 
(proof-of-concept) phase.
  Disk replication support is limited to DRBD disks.
 
  COLO support in xl is still in experimental (proof-of-concept) phase.
- There is no support for network at the moment.
 
 B
 
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 08d68df..f4079ee 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -3398,6 +3398,16 @@ void libxl__device_nic_add(libxl__egc *egc, uint32_t 
domid,
 flexarray_append(back, nic->ifname);
 }
 
+if (nic->forwarddev) {
+flexarray_append(back, "forwarddev");
+flexarray_append(back, nic->forwarddev);
+}
+
+if (nic->forwardbr) {
+flexarray_append(back, "forwardbr");
+flexarray_append(back, nic->forwardbr);
+}
+
 flexarray_append(back, "mac");
 flexarray_append(back,libxl__sprintf(gc,
 LIBXL_MAC_FMT, LIBXL_MAC_BYTES(nic->mac)));
@@ -3521,6 +3531,8 @@ static int libxl__device_nic_from_xs_be(libxl__gc *gc,
 nic->ip = READ_BACKEND(NOGC, "ip");
 nic->bridge = READ_BACKEND(NOGC, "bridge");
 nic->script = READ_BACKEND(NOGC, "script");
+nic->forwarddev = READ_BACKEND(NOGC, "forwarddev");
+nic->forwardbr = READ_BACKEND(NOGC, "forwardbr");
 
 /* vif_ioemu nics use the same xenstore entries as vif interfaces */
 tmp = READ_BACKEND(gc, "type");
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 1fae0a4..b1e9372 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1132,6 +1132,11 @@ static void domcreate_bootloader_done(libxl__egc *egc,
 crs->superpages = superpages;
 crs->pae = pae;
 crs->callback = libxl__colo_restore_setup_done;
+if (dcs->colo_proxy_script)
+crs->colo_proxy_script = libxl__strdup(gc, dcs->colo_proxy_script);
+else
+crs->colo_proxy_script = GCSPRINTF("%s/colo-proxy-setup",
+   libxl__xen_script_dir_path());
 libxl__colo_restore_setup(egc, crs);
 } else
 libxl__xc_domain_restore(egc, dcs,
@@ -1628,6 +1633,7 @@ static void domain_create_cb(libxl__egc *egc,
 static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config,
 uint32_t *domid, int restore_fd,
 int send_fd, int checkpointed_stream,
+const char *colo_proxy_script,
 const libxl_asyncop_how *ao_how,
 const libxl_asyncprogress_how *aop_console_how)
 {
@@ -1643,6 +1649,7 @@ static int do_domain_create(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 cdcs->dcs.send_fd = send_fd;
 cdcs->dcs.callback = domain_create_cb;
 cdcs->dcs.checkpointed_stream = checkpointed_stream;
+cdcs->dcs.colo_proxy_script = colo_proxy_script;
 libxl__ao_progress_gethow(&cdcs->dcs.aop_console_how, aop_console_how);
 cdcs->domid_out = domid;
 
@@ -1686,7 +1693,7 @@ int libxl_domain_create_new(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 const libxl_asyncprogress_how *aop_console_how)
 {
 unset_disk_colo_restore(d_config);
-return do_domain_create(ctx, d_config, domid, -1, -1, 0,
+return do_domain_create(ctx, d_config, domid, -1, -1, 0, NULL,
 ao_how, aop_console_how);
 }
 
@@ -1697,16 +1704,19 @@ int libxl_domain_create_restore(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 const libxl_asyncprogress_how *aop_console_how)
 {
 int send_fd = -1;
+char *colo_pr

[Xen-devel] [RFC PATCH COLO v5 27/29] setup and control colo proxy on primary side

2015-03-31 Thread Yang Hongyang

setup and control colo proxy on primary side

Signed-off-by: Yang Hongyang 
---
 tools/libxl/libxl_colo_save.c | 124 +++---
 tools/libxl/libxl_internal.h  |   4 ++
 2 files changed, 120 insertions(+), 8 deletions(-)

diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
index de270e2..e325cff 100644
--- a/tools/libxl/libxl_colo_save.c
+++ b/tools/libxl/libxl_colo_save.c
@@ -18,9 +18,11 @@
 #include "libxl_internal.h"
 #include "libxl_colo.h"
 
+extern const libxl__checkpoint_device_instance_ops colo_save_device_nic;
 extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk;
 
 static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+&colo_save_device_nic,
 &colo_save_device_qdisk,
 NULL,
 };
@@ -32,9 +34,15 @@ static int 
init_device_subkind(libxl__checkpoint_devices_state *cds)
 int rc;
 STATE_AO_GC(cds->ao);
 
-rc = init_subkind_qdisk(cds);
+rc = init_subkind_colo_nic(cds);
 if (rc) goto out;
 
+rc = init_subkind_qdisk(cds);
+if (rc) {
+cleanup_subkind_colo_nic(cds);
+goto out;
+}
+
 rc = 0;
 out:
 return rc;
@@ -45,6 +53,7 @@ static void 
cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 /* cleanup device subkind-specific state in the libxl ctx */
 STATE_AO_GC(cds->ao);
 
+cleanup_subkind_colo_nic(cds);
 cleanup_subkind_qdisk(cds);
 }
 
@@ -75,9 +84,16 @@ void libxl__colo_save_setup(libxl__egc *egc, 
libxl__colo_save_state *css)
 css->svm_running = false;
 css->paused = true;
 css->qdisk_setuped = false;
+libxl__ev_child_init(&css->child);
 
-/* TODO: nic support */
-cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
+if (dss->remus->netbufscript)
+css->colo_proxy_script = libxl__strdup(gc, dss->remus->netbufscript);
+else
+css->colo_proxy_script = GCSPRINTF("%s/colo-proxy-setup",
+   libxl__xen_script_dir_path());
+
+cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VIF) |
+ (1 << LIBXL__DEVICE_KIND_VBD);
 cds->ops = colo_ops;
 cds->callback = colo_save_setup_done;
 cds->ao = ao;
@@ -103,12 +119,18 @@ static void colo_save_setup_done(libxl__egc *egc,
 STATE_AO_GC(cds->ao);
 
 if (!rc) {
+css->cps.ao = ao;
+rc = colo_proxy_setup(&css->cps);
+if (rc)
+goto failed;
 libxl__domain_suspend(egc, dss);
 return;
 }
 
 LOG(ERROR, "COLO: failed to setup device for guest with domid %u",
 dss->domid);
+
+failed:
 css->cds.callback = colo_save_setup_failed;
 libxl__checkpoint_devices_teardown(egc, &css->cds);
 }
@@ -156,6 +178,7 @@ static void colo_teardown_done(libxl__egc *egc,
 libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
 
 cleanup_device_subkind(cds);
+colo_proxy_teardown(&css->cps);
 dss->callback(egc, dss, rc);
 }
 
@@ -436,6 +459,8 @@ static void colo_read_svm_ready_done(libxl__egc *egc,
 goto out;
 }
 
+colo_proxy_preresume(&css->cps);
+
 css->svm_running = true;
 css->cds.callback = colo_preresume_cb;
 libxl__checkpoint_devices_preresume(egc, &css->cds);
@@ -529,6 +554,8 @@ static void colo_read_svm_resumed_done(libxl__egc *egc,
 goto out;
 }
 
+colo_proxy_postresume(&css->cps);
+
 ok = 1;
 
 out:
@@ -537,6 +564,91 @@ out:
 
 
 /* = colo: wait new checkpoint = */
+
+static void colo_start_new_checkpoint(libxl__egc *egc,
+  libxl__checkpoint_devices_state *cds,
+  int rc);
+static void colo_proxy_async_wait_for_checkpoint(libxl__colo_save_state *css);
+static void colo_proxy_async_call_done(libxl__egc *egc,
+   libxl__ev_child *child,
+   int pid,
+   int status);
+
+static void colo_proxy_async_call(libxl__egc *egc,
+  libxl__colo_save_state *css,
+  void func(libxl__colo_save_state *),
+  libxl__ev_child_callback callback)
+{
+int pid = -1, rc;
+
+STATE_AO_GC(css->cds.ao);
+
+/* Fork and call */
+pid = libxl__ev_child_fork(gc, &css->child, callback);
+if (pid == -1) {
+LOG(ERROR, "unable to fork");
+rc = ERROR_FAIL;
+goto out;
+}
+
+if (!pid) {
+/* child */
+func(css);
+/* notreached */
+abort();
+}
+
+return;
+
+out:
+callback(egc, &css->child, -1, 1);
+}
+
+static void colo_proxy_wait_for_checkpoint(libxl__egc *egc,
+   libxl__colo_save_state *css)
+{
+colo_proxy_async_call(egc, css,
+  colo_proxy_async_wait_for_checkpoint,
+  colo_

[Xen-devel] [RFC PATCH COLO v5 25/29] COLO proxy: preresume, postresume and checkpoint

2015-03-31 Thread Yang Hongyang

preresume, postresume and checkpoint

Signed-off-by: Yang Hongyang 
---
 tools/libxl/libxl_colo.h   |  3 +++
 tools/libxl/libxl_colo_proxy.c | 57 ++
 2 files changed, 60 insertions(+)

diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 5983aa0..872c652 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -47,4 +47,7 @@ extern void libxl__colo_save_teardown(libxl__egc *egc,
 
 extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
 extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
+extern void colo_proxy_preresume(libxl__colo_proxy_state *cps);
+extern void colo_proxy_postresume(libxl__colo_proxy_state *cps);
+extern int colo_proxy_checkpoint(libxl__colo_proxy_state *cps);
 #endif
diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
index 486ed73..2483be3 100644
--- a/tools/libxl/libxl_colo_proxy.c
+++ b/tools/libxl/libxl_colo_proxy.c
@@ -208,3 +208,60 @@ void colo_proxy_teardown(libxl__colo_proxy_state *cps)
 cps->sock_fd = -1;
 }
 }
+
+/* = colo-proxy: preresume, postresume and checkpoint == */
+
+void colo_proxy_preresume(libxl__colo_proxy_state *cps)
+{
+colo_proxy_send(cps, NULL, 0, COLO_CHECKPOINT);
+/* TODO: need to handle if the call fails... */
+}
+
+void colo_proxy_postresume(libxl__colo_proxy_state *cps)
+{
+/* nothing to do... */
+}
+
+
+typedef struct colo_msg {
+bool is_checkpoint;
+} colo_msg;
+
+/*
+do checkpoint: return 1
+error: return -1
+do not checkpoint: return 0
+*/
+int colo_proxy_checkpoint(libxl__colo_proxy_state *cps)
+{
+uint8_t *buff;
+int64_t size;
+struct nlmsghdr *h;
+struct colo_msg *m;
+int ret = -1;
+
+size = colo_proxy_recv(cps, &buff, MSG_DONTWAIT);
+
+/* timeout, return no checkpoint message. */
+if (size <= 0) {
+return 0;
+}
+
+h = (struct nlmsghdr *) buff;
+
+if (h->nlmsg_type == NLMSG_ERROR) {
+goto out;
+}
+
+if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*m))) {
+goto out;
+}
+
+m = NLMSG_DATA(h);
+
+ret = m->is_checkpoint ? 1 : 0;
+
+out:
+free(buff);
+return ret;
+}
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [RFC PATCH COLO v5 28/29] setup and control colo proxy on secondary side

2015-03-31 Thread Yang Hongyang

setup and control colo proxy on secondary side

Signed-off-by: Yang Hongyang 
---
 tools/libxl/libxl_colo_restore.c | 26 +++---
 tools/libxl/libxl_internal.h |  3 +++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
index 28eb8ab..151e7a5 100644
--- a/tools/libxl/libxl_colo_restore.c
+++ b/tools/libxl/libxl_colo_restore.c
@@ -64,9 +64,11 @@ static void libxl__colo_restore_domain_resume_callback(void 
*data);
 static void libxl__colo_restore_domain_checkpoint_callback(void *data);
 static void libxl__colo_restore_domain_suspend_callback(void *data);
 
+extern const libxl__checkpoint_device_instance_ops colo_restore_device_nic;
 extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk;
 
 static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
+&colo_restore_device_nic,
 &colo_restore_device_qdisk,
 NULL,
 };
@@ -166,8 +168,14 @@ static int 
init_device_subkind(libxl__checkpoint_devices_state *cds)
 int rc;
 STATE_AO_GC(cds->ao);
 
+rc = init_subkind_colo_nic(cds);
+if (rc) goto out;
+
 rc = init_subkind_qdisk(cds);
-if (rc)  goto out;
+if (rc) {
+cleanup_subkind_colo_nic(cds);
+goto out;
+}
 
 rc = 0;
 out:
@@ -179,6 +187,7 @@ static void 
cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 /* cleanup device subkind-specific state in the libxl ctx */
 STATE_AO_GC(cds->ao);
 
+cleanup_subkind_colo_nic(cds);
 cleanup_subkind_qdisk(cds);
 }
 
@@ -293,6 +302,11 @@ void libxl__colo_restore_setup(libxl__egc *egc,
 
 crs->qdisk_setuped = false;
 
+crs->cps.ao = ao;
+rc = colo_proxy_setup(&crs->cps);
+if (rc)
+goto err_init_dss2;
+
 rc = 0;
 
 out:
@@ -398,6 +412,8 @@ static void colo_restore_teardown_done(libxl__egc *egc,
 if (crcs->teardown_devices)
 cleanup_device_subkind(cds);
 
+colo_proxy_teardown(&crs->cps);
+
 rc = crcs->saved_rc;
 if (!rc) {
 crcs->callback = do_failover_done;
@@ -607,6 +623,8 @@ static void colo_restore_preresume_cb(libxl__egc *egc,
 goto out;
 }
 
+colo_proxy_preresume(&crs->cps);
+
 colo_restore_resume_vm(egc, crcs);
 
 return;
@@ -643,6 +661,8 @@ static void colo_resume_vm_done(libxl__egc *egc,
 
 crcs->status = LIBXL_COLO_RESUMED;
 
+colo_proxy_postresume(&crs->cps);
+
 /* avoid calling libxl__xc_domain_restore_done() more than once */
 if (crs->saved_cb) {
 dcs->callback = crs->saved_cb;
@@ -792,8 +812,8 @@ static void colo_setup_checkpoint_devices(libxl__egc *egc,
 
 STATE_AO_GC(crs->ao);
 
-/* TODO: nic support */
-cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
+cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VIF) |
+ (1 << LIBXL__DEVICE_KIND_VBD);
 cds->callback = colo_restore_setup_cds_done;
 cds->ao = ao;
 cds->domid = crs->domid;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index a64efdc..bd3c9e3 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3213,6 +3213,9 @@ struct libxl__colo_restore_state {
 
 /* private, used by qdisk block replication */
 bool qdisk_setuped;
+
+/* private, used by colo proxy */
+libxl__colo_proxy_state cps;
 };
 
 struct libxl__domain_create_state {
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [RFC PATCH COLO v5 26/29] COLO nic: implement COLO nic subkind

2015-03-31 Thread Yang Hongyang

implement COLO nic subkind.

Signed-off-by: Yang Hongyang 
Signed-off-by: Wen Congyang 
---
 tools/hotplug/Linux/Makefile |   1 +
 tools/hotplug/Linux/colo-proxy-setup | 128 ++
 tools/libxl/Makefile |   1 +
 tools/libxl/libxl_colo_nic.c | 313 +++
 tools/libxl/libxl_internal.h |   5 +
 tools/libxl/libxl_types.idl  |   2 +
 6 files changed, 450 insertions(+)
 create mode 100755 tools/hotplug/Linux/colo-proxy-setup
 create mode 100644 tools/libxl/libxl_colo_nic.c

diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
index d94a9cb..1c28bea 100644
--- a/tools/hotplug/Linux/Makefile
+++ b/tools/hotplug/Linux/Makefile
@@ -25,6 +25,7 @@ XEN_SCRIPTS += vscsi
 XEN_SCRIPTS += block-iscsi
 XEN_SCRIPTS += block-drbd-probe
 XEN_SCRIPTS += $(XEN_SCRIPTS-y)
+XEN_SCRIPTS += colo-proxy-setup
 
 SUBDIRS-$(CONFIG_SYSTEMD) += systemd
 
diff --git a/tools/hotplug/Linux/colo-proxy-setup 
b/tools/hotplug/Linux/colo-proxy-setup
new file mode 100755
index 000..850f672
--- /dev/null
+++ b/tools/hotplug/Linux/colo-proxy-setup
@@ -0,0 +1,128 @@
+#! /bin/bash
+
+dir=$(dirname "$0")
+. "$dir/xen-hotplug-common.sh"
+. "$dir/hotplugpath.sh"
+. "$dir/xen-network-ft.sh"
+
+findCommand "$@"
+
+if [ "$command" != "setup" -a  "$command" != "teardown" ]
+then
+echo "Invalid command: $command"
+log err "Invalid command: $command"
+exit 1
+fi
+
+evalVariables "$@"
+
+: ${vifname:?}
+: ${forwarddev:?}
+: ${mode:?}
+: ${forwardbr:?}
+: ${index:?}
+: ${bridge:?}
+
+if [ "$mode" != "primary" -a "$mode" != "secondary" ]
+then
+echo "Invalid mode: $mode"
+log err "Invalid mode: $mode"
+exit 1
+fi
+
+if [ $index -lt 0 ] || [ $index -gt 100 ]; then
+echo "index overflow"
+exit 1
+fi
+
+function setup_primary()
+{
+do_without_error tc qdisc add dev $vifname root handle 1: prio
+do_without_error tc filter add dev $vifname parent 1: protocol ip prio 10 \
+u32 match u32 0 0 flowid 1:2 action mirred egress mirror dev 
$forwarddev
+do_without_error tc filter add dev $vifname parent 1: protocol arp prio 11 
\
+u32 match u32 0 0 flowid 1:2 action mirred egress mirror dev 
$forwarddev
+do_without_error tc filter add dev $vifname parent 1: protocol ipv6 prio \
+12 u32 match u32 0 0 flowid 1:2 action mirred egress mirror \
+dev $forwarddev
+
+do_without_error modprobe nf_conntrack_ipv4
+do_without_error modprobe xt_PMYCOLO sec_dev=$forwarddev
+
+do_without_error /usr/local/sbin/iptables -t mangle -I PREROUTING -m 
physdev --physdev-in \
+$vifname -j PMYCOLO --index $index
+do_without_error /usr/local/sbin/ip6tables -t mangle -I PREROUTING -m 
physdev --physdev-in \
+$vifname -j PMYCOLO --index $index
+do_without_error /usr/local/sbin/arptables -I INPUT -i $forwarddev -j MARK 
--set-mark $index
+}
+
+function teardown_primary()
+{
+do_without_error tc filter del dev $vifname parent 1: protocol ip prio 10 
u32 match u32 \
+0 0 flowid 1:2 action mirred egress mirror dev $forwarddev
+do_without_error tc filter del dev $vifname parent 1: protocol arp prio 11 
u32 match u32 \
+0 0 flowid 1:2 action mirred egress mirror dev $forwarddev
+do_without_error tc filter del dev $vifname parent 1: protocol ipv6 prio 
12 u32 match u32 \
+0 0 flowid 1:2 action mirred egress mirror dev $forwarddev
+do_without_error tc qdisc del dev $vifname root handle 1: prio
+
+do_without_error /usr/local/sbin/iptables -t mangle -F
+do_without_error /usr/local/sbin/ip6tables -t mangle -F
+do_without_error /usr/local/sbin/arptables -F
+do_without_error rmmod xt_PMYCOLO
+}
+
+function setup_secondary()
+{
+do_without_error brctl delif $bridge $vifname
+do_without_error brctl addif $forwardbr $vifname
+do_without_error brctl addif $forwardbr $forwarddev
+do_without_error modprobe xt_SECCOLO
+
+do_without_error /usr/local/sbin/iptables -t mangle -I PREROUTING -m 
physdev --physdev-in \
+$vifname -j SECCOLO --index $index
+do_without_error /usr/local/sbin/ip6tables -t mangle -I PREROUTING -m 
physdev --physdev-in \
+$vifname -j SECCOLO --index $index
+}
+
+function teardown_secondary()
+{
+do_without_error brctl delif $forwardbr $forwarddev
+do_without_error brctl delif $forwardbr $vifname
+do_without_error brctl addif $bridge $vifname
+
+do_without_error /usr/local/sbin/iptables -t mangle -F
+do_without_error /usr/local/sbin/ip6tables -t mangle -F
+do_without_error rmmod xt_SECCOLO
+}
+
+case "$command" in
+setup)
+if [ "$mode" = "primary" ]
+then
+setup_primary
+else
+setup_secondary
+fi
+
+success
+;;
+teardown)
+if [ "$mode" = "primary" ]
+then
+teardown_primary
+else
+teardown_secondary
+fi
+;;
+esac
+
+if [ "$

[Xen-devel] [RFC PATCH COLO v5 23/29] COLO: use qemu block replication

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

The guest should be paused before doing COLO!!!

Signed-off-by: Wen Congyang 
---
 tools/libxl/Makefile |   1 +
 tools/libxl/libxl_colo_qdisk.c   | 209 +++
 tools/libxl/libxl_colo_restore.c |  21 +++-
 tools/libxl/libxl_colo_save.c|  36 ++-
 tools/libxl/libxl_internal.h |  18 
 tools/libxl/libxl_qmp.c  |  31 ++
 6 files changed, 312 insertions(+), 4 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_qdisk.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index b2eaf14..12caf4c 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -58,6 +58,7 @@ endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
+LIBXL_OBJS-y += libxl_colo_qdisk.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_colo_qdisk.c b/tools/libxl/libxl_colo_qdisk.c
new file mode 100644
index 000..d73572e
--- /dev/null
+++ b/tools/libxl/libxl_colo_qdisk.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Wen Congyang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+typedef struct libxl__colo_qdisk {
+libxl__checkpoint_device *dev;
+} libxl__colo_qdisk;
+
+/* == init() and cleanup() == */
+int init_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+/*
+ * We don't know if we use qemu block replication, so
+ * we cannot start block replication here.
+ */
+return 0;
+}
+
+void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+}
+
+/* == setup() and teardown() == */
+static void colo_qdisk_setup(libxl__egc *egc, libxl__checkpoint_device *dev,
+ bool primary)
+{
+const libxl_device_disk *disk = dev->backend_dev;
+const char *addr = NULL;
+const char *export_name;
+int ret, rc = 0;
+
+/* Convenience aliases */
+libxl__checkpoint_devices_state *const cds = dev->cds;
+const char *colo_params = disk->colo_params;
+const int domid = cds->domid;
+
+EGC_GC;
+
+if (disk->backend != LIBXL_DISK_BACKEND_QDISK ||
+!libxl_defbool_val(disk->colo_enable)) {
+rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+goto out;
+}
+
+export_name = strstr(colo_params, ":exportname=");
+if (!export_name) {
+rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+goto out;
+}
+export_name += strlen(":exportname=");
+if (export_name[0] == 0) {
+rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+goto out;
+}
+
+dev->matched = 1;
+
+if (primary) {
+/* NBD server is not ready, so we cannot start block replication now */
+goto out;
+} else {
+libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+int len;
+
+if (crs->qdisk_setuped)
+goto out;
+
+crs->qdisk_setuped = true;
+
+len = export_name - strlen(":exportname=") - colo_params;
+addr = libxl__strndup(gc, colo_params, len);
+}
+
+ret = libxl__qmp_block_start_replication(gc, domid, primary, addr);
+if (ret)
+rc = ERROR_FAIL;
+
+out:
+dev->aodev.rc = rc;
+dev->aodev.callback(egc, &dev->aodev);
+}
+
+static void colo_qdisk_teardown(libxl__egc *egc, libxl__checkpoint_device *dev,
+bool primary)
+{
+int ret, rc = 0;
+
+/* Convenience aliases */
+libxl__checkpoint_devices_state *const cds = dev->cds;
+const int domid = cds->domid;
+
+EGC_GC;
+
+if (primary) {
+libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+
+if (!css->qdisk_setuped)
+goto out;
+
+css->qdisk_setuped = false;
+} else {
+libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+
+if (!crs->qdisk_setuped)
+goto out;
+
+crs->qdisk_setuped = false;
+}
+
+ret = libxl__qmp_block_stop_replication(gc, domid, primary);
+if (ret)
+rc = ERROR_FAIL;
+
+out:
+dev->aodev.rc = rc;
+dev->aodev.callback(egc, &dev->aodev);
+}
+
+/* == checkpointing APIs == */
+/* should be called after libxl__checkpoint_device_instance_ops.preresume */
+int c

[Xen-devel] [RFC PATCH COLO v5 24/29] COLO proxy: implement setup/teardown of COLO proxy module

2015-03-31 Thread Yang Hongyang

setup/teardown of COLO proxy module.
we use netlink to communicate with proxy module.

Signed-off-by: Yang Hongyang 
---
 tools/libxl/Makefile   |   1 +
 tools/libxl/libxl_colo.h   |   2 +
 tools/libxl/libxl_colo_proxy.c | 210 +
 tools/libxl/libxl_internal.h   |   9 ++
 4 files changed, 222 insertions(+)
 create mode 100644 tools/libxl/libxl_colo_proxy.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 12caf4c..c74ba79 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -59,6 +59,7 @@ endif
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
 LIBXL_OBJS-y += libxl_colo_qdisk.o
+LIBXL_OBJS-y += libxl_colo_proxy.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 26a2563..5983aa0 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -45,4 +45,6 @@ extern void libxl__colo_save_teardown(libxl__egc *egc,
   libxl__colo_save_state *css,
   int rc);
 
+extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
+extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
 #endif
diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
new file mode 100644
index 000..486ed73
--- /dev/null
+++ b/tools/libxl/libxl_colo_proxy.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+#include 
+
+#define NETLINK_COLO 28
+
+enum colo_netlink_op {
+COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1),
+COLO_CHECKPOINT,
+COLO_FAILOVER,
+COLO_PROXY_INIT,
+COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */
+};
+
+/* = colo-proxy: helper functions == */
+
+static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff, 
uint64_t size, int type)
+{
+struct sockaddr_nl sa;
+struct nlmsghdr msg;
+struct iovec iov;
+struct msghdr mh;
+int ret;
+
+STATE_AO_GC(cps->ao);
+
+memset(&sa, 0, sizeof(sa));
+sa.nl_family = AF_NETLINK;
+sa.nl_pid = 0;
+sa.nl_groups = 0;
+
+msg.nlmsg_len = NLMSG_SPACE(0);
+msg.nlmsg_flags = NLM_F_REQUEST;
+if (type == COLO_PROXY_INIT) {
+msg.nlmsg_flags |= NLM_F_ACK;
+}
+msg.nlmsg_seq = 0;
+/* This is untrusty */
+msg.nlmsg_pid = cps->index;
+msg.nlmsg_type = type;
+
+iov.iov_base = &msg;
+iov.iov_len = msg.nlmsg_len;
+
+mh.msg_name = &sa;
+mh.msg_namelen = sizeof(sa);
+mh.msg_iov = &iov;
+mh.msg_iovlen = 1;
+mh.msg_control = NULL;
+mh.msg_controllen = 0;
+mh.msg_flags = 0;
+
+ret = sendmsg(cps->sock_fd, &mh, 0);
+if (ret <= 0) {
+LOG(ERROR, "can't send msg to kernel by netlink: %s",
+strerror(errno));
+}
+
+return ret;
+}
+
+/* error: return -1, otherwise return 0 */
+static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff, 
int flags)
+{
+struct sockaddr_nl sa;
+struct iovec iov;
+struct msghdr mh = {
+.msg_name = &sa,
+.msg_namelen = sizeof(sa),
+.msg_iov = &iov,
+.msg_iovlen = 1,
+};
+uint32_t size = 16384;
+int64_t len = 0;
+int ret;
+
+STATE_AO_GC(cps->ao);
+uint8_t *tmp = libxl__malloc(gc, size);
+
+iov.iov_base = tmp;
+iov.iov_len = size;
+next:
+   ret = recvmsg(cps->sock_fd, &mh, flags);
+if (ret <= 0) {
+goto out;
+}
+
+len += ret;
+if (mh.msg_flags & MSG_TRUNC) {
+size += 16384;
+tmp = libxl__realloc(gc, tmp, size);
+iov.iov_base = tmp + len;
+iov.iov_len = size - len;
+goto next;
+}
+
+*buff = tmp;
+return len;
+
+out:
+free(tmp);
+*buff = NULL;
+return ret;
+}
+
+/* = colo-proxy: setup and teardown == */
+
+int colo_proxy_setup(libxl__colo_proxy_state *cps)
+{
+int skfd = 0;
+struct sockaddr_nl sa;
+struct nlmsghdr *h;
+struct timeval tv = {0, 50}; /* timeout for recvmsg from kernel */
+int i = 1;
+int ret = ERROR_FAIL;
+uint8_t *buff = NULL;
+int64_t size

[Xen-devel] [RFC PATCH COLO v5 22/29] Support colo mode for qemu disk

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

Usage: disk = ['...,colo,colo-params=xxx,active-disk=xxx,hidden-disk=xxx...']
The format of colo-params: host:port:exportname=xx

Signed-off-by: Wen Congyang 
Signed-off-by: Yang Hongyang 
---
 docs/man/xl.pod.1   |   2 +-
 tools/libxl/libxl.c |  42 ++-
 tools/libxl/libxl_create.c  |  25 -
 tools/libxl/libxl_device.c  |  38 +++
 tools/libxl/libxl_dm.c  | 262 ++--
 tools/libxl/libxl_types.idl |   5 +
 tools/libxl/libxlu_disk_l.l |   5 +
 7 files changed, 367 insertions(+), 12 deletions(-)

diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index adcbe37..431ef5e 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -447,7 +447,7 @@ N.B: Remus support in xl is still in experimental 
(proof-of-concept) phase.
  Disk replication support is limited to DRBD disks.
 
  COLO support in xl is still in experimental (proof-of-concept) phase.
- There is no support for network or disk at the moment.
+ There is no support for network at the moment.
 
 B
 
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index afe0cc9..08d68df 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -2296,6 +2296,8 @@ int libxl__device_disk_setdefault(libxl__gc *gc, 
libxl_device_disk *disk)
 int rc;
 
 libxl_defbool_setdefault(&disk->discard_enable, !!disk->readwrite);
+libxl_defbool_setdefault(&disk->colo_enable, false);
+libxl_defbool_setdefault(&disk->colo_restore_enable, false);
 
 rc = libxl__resolve_domid(gc, disk->backend_domname, &disk->backend_domid);
 if (rc < 0) return rc;
@@ -2496,6 +2498,14 @@ static void device_disk_add(libxl__egc *egc, uint32_t 
domid,
 flexarray_append(back, "params");
 flexarray_append(back, libxl__sprintf(gc, "%s:%s",
   
libxl__device_disk_string_of_format(disk->format), disk->pdev_path));
+if (libxl_defbool_val(disk->colo_enable)) {
+flexarray_append(back, "colo-params");
+flexarray_append(back, libxl__sprintf(gc, "%s", 
disk->colo_params));
+flexarray_append(back, "active-disk");
+flexarray_append(back, libxl__sprintf(gc, "%s", 
disk->active_disk));
+flexarray_append(back, "hidden-disk");
+flexarray_append(back, libxl__sprintf(gc, "%s", 
disk->hidden_disk));
+}
 assert(device->backend_kind == LIBXL__DEVICE_KIND_QDISK);
 break;
 default:
@@ -2610,7 +2620,10 @@ static int libxl__device_disk_from_xs_be(libxl__gc *gc,
 goto cleanup;
 }
 
-/* "params" may not be present; but everything else must be. */
+/*
+ * "params" and "colo-params" may not be present; but everything
+ * else must be.
+ */
 tmp = xs_read(ctx->xsh, XBT_NULL,
   libxl__sprintf(gc, "%s/params", be_path), &len);
 if (tmp && strchr(tmp, ':')) {
@@ -2620,6 +2633,33 @@ static int libxl__device_disk_from_xs_be(libxl__gc *gc,
 disk->pdev_path = tmp;
 }
 
+tmp = xs_read(ctx->xsh, XBT_NULL,
+  libxl__sprintf(gc, "%s/colo-params", be_path), &len);
+if (tmp) {
+libxl_defbool_set(&disk->colo_enable, true);
+disk->colo_params = tmp;
+} else {
+libxl_defbool_set(&disk->colo_enable, false);
+}
+
+if (libxl_defbool_val(disk->colo_enable)) {
+tmp = xs_read(ctx->xsh, XBT_NULL,
+  libxl__sprintf(gc, "%s/active-disk", be_path), &len);
+if (!tmp) {
+LOG(ERROR, "Missing xenstore node %s/active-disk", be_path);
+goto cleanup;
+}
+disk->active_disk = tmp;
+
+tmp = xs_read(ctx->xsh, XBT_NULL,
+  libxl__sprintf(gc, "%s/hidden-disk", be_path), &len);
+if (!tmp) {
+LOG(ERROR, "Missing xenstore node %s/hidden-disk", be_path);
+goto cleanup;
+}
+disk->hidden_disk = tmp;
+}
+
 
 tmp = libxl__xs_read(gc, XBT_NULL,
  libxl__sprintf(gc, "%s/type", be_path));
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 89c18dc..1fae0a4 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1663,12 +1663,29 @@ static void domain_create_cb(libxl__egc *egc,
 
 libxl__ao_complete(egc, ao, rc);
 }
-
+
+static void set_disk_colo_restore(libxl_domain_config *d_config)
+{
+int i;
+
+for (i = 0; i < d_config->num_disks; i++)
+libxl_defbool_set(&d_config->disks[i].colo_restore_enable, true);
+}
+
+static void unset_disk_colo_restore(libxl_domain_config *d_config)
+{
+int i;
+
+for (i = 0; i < d_config->num_disks; i++)
+libxl_defbool_set(&d_config->disks[i].colo_restore_enable, false);
+}
+
 int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config,
 uint32_t *

[Xen-devel] [RFC PATCH COLO v5 16/29] primary vm suspend/get_dirty_pfn/resume/checkpoint code

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

We will do the following things again and again:
1. Suspend primary vm
   a. Suspend primary vm
   b. do postsuspend
   c. Read LIBXL_COLO_SVM_SUSPENDED to master
   d. Read secondary vm's dirty page information to master(count + pfn list)
2. Get dirty pfn list
   a. Return secondary vm's dirty pfn list
3. Resume primary vm
   a. Read LIBXL_COLO_SVM_READY from slave
   b. Do presume
   c. Resume primary vm
   d. Read LIBXL_COLO_SVM_RESUMED from slave
4. Wait a new checkpoint
a. Wait a new checkpoint(not implemented)
b. Send LIBXL_COLO_NEW_CHECKPOINT to slave

Signed-off-by: Wen Congyang 
---
 tools/libxc/include/xenguest.h |  12 +
 tools/libxl/Makefile   |   2 +-
 tools/libxl/libxl.c|   6 +-
 tools/libxl/libxl_colo.h   |  10 +
 tools/libxl/libxl_colo_save.c  | 642 +
 tools/libxl/libxl_dom.c|  13 +-
 tools/libxl/libxl_internal.h   |  31 +-
 tools/libxl/libxl_save_msgs_gen.pl |   1 +
 tools/libxl/libxl_types.idl|   1 +
 9 files changed, 710 insertions(+), 8 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_save.c

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 6e621a6..266d96b 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -74,6 +74,18 @@ struct save_callbacks {
  */
 int (*toolstack_save)(uint32_t domid, uint8_t **buf, uint32_t *len, void 
*data);
 
+/* Called after the guest is suspended.
+ *
+ * returns the list of dirty pfn:
+ *  struct {
+ *  uint64_t count;
+ *  uint64_t pfn[];
+ *  };
+ *
+ *  Note: the caller must free the return value.
+ */
+uint8_t *(*get_dirty_pfn)(void *data);
+
 /* to be provided as the last argument to each callback function */
 void* data;
 };
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 8acfd5d..b2eaf14 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -57,7 +57,7 @@ LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
-LIBXL_OBJS-y += libxl_colo_restore.o
+LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 40a49c7..b6c5429 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -18,6 +18,7 @@
 
 #include "libxl_internal.h"
 #include "libxl_remus.h"
+#include "libxl_colo.h"
 
 #define PAGE_TO_MEMKB(pages) ((pages) * 4)
 #define BACKEND_STRING_SIZE 5
@@ -892,7 +893,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 assert(info);
 
 /* Point of no return */
-libxl__remus_setup(egc, &dss->rs);
+if (libxl_defbool_val(info->colo))
+libxl__colo_save_setup(egc, &dss->css);
+else
+libxl__remus_setup(egc, &dss->rs);
 return AO_INPROGRESS;
 
  out:
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 91df275..26a2563 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -35,4 +35,14 @@ extern void libxl__colo_restore_teardown(libxl__egc *egc,
  libxl__colo_restore_state *crs,
  int rc);
 
+extern void libxl__colo_save_domain_suspend_callback(void *data);
+extern void libxl__colo_save_domain_resume_callback(void *data);
+extern void libxl__colo_save_domain_checkpoint_callback(void *data);
+extern void libxl__colo_save_get_dirty_pfn_callback(void *data);
+extern void libxl__colo_save_setup(libxl__egc *egc,
+   libxl__colo_save_state *css);
+extern void libxl__colo_save_teardown(libxl__egc *egc,
+  libxl__colo_save_state *css,
+  int rc);
+
 #endif
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
new file mode 100644
index 000..bb5b434
--- /dev/null
+++ b/tools/libxl/libxl_colo_save.c
@@ -0,0 +1,642 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+
+static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+

[Xen-devel] [RFC PATCH COLO v5 15/29] secondary vm suspend/resume/checkpoint code

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

Secondary vm is running in colo mode. So we will do
the following things again and again:
1. Resume secondary vm
   a. Send LIBXL_COLO_SVM_READY to master.
   b. If it is not resumed the first time, call 
libxl__checkpoint_devices_preresume().
   c. If it is resumed the first time, call libxl__xc_domain_restore_done()
  to build the secondary vm. We should also enable secondary vm's logdirty.
  Otherwise, call libxl__domain_resume() to resume secondary vm.
   d. If it is resumed the first time, call libxl__checkpoint_devices_setup()
  to setup checkpoint devices.
   e. Send LIBXL_COLO_SVM_RESUMED to master.
2. Wait a new checkpoint
   a. Call libxl__checkpoint_devices_commit().
   a. Read LIBXL_COLO_NEW_CHECKPOINT from master.
3. Suspend secondary vm
   a. Suspend secondary vm.
   b. Call libxl__checkpoint_devices_postsuspend().
   c. Get secondary vm's dirty page information.
   d. Send LIBXL_COLO_SVM_SUSPENDED to master.
   e. Send secondary vm's dirty page information to master(count + pfn list).

Signed-off-by: Wen Congyang 
---
 tools/libxc/include/xenguest.h |   20 +
 tools/libxl/Makefile   |1 +
 tools/libxl/libxl_colo.h   |   38 ++
 tools/libxl/libxl_colo_restore.c   | 1158 
 tools/libxl/libxl_create.c |  116 +++-
 tools/libxl/libxl_dom.c|2 +-
 tools/libxl/libxl_internal.h   |   23 +
 tools/libxl/libxl_save_callout.c   |6 +-
 tools/libxl/libxl_save_msgs_gen.pl |6 +-
 9 files changed, 1363 insertions(+), 7 deletions(-)
 create mode 100644 tools/libxl/libxl_colo.h
 create mode 100644 tools/libxl/libxl_colo_restore.c

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 601b108..6e621a6 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -93,6 +93,26 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t 
dom, uint32_t max_iter
 
 /* callbacks provided by xc_domain_restore */
 struct restore_callbacks {
+/* Called after a new checkpoint to suspend the guest.
+ */
+int (*suspend)(void* data);
+
+/* Called after the secondary vm is ready to resume.
+ * Callback function resumes the guest & the device model,
+ *  returns to xc_domain_restore.
+ */
+int (*postcopy)(void* data);
+
+/* callback to wait a new checkpoint
+ *
+ * returns:
+ * 0: terminate checkpointing gracefully
+ * 1: take another checkpoint */
+int (*checkpoint)(void* data);
+
+/* Enable qemu-dm logging dirty pages to xen */
+int (*switch_qemu_logdirty)(int domid, unsigned enable, void *data); /* 
HVM only */
+
 /* callback to restore toolstack specific data */
 int (*toolstack_restore)(uint32_t domid, const uint8_t *buf,
 uint32_t size, void* data);
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 1e27754..8acfd5d 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -57,6 +57,7 @@ LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
+LIBXL_OBJS-y += libxl_colo_restore.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
new file mode 100644
index 000..91df275
--- /dev/null
+++ b/tools/libxl/libxl_colo.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#ifndef LIBXL_COLO_H
+#define LIBXL_COLO_H
+
+/*
+ * values to control suspend/resume primary vm and secondary vm
+ * at the same time
+ */
+enum {
+LIBXL_COLO_NEW_CHECKPOINT = 1,
+LIBXL_COLO_SVM_SUSPENDED,
+LIBXL_COLO_SVM_READY,
+LIBXL_COLO_SVM_RESUMED,
+};
+
+extern void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void,
+ int ret, int retval, int errnoval);
+extern void libxl__colo_restore_setup(libxl__egc *egc,
+  libxl__colo_restore_state *crs);
+extern void libxl__colo_restore_teardown(libxl__egc *egc,
+ libxl__colo_restore_state *crs,
+ int rc);
+
+#endif
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
new file mode 100644
index 000..7b825d4
--- /dev/null
+++ b/tools/libxl/libxl_colo_resto

[Xen-devel] [RFC PATCH COLO v5 19/29] send store mfn and console mfn to xl before resuming secondary vm

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

We will call libxl__xc_domain_restore_done() to rebuild secondary vm. But
we need store mfn and console mfn when rebuilding secondary vm. So make
restore_results is a function pointers in callbacks struct and struct
{save,restore}_callbacks, and use this callback to send store mfn and
console mfn to xl.

Signed-off-by: Wen Congyang 
---
 tools/libxc/include/xenguest.h | 8 
 tools/libxc/xc_domain_restore.c| 2 +-
 tools/libxl/libxl_colo_restore.c   | 5 -
 tools/libxl/libxl_create.c | 1 +
 tools/libxl/libxl_save_msgs_gen.pl | 2 +-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 266d96b..597f515 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -125,6 +125,14 @@ struct restore_callbacks {
 /* Enable qemu-dm logging dirty pages to xen */
 int (*switch_qemu_logdirty)(int domid, unsigned enable, void *data); /* 
HVM only */
 
+/*
+ * callback to send store mfn and console mfn to xl
+ * if we want to resume vm before xc_domain_save()
+ * exits.
+ */
+void (*restore_results)(unsigned long store_mfn, unsigned long console_mfn,
+void *data);
+
 /* callback to restore toolstack specific data */
 int (*toolstack_restore)(uint32_t domid, const uint8_t *buf,
 uint32_t size, void* data);
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 5cad21c..cc5c1ad 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -2408,7 +2408,7 @@ new_checkpoint:
 } while (0)
 /* COLO */
 
-/* TODO: call restore_results */
+callbacks->restore_results(*store_mfn, *console_mfn, callbacks->data);
 
 /* Resume secondary vm */
 frc = callbacks->postcopy(callbacks->data);
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
index 7b825d4..554474d 100644
--- a/tools/libxl/libxl_colo_restore.c
+++ b/tools/libxl/libxl_colo_restore.c
@@ -152,11 +152,6 @@ static void colo_resume_vm(libxl__egc *egc,
 return;
 }
 
-/*
- * TODO: get store mfn and console mfn
- *  We should call the callback restore_results in
- *  xc_domain_restore() before resuming the guest.
- */
 libxl__xc_domain_restore_done(egc, dcs, 0, 0, 0);
 
 return;
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index ba6e1fe..89c18dc 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1121,6 +1121,7 @@ static void domcreate_bootloader_done(libxl__egc *egc,
 rc = ERROR_INVAL;
 goto out;
 }
+callbacks->restore_results = libxl__srm_callout_callback_restore_results;
 
 if (checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
 crs->ao = ao;
diff --git a/tools/libxl/libxl_save_msgs_gen.pl 
b/tools/libxl/libxl_save_msgs_gen.pl
index fbb2d67..2ecd25d 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -32,7 +32,7 @@ our @msgs = (
 #toolstack_save  done entirely `by hand'
 [  7, 'rcxW',   "toolstack_restore", [qw(uint32_t domid
 BLOCK tsdata)] ],
-[  8, 'r',  "restore_results",   ['unsigned long', 'store_mfn',
+[  8, 'rcx',"restore_results",   ['unsigned long', 'store_mfn',
   'unsigned long', 'console_mfn'] 
],
 [  9, 'srW',"complete",  [qw(int retval
  int errnoval)] ],
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [RFC PATCH COLO v5 21/29] tools: xc_doamin_restore: zero ioreq page only one time

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

ioreq page contains evtchn which will be set when we resume the
secondary vm the first time. The hypervisor will check if the
evtchn is corrupted, so we cannot zero the ioreq page more
than one time.

The ioreq->state is always STATE_IOREQ_NONE after the vm is
suspended, so it is OK if we only zero it one time.
---
 tools/libxc/xc_domain_restore.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index cc5c1ad..276db37 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1501,6 +1501,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 struct restore_ctx _ctx;
 struct restore_ctx *ctx = &_ctx;
 struct domain_info_context *dinfo = &ctx->dinfo;
+int skip_clear_ioreq_page = 0;
 
 DPRINTF("%s: starting restore of new domid %u", __func__, dom);
 
@@ -2331,13 +2332,30 @@ new_checkpoint:
 }
 
 /* These comms pages need to be zeroed at the start of day */
-if ( xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[0]) ||
- xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[1]) ||
- xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[2]) )
+if ( xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[2]) )
 {
 PERROR("error zeroing magic pages");
 goto out;
 }
+if ( !skip_clear_ioreq_page )
+{
+if ( xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[0]) ||
+ xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[1]) )
+{
+PERROR("error zeroing magic pages");
+goto out;
+}
+/*
+ * ioreq page contains evtchn which will be set when we resume the
+ * secondary vm the first time. The hypervisor will check if the
+ * evtchn is corrupted, so we cann't clear the ioreq page more
+ * than one time.
+ *
+ * The ioreq->state is always STATE_IOREQ_NONE after the vm is
+ * suspended, so it is OK if we only clear it one time.
+ */
+skip_clear_ioreq_page = 1;
+}
 
 if ( (frc = xc_hvm_param_set(xch, dom,
  HVM_PARAM_IOREQ_PFN, 
tailbuf.u.hvm.magicpfns[0]))
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [RFC PATCH COLO v5 18/29] COLO: xc related codes

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

Save:
1. send XC_SAVE_ID_LAST_CHECKPOINT, so secondary vm can be resumed
2. call callbacks->get_dirty_pfn() after suspend primary vm if we
   are doing checkpoint.

Restore:
1. call the callbacks resume/checkpoint/suspend if secondary vm's
   status is the same as primary vm's status.
2. zero out tdata because we will use it zero out pagebuf.tdata.
3. don't apply the secondary vm's state when we failed to get new
   secondary vm's state, because we have applied it every checkpoint.

Signed-off-by: Wen Congyang 
---
 tools/libxc/xc_domain_restore.c | 82 +++--
 tools/libxc/xc_domain_save.c| 57 +++-
 2 files changed, 125 insertions(+), 14 deletions(-)

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index a382701..5cad21c 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1454,7 +1454,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 int nraces = 0;
 
 /* The new domain's shared-info frame number. */
-unsigned long shared_info_frame;
+unsigned long shared_info_frame = 0;
 unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
 shared_info_any_t *old_shared_info = 
 (shared_info_any_t *)shared_info_page;
@@ -1504,6 +1504,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 
 DPRINTF("%s: starting restore of new domid %u", __func__, dom);
 
+n = m = 0;
+
 pagebuf_init(&pagebuf);
 memset(&tailbuf, 0, sizeof(tailbuf));
 tailbuf.ishvm = hvm;
@@ -1629,7 +1631,6 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
  * We uncanonicalise page tables as we go.
  */
 
-n = m = 0;
  loadpages:
 for ( ; ; )
 {
@@ -1793,26 +1794,45 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 goto finish;
 }
 
+new_checkpoint:
 // DPRINTF("Buffered checkpoint\n");
 
 if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
 PERROR("error when buffering batch, finishing");
-/*
- * Remus: discard the current incomplete checkpoint and restore
- * backup from the last complete checkpoint.
- */
-goto finish;
+if ( callbacks && callbacks->checkpoint )
+{
+/* COLO: discard the current incomplete checkpoint */
+rc = 0;
+goto failover;
+}
+else
+{
+/*
+ * Remus: discard the current incomplete checkpoint and restore
+ * backup from the last complete checkpoint.
+ */
+goto finish;
+}
 }
 memset(&tmptail, 0, sizeof(tmptail));
 tmptail.ishvm = hvm;
 if ( buffer_tail(xch, ctx, &tmptail, io_fd, max_vcpu_id, vcpumap,
  ext_vcpucontext, vcpuextstate_size) < 0 ) {
 ERROR ("error buffering image tail, finishing");
-/*
- * Remus: discard the current incomplete checkpoint and restore
- * backup from the last complete checkpoint.
- */
-goto finish;
+if ( callbacks && callbacks->checkpoint )
+{
+/* COLO: discard the current incomplete checkpoint */
+rc = 0;
+goto failover;
+}
+else
+{
+/*
+ * Remus: discard the current incomplete checkpoint and restore
+ * backup from the last complete checkpoint.
+ */
+goto finish;
+}
 }
 tailbuf_free(&tailbuf);
 memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
@@ -2301,6 +2321,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 free(tdata.data);
 goto out;
 }
+memset(&tdata, 0, sizeof(tdata));
 }
 
 /* Dump the QEMU state to a state file for QEMU to load */
@@ -2368,6 +2389,43 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 rc = 0;
 
  out:
+if ( !rc && callbacks && callbacks->checkpoint )
+{
+#define HANDLE_CALLBACK_RETURN_VALUE(frc)   \
+do {\
+if ( frc == 0 ) \
+{   \
+/* Some internal error happens */   \
+rc = 1; \
+goto out;   \
+}   \
+else if ( frc == 2 )\
+{   \
+/* Reading/writing error, do failover */\
+rc = 0; \
+goto failover;  \
+}   \
+} wh

[Xen-devel] [RFC PATCH COLO v5 14/29] Allow slave sends data to master

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

In colo mode, slave needs to send data to master, but the io_fd
only can be written in master, and only can be read in slave.
Save recv_fd in domain_suspend_state, and send_fd in
domain_create_state.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl.c  |  2 +-
 tools/libxl/libxl_create.c   | 14 ++
 tools/libxl/libxl_internal.h |  2 ++
 tools/libxl/libxl_types.idl  |  7 +++
 tools/libxl/xl_cmdimpl.c |  7 +++
 5 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 7bc4fc4..40a49c7 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -883,7 +883,7 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 dss->callback = remus_failover_cb;
 dss->domid = domid;
 dss->fd = send_fd;
-/* TODO do something with recv_fd */
+dss->recv_fd = recv_fd;
 dss->type = type;
 dss->live = 1;
 dss->debug = 0;
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index af04248..392420f 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1513,8 +1513,8 @@ static void domain_create_cb(libxl__egc *egc,
  int rc, uint32_t domid);
 
 static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config,
-uint32_t *domid,
-int restore_fd, int checkpointed_stream,
+uint32_t *domid, int restore_fd,
+int send_fd, int checkpointed_stream,
 const libxl_asyncop_how *ao_how,
 const libxl_asyncprogress_how *aop_console_how)
 {
@@ -1527,6 +1527,7 @@ static int do_domain_create(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 libxl_domain_config_init(&cdcs->dcs.guest_config_saved);
 libxl_domain_config_copy(ctx, &cdcs->dcs.guest_config_saved, d_config);
 cdcs->dcs.restore_fd = restore_fd;
+cdcs->dcs.send_fd = send_fd;
 cdcs->dcs.callback = domain_create_cb;
 cdcs->dcs.checkpointed_stream = checkpointed_stream;
 libxl__ao_progress_gethow(&cdcs->dcs.aop_console_how, aop_console_how);
@@ -1555,7 +1556,7 @@ int libxl_domain_create_new(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 const libxl_asyncop_how *ao_how,
 const libxl_asyncprogress_how *aop_console_how)
 {
-return do_domain_create(ctx, d_config, domid, -1, 0,
+return do_domain_create(ctx, d_config, domid, -1, -1, 0,
 ao_how, aop_console_how);
 }
 
@@ -1565,7 +1566,12 @@ int libxl_domain_create_restore(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 const libxl_asyncop_how *ao_how,
 const libxl_asyncprogress_how *aop_console_how)
 {
-return do_domain_create(ctx, d_config, domid, restore_fd,
+int send_fd = -1;
+
+if (params->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO)
+send_fd = params->send_fd;
+
+return do_domain_create(ctx, d_config, domid, restore_fd, send_fd,
 params->checkpointed_stream, ao_how, 
aop_console_how);
 }
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 7bfabd8..971d975 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2874,6 +2874,7 @@ struct libxl__domain_suspend_state {
 
 uint32_t domid;
 int fd;
+int recv_fd;
 libxl_domain_type type;
 int live;
 int debug;
@@ -3140,6 +3141,7 @@ struct libxl__domain_create_state {
 libxl_domain_config *guest_config;
 libxl_domain_config guest_config_saved; /* vanilla config */
 int restore_fd;
+int send_fd;
 libxl__domain_create_cb *callback;
 libxl_asyncprogress_how aop_console_how;
 /* private to domain_create */
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index fa85e5b..292d754 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -196,6 +196,12 @@ libxl_viridian_enlightenment = 
Enumeration("viridian_enlightenment", [
 (3, "reference_tsc"),
 ])
 
+libxl_checkpointed_stream = Enumeration("checkpointed_stream", [
+(0, "NONE"),
+(1, "REMUS"),
+(2, "COLO"),
+], init_val = 0)
+
 #
 # Complex libxl types
 #
@@ -344,6 +350,7 @@ libxl_domain_create_info = Struct("domain_create_info",[
 
 libxl_domain_restore_params = Struct("domain_restore_params", [
 ("checkpointed_stream", integer),
+("send_fd", integer),
 ])
 
 libxl_domain_sched_params = Struct("domain_sched_params",[
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 394b55d..4574d05 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -154,6 +154,7 @@ struct domain_create {
 const char *extra_config; /* extra config string */
 const char *restore_file;
 int migrate_fd; /* -1 means n

[Xen-devel] [RFC PATCH COLO v5 20/29] implement the cmdline for COLO

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

Add a new option -c to the command 'xl remus'. If you want
to use COLO HA instead of Remus HA, please use -c option.

Update man pages to reflect the addition of a new option to
'xl remus' command.

Also add a new option -c to the internal command 'xl migrate-receive'.

Signed-off-by: Wen Congyang 
---
 docs/man/xl.pod.1 | 12 +--
 tools/libxl/libxl.c   | 16 ++
 tools/libxl/xl_cmdimpl.c  | 53 +++
 tools/libxl/xl_cmdtable.c |  4 +++-
 4 files changed, 73 insertions(+), 12 deletions(-)

diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index 16783c8..adcbe37 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -440,12 +440,15 @@ Print huge (!) amount of debug during the migration 
process.
 
 =item B [I] I I
 
-Enable Remus HA for domain. By default B relies on ssh as a transport
-mechanism between the two hosts.
+Enable Remus HA or COLO HA for domain. By default B relies on ssh as a
+transport mechanism between the two hosts.
 
 N.B: Remus support in xl is still in experimental (proof-of-concept) phase.
  Disk replication support is limited to DRBD disks.
 
+ COLO support in xl is still in experimental (proof-of-concept) phase.
+ There is no support for network or disk at the moment.
+
 B
 
 =over 4
@@ -491,6 +494,11 @@ Disable network output buffering. Requires enabling unsafe 
mode.
 
 Disable disk replication. Requires enabling unsafe mode.
 
+=item B<-c>
+
+Enable COLO HA. It is conflict with B<-i> and B<-b>, and memory
+checkpoint compression must be disabled.
+
 =back
 
 =item B I
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index b6c5429..afe0cc9 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -862,6 +862,22 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 goto out;
 }
 
+/* The caller must set this defbool */
+if (libxl_defbool_is_default(info->colo)) {
+LOG(ERROR, "colo mode must be enabled/disabled");
+rc = ERROR_FAIL;
+goto out;
+}
+
+if (libxl_defbool_val(info->colo)) {
+libxl_defbool_setdefault(&info->compression, false);
+if (libxl_defbool_val(info->compression)) {
+LOG(ERROR, "cannot use memory checkpoint compression in COLO 
mode");
+rc = ERROR_FAIL;
+goto out;
+}
+}
+
 libxl_defbool_setdefault(&info->allow_unsafe, false);
 libxl_defbool_setdefault(&info->blackhole, false);
 libxl_defbool_setdefault(&info->compression, true);
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 4574d05..6c5b792 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -4250,6 +4250,9 @@ static void migrate_receive(int debug, int daemonize, int 
monitor,
 dom_info.send_fd = send_fd;
 dom_info.migration_domname_r = &migration_domname;
 dom_info.checkpointed_stream = remus;
+if (remus == LIBXL_CHECKPOINTED_STREAM_COLO)
+/* COLO uses stdout to send control message to master */
+dom_info.quiet = 1;
 
 rc = create_domain(&dom_info);
 if (rc < 0) {
@@ -4264,7 +4267,8 @@ static void migrate_receive(int debug, int daemonize, int 
monitor,
 /* If we are here, it means that the sender (primary) has crashed.
  * TODO: Split-Brain Check.
  */
-fprintf(stderr, "migration target: Remus Failover for domain %u\n",
+fprintf(stderr, "migration target: %s Failover for domain %u\n",
+remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
 domid);
 
 /*
@@ -4281,15 +4285,21 @@ static void migrate_receive(int debug, int daemonize, 
int monitor,
 rc = libxl_domain_rename(ctx, domid, migration_domname,
  common_domname);
 if (rc)
-fprintf(stderr, "migration target (Remus): "
+fprintf(stderr, "migration target (%s): "
 "Failed to rename domain from %s to %s:%d\n",
+remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : 
"Remus",
 migration_domname, common_domname, rc);
 }
 
+if (remus == LIBXL_CHECKPOINTED_STREAM_COLO)
+/* The guest is running after failover in COLO mode */
+exit(rc ? -ERROR_FAIL: 0);
+
 rc = libxl_domain_unpause(ctx, domid);
 if (rc)
-fprintf(stderr, "migration target (Remus): "
+fprintf(stderr, "migration target (%s): "
 "Failed to unpause domain %s (id: %u):%d\n",
+remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
 common_domname, domid, rc);
 
 exit(rc ? -ERROR_FAIL: 0);
@@ -4435,7 +4445,7 @@ int main_migrate_receive(int argc, char **argv)
 int debug = 0, daemonize = 1, monitor = 1, remus = 0;
 int opt;
 
-SWITCH_FOREACH_OPT(opt, "Fedr", NULL,

[Xen-devel] [RFC PATCH COLO v5 17/29] xc_domain_save: flush cache before calling callbacks->postcopy() in colo mode

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

In colo mode, secondary vm is running. We will use the io_fd to
ensure that both primary vm and secondary vm are resumed
at the same time. So we should call postcopy later.

Signed-off-by: Wen Congyang 
---
 tools/libxc/xc_domain_save.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index cef6995..045a050 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -2082,10 +2082,15 @@ int xc_domain_save(xc_interface *xch, int io_fd, 
uint32_t dom, uint32_t max_iter
  out_rc:
 completed = 1;
 
-if ( !rc && callbacks->postcopy )
+/*
+ * COLO: secondary vm is running. We will use the io_fd to
+ * ensure that both primary vm and secondary vm are resumed
+ * at the same time. So we should call postcopy later.
+ */
+if ( !rc && callbacks->postcopy && !callbacks->get_dirty_pfn )
 callbacks->postcopy(callbacks->data);
 
-/* guest has been resumed. Now we can compress data
+/* Remus: guest has been resumed. Now we can compress data
  * at our own pace.
  */
 if (!rc && compressing)
@@ -2113,6 +2118,13 @@ int xc_domain_save(xc_interface *xch, int io_fd, 
uint32_t dom, uint32_t max_iter
 
 discard_file_cache(xch, io_fd, 1 /* flush */);
 
+/*
+ * COLO: send qemu device state and resume both
+ * primary vm and secondary vm now.
+ */
+if ( !rc && callbacks->postcopy && callbacks->get_dirty_pfn )
+callbacks->postcopy(callbacks->data);
+
 /* Enable compression now, finally */
 compressing = (flags & XCFLAGS_CHECKPOINT_COMPRESS);
 
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [RFC PATCH COLO v5 11/29] adjust the indentation

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl_checkpoint_device.c | 23 ---
 tools/libxl/libxl_internal.h  | 21 -
 tools/libxl/libxl_remus.c | 12 
 3 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/tools/libxl/libxl_checkpoint_device.c 
b/tools/libxl/libxl_checkpoint_device.c
index 109cd23..0cfabc3 100644
--- a/tools/libxl/libxl_checkpoint_device.c
+++ b/tools/libxl/libxl_checkpoint_device.c
@@ -73,9 +73,9 @@ static void devices_teardown_cb(libxl__egc *egc,
 /* checkpoint device setup and teardown */
 
 static libxl__checkpoint_device* checkpoint_device_init(libxl__egc *egc,
-  libxl__checkpoint_devices_state 
*cds,
-  libxl__device_kind kind,
-  void *libxl_dev)
+libxl__checkpoint_devices_state *cds,
+libxl__device_kind kind,
+void *libxl_dev)
 {
 libxl__checkpoint_device *dev = NULL;
 
@@ -89,9 +89,10 @@ static libxl__checkpoint_device* 
checkpoint_device_init(libxl__egc *egc,
 }
 
 static void checkpoint_devices_setup(libxl__egc *egc,
-libxl__checkpoint_devices_state *cds);
+ libxl__checkpoint_devices_state *cds);
 
-void libxl__checkpoint_devices_setup(libxl__egc *egc, 
libxl__checkpoint_devices_state *cds)
+void libxl__checkpoint_devices_setup(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds)
 {
 int i, rc;
 
@@ -137,7 +138,7 @@ out:
 }
 
 static void checkpoint_devices_setup(libxl__egc *egc,
-libxl__checkpoint_devices_state *cds)
+ libxl__checkpoint_devices_state *cds)
 {
 int i, rc;
 
@@ -223,7 +224,7 @@ static void all_devices_setup_cb(libxl__egc *egc,
 }
 
 void libxl__checkpoint_devices_teardown(libxl__egc *egc,
-   libxl__checkpoint_devices_state *cds)
+libxl__checkpoint_devices_state *cds)
 {
 int i;
 libxl__checkpoint_device *dev;
@@ -285,12 +286,12 @@ static void devices_checkpoint_cb(libxl__egc *egc,
 
 /* API implementations */
 
-#define define_checkpoint_api(api)\
-void libxl__checkpoint_devices_##api(libxl__egc *egc,\
-libxl__checkpoint_devices_state *cds)\
+#define define_checkpoint_api(api)  \
+void libxl__checkpoint_devices_##api(libxl__egc *egc,   \
+libxl__checkpoint_devices_state *cds)   \
 {   \
 int i;  \
-libxl__checkpoint_device *dev;   \
+libxl__checkpoint_device *dev;  \
 \
 STATE_AO_GC(cds->ao);   \
 \
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 7e7c3b3..4b8590c 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2652,7 +2652,8 @@ typedef struct libxl__save_helper_state {
  * Each device type needs to implement the interfaces specified in
  * the libxl__checkpoint_device_instance_ops if it wishes to support Remus.
  *
- * The high-level control flow through the checkpoint device layer is shown 
below:
+ * The high-level control flow through the checkpoint device layer is shown
+ * below:
  *
  * xl remus
  *  |->  libxl_domain_remus_start
@@ -2713,7 +2714,8 @@ int 
init_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
 void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
 
 typedef void libxl__checkpoint_callback(libxl__egc *,
-   libxl__checkpoint_devices_state *, int rc);
+libxl__checkpoint_devices_state *,
+int rc);
 
 /*
  * State associated with a checkpoint invocation, including parameters
@@ -2721,7 +2723,7 @@ typedef void libxl__checkpoint_callback(libxl__egc *,
  * save/restore machinery.
  */
 struct libxl__checkpoint_devices_state {
-/* must be set by caller of libxl__checkpoint_device_(setup|teardown) 
*/
+/*-- must be set by caller of libxl__checkpoint_device_(setup|teardown) 
--*/
 
 libxl__ao *ao;
 uint32_t domid;
@@ -2734,7 +2736,8 @@ struct libxl__checkpoint_devices_state {
 /*
  * this array is allocated before setup the checkpoint devices by the
  * checkpoint abstract layer.
- * devs may b

[Xen-devel] [RFC PATCH COLO v5 10/29] rename remus device to checkpoint device

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

This patch is auto generated by the following commands:
 1. git mv tools/libxl/libxl_remus_device.c 
tools/libxl/libxl_checkpoint_device.c
 2. perl -pi -e 's/libxl_remus_device/libxl_checkpoint_device/g' 
tools/libxl/Makefile
 3. perl -pi -e 's/\blibxl__remus_devices/libxl__checkpoint_devices/g' 
tools/libxl/*.[ch]
 4. perl -pi -e 's/\blibxl__remus_device\b/libxl__checkpoint_device/g' 
tools/libxl/*.[ch]
 5. perl -pi -e 
's/\blibxl__remus_device_instance_ops\b/libxl__checkpoint_device_instance_ops/g'
 tools/libxl/*.[ch]
 6. perl -pi -e 's/\blibxl__remus_callback\b/libxl__checkpoint_callback/g' 
tools/libxl/*.[ch]
 7. perl -pi -e 's/\bremus_device_init\b/checkpoint_device_init/g' 
tools/libxl/*.[ch]
 8. perl -pi -e 's/\bremus_devices_setup\b/checkpoint_devices_setup/g' 
tools/libxl/*.[ch]
 9. perl -pi -e 's/\bdefine_remus_checkpoint_api\b/define_checkpoint_api/g' 
tools/libxl/*.[ch]
10. perl -pi -e 's/\brds\b/cds/g' tools/libxl/*.[ch]
11. perl -pi -e 's/REMUS_DEVICE/CHECKPOINT_DEVICE/g' tools/libxl/*.[ch] 
tools/libxl/*.idl
12. perl -pi -e 's/REMUS_DEVOPS/CHECKPOINT_DEVOPS/g' tools/libxl/*.[ch] 
tools/libxl/*.idl
13. perl -pi -e 's/\bremus\b/checkpoint/g' 
tools/libxl/libxl_checkpoint_device.[ch]
14. perl -pi -e 's/\bremus device/checkpoint device/g' 
tools/libxl/libxl_internal.h
15. perl -pi -e 's/\bRemus device/checkpoint device/g' 
tools/libxl/libxl_internal.h
16. perl -pi -e 's/\bremus abstract/checkpoint abstract/g' 
tools/libxl/libxl_internal.h
17. perl -pi -e 's/\bremus invocation/checkpoint invocation/g' 
tools/libxl/libxl_internal.h
18. perl -pi -e 's/\blibxl__remus_device_\(/libxl__checkpoint_device_(/g' 
tools/libxl/libxl_internal.h

Signed-off-by: Wen Congyang 
Cc: Shriram Rajagopalan 
---
 tools/libxl/Makefile  |   2 +-
 tools/libxl/libxl_checkpoint_device.c | 327 ++
 tools/libxl/libxl_internal.h  | 112 ++--
 tools/libxl/libxl_netbuffer.c | 108 +--
 tools/libxl/libxl_nonetbuffer.c   |  10 +-
 tools/libxl/libxl_remus.c |  78 
 tools/libxl/libxl_remus_device.c  | 327 --
 tools/libxl/libxl_remus_disk_drbd.c   |  52 +++---
 tools/libxl/libxl_types.idl   |   4 +-
 9 files changed, 510 insertions(+), 510 deletions(-)
 create mode 100644 tools/libxl/libxl_checkpoint_device.c
 delete mode 100644 tools/libxl/libxl_remus_device.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 7eeda0e..1e27754 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -56,7 +56,7 @@ else
 LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
-LIBXL_OBJS-y += libxl_remus.o libxl_remus_device.o libxl_remus_disk_drbd.o
+LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_checkpoint_device.c 
b/tools/libxl/libxl_checkpoint_device.c
new file mode 100644
index 000..109cd23
--- /dev/null
+++ b/tools/libxl/libxl_checkpoint_device.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Yang Hongyang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+extern const libxl__checkpoint_device_instance_ops remus_device_nic;
+extern const libxl__checkpoint_device_instance_ops remus_device_drbd_disk;
+static const libxl__checkpoint_device_instance_ops *remus_ops[] = {
+&remus_device_nic,
+&remus_device_drbd_disk,
+NULL,
+};
+
+/*- helper functions -*/
+
+static int init_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+/* init device subkind-specific state in the libxl ctx */
+int rc;
+STATE_AO_GC(cds->ao);
+
+if (libxl__netbuffer_enabled(gc)) {
+rc = init_subkind_nic(cds);
+if (rc) goto out;
+}
+
+rc = init_subkind_drbd_disk(cds);
+if (rc) goto out;
+
+rc = 0;
+out:
+return rc;
+}
+
+static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+/* cleanup device subkind-specific state in the libxl ctx */
+STATE_AO_GC(cds->ao);
+
+if (libxl__netbuffer_enabled(gc))
+cleanup_subkind_nic(cds);
+
+cleanup_subkind_drbd_disk(cds);
+}
+
+/*- setup() and teardown() -*/
+
+/* callbacks */
+
+static void all_devices_setup_cb(libxl__egc *egc,
+

[Xen-devel] [RFC PATCH COLO v5 12/29] don't touch remus in checkpoint_device

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

Checkpoint device is an abstract layer to do checkpoint.
COLO can also use it to do checkpoint. But there are
still some codes in checkpoint device which touch remus:
1. remus_ops: we use remus ops directly in checkpoint
   device. Store it in checkpoint device state.
2. concrete layer's private member: add a new structure
   remus state, and move them to remus state.
3. init/cleanup device subkind: we call (init|cleanup)_subkind_nic
   and (init|cleanup)_subkind_drbd_disk directly in checkpoint
   device. Call them before calling libxl__checkpoint_devices_setup()
   or after calling libxl__checkpoint_devices_teardown().

Signed-off-by: Wen Congyang 
Cc: Shriram Rajagopalan 
---
 tools/libxl/libxl.c   |  2 +-
 tools/libxl/libxl_checkpoint_device.c | 52 ++--
 tools/libxl/libxl_dom.c   |  3 +-
 tools/libxl/libxl_internal.h  | 37 ++-
 tools/libxl/libxl_netbuffer.c | 51 +++-
 tools/libxl/libxl_remus.c | 89 +++
 tools/libxl/libxl_remus.h |  5 +-
 tools/libxl/libxl_remus_disk_drbd.c   |  9 ++--
 8 files changed, 136 insertions(+), 112 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index bcbd961..7bc4fc4 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -892,7 +892,7 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 assert(info);
 
 /* Point of no return */
-libxl__remus_setup(egc, dss);
+libxl__remus_setup(egc, &dss->rs);
 return AO_INPROGRESS;
 
  out:
diff --git a/tools/libxl/libxl_checkpoint_device.c 
b/tools/libxl/libxl_checkpoint_device.c
index 0cfabc3..2b7318f 100644
--- a/tools/libxl/libxl_checkpoint_device.c
+++ b/tools/libxl/libxl_checkpoint_device.c
@@ -17,46 +17,6 @@
 
 #include "libxl_internal.h"
 
-extern const libxl__checkpoint_device_instance_ops remus_device_nic;
-extern const libxl__checkpoint_device_instance_ops remus_device_drbd_disk;
-static const libxl__checkpoint_device_instance_ops *remus_ops[] = {
-&remus_device_nic,
-&remus_device_drbd_disk,
-NULL,
-};
-
-/*- helper functions -*/
-
-static int init_device_subkind(libxl__checkpoint_devices_state *cds)
-{
-/* init device subkind-specific state in the libxl ctx */
-int rc;
-STATE_AO_GC(cds->ao);
-
-if (libxl__netbuffer_enabled(gc)) {
-rc = init_subkind_nic(cds);
-if (rc) goto out;
-}
-
-rc = init_subkind_drbd_disk(cds);
-if (rc) goto out;
-
-rc = 0;
-out:
-return rc;
-}
-
-static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
-{
-/* cleanup device subkind-specific state in the libxl ctx */
-STATE_AO_GC(cds->ao);
-
-if (libxl__netbuffer_enabled(gc))
-cleanup_subkind_nic(cds);
-
-cleanup_subkind_drbd_disk(cds);
-}
-
 /*- setup() and teardown() -*/
 
 /* callbacks */
@@ -94,14 +54,10 @@ static void checkpoint_devices_setup(libxl__egc *egc,
 void libxl__checkpoint_devices_setup(libxl__egc *egc,
  libxl__checkpoint_devices_state *cds)
 {
-int i, rc;
+int i;
 
 STATE_AO_GC(cds->ao);
 
-rc = init_device_subkind(cds);
-if (rc)
-goto out;
-
 cds->num_devices = 0;
 cds->num_nics = 0;
 cds->num_disks = 0;
@@ -134,7 +90,7 @@ void libxl__checkpoint_devices_setup(libxl__egc *egc,
 return;
 
 out:
-cds->callback(egc, cds, rc);
+cds->callback(egc, cds, 0);
 }
 
 static void checkpoint_devices_setup(libxl__egc *egc,
@@ -172,7 +128,7 @@ static void device_setup_iterate(libxl__egc *egc, 
libxl__ao_device *aodev)
 goto out;
 
 do {
-dev->ops = remus_ops[++dev->ops_index];
+dev->ops = dev->cds->ops[++dev->ops_index];
 if (!dev->ops) {
 libxl_device_nic * nic = NULL;
 libxl_device_disk * disk = NULL;
@@ -271,8 +227,6 @@ static void devices_teardown_cb(libxl__egc *egc,
 cds->disks = NULL;
 cds->num_disks = 0;
 
-cleanup_device_subkind(cds);
-
 cds->callback(egc, cds, rc);
 }
 
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 4693d32..e09a1eb 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1865,7 +1865,6 @@ void libxl__domain_suspend(libxl__egc *egc, 
libxl__domain_suspend_state *dss)
 dss2->save_dm = 1;
 
 if (r_info != NULL) {
-dss->interval = r_info->interval;
 if (libxl_defbool_val(r_info->compression))
 dss->xcflags |= XCFLAGS_CHECKPOINT_COMPRESS;
 }
@@ -2051,7 +2050,7 @@ static void domain_suspend_done(libxl__egc *egc,
dss2->guest_evtchn.port, 
&dss2->guest_evtchn_lockfd);
 
 if (dss->remus) {
-libxl__remus_teardown(egc, dss, rc);
+libxl__remus_teardown(egc, &dss->rs, rc);
 return;
 }
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 4b8590c..e12c7b5 100644
---

[Xen-devel] [RFC PATCH COLO v5 13/29] Update libxl_save_msgs_gen.pl to support return data from xl to xc

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

 Currently, all callbacks return an integer value or void. We cannot
 return some data to xc via callback. Update libxl_save_msgs_gen.pl
 to support this case.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl_internal.h   |  3 ++
 tools/libxl/libxl_save_callout.c   | 31 ++
 tools/libxl/libxl_save_helper.c| 17 ++
 tools/libxl/libxl_save_msgs_gen.pl | 65 ++
 4 files changed, 109 insertions(+), 7 deletions(-)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index e12c7b5..7bfabd8 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3180,6 +3180,9 @@ _hidden void libxl__xc_domain_save_done(libxl__egc*, void 
*dss_void,
  * When they are ready to indicate completion, they call this. */
 void libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
libxl__save_helper_state *shs, int return_value);
+void libxl__xc_domain_saverestore_async_callback_done_with_data(libxl__egc 
*egc,
+   libxl__save_helper_state *shs,
+   const void *data, uint64_t size);
 
 
 _hidden void libxl__domain_suspend_common_switch_qemu_logdirty
diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c
index 40b25e4..477e633 100644
--- a/tools/libxl/libxl_save_callout.c
+++ b/tools/libxl/libxl_save_callout.c
@@ -145,6 +145,15 @@ void 
libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
 shs->egc = 0;
 }
 
+void libxl__xc_domain_saverestore_async_callback_done_with_data(libxl__egc 
*egc,
+   libxl__save_helper_state *shs,
+   const void *data, uint64_t size)
+{
+shs->egc = egc;
+libxl__srm_callout_sendreply_data(data, size, shs);
+shs->egc = 0;
+}
+
 /*- helper execution -*/
 
 static void run_helper(libxl__egc *egc, libxl__save_helper_state *shs,
@@ -370,6 +379,28 @@ void libxl__srm_callout_sendreply(int r, void *user)
 helper_failed(egc, shs, ERROR_FAIL);
 }
 
+void libxl__srm_callout_sendreply_data(const void *data, uint64_t size, void 
*user)
+{
+libxl__save_helper_state *shs = user;
+libxl__egc *egc = shs->egc;
+STATE_AO_GC(shs->ao);
+int errnoval;
+
+errnoval = libxl_write_exactly(CTX, libxl__carefd_fd(shs->pipes[0]),
+   &size, sizeof(size), shs->stdin_what,
+   "callback return data length");
+if (errnoval)
+goto out;
+
+errnoval = libxl_write_exactly(CTX, libxl__carefd_fd(shs->pipes[0]),
+   data, size, shs->stdin_what,
+   "callback return data");
+
+out:
+if (errnoval)
+helper_failed(egc, shs, ERROR_FAIL);
+}
+
 void libxl__srm_callout_callback_log(uint32_t level, uint32_t errnoval,
   const char *context, const char *formatted, void *user)
 {
diff --git a/tools/libxl/libxl_save_helper.c b/tools/libxl/libxl_save_helper.c
index 74826a1..44c5807 100644
--- a/tools/libxl/libxl_save_helper.c
+++ b/tools/libxl/libxl_save_helper.c
@@ -155,6 +155,23 @@ int helper_getreply(void *user)
 return v;
 }
 
+uint8_t *helper_getreply_data(void *user)
+{
+uint64_t size;
+int r = read_exactly(0, &size, sizeof(size));
+uint8_t *data;
+
+if (r <= 0)
+exit(-2);
+
+data = helper_allocbuf(size, user);
+r = read_exactly(0, data, size);
+if (r <= 0)
+exit(-2);
+
+return data;
+}
+
 /*- other callbacks -*/
 
 static int toolstack_save_fd;
diff --git a/tools/libxl/libxl_save_msgs_gen.pl 
b/tools/libxl/libxl_save_msgs_gen.pl
index 6b4b65e..41ee000 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -15,6 +15,7 @@ our @msgs = (
 # and its null-ness needs to be passed through to the helper's xc
 #   W  - needs a return value; callback is synchronous
 #   A  - needs a return value; callback is asynchronous
+#   B  - return value is an pointer
 [  1, 'sr', "log",   [qw(uint32_t level
  uint32_t errnoval
  STRING context
@@ -99,23 +100,28 @@ our $libxl = "libxl__srm";
 our $callback = "${libxl}_callout_callback";
 our $receiveds = "${libxl}_callout_received";
 our $sendreply = "${libxl}_callout_sendreply";
+our $sendreply_data = "${libxl}_callout_sendreply_data";
 our $getcallbacks = "${libxl}_callout_get_callbacks";
 our $enumcallbacks = "${libxl}_callout_enumcallbacks";
 sub cbtype ($) { "${libxl}_".$_[0]."_autogen_callbacks"; };
 
 f_decl($sendreply, 'callout', 'void', "(int r, void *user)");
+f_decl($sendreply_data, 'callout', 'void',
+   "(const void *data, uint64_t size, void *user)");
 
 our $helper = "helper";
 our $encode = "${helper}_stub";
 our $allocbuf = "${helper}_allocbuf";

[Xen-devel] [RFC PATCH COLO v5 09/29] move remus related codes to libxl_remus.c

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

libxl_domain_remus_start() is external API, and is not moved.

Signed-off-by: Wen Congyang 
Cc: Shriram Rajagopalan 
---
 tools/libxl/Makefile  |   2 +-
 tools/libxl/libxl.c   |  57 +
 tools/libxl/libxl_dom.c   | 220 +---
 tools/libxl/libxl_remus.c | 318 ++
 tools/libxl/libxl_remus.h |  28 
 5 files changed, 352 insertions(+), 273 deletions(-)
 create mode 100644 tools/libxl/libxl_remus.c
 create mode 100644 tools/libxl/libxl_remus.h

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 1b16598..7eeda0e 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -56,7 +56,7 @@ else
 LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
-LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o
+LIBXL_OBJS-y += libxl_remus.o libxl_remus_device.o libxl_remus_disk_drbd.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 58629ed..bcbd961 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -17,6 +17,7 @@
 #include "libxl_osdeps.h"
 
 #include "libxl_internal.h"
+#include "libxl_remus.h"
 
 #define PAGE_TO_MEMKB(pages) ((pages) * 4)
 #define BACKEND_STRING_SIZE 5
@@ -842,11 +843,6 @@ out:
 GC_FREE;
 return ptr;
 }
-
-static void libxl__remus_setup_done(libxl__egc *egc,
-libxl__remus_devices_state *rds, int rc);
-static void libxl__remus_setup_failed(libxl__egc *egc,
-  libxl__remus_devices_state *rds, int rc);
 static void remus_failover_cb(libxl__egc *egc,
   libxl__domain_suspend_state *dss, int rc);
 
@@ -895,63 +891,14 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 
 assert(info);
 
-/* Convenience aliases */
-libxl__remus_devices_state *const rds = &dss->rds;
-
-if (libxl_defbool_val(info->netbuf)) {
-if (!libxl__netbuffer_enabled(gc)) {
-LOG(ERROR, "Remus: No support for network buffering");
-rc = ERROR_FAIL;
-goto out;
-}
-rds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VIF);
-}
-
-if (libxl_defbool_val(info->diskbuf))
-rds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VBD);
-
-rds->ao = ao;
-rds->domid = domid;
-rds->callback = libxl__remus_setup_done;
-
 /* Point of no return */
-libxl__remus_devices_setup(egc, rds);
+libxl__remus_setup(egc, dss);
 return AO_INPROGRESS;
 
  out:
 return AO_ABORT(rc);
 }
 
-static void libxl__remus_setup_done(libxl__egc *egc,
-libxl__remus_devices_state *rds, int rc)
-{
-libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds);
-STATE_AO_GC(dss->ao);
-
-if (!rc) {
-libxl__domain_suspend(egc, dss);
-return;
-}
-
-LOG(ERROR, "Remus: failed to setup device for guest with domid %u, rc %d",
-dss->domid, rc);
-rds->callback = libxl__remus_setup_failed;
-libxl__remus_devices_teardown(egc, rds);
-}
-
-static void libxl__remus_setup_failed(libxl__egc *egc,
-  libxl__remus_devices_state *rds, int rc)
-{
-libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds);
-STATE_AO_GC(dss->ao);
-
-if (rc)
-LOG(ERROR, "Remus: failed to teardown device after setup failed"
-" for guest with domid %u, rc %d", dss->domid, rc);
-
-dss->callback(egc, dss, rc);
-}
-
 static void remus_failover_cb(libxl__egc *egc,
   libxl__domain_suspend_state *dss, int rc)
 {
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index a3fce46..4693d32 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -19,6 +19,7 @@
 
 #include "libxl_internal.h"
 #include "libxl_arch.h"
+#include "libxl_remus.h"
 
 #include 
 #include 
@@ -1807,194 +1808,6 @@ static void 
domain_suspend_callback_common_done(libxl__egc *egc,
 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
 }
 
-/*- remus callbacks -*/
-static void remus_domain_suspend_callback_common_done(libxl__egc *egc,
-libxl__domain_suspend_state2 *dss2, int ok);
-static void remus_devices_postsuspend_cb(libxl__egc *egc,
- libxl__remus_devices_state *rds,
- int rc);
-static void remus_devices_preresume_cb(libxl__egc *egc,
-   libxl__remus_devices_state *rds,
-   int rc);
-
-static void libxl__remus_domain_suspend_callback(void *data)
-{
-libxl__save_helper_state *shs = data;
-libxl__egc *egc = shs->egc;
-libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
-
-/* Convenience aliases

[Xen-devel] [RFC PATCH COLO v5 03/29] tools: libxl: introduce a new API libxl__domain_restore() to read qemu state

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

Secondary vm is running in colo mode. So we will do
the following things again and again:
1. suspend both primay vm and secondary vm
2. sync the state
3. resume both primary vm and secondary vm
We will send qemu's state each time in step2, and
slave's qemu should read it each time before resuming
secondary vm. Introduce a new API libxl__domain_restore()
to do it. This API should be called before resuming
secondary vm.

Note: we should update qemu to support it.
Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl.c  | 18 ++
 tools/libxl/libxl_dom.c  | 26 ++
 tools/libxl/libxl_internal.h |  4 
 tools/libxl/libxl_qmp.c  | 10 ++
 4 files changed, 58 insertions(+)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 2a735b3..6e55afc 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -510,6 +510,24 @@ int libxl_domain_rename(libxl_ctx *ctx, uint32_t domid,
 return rc;
 }
 
+int libxl__domain_restore(libxl__gc *gc, uint32_t domid)
+{
+int rc = 0;
+
+libxl_domain_type type = libxl__domain_type(gc, domid);
+if (type != LIBXL_DOMAIN_TYPE_HVM) {
+rc = ERROR_FAIL;
+goto out;
+}
+
+rc = libxl__domain_restore_device_model(gc, domid);
+if (rc)
+LOG(ERROR, "failed to restore device mode for domain %u:%d",
+domid, rc);
+out:
+return rc;
+}
+
 int libxl__domain_resume(libxl__gc *gc, uint32_t domid, int suspend_cancel)
 {
 int rc = 0;
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index d286851..fd0c5c2 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1343,6 +1343,32 @@ int libxl__domain_suspend_device_model(libxl__gc *gc,
 return ret;
 }
 
+int libxl__domain_restore_device_model(libxl__gc *gc, uint32_t domid)
+{
+char *state_file;
+int rc;
+
+switch (libxl__device_model_version_running(gc, domid)) {
+case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
+/* not supported now */
+return ERROR_FAIL;
+case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
+/*
+ * This function may be called too many times for the same gc,
+ * so we use NOGC, and free the memory before return to avoid
+ * OOM.
+ */
+state_file = libxl__sprintf(NOGC,
+XC_DEVICE_MODEL_RESTORE_FILE".%d",
+domid);
+rc = libxl__qmp_restore(gc, domid, state_file);
+free(state_file);
+return rc;
+default:
+return ERROR_INVAL;
+}
+}
+
 int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid)
 {
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index c1ea498..3b4e6c4 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1016,6 +1016,7 @@ _hidden int libxl__domain_rename(libxl__gc *gc, uint32_t 
domid,
 
 _hidden int libxl__toolstack_restore(uint32_t domid, const uint8_t *buf,
  uint32_t size, void *data);
+_hidden int libxl__domain_restore_device_model(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid);
 
 _hidden const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
@@ -1033,6 +1034,7 @@ _hidden int libxl__userdata_store(libxl__gc *gc, uint32_t 
domid,
   const char *userdata_userid,
   const uint8_t *data, int datalen);
 
+_hidden int libxl__domain_restore(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_resume(libxl__gc *gc, uint32_t domid,
  int suspend_cancel);
 
@@ -1629,6 +1631,8 @@ _hidden int libxl__qmp_stop(libxl__gc *gc, int domid);
 _hidden int libxl__qmp_resume(libxl__gc *gc, int domid);
 /* Save current QEMU state into fd. */
 _hidden int libxl__qmp_save(libxl__gc *gc, int domid, const char *filename);
+/* Load current QEMU state from fd. */
+_hidden int libxl__qmp_restore(libxl__gc *gc, int domid, const char *filename);
 /* Set dirty bitmap logging status */
 _hidden int libxl__qmp_set_global_dirty_log(libxl__gc *gc, int domid, bool 
enable);
 _hidden int libxl__qmp_insert_cdrom(libxl__gc *gc, int domid, const 
libxl_device_disk *disk);
diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c
index 9aa7e2e..1b66d55 100644
--- a/tools/libxl/libxl_qmp.c
+++ b/tools/libxl/libxl_qmp.c
@@ -892,6 +892,16 @@ int libxl__qmp_save(libxl__gc *gc, int domid, const char 
*filename)
NULL, NULL);
 }
 
+int libxl__qmp_restore(libxl__gc *gc, int domid, const char *state_file)
+{
+libxl__json_object *args = NULL;
+
+qmp_parameters_add_string(gc, &args, "filename", (char *)state_file);
+
+return qmp_run_command(gc, domid, "xen-load-devices-state", args,
+   NULL, NULL);
+}
+
 static int qmp_change(libxl__gc *gc, libxl__qmp_handler *qmp,

[Xen-devel] [RFC PATCH COLO v5 08/29] tools/libxl: Introduce bitops macros

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

This is the same set used by libxc.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl_bitops.h | 79 ++
 1 file changed, 79 insertions(+)
 create mode 100644 tools/libxl/libxl_bitops.h

diff --git a/tools/libxl/libxl_bitops.h b/tools/libxl/libxl_bitops.h
new file mode 100644
index 000..c6ef8df
--- /dev/null
+++ b/tools/libxl/libxl_bitops.h
@@ -0,0 +1,79 @@
+#ifndef LIBXL_BITOPS_H
+#define LIBXL_BITOPS_H 1
+
+/* bitmap operations for single threaded access */
+
+#include 
+#include 
+
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+#define ORDER_LONG (sizeof(unsigned long) == 4 ? 5 : 6)
+
+#define BITMAP_ENTRY(_nr,_bmap) ((_bmap))[(_nr)/BITS_PER_LONG]
+#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
+
+/* calculate required space for number of longs needed to hold nr_bits */
+static inline int bitmap_size(int nr_bits)
+{
+int nr_long, nr_bytes;
+nr_long = (nr_bits + BITS_PER_LONG - 1) >> ORDER_LONG;
+nr_bytes = nr_long * sizeof(unsigned long);
+return nr_bytes;
+}
+
+static inline unsigned long *bitmap_alloc(int nr_bits)
+{
+return calloc(1, bitmap_size(nr_bits));
+}
+
+static inline void bitmap_clear(unsigned long *addr, int nr_bits)
+{
+memset(addr, 0, bitmap_size(nr_bits));
+}
+
+static inline int test_bit(int nr, unsigned long *addr)
+{
+return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
+}
+
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
+}
+
+static inline void set_bit(int nr, unsigned long *addr)
+{
+BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
+}
+
+static inline int test_and_clear_bit(int nr, unsigned long *addr)
+{
+int oldbit = test_bit(nr, addr);
+clear_bit(nr, addr);
+return oldbit;
+}
+
+static inline int test_and_set_bit(int nr, unsigned long *addr)
+{
+int oldbit = test_bit(nr, addr);
+set_bit(nr, addr);
+return oldbit;
+}
+
+static inline void bitmap_or(unsigned long *dst, const unsigned long *other,
+ int nr_bits)
+{
+int i, nr_longs = (bitmap_size(nr_bits) / sizeof(unsigned long));
+for ( i = 0; i < nr_longs; ++i )
+dst[i] |= other[i];
+}
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [RFC PATCH COLO v5 06/29] Update libxl__domain_unpause() to support qemu-xen

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

Currently, libxl__domain_unpause() only supports
qemu-xen-traditional. Update it to support qemu-xen.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl.c  | 13 +
 tools/libxl/libxl_dom.c  | 25 +
 tools/libxl/libxl_internal.h |  2 ++
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index c3898ce..58629ed 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -1039,8 +1039,6 @@ out:
 
 int libxl__domain_unpause(libxl__gc *gc, uint32_t domid)
 {
-char *path;
-char *state;
 int ret, rc = 0;
 
 libxl_domain_type type = libxl__domain_type(gc, domid);
@@ -1050,12 +1048,11 @@ int libxl__domain_unpause(libxl__gc *gc, uint32_t domid)
 }
 
 if (type == LIBXL_DOMAIN_TYPE_HVM) {
-path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", 
domid);
-state = libxl__xs_read(gc, XBT_NULL, path);
-if (state != NULL && !strcmp(state, "paused")) {
-libxl__qemu_traditional_cmd(gc, domid, "continue");
-libxl__wait_for_device_model_deprecated(gc, domid, "running",
- NULL, NULL, NULL);
+rc = libxl__domain_unpause_device_model(gc, domid);
+if (rc < 0) {
+LOG(ERROR, "failed to unpause device model for domain %u:%d",
+domid, rc);
+goto out;
 }
 }
 
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index eb4ed94..a3fce46 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -2272,6 +2272,31 @@ static void remus_teardown_done(libxl__egc *egc,
 dss->callback(egc, dss, rc);
 }
 
+int libxl__domain_unpause_device_model(libxl__gc *gc, uint32_t domid)
+{
+char *path;
+char *state;
+
+switch (libxl__device_model_version_running(gc, domid)) {
+case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
+path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", 
domid);
+state = libxl__xs_read(gc, XBT_NULL, path);
+if (state != NULL && !strcmp(state, "paused")) {
+libxl__qemu_traditional_cmd(gc, domid, "continue");
+libxl__wait_for_device_model_deprecated(gc, domid, "running",
+ NULL, NULL, NULL);
+}
+case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
+if (libxl__qmp_resume(gc, domid))
+return ERROR_FAIL;
+break;
+default:
+return ERROR_FAIL;
+}
+
+return 0;
+}
+
 /* Miscellaneous */
 
 char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 538ac4b..8d229ac 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1018,6 +1018,8 @@ _hidden int libxl__toolstack_restore(uint32_t domid, 
const uint8_t *buf,
  uint32_t size, void *data);
 _hidden int libxl__domain_restore_device_model(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid);
+_hidden int libxl__domain_unpause_device_model(libxl__gc *gc,
+   uint32_t domid);
 
 _hidden const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
  const char *userdata_userid,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [RFC PATCH COLO v5 04/29] Update libxl__domain_suspend_common_switch_qemu_logdirty() for colo

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

Secondary vm is running in colo mode. So we need to
send secondary vm's dirty page information to master.
libxl__domain_suspend_common_switch_qemu_logdirty() is to enable
qemu logdirty. But it uses domain_suspend_state, and calls
libxl__xc_domain_saverestore_async_callback_done()
before exits.

Introduce a new API libxl__domain_common_switch_qemu_logdirty().
This API only uses libxl__logdirty_switch, and calls
lds->callback before exits.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl_dom.c  | 79 +++-
 tools/libxl/libxl_internal.h | 12 +--
 2 files changed, 59 insertions(+), 32 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index fd0c5c2..eb4ed94 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -,7 +,7 @@ static void switch_logdirty_timeout(libxl__egc *egc, 
libxl__ev_time *ev,
 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
 const char *watch_path, const char *event_path);
 static void switch_logdirty_done(libxl__egc *egc,
- libxl__domain_suspend_state *dss, int ok);
+ libxl__logdirty_switch *lds, int ok);
 
 static void logdirty_init(libxl__logdirty_switch *lds)
 {
@@ -1122,12 +1122,10 @@ static void logdirty_init(libxl__logdirty_switch *lds)
 
 static void domain_suspend_switch_qemu_xen_traditional_logdirty
(int domid, unsigned enable,
-libxl__save_helper_state *shs)
+libxl__logdirty_switch *lds,
+libxl__egc *egc)
 {
-libxl__egc *egc = shs->egc;
-libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
-libxl__logdirty_switch *lds = &dss->logdirty;
-STATE_AO_GC(dss->ao);
+STATE_AO_GC(lds->ao);
 int rc;
 xs_transaction_t t = 0;
 const char *got;
@@ -1188,64 +1186,85 @@ static void 
domain_suspend_switch_qemu_xen_traditional_logdirty
  out:
 LOG(ERROR,"logdirty switch failed (rc=%d), aborting suspend",rc);
 libxl__xs_transaction_abort(gc, &t);
-switch_logdirty_done(egc,dss,-1);
+switch_logdirty_done(egc,lds,-1);
 }
 
 static void domain_suspend_switch_qemu_xen_logdirty
(int domid, unsigned enable,
-libxl__save_helper_state *shs)
+libxl__logdirty_switch *lds,
+libxl__egc *egc)
 {
-libxl__egc *egc = shs->egc;
-libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
-STATE_AO_GC(dss->ao);
+STATE_AO_GC(lds->ao);
 int rc;
 
 rc = libxl__qmp_set_global_dirty_log(gc, domid, enable);
 if (!rc) {
-libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+lds->callback(egc, lds, 0);
 } else {
 LOG(ERROR,"logdirty switch failed (rc=%d), aborting suspend",rc);
-libxl__xc_domain_saverestore_async_callback_done(egc, shs, -1);
+lds->callback(egc, lds, -1);
 }
 }
 
+static void libxl__domain_suspend_switch_qemu_logdirty_done
+(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc)
+{
+libxl__domain_suspend_state *dss = CONTAINER_OF(lds, *dss, logdirty);
+
+libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, rc);
+}
+
 void libxl__domain_suspend_common_switch_qemu_logdirty
(int domid, unsigned enable, void *user)
 {
 libxl__save_helper_state *shs = user;
 libxl__egc *egc = shs->egc;
 libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
-STATE_AO_GC(dss->ao);
+
+/* convenience aliases */
+libxl__logdirty_switch *const lds = &dss->logdirty;
+
+lds->callback = libxl__domain_suspend_switch_qemu_logdirty_done;
+
+libxl__domain_common_switch_qemu_logdirty(domid, enable, lds, egc);
+}
+
+void libxl__domain_common_switch_qemu_logdirty(int domid, unsigned enable,
+   libxl__logdirty_switch *lds,
+   libxl__egc *egc)
+{
+STATE_AO_GC(lds->ao);
 
 switch (libxl__device_model_version_running(gc, domid)) {
 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
-domain_suspend_switch_qemu_xen_traditional_logdirty(domid, enable, 
shs);
+domain_suspend_switch_qemu_xen_traditional_logdirty(domid, enable,
+lds, egc);
 break;
 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
-domain_suspend_switch_qemu_xen_logdirty(domid, enable, shs);
+domain_suspend_switch_qemu_xen_logdirty(domid, enable, lds, egc);
 break;
 default:
 LOG(ERROR,"logdirty switch failed"
 ", no valid device model ver

[Xen-devel] [RFC PATCH COLO v5 00/29] COarse-grain LOck-stepping Virtual Machines for Non-stop Service

2015-03-31 Thread Yang Hongyang

This patchset is for xen-4.6. The main diffrence from previous versions are:
1. Use qdisk block replication
http://wiki.qemu.org/Features/BlockReplication
2. Nic replication based on colo-proxy
http://wiki.qemu.org/Features/COLO#Components
Note that COLO feature is under active development, this version is not well
tested and has some known problems.
We post this early in order to give you a brief impression about how COLO
will be implemented and we request for your comments about the general idea
of COLO and of course the implementation, if you have any idea/suggestion
on COLO, please do not hesitate to give your comments, thanks in advance.

Virtual machine (VM) replication is a well known technique for providing
application-agnostic software-implemented hardware fault tolerance -
"non-stop service". Currently, remus provides this function, but it buffers
all output packets, and the latency is unacceptable.
In xen summit 2012, We introduce a new VM replication solution: colo
(COarse-grain LOck-stepping virtual machine). The presentation is in
the following URL:
http://www.slideshare.net/xen_com_mgr/colo-coarsegrain-lockstepping-virtual-machines-for-nonstop-service

Here is the summary of the solution:
>From the client's point of view, as long as the client observes identical
responses from the primary and secondary VMs, according to the service
semantics, then the secondary vm is a valid replica of the primary
vm, and can successfully take over when a hardware failure of the
primary vm is detected.

This patchset is based on migration v1.
Only supports hvm guest now. The codes are also hosted on github:
https://github.com/macrosheep/xen/tree/COLO_RFC_v5

TODO list:
1. Code reviews and Bug fixes
2. Switch to migration v2
3. Support pvm

Known bugs:
1. Secondary vm may crash due to triple fault.

Wiki pages:
http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping
http://wiki.qemu.org/Features/COLO

Patch 1: Add readme
Patch 2-8 : Some refactor and prepare work
Patch 9-12 : Update remus to reuse remus device codes
Patch 13-21: COLO framework related codes
Patch 22-23: implement disk replication
Patch 24-29: implement nic replication

Changelog from v4 to v5:
1. rebase to the latest xen upstream
2. disk replication: blktap2->qdisk
3. nic replication: colo-agent->colo-proxy

Changelog from v3 to v4:
1. rebase to newest xen
2. bug fix

Changlog from v2 to v3:
1. rebase to newest remus
2. add nic replication support

Changlog from v1 to v2:
1. rebase to newest remus
2. add disk replication support

Wen Congyang (23):
Add readme
Refactor domain_suspend_callback_common()
tools: libxl: introduce a new API libxl__domain_restore() to read qemu
state
Update libxl__domain_suspend_common_switch_qemu_logdirty() for colo
Introduce a new internal API libxl__domain_unpause()
Update libxl__domain_unpause() to support qemu-xen
support to resume uncooperative HVM guests
tools/libxl: Introduce bitops macros
move remus related codes to libxl_remus.c
rename remus device to checkpoint device
adjust the indentation
don't touch remus in checkpoint_device
Update libxl_save_msgs_gen.pl to support return data from xl to xc
Allow slave sends data to master
secondary vm suspend/resume/checkpoint code
primary vm suspend/get_dirty_pfn/resume/checkpoint code
xc_domain_save: flush cache before calling callbacks->postcopy() in
colo mode
COLO: xc related codes
send store mfn and console mfn to xl before resuming secondary vm
implement the cmdline for COLO
tools: xc_doamin_restore: zero ioreq page only one time
Support colo mode for qemu disk
COLO: use qemu block replication

Yang Hongyang (6):
COLO proxy: implement setup/teardown of COLO proxy module
COLO proxy: preresume, postresume and checkpoint
COLO nic: implement COLO nic subkind
setup and control colo proxy on primary side
setup and control colo proxy on secondary side
cmdline switches and config vars to control colo-proxy

[Xen-devel] [RFC PATCH COLO v5 01/29] Add readme

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Yang Hongyang 
---
 docs/README.colo | 92 
 1 file changed, 92 insertions(+)
 create mode 100644 docs/README.colo

diff --git a/docs/README.colo b/docs/README.colo
new file mode 100644
index 000..60f487d
--- /dev/null
+++ b/docs/README.colo
@@ -0,0 +1,92 @@
+COLO provides fault tolerance for virtual machines by sending continuous
+checkpoints to a backup, which will activate if the target VM fails. It
+only supports HVM guest(without pv extensions).
+
+Requriements:
+1. Hardware requriements
+   There is at least one directly connected nic to forward the nic from client
+   to secondary vm. The directly connected nic must not be used by any other
+   purpose. If your guest has more than one nic, you should have directly
+   connected nic for each guest nic. If you don't have enouth directly 
connected
+   nic, you can use vlan.
+2. Dom0 requirements
+   - Support dom0
+   - kernel module:
+sch_ingress
+cls_basic
+cls_tcindex
+cls_u32
+act_mirred
+   - libnl-tools >= 3.0. This package provides the command nl-qdisc-list, and
+ colo need this command.
+   - If your host os has OEM-released xen tools, please uninstall it first.
+   - You can load the module which is not provided by OEM.
+3. Guest requirements
+   Only HVM guest(without pv extensions) is supported now. If you want to
+   use OEM released guest os, please use SUSE. REDHAT and Ubuntu is not
+   supported now because I don't find any way to disable pv extensions.
+   If you want to use REDHAT or Ubuntu, you need to build the newest
+   kernel which has the parameter xen_nopv.
+
+Network link topology
+   Please refer to: http://wiki.qemu.org/Features/COLO#Network_link_topology
+
+The steps to setup COLO environment:
+You need to recompile your host kernel because colo-proxy module need cooperate
+with linux kernel.
+Please refer to: http://wiki.qemu.org/Features/COLO#Test_environment_prepare
+1. Build and install xen
+2. Apply the patch for qemu xen, and rebuild xen tools:
+- cd tools/qemu-xen-dir
+- use git am to apply the patch:
+  
https://raw.githubusercontent.com/wencongyang/colo-files/master/patch_for_qemu/*.patch
+- make tools && make install-tools
+Note: You must use qemu-xen. qemu-xen-traditional is not supported.
+3. Install COLO proxy module:
+3.1 Download COLO proxy, compile and install it:
+https://github.com/gao-feng/colo-proxy.git
+3.2 Download iptables patch, it is based on v1.4.21 compile and install it:
+
https://github.com/gao-feng/colo-proxy/blob/master/colo-patch-for-kernel.patch
+4. Install the guest
+4.1 Add "xen_platform_pci=0" into the guest configfile
+4.2 If you use suse, please select physical machine
+4.3 copy the disk image to the secondary host
+5. Update your guest config file for COLO:
+5.1 disk
+disk = [
+
'format=raw,devtype=disk,access=w,vdev=hda,backendtype=qdisk,colo,colo-params=192.168.3.1:9000:exportname=qdisk1,active-disk=/mnt/ramfs/active_disk.img,hidden-disk=/mnt/ramfs/hidden_disk.img,target=/root/images/colo-hvm.img'
 ]
+5.2 nic
+vif = [ 'mac=00:16:4f:00:00:11, bridge=br0, model=e1000, 
forwarddev=eth0, forwardbr=br1' ]
+Note:
+a. The ip/port in colo-params is the secondary host's IP. Don't use the
+   directly connected nic's IP.
+b. forwarddev is the directly connected nic.
+c. If you have more than one disk, colo-params's host/port must be the same
+   and colo-param's exportname must be different.
+6. Run COLO:
+xl remus -c -u  
+Note: The ip must not be the directly connected nic's IP.
+Note:
+Secondary host only need to do step 1-3.
+
+The known problem:
+1. Secondary vm may crash due to triple fault.
+2. The heartbeat is not reliable. If you want to test the performance,
+   please disable the heartbeat(modify the xen codes). You can use the
+   branch colo-v4-noheartbeat.
+3. Suspending the vm fails, and the error message is:
+libxl: error: libxl_qmp.c:429:qmp_next: timeout
+
+Problem 1 and 3 don't happen every time. So you can run colo again to
+avoid this problem.
+
+Virtio-Net:
+1. If you want to get better performance, you can use virtio-net.
+
+Trouble shooting:
+If there's some error happend when staritng COLO, you can do:
+1. Make sure you have all necessary modules that DOM0 needed on both side.
+2. Make sure you have followed all the instructions in this README.
+3. Try to reboot both primary and secondary host.
+4. If you still have problems, collect the error logs and contact
+   Wen Congyang(we...@cn.fujitsu.com)/Yang Hongyang(yan...@cn.fujitsu.com).
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [RFC PATCH COLO v5 07/29] support to resume uncooperative HVM guests

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

For PVHVM, the hypercall return code is 0, and it can be resumed
in a new domain context.

For HVM, do nothing.

Signed-off-by: Wen Congyang 
---
 tools/libxc/xc_resume.c | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/tools/libxc/xc_resume.c b/tools/libxc/xc_resume.c
index e67bebd..b862ce3 100644
--- a/tools/libxc/xc_resume.c
+++ b/tools/libxc/xc_resume.c
@@ -109,6 +109,21 @@ static int xc_domain_resume_cooperative(xc_interface *xch, 
uint32_t domid)
 return do_domctl(xch, &domctl);
 }
 
+static int xc_domain_resume_hvm(xc_interface *xch, uint32_t domid)
+{
+DECLARE_DOMCTL;
+
+/*
+ * If it is PVHVM, the hypercall return code is 0, and resume
+ * it in a new domain context.
+ *
+ * If it is a HVM, do nothing.
+ */
+domctl.cmd = XEN_DOMCTL_resumedomain;
+domctl.domain = domid;
+return do_domctl(xch, &domctl);
+}
+
 static int xc_domain_resume_any(xc_interface *xch, uint32_t domid)
 {
 DECLARE_DOMCTL;
@@ -138,10 +153,7 @@ static int xc_domain_resume_any(xc_interface *xch, 
uint32_t domid)
  */
 #if defined(__i386__) || defined(__x86_64__)
 if ( info.hvm )
-{
-ERROR("Cannot resume uncooperative HVM guests");
-return rc;
-}
+return xc_domain_resume_hvm(xch, domid);
 
 if ( xc_domain_get_guest_width(xch, domid, &dinfo->guest_width) != 0 )
 {
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [RFC PATCH COLO v5 05/29] Introduce a new internal API libxl__domain_unpause()

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

The guest is paused after libxl_domain_create_restore().
Secondary vm is running in colo mode. So we need to unpause
the guest. The current API libxl_domain_unpause() is
not an internal API. Introduce a new API to support it.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl.c  | 21 +++--
 tools/libxl/libxl_internal.h |  1 +
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 6e55afc..c3898ce 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -1037,9 +1037,8 @@ out:
 return AO_INPROGRESS;
 }
 
-int libxl_domain_unpause(libxl_ctx *ctx, uint32_t domid)
+int libxl__domain_unpause(libxl__gc *gc, uint32_t domid)
 {
-GC_INIT(ctx);
 char *path;
 char *state;
 int ret, rc = 0;
@@ -1059,12 +1058,22 @@ int libxl_domain_unpause(libxl_ctx *ctx, uint32_t domid)
  NULL, NULL, NULL);
 }
 }
-ret = xc_domain_unpause(ctx->xch, domid);
-if (ret<0) {
-LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "unpausing domain %d", domid);
+
+ret = xc_domain_unpause(CTX->xch, domid);
+if (ret < 0) {
+LOGE(ERROR, "unpausing domain %d", domid);
 rc = ERROR_FAIL;
 }
- out:
+
+out:
+return rc;
+}
+
+int libxl_domain_unpause(libxl_ctx *ctx, uint32_t domid)
+{
+GC_INIT(ctx);
+int rc = libxl__domain_unpause(gc, domid);
+
 GC_FREE;
 return rc;
 }
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 6470866..538ac4b 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1037,6 +1037,7 @@ _hidden int libxl__userdata_store(libxl__gc *gc, uint32_t 
domid,
 _hidden int libxl__domain_restore(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_resume(libxl__gc *gc, uint32_t domid,
  int suspend_cancel);
+_hidden int libxl__domain_unpause(libxl__gc *gc, uint32_t domid);
 
 /* returns 0 or 1, or a libxl error code */
 _hidden int libxl__domain_pvcontrol_available(libxl__gc *gc, uint32_t domid);
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [RFC PATCH COLO v5 02/29] Refactor domain_suspend_callback_common()

2015-03-31 Thread Yang Hongyang

From: Wen Congyang 

libxl__domain_suspend() is to save the guest. I think
we should call it libxl__domain_save(), but I don't
rename it.

Secondary vm is running in colo mode. So we will do
the following things again and again:
1. suspend both primay vm and secondary vm
2. sync the state
3. resume both primary vm and secondary vm
To suspend secondary vm, we need an independent API to
suspend vm.

The core function to suspend vm is domain_suspend_callback_common().
So use a new structure libxl__domain_suspend_state2 to
instead of libxl__domain_suspend_state. The dss's members that
will be used in domain_suspend_callback_common() are
moved to dss2.

We introduce a new API libxl__domain_suspend2() too.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl_dom.c  | 235 ---
 tools/libxl/libxl_internal.h |  39 +--
 2 files changed, 159 insertions(+), 115 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 26a0382..d286851 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1094,7 +1094,7 @@ int libxl__toolstack_restore(uint32_t domid, const 
uint8_t *buf,
 static void domain_suspend_done(libxl__egc *egc,
 libxl__domain_suspend_state *dss, int rc);
 static void domain_suspend_callback_common_done(libxl__egc *egc,
-libxl__domain_suspend_state *dss, int ok);
+libxl__domain_suspend_state2 *dss2, int ok);
 
 /*- complicated callback, called by xc_domain_save -*/
 
@@ -1312,16 +1312,17 @@ static void switch_logdirty_done(libxl__egc *egc,
 /*- callbacks, called by xc_domain_save -*/
 
 int libxl__domain_suspend_device_model(libxl__gc *gc,
-   libxl__domain_suspend_state *dss)
+   libxl__domain_suspend_state2 *dss2)
 {
 int ret = 0;
-uint32_t const domid = dss->domid;
-const char *const filename = dss->dm_savefile;
+uint32_t const domid = dss2->domid;
+const char *const filename = dss2->dm_savefile;
 
 switch (libxl__device_model_version_running(gc, domid)) {
 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
 LOG(DEBUG, "Saving device model state to %s", filename);
-libxl__qemu_traditional_cmd(gc, domid, "save");
+if (dss2->save_dm)
+libxl__qemu_traditional_cmd(gc, domid, "save");
 libxl__wait_for_device_model_deprecated(gc, domid, "paused", NULL, 
NULL, NULL);
 break;
 }
@@ -1329,9 +1330,11 @@ int libxl__domain_suspend_device_model(libxl__gc *gc,
 if (libxl__qmp_stop(gc, domid))
 return ERROR_FAIL;
 /* Save DM state into filename */
-ret = libxl__qmp_save(gc, domid, filename);
-if (ret)
-unlink(filename);
+if (dss2->save_dm) {
+ret = libxl__qmp_save(gc, domid, filename);
+if (ret)
+unlink(filename);
+}
 break;
 default:
 return ERROR_INVAL;
@@ -1361,9 +1364,9 @@ int libxl__domain_resume_device_model(libxl__gc *gc, 
uint32_t domid)
 }
 
 static void domain_suspend_common_wait_guest(libxl__egc *egc,
- libxl__domain_suspend_state *dss);
+ libxl__domain_suspend_state2 
*dss2);
 static void domain_suspend_common_guest_suspended(libxl__egc *egc,
- libxl__domain_suspend_state *dss);
+ libxl__domain_suspend_state2 *dss2);
 
 static void domain_suspend_common_pvcontrol_suspending(libxl__egc *egc,
   libxl__xswait_state *xswa, int rc, const char *state);
@@ -1372,14 +1375,14 @@ static void 
domain_suspend_common_wait_guest_evtchn(libxl__egc *egc,
 static void suspend_common_wait_guest_watch(libxl__egc *egc,
   libxl__ev_xswatch *xsw, const char *watch_path, const char *event_path);
 static void suspend_common_wait_guest_check(libxl__egc *egc,
-libxl__domain_suspend_state *dss);
+libxl__domain_suspend_state2 
*dss2);
 static void suspend_common_wait_guest_timeout(libxl__egc *egc,
   libxl__ev_time *ev, const struct timeval *requested_abs);
 
 static void domain_suspend_common_failed(libxl__egc *egc,
- libxl__domain_suspend_state *dss);
+ libxl__domain_suspend_state2 *dss2);
 static void domain_suspend_common_done(libxl__egc *egc,
-   libxl__domain_suspend_state *dss,
+   libxl__domain_suspend_state2 *dss2,
bool ok);
 
 static bool domain_suspend_pvcontrol_acked(const char *state) {
@@ -1388,36 +1391,36 @@ static bool domain_suspend_pvcontrol_acked(const char 
*state) {
 return strcmp(state,"suspend");
 }
 
-/* calls dss->c

Re: [Xen-devel] [PATCH] xen-blkback: define pr_fmt macro to avoid the duplication of DRV_PFX

2015-03-31 Thread Chentao (Boby)


Thanks roger and joe.

I will adopt your suggestions in my v2 patch.

On 2015/3/31 22:57, Roger Pau Monné wrote:

El 31/03/15 a les 23.14, Tao Chen ha escrit:

Define pr_fmt macro with {xen-blkback: } prefix, then remove all use
of DRV_PFX in the pr and DPRINTK sentences. It will simplify the code.

And if the pr sentences miss a \n, add it in the end. If the DPRINTK
sentences have redundant \n, remove it. It will format the code.

These all make the readability of the code become better.


Thanks for the patch.


Signed-off-by: Tao Chen 
---
  drivers/block/xen-blkback/blkback.c | 62 ++---
  drivers/block/xen-blkback/common.h  |  6 
  drivers/block/xen-blkback/xenbus.c  | 18 ++-
  3 files changed, 42 insertions(+), 44 deletions(-)


[...]

diff --git a/drivers/block/xen-blkback/common.h 
b/drivers/block/xen-blkback/common.h
index 375d288..f620b5d 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -44,12 +44,6 @@
  #include 
  #include 

-#define DRV_PFX "xen-blkback:"
-#define DPRINTK(fmt, args...)  \
-   pr_debug(DRV_PFX "(%s:%d) " fmt ".\n",  \
-__func__, __LINE__, ##args)
-
-
  /*
   * This is the maximum number of segments that would be allowed in indirect
   * requests. This value will also be passed to the frontend.
diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index b33083e..0dbbfeb 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -14,6 +14,11 @@

  */

+#define pr_fmt(fmt) "xen-blkback: " fmt
+#define DPRINTK(fmt, args...)  \
+   pr_debug("(%s:%d) " fmt ".\n",  \
+   __func__, __LINE__, ##args)
+
  #include 
  #include 
  #include 
@@ -426,14 +431,14 @@ static int xen_vbd_create(struct xen_blkif *blkif, 
blkif_vdev_t handle,
 FMODE_READ : FMODE_WRITE, NULL);

if (IS_ERR(bdev)) {
-   DPRINTK("xen_vbd_create: device %08x could not be opened.\n",
+   DPRINTK("xen_vbd_create: device %08x could not be opened",
vbd->pdevice);
return -ENOENT;
}

vbd->bdev = bdev;
if (vbd->bdev->bd_disk == NULL) {
-   DPRINTK("xen_vbd_create: device %08x doesn't exist.\n",
+   DPRINTK("xen_vbd_create: device %08x doesn't exist",
vbd->pdevice);


IMHO this two above should be made a pr_warn probably...


xen_vbd_free(vbd);
return -ENOENT;
@@ -452,7 +457,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, 
blkif_vdev_t handle,
if (q && blk_queue_secdiscard(q))
vbd->discard_secure = true;

-   DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+   DPRINTK("Successful creation of handle=%04x (dom=%u)",
handle, blkif->domid);


...and this should be turned into a plain pr_debug. And with that we can
get rid of DPRINTK.

Roger.


.




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [qemu-upstream-4.4-testing test] 50274: tolerable FAIL - PUSHED

2015-03-31 Thread osstest service user

flight 50274 qemu-upstream-4.4-testing real [real]
http://logs.test-lab.xenproject.org/osstest/logs/50274/

Failures :-/ but no regressions.

Tests which are failing intermittently (not blocking):
 test-amd64-i386-pair 17 guest-migrate/src_host/dst_host fail pass in 36769
 test-amd64-i386-xl-win7-amd64 13 guest-localmigrate/x10 fail pass in 36769
 test-amd64-amd64-xl-qemuu-winxpsp3 7 windows-install fail in 36769 pass in 
50274

Tests which did not succeed, but are not blocking:
 test-amd64-i386-libvirt  10 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xend-winxpsp3 17 leak-check/check fail  never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xend-qemut-winxpsp3 17 leak-check/checkfail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-amd64-xl-pcipt-intel  9 guest-startfail in 36769 never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail in 36769 never pass

version targeted for testing:
 qemuud173a0c20d7970c17fa593cf86abc1791a8a4a3a
baseline version:
 qemuub04df88d41f64fc6b56d193b6e90fb840cedb1d3


People who touched revisions under test:
  Benoit Canet 
  BenoÃ®t Canet 
  Dmitry Fleytman 
  Gerd Hoffmann 
  Jason Wang 
  Jeff Cody 
  Juan Quintela 
  Kevin Wolf 
  Laszlo Ersek 
  Michael Roth 
  Michael S. Tsirkin 
  Peter Maydell 
  Petr Matousek 
  Stefan Hajnoczi 
  Stefano Stabellini 


jobs:
 build-amd64-xend pass
 build-i386-xend  pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl  pass
 test-amd64-i386-xl   pass
 test-amd64-i386-rhel6hvm-amd pass
 test-amd64-i386-qemut-rhel6hvm-amd   pass
 test-amd64-i386-qemuu-rhel6hvm-amd   pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64pass
 test-amd64-i386-xl-qemut-debianhvm-amd64 pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-i386-xl-qemuu-debianhvm-amd64 pass
 test-amd64-i386-freebsd10-amd64  pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass
 test-amd64-amd64-xl-qemut-win7-amd64 fail
 test-amd64-i386-xl-qemut-win7-amd64  fail
 test-amd64-amd64-xl-qemuu-win7-amd64 fail
 test-amd64-i386-xl-qemuu-win7-amd64  fail
 test-amd64-amd64-xl-win7-amd64   fail
 test-amd64-i386-xl-win7-amd64fail
 test-amd64-amd64-xl-credit2  pass
 test-amd64-i386-freebsd10-i386   pass
 test-amd64-i386-rhel6hvm-intel   pass
 test-amd64-i386-qemut-rhel6hvm-intel pass
 test-amd64-i386-qemuu-rhel6hvm-intel pass
 test-amd64-amd64-libvirt pass
 test-amd64-i386-libvirt  pass
 test-amd64-amd64-xl-multivcpupass
 test-amd64-amd64-pairpass
 test-amd64-i386-pair fail
 test-amd64-amd64-xl-sedf-pin pass
 test-amd64-amd64-pv  pass

Re: [Xen-devel] [OSSTEST Nested PATCH v7 2/6] Edit some testsupport APIs for nested test

2015-03-31 Thread Pang, LongtaoX



> -Original Message-
> From: Ian Campbell [mailto:ian.campb...@citrix.com]
> Sent: Tuesday, March 31, 2015 9:50 PM
> To: Pang, LongtaoX
> Cc: xen-devel@lists.xen.org; ian.jack...@eu.citrix.com; wei.l...@citrix.com;
> Hu, Robert
> Subject: Re: [OSSTEST Nested PATCH v7 2/6] Edit some testsupport APIs for
> nested test
> 
> On Fri, 2015-03-27 at 19:06 -0400, longtao.pang wrote:
> > 1. Designate vif model to 'e1000' by make-flight.
> 
> Strictly you could s/to 'e1000'// here since the make-flight changes are
> elsewhere and that would better describe the generic change.
> 
Do you mean that I should change the description from "Designate vif model to 
'e1000' by make-flight" to "Designate vif model by make-flight"?
> > 2. In L2 installation context, its host (L1) IP address is not queried
> > from DNS, but after running "ts-nested-setup + host + nested", L1 IP
> > is stored in runvar.
> >
> > Signed-off-by: longtao.pang 
> 
> Acked-by: Ian Campbell 
> 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [xen-unstable bisection] complete test-amd64-amd64-xl-multivcpu

2015-03-31 Thread xen . org

branch xen-unstable
xen branch xen-unstable
job test-amd64-amd64-xl-multivcpu
test guest-localmigrate

Tree: linux git://xenbits.xen.org/linux-pvops.git
Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git
Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git
Tree: qemuu git://xenbits.xen.org/staging/qemu-upstream-unstable.git
Tree: xen git://xenbits.xen.org/xen.git

*** Found and reproduced problem changeset ***

  Bug is in tree:  xen git://xenbits.xen.org/xen.git
  Bug introduced:  d639e6a05a0f8ee0e61c6cc4eebba78934ef3648
  Bug not present: 88a2372c6ba44dd42b915a95a823cf9d4d260e25


  commit d639e6a05a0f8ee0e61c6cc4eebba78934ef3648
  Author: Jan Beulich 
  Date:   Mon Mar 23 16:51:14 2015 +0100
  
  x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P
  
  Xen L4 entries being uniformly installed into any L4 table and 64-bit
  PV kernels running in ring 3 means that user mode was able to see the
  read-only M2P presented by Xen to the guests. While apparently not
  really representing an exploitable information leak, this still very
  certainly was never meant to be that way.
  
  Building on the fact that these guests already have separate kernel and
  user mode page tables we can allow guest kernels to tell Xen that they
  don't want user mode to see this table. We can't, however, do this by
  default: There is no ABI requirement that kernel and user mode page
  tables be separate. Therefore introduce a new VM-assist flag allowing
  the guest to control respective hypervisor behavior:
  - when not set, L4 tables get created with the respective slot blank,
and whenever the L4 table gets used as a kernel one the missing
mapping gets inserted,
  - when set, L4 tables get created with the respective slot initialized
as before, and whenever the L4 table gets used as a user one the
mapping gets zapped.
  
  Signed-off-by: Jan Beulich 
  Reviewed-by: Tim Deegan 


For bisection revision-tuple graph see:
   
http://www.chiark.greenend.org.uk/~xensrcts/results/bisect.xen-unstable.test-amd64-amd64-xl-multivcpu.guest-localmigrate.html
Revision IDs in each graph node refer, respectively, to the Trees above.


Searching for failure / basis pass:
 36772 fail [host=scape-moth] / 36622 [host=potato-beetle] 36540 
[host=fire-frog] 36514 [host=bush-cricket] 35957 [host=field-cricket] 35887 
[host=grain-weevil] 35810 [host=bush-cricket] 35556 ok.
Failure / basis pass flights: 36772 / 35556
(tree with no url: ovmf)
(tree with no url: seabios)
Tree: linux git://xenbits.xen.org/linux-pvops.git
Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git
Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git
Tree: qemuu git://xenbits.xen.org/staging/qemu-upstream-unstable.git
Tree: xen git://xenbits.xen.org/xen.git
Latest 8a5f782c33c04ea5c9b3ca6fb32d6039e2e5c0c9 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
a4b276b4ce49c8d70dd841ff885b900ec652b994 
42ffdf360dd9df66b0a4a7ada059c02a3cf3a8de 
84066dd4ef4bb5983e246c629a26ef4f3394e5d5
Basis pass a74f1d1204a5c892466b52ac68ee6443c1e459d7 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
a4b276b4ce49c8d70dd841ff885b900ec652b994 
0d37748342e29854db7c9f6c47d7f58c6cfba6b2 
befe0a0da90d7ac063fd8b5891c7d0cafa5f
Generating revisions with ./adhoc-revtuple-generator  
git://xenbits.xen.org/linux-pvops.git#a74f1d1204a5c892466b52ac68ee6443c1e459d7-8a5f782c33c04ea5c9b3ca6fb32d6039e2e5c0c9
 
git://xenbits.xen.org/osstest/linux-firmware.git#c530a75c1e6a472b0eb9558310b518f0dfcd8860-c530a75c1e6a472b0eb9558310b518f0dfcd8860
 
git://xenbits.xen.org/staging/qemu-xen-unstable.git#a4b276b4ce49c8d70dd841ff885b900ec652b994-a4b276b4ce49c8d70dd841ff885b900ec652b994
 
git://xenbits.xen.org/staging/qemu-upstream-unstable.git#0d37748342e29854db7c9f6c47d7f58c6cfba6b2-42ffdf360dd9df66b0a4a7ada059c02a3cf3a8de
 
git://xenbits.xen.org/xen.git#befe0a0da90d7ac063fd8b5891c7d0cafa5f-84066dd4ef4bb5983e246c629a26ef4f3394e5d5
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/linux-pvops
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://xenbits.xen.org/linux-pvops.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/qemu-upstream-unstable
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://xenbits.xen.org/staging/qemu-upstream-unstable.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/xen
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://xenbits.xen.org/xen.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/linux-pvops
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://xenbits.xen.org/linux-pvops.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
+ exec
+ sh -xe
+

[Xen-devel] [xen-unstable test] 50273: regressions - FAIL

2015-03-31 Thread osstest service user

flight 50273 xen-unstable real [real]
http://logs.test-lab.xenproject.org/osstest/logs/50273/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-i386-freebsd10-i386 11 guest-localmigrate  fail REGR. vs. 36514

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-libvirt  9 guest-start   fail REGR. vs. 36514
 test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail like 36514

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt-xsm  1 build-check(1)   blocked  n/a
 test-amd64-i386-libvirt-xsm   1 build-check(1)   blocked  n/a
 test-amd64-amd64-xl-xsm   1 build-check(1)   blocked  n/a
 test-amd64-i386-xl-xsm1 build-check(1)   blocked  n/a
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsm  1 build-check(1)blocked n/a
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm  1 build-check(1) blocked n/a
 test-amd64-amd64-xl-qemut-debianhvm-amd64-xsm  1 build-check(1)blocked n/a
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm  1 build-check(1) blocked n/a
 test-armhf-armhf-libvirt-xsm  1 build-check(1)   blocked  n/a
 test-armhf-armhf-xl-xsm   1 build-check(1)   blocked  n/a
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-amd64-i386-libvirt  10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-armhf-armhf-xl-sedf-pin 10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  10 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 10 migrate-support-checkfail   never pass
 build-armhf-xsm   5 xen-buildfail   never pass
 test-armhf-armhf-xl-cubietruck 10 migrate-support-checkfail never pass
 test-armhf-armhf-xl  10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-sedf 10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 10 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-credit2  10 migrate-support-checkfail   never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass
 build-amd64-xsm   5 xen-buildfail   never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass

version targeted for testing:
 xen  71cba2a07bb541f25390cdd3546c9ee296a7257b
baseline version:
 xen  3a28f760508fb35c430edac17a9efde5aff6d1d5


People who touched revisions under test:
  Andrew Cooper 
  Boris Ostrovsky 
  Daniel De Graaf 
  Dario Faggioli 
  Don Slutz 
  George Dunlap 
  Ian Campbell 
  Ian Jackson 
  Jan Beulich 
  JeHyeon Yeon 
  Jim Fehlig 
  Juergen Gross 
  Kevin Tian 
  Konrad Rzeszutek Wilk 
  Koushik Chakravarty 
  Olaf Hering 
  Pramod Devendra 
  Quan Xu 
  Riku Voipio 
  Roger Pau MonnÃ© 
  Ross Lagerwall 
  Tim Deegan 
  Vijaya Kumar K 
  Vijaya Kumar K
  Wei Liu 
  Wen Congyang 
  Yang Hongyang 
  Yang Zhang 


jobs:
 build-amd64-xsm  fail
 build-armhf-xsm  fail
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-oldkern

Re: [Xen-devel] [OSSTEST Nested PATCH v7 1/6] parsing grub which has 'submenu' primitive

2015-03-31 Thread Hu, Robert

> -Original Message-
> From: Ian Campbell [mailto:ian.campb...@citrix.com]
> Sent: Tuesday, March 31, 2015 9:44 PM
> To: Pang, LongtaoX
> Cc: xen-devel@lists.xen.org; ian.jack...@eu.citrix.com; wei.l...@citrix.com;
> Hu, Robert
> Subject: Re: [OSSTEST Nested PATCH v7 1/6] parsing grub which has 'submenu'
> primitive
> 
> On Fri, 2015-03-27 at 19:06 -0400, longtao.pang wrote:
> > From a hvm kernel build from Linux stable Kernel tree,
> > the auto generated grub2 menu will have 'submenu' primitive, upon the
> > 'menuentry' items. Xen boot entries will be grouped into a submenu. This
> > patch adds capability to support such grub formats.
> >
> > Signed-off-by: longtao.pang 
> > ---
> > Changes in v7:
> > Remove the reformatting change for Debian.pm and keep the original format.
> 
> Thank you.
> 
> > ---
> >  Osstest/Debian.pm |   21 -
> >  1 file changed, 16 insertions(+), 5 deletions(-)
> >
> > diff --git a/Osstest/Debian.pm b/Osstest/Debian.pm
> > index 6784024..35163a0 100644
> > --- a/Osstest/Debian.pm
> > +++ b/Osstest/Debian.pm
> > @@ -398,10 +398,18 @@ sub setupboot_grub2 () {
> >
> >  my $count= 0;
> >  my $entry;
> > +my $submenu;
> >  while (<$f>) {
> >  next if m/^\s*\#/ || !m/\S/;
> >  if (m/^\s*\}\s*$/) {
> > -die unless $entry;
> > +die unless $entry || $submenu;
> > +if(!defined $entry && defined $submenu){
> > +logm("Met end of a submenu starting from ".
> > +"$submenu->{StartLine}. ".
> > +"Our want kern is $want_kernver");
> > +$submenu=undef;
> > +next;
> > +}
> >  my (@missing) =
> >  grep { !defined $entry->{$_} }
> > (defined $xenhopt
> > @@ -432,21 +440,24 @@ sub setupboot_grub2 () {
> >  $entry= { Title => $1, StartLine => $., Number =>
> $count };
> >  $count++;
> >  }
> > -if (m/^\s*multiboot\s*\/(xen\-[0-9][-+.0-9a-z]*\S+)/) {
> > +if (m/^submenu\s+[\'\"](.*)[\'\"].*\{\s*$/) {
> > +$submenu={ StartLine =>$.};
> > +}
> 
> This looks reasonable enough to support a single nesting, I suppose we
> can leave more deeply nested submenus for another time.
> 
> So in that regard this patch looks ok to me.
> 
> > +if (m/^\s*multiboot\s*(?:\/boot)*\/(xen\S+)/) {
> >  die unless $entry;
> >  $entry->{Hv}= $1;
> >  }
> > -if (m/^\s*multiboot\s*\/(vmlinu[xz]-(\S+))/) {
> > +if (m/^\s*multiboot\s*(?:\/boot)*\/(vmlinu[xz]-(\S+))/) {
> 
> What are these changes all about? I think they must be unrelated to the
> use of submenu (perhaps relate to having a separate /boot or not?). If
> so then please do in a separate patch.
> 
You're right. This has nothing to do with submenu.
Going to separate it out in another patch.
> If this is somehow to do with submenu then please explain how/why in the
> commit log.
> 
> BTW, your regex as it stand will accept /boot/boot/boot/boot/vmlinuz. I
> think you maybe meant to add "(?:\/boot)?" to match zero or one
> occurrences?
Yes, this is a potential bug. Thanks for point out!
> 
> Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [v3][PATCH 2/2] libxl: introduce gfx_passthru_kind

2015-03-31 Thread Chen, Tiejun


On 2015/3/30 17:19, Ian Campbell wrote:

On Mon, 2015-03-30 at 09:28 +0800, Chen, Tiejun wrote:

Sounds it should be a legacy fix to qemu-xen-tranditional :) So lets do
it now,

@@ -326,6 +326,10 @@ static char **
libxl__build_device_model_args_old(libxl__gc *gc,
   }
   if (libxl_defbool_val(b_info->u.hvm.gfx_passthru)) {
   flexarray_append(dm_args, "-gfx_passthru");
+if (b_info->u.hvm.gfx_passthru_kind >
+LIBXL_GFX_PASSTHRU_KIND_IGD)
+LOG(ERROR, "unsupported device type for
\"gfx_passthru\".\n");
+return NULL;


I'd rather not encode any ordering constraints if we don't have to. I
think this is preferable:

   if (libxl_defbool_val(b_info->u.hvm.gfx_passthru)) {
switch (b_info->u.hvm.gfx_passthru_kind) {
case LIBXL_GFX_PASSTHRU_KIND_DEFAULT:
case LIBXL_GFX_PASSTHRU_KIND_IGD:
flexarray_append(dm_args, "-gfx_passthru");
break;
default:
LOG(ERROR, "unsupported gfx_passthru_kind.\n");
return NULL;
}
  }

(notice that the error message above doesn't refer to the xl specific
option naming).



Sorry for this delay response.

This looks reasonable and I regenerate this patch based on this comment:

 libxl: introduce gfx_passthru_kind

Although we already have 'gfx_passthru' in b_info, this doesn't suffice
after we want to handle IGD specifically. Now we define a new field of
type, gfx_passthru_kind, to indicate we're trying to pass IGD. Actually
this means we can benefit this to support other specific devices just
by extending gfx_passthru_kind. And then we can cooperate with
gfx_passthru to address IGD cases as follows:

gfx_passthru = 0=> sets build_info.u.gfx_passthru to false
gfx_passthru = 1=> sets build_info.u.gfx_passthru to true and
   build_info.u.gfx_passthru_kind to DEFAULT
gfx_passthru = "igd"=> sets build_info.u.gfx_passthru to true
   and build_info.u.gfx_passthru_kind to IGD

Here if gfx_passthru_kind = DEFAULT, we will call
libxl__is_igd_vga_passthru() to check if we're hitting that table to need
to pass that option to qemu. But if gfx_passthru_kind = "igd" we always
force to pass that.

And now "gfx_passthru" is supported both with the qemu-xen-traditional
device-model and upstream qemu-xen device-model. But when given as a
string this option describes the type of device to enable. Note this
behavior is only supported with the upstream qemu-xen device-model.

Signed-off-by: Tiejun Chen 
---
 docs/man/xl.cfg.pod.5   | 34 +
 tools/libxl/libxl.h |  6 ++
 tools/libxl/libxl_dm.c  | 46 
+

 tools/libxl/libxl_types.idl |  6 ++
 tools/libxl/xl_cmdimpl.c| 14 --
 5 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
index 408653f..dfde92d 100644
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -671,7 +671,7 @@ through to this VM. See L above.
 devices passed through to this VM. See L
 above.

-=item B
+=item B

 Enable graphics device PCI passthrough. This option makes an assigned
 PCI graphics card become primary graphics card in the VM. The QEMU
@@ -699,9 +699,35 @@ working graphics passthrough. See the 
XenVGAPassthroughTestedAdapters

 L wiki page
 for currently supported graphics cards for gfx_passthru.

-gfx_passthru is currently only supported with the qemu-xen-traditional
-device-model. Upstream qemu-xen device-model currently does not have
-support for gfx_passthru.
+gfx_passthru is currently supported both with the qemu-xen-traditional
+device-model and upstream qemu-xen device-model.
+
+When given as a boolean the B option either disables gfx
+passthru or enables autodetection.
+
+But when given as a string the B option describes the type
+of device to enable. Note this behavior is only supported with the upstream
+qemu-xen device-model.
+
+Currently, valid options are:
+
+=over 4
+
+=item B
+
+Disables graphics device PCI passthrough.
+
+=item B, B
+
+Enables graphics device PCI passthrough and autodetects the type of device
+which is being used.
+
+=item "igd"
+
+Enables graphics device PCI passthrough but forcing the type
+of device to Intel Graphics Device.
+
+=back

 Note that some graphics adapters (AMD/ATI cards, for example) do not
 necessarily require gfx_passthru option, so you can use the normal Xen
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 5eec092..1144c5e 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -720,6 +720,12 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, 
libxl_mac *src);

 #define LIBXL_HAVE_PSR_MBM 1
 #endif

+/*
+ * libxl_domain_

Re: [Xen-devel] Xen-unstable-staging: Xen BUG at iommu_map.c:455

2015-03-31 Thread Sander Eikelenboom


Wednesday, April 1, 2015, 1:38:34 AM, you wrote:

> On 31/03/2015 22:11, Sander Eikelenboom wrote:
>> Hi all,
>>
>> I just tested xen-unstable staging (changeset: git:0522407-dirty) 
>>
>> with revert of commit 1aeb1156fa43fe2cd2b5003995b20466cd19a622
>> (due to an already reported but not yet resolved issue)
>>
>> and build with qemu xen from 
>> git://xenbits.xen.org/staging/qemu-upstream-unstable.git
>> (to include the pci command register patch from Jan)
>>
>>
>> and now came across this new splat when starting an HVM with PCI passtrhough:

> Wow - you are getting all the fun bugs at the moment!

Hrmm i'm not so sure at the moment .. could also be a stale tree or is it just
that it's april 1st ..
*sigh* 
tried to git reset --hard to a known good changeset .. but it still seems
to fail, even with cold boot. 

So sorry for the noise and please ignore for the moment while i'm trying to
figure out what is fooling me :-)

--
sander



> Nothing has changed in the AMD IOMMU driver for a while, but the
> BUG_ON() is particularly unhelpful at identifying what went wrong.

> As a first pass triage, can you rerun with

> diff --git a/xen/drivers/passthrough/amd/iommu_map.c
> b/xen/drivers/passthrough/amd/iommu_map.c
> index 495ff5c..f15c324 100644
> --- a/xen/drivers/passthrough/amd/iommu_map.c
> +++ b/xen/drivers/passthrough/amd/iommu_map.c
> @@ -451,8 +451,9 @@ static int iommu_pde_from_gfn(struct domain *d,
> unsigned long pfn,
>  table = hd->arch.root_table;
>  level = hd->arch.paging_mode;

> -BUG_ON( table == NULL || level < IOMMU_PAGING_MODE_LEVEL_1 ||
-level >> IOMMU_PAGING_MODE_LEVEL_6 );
> +BUG_ON(table == NULL);
> +BUG_ON(level < IOMMU_PAGING_MODE_LEVEL_1);
+BUG_ON(level >> IOMMU_PAGING_MODE_LEVEL_6);

>  next_table_mfn = page_to_mfn(table);

> which will help identify which of the conditions is failing.

> Can you please also provide the full serial log, including iommu=debug?

> ~Andrew



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] Xen-unstable-staging: Xen BUG at iommu_map.c:455

2015-03-31 Thread Andrew Cooper

On 31/03/2015 22:11, Sander Eikelenboom wrote:
> Hi all,
>
> I just tested xen-unstable staging (changeset: git:0522407-dirty) 
>
> with revert of commit 1aeb1156fa43fe2cd2b5003995b20466cd19a622
> (due to an already reported but not yet resolved issue)
>
> and build with qemu xen from 
> git://xenbits.xen.org/staging/qemu-upstream-unstable.git
> (to include the pci command register patch from Jan)
>
>
> and now came across this new splat when starting an HVM with PCI passtrhough:

Wow - you are getting all the fun bugs at the moment!

Nothing has changed in the AMD IOMMU driver for a while, but the
BUG_ON() is particularly unhelpful at identifying what went wrong.

As a first pass triage, can you rerun with

diff --git a/xen/drivers/passthrough/amd/iommu_map.c
b/xen/drivers/passthrough/amd/iommu_map.c
index 495ff5c..f15c324 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -451,8 +451,9 @@ static int iommu_pde_from_gfn(struct domain *d,
unsigned long pfn,
 table = hd->arch.root_table;
 level = hd->arch.paging_mode;

-BUG_ON( table == NULL || level < IOMMU_PAGING_MODE_LEVEL_1 ||
-level > IOMMU_PAGING_MODE_LEVEL_6 );
+BUG_ON(table == NULL);
+BUG_ON(level < IOMMU_PAGING_MODE_LEVEL_1);
+BUG_ON(level > IOMMU_PAGING_MODE_LEVEL_6);

 next_table_mfn = page_to_mfn(table);

which will help identify which of the conditions is failing.

Can you please also provide the full serial log, including iommu=debug?

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [qemu-mainline test] 50272: regressions - FAIL

2015-03-31 Thread osstest service user

flight 50272 qemu-mainline real [real]
http://logs.test-lab.xenproject.org/osstest/logs/50272/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-i386-freebsd10-i386 14 guest-localmigrate/x10  fail REGR. vs. 36709

Regressions which are regarded as allowable (not blocking):
 test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail like 36709

Tests which did not succeed, but are not blocking:
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-i386-libvirt-xsm   9 guest-start  fail   never pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-i386-xl-xsm9 guest-start  fail   never pass
 test-amd64-i386-libvirt  10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-xsm   9 guest-start  fail   never pass
 test-amd64-amd64-libvirt 10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-amd64-amd64-libvirt-xsm  9 guest-start  fail   never pass
 test-armhf-armhf-libvirt 10 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-xsm  5 xen-boot fail   never pass
 test-armhf-armhf-xl-xsm   5 xen-boot fail   never pass
 test-armhf-armhf-xl-cubietruck 10 migrate-support-checkfail never pass
 test-armhf-armhf-xl-sedf 10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 10 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-sedf-pin 10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  10 migrate-support-checkfail   never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass

version targeted for testing:
 qemuu627f91b1f80fecc73d00727181a9ddb6162cc30e
baseline version:
 qemuu362ca922eea03240916287a8a6267801ab095d12


People who touched revisions under test:
  Alexander Graf 
  Alexey Kardashevskiy 
  Bastian Koppelmann 
  Cole Robinson 
  David Gibson 
  Dirk Mueller 
  Dirk MÃ¼ller 
  Dr. David Alan Gilbert 
  Eduardo Otubo 
  Fam Zheng 
  Gabriel L. Somlo 
  Gabriel Somlo 
  Gerd Hoffmann 
  Gonglei 
  HervÃ© Poussineau 
  Jason Wang 
  John Snow 
  Juan Quintela 
  Leon Alrae 
  Markus Armbruster 
  Meghana Cheripady 
  Michael S. Tsirkin 
  Padmanabh Ratnakar 
  Paolo Bonzini 
  Peter Crosthwaite 
  Peter Maydell 
  Shannon Zhao 
  Shannon Zhao 
  Stefan Hajnoczi 
  Stefan Weil 
  Ting Wang 


jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvops

Re: [Xen-devel] [PATCH RESEND 1/2] xenbus_client: Extend interface to support multi-page ring

2015-03-31 Thread Bob Liu

Hi Juergen,

On 03/31/2015 08:36 PM, Juergen Gross wrote:
> On 03/31/2015 02:15 PM, Bob Liu wrote:
>> From: Wei Liu 
>>
>> Originally Xen PV drivers only use single-page ring to pass along
>> information. This might limit the throughput between frontend and
>> backend.
>>
>> The patch extends Xenbus driver to support multi-page ring, which in
>> general should improve throughput if ring is the bottleneck. Changes to
>> various frontend / backend to adapt to the new interface are also
>> included.
>>
>> Affected Xen drivers:
>> * blkfront/back
>> * netfront/back
>> * pcifront/back
> 
> What about pvscsi drivers?
> They are affected, too!
> 

Thanks for the reminding, I'll send an new version fix it.

Regards,
-Bob

> 
> Juergen
> 
>>
>> The interface is documented, as before, in xenbus_client.c.
>>
>> Change in V2:
>> * allow ring has arbitrary number of pages <= XENBUS_MAX_RING_PAGES
>>
>> Change in V3:
>> * update function prototypes
>> * carefully deal with types of different sizes
>>
>> Change in V4:
>> * use PAGE_KERNEL instead of PAGE_KERNEL_IO to avoid breakage on Arm
>>
>> Change in V5:
>> * fix off-by-one error and other minor glitches spotted by Mathew Daley
>>
>> Signed-off-by: Wei Liu 
>> Signed-off-by: Paul Durrant 
>> Signed-off-by: Bob Liu 
>> Cc: Konrad Wilk 
>> Cc: David Vrabel 
>> Cc: Boris Ostrovsky 
>> ---
>>   drivers/block/xen-blkback/xenbus.c |   5 +-
>>   drivers/block/xen-blkfront.c   |   5 +-
>>   drivers/net/xen-netback/netback.c  |   4 +-
>>   drivers/net/xen-netfront.c |   9 +-
>>   drivers/pci/xen-pcifront.c |   5 +-
>>   drivers/xen/xen-pciback/xenbus.c   |   2 +-
>>   drivers/xen/xenbus/xenbus_client.c | 387
>> +++--
>>   include/xen/xenbus.h   |  20 +-
>>   8 files changed, 317 insertions(+), 120 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkback/xenbus.c
>> b/drivers/block/xen-blkback/xenbus.c
>> index e3afe97..ff30259 100644
>> --- a/drivers/block/xen-blkback/xenbus.c
>> +++ b/drivers/block/xen-blkback/xenbus.c
>> @@ -193,7 +193,7 @@ fail:
>>   return ERR_PTR(-ENOMEM);
>>   }
>>
>> -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long
>> shared_page,
>> +static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
>>unsigned int evtchn)
>>   {
>>   int err;
>> @@ -202,7 +202,8 @@ static int xen_blkif_map(struct xen_blkif *blkif,
>> unsigned long shared_page,
>>   if (blkif->irq)
>>   return 0;
>>
>> -err = xenbus_map_ring_valloc(blkif->be->dev, shared_page,
>> &blkif->blk_ring);
>> +err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
>> + &blkif->blk_ring);
>>   if (err < 0)
>>   return err;
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index 37779e4..2c61cf8 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -1245,6 +1245,7 @@ static int setup_blkring(struct xenbus_device *dev,
>>struct blkfront_info *info)
>>   {
>>   struct blkif_sring *sring;
>> +grant_ref_t gref;
>>   int err;
>>
>>   info->ring_ref = GRANT_INVALID_REF;
>> @@ -1257,13 +1258,13 @@ static int setup_blkring(struct xenbus_device
>> *dev,
>>   SHARED_RING_INIT(sring);
>>   FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
>>
>> -err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
>> +err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref);
>>   if (err < 0) {
>>   free_page((unsigned long)sring);
>>   info->ring.sring = NULL;
>>   goto fail;
>>   }
>> -info->ring_ref = err;
>> +info->ring_ref = gref;
>>
>>   err = xenbus_alloc_evtchn(dev, &info->evtchn);
>>   if (err)
>> diff --git a/drivers/net/xen-netback/netback.c
>> b/drivers/net/xen-netback/netback.c
>> index 997cf09..865203f 100644
>> --- a/drivers/net/xen-netback/netback.c
>> +++ b/drivers/net/xen-netback/netback.c
>> @@ -1782,7 +1782,7 @@ int xenvif_map_frontend_rings(struct
>> xenvif_queue *queue,
>>   int err = -ENOMEM;
>>
>>   err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
>> - tx_ring_ref, &addr);
>> + &tx_ring_ref, 1, &addr);
>>   if (err)
>>   goto err;
>>
>> @@ -1790,7 +1790,7 @@ int xenvif_map_frontend_rings(struct
>> xenvif_queue *queue,
>>   BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE);
>>
>>   err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
>> - rx_ring_ref, &addr);
>> + &rx_ring_ref, 1, &addr);
>>   if (err)
>>   goto err;
>>
>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
>> index e9b960f..13f5e7f 100644
>> --- a/drivers/net/xen-netfront.c
>> +++ b/drivers/net/xen-netfront.c
>> @@ -1486,6 +1486,7 @@ static int setup_netfront(struct xenbus_device
>> *dev,
>>   {
>>   struct xen_netif_tx_sring *txs;
>>   struct xen

[Xen-devel] Xen-unstable-staging: Xen BUG at iommu_map.c:455

2015-03-31 Thread Sander Eikelenboom

Hi all,

I just tested xen-unstable staging (changeset: git:0522407-dirty) 

with revert of commit 1aeb1156fa43fe2cd2b5003995b20466cd19a622
(due to an already reported but not yet resolved issue)

and build with qemu xen from 
git://xenbits.xen.org/staging/qemu-upstream-unstable.git
(to include the pci command register patch from Jan)


and now came across this new splat when starting an HVM with PCI passtrhough:

(XEN) [2015-03-31 20:58:20.710] io.c:429: d17: bind: m_gsi=37 g_gsi=36 
dev=00.00.5 intx=0
(XEN) [2015-03-31 20:58:21.100] Xen BUG at iommu_map.c:455
(XEN) [2015-03-31 20:58:21.100] [ Xen-4.6-unstable  x86_64  debug=y  Not 
tainted ]
(XEN) [2015-03-31 20:58:21.100] CPU:0
(XEN) [2015-03-31 20:58:21.100] RIP:e008:[] 
iommu_pde_from_gfn+0x38/0x430
(XEN) [2015-03-31 20:58:21.100] RFLAGS: 00010202   CONTEXT: hypervisor
(XEN) [2015-03-31 20:58:21.100] rax: 0008   rbx: 0003   
rcx: 82c000802000
(XEN) [2015-03-31 20:58:21.100] rdx: 82e007d56740   rsi:    
rdi: 8305167dd000
(XEN) [2015-03-31 20:58:21.100] rbp: 82d0802efad8   rsp: 82d0802efa78   
r8:  83054eb755b0
(XEN) [2015-03-31 20:58:21.100] r9:  0003   r10: 0200   
r11: 82d0802fc0d0
(XEN) [2015-03-31 20:58:21.100] r12: 82e0075527e0   r13: 05e9   
r14: 
(XEN) [2015-03-31 20:58:21.100] r15: 7d20   cr0: 80050033   
cr4: 06f0
(XEN) [2015-03-31 20:58:21.100] cr3: 00051a197000   cr2: 7efdd5ee1d48
(XEN) [2015-03-31 20:58:21.100] ds:    es:    fs:    gs:    ss: 
e010   cs: e008
(XEN) [2015-03-31 20:58:21.100] Xen stack trace from rsp=82d0802efa78:
(XEN) [2015-03-31 20:58:21.100]8305167dd000 82d0802efb30 
 8305167dd190
(XEN) [2015-03-31 20:58:21.100]0286 82e007d56740 
82e007552800 0003
(XEN) [2015-03-31 20:58:21.100]82e0075527e0 05e9 
 7d20
(XEN) [2015-03-31 20:58:21.100]82d0802efb98 82d0801560b6 
7d2f7fd104e7 0001802351d2
(XEN) [2015-03-31 20:58:21.100]003aa93f  
00020001 8305167dd938
(XEN) [2015-03-31 20:58:21.100]82004ff8 8305167dd000 
0020941c 
(XEN) [2015-03-31 20:58:21.100]  
 
(XEN) [2015-03-31 20:58:21.100]  
8305167dd938 8305167dd000
(XEN) [2015-03-31 20:58:21.100]82e0075527e0 05e9 
 7d20
(XEN) [2015-03-31 20:58:21.100]82d0802efbf8 82d08015a54d 
 8305167dd020
(XEN) [2015-03-31 20:58:21.100]82d0802e8000 003aa93f 
82d0802efbf8 
(XEN) [2015-03-31 20:58:21.100]8305167dd000 0800 
8305167dd000 
(XEN) [2015-03-31 20:58:21.100]82d0802efc98 82d08014c6c1 
82d0802efc78 82d08012c298
(XEN) [2015-03-31 20:58:21.100]0286 82d0802efc28 
0020 
(XEN) [2015-03-31 20:58:21.100]  
0008 7f6525ed2004
(XEN) [2015-03-31 20:58:21.100]83054eb1ab60 83055cc6c300 
0282 7f6525ed2004
(XEN) [2015-03-31 20:58:21.100]8305167dd000 7f6525ed2004 
8305167dd000 0005
(XEN) [2015-03-31 20:58:21.100]82d0802efca8 82d08014908b 
82d0802efd98 82d080161f2d
(XEN) [2015-03-31 20:58:21.100]0020  
0005 0001
(XEN) [2015-03-31 20:58:21.100]82d080331bb8 0001 
82d0802efde8 82d080120d00
(XEN) [2015-03-31 20:58:21.100] Xen call trace:
(XEN) [2015-03-31 20:58:21.100][] 
iommu_pde_from_gfn+0x38/0x430
(XEN) [2015-03-31 20:58:21.100][] 
amd_iommu_map_page+0x10d/0x4e6
(XEN) [2015-03-31 20:58:21.100][] 
arch_iommu_populate_page_table+0x179/0x4d8
(XEN) [2015-03-31 20:58:21.100][] 
iommu_do_pci_domctl+0x395/0x604
(XEN) [2015-03-31 20:58:21.100][] 
iommu_do_domctl+0x17/0x1a
(XEN) [2015-03-31 20:58:21.100][] 
arch_do_domctl+0x2469/0x26e1
(XEN) [2015-03-31 20:58:21.100][] do_domctl+0x1a1f/0x1d60
(XEN) [2015-03-31 20:58:21.100][] syscall_enter+0xeb/0x145
(XEN) [2015-03-31 20:58:21.100] 
(XEN) [2015-03-31 20:58:22.167] 
(XEN) [2015-03-31 20:58:22.176] 
(XEN) [2015-03-31 20:58:22.195] Panic on CPU 0:
(XEN) [2015-03-31 20:58:22.208] Xen BUG at iommu_map.c:455
(XEN) [2015-03-31 20:58:22.223] 
(XEN) [2015-03-31 20:58:22.243] 
(XEN) [2015-03-31 20:58:22.252] Manual reset required ('noreboot' specified)


Haven't tried without the revert yet.

--
Sander


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen

[Xen-devel] [linux-3.10 test] 50271: tolerable FAIL - PUSHED

2015-03-31 Thread osstest service user

flight 50271 linux-3.10 real [real]
http://logs.test-lab.xenproject.org/osstest/logs/50271/

Failures :-/ but no regressions.

Regressions which are regarded as allowable (not blocking):
 test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail like 26303

Tests which did not succeed, but are not blocking:
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-i386-xl-xsm9 guest-start  fail   never pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-amd64-xl-xsm   9 guest-start  fail   never pass
 test-armhf-armhf-xl-arndale   5 xen-boot fail   never pass
 test-amd64-i386-libvirt-xsm   9 guest-start  fail   never pass
 test-amd64-amd64-libvirt-xsm  9 guest-start  fail   never pass
 test-amd64-i386-libvirt  10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-amd64-amd64-libvirt 10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck  5 xen-boot fail never pass
 test-armhf-armhf-xl-multivcpu  5 xen-boot fail  never pass
 test-armhf-armhf-xl-sedf-pin  5 xen-boot fail   never pass
 test-armhf-armhf-xl-credit2   5 xen-boot fail   never pass
 test-armhf-armhf-libvirt  5 xen-boot fail   never pass
 test-armhf-armhf-xl-xsm   5 xen-boot fail   never pass
 test-armhf-armhf-libvirt-xsm  5 xen-boot fail   never pass
 test-armhf-armhf-xl   5 xen-boot fail   never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-armhf-armhf-xl-sedf  5 xen-boot fail   never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass

version targeted for testing:
 linux73895725a9401bd3454757fcfa7d691270ac7498
baseline version:
 linuxbe67db109090b17b56eb8eb2190cd70700f107aa


1013 people touched revisions under test,
not listing them all


jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 build-amd64-rumpuserxen  pass
 build-i386-rumpuserxen   pass
 test-amd64-amd64-xl  pass
 test-armhf-armhf-xl  fail
 test-amd64-i386-xl   pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64-xsmfail
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm

1 2 3 >

1 - 100 of 253 matches

Mail list logo