[Xen-devel] [RFC PATCH COLO v5 29/29] cmdline switches and config vars to control colo-proxy

2015-03-31 Thread Yang Hongyang
Add cmdline switches to 'xl migrate-receive' command to specify
a domain-specific hotplug script to setup COLO proxy.

Add a new config var 'colo.default.agentscript' to xl.conf, that
allows the user to override the default global script used to
setup COLO proxy.

Signed-off-by: Yang Hongyang 
Signed-off-by: Wen Congyang 
---
 docs/man/xl.conf.pod.5  |  6 ++
 docs/man/xl.pod.1   |  1 -
 tools/libxl/libxl.c | 12 +++
 tools/libxl/libxl_create.c  | 14 +++--
 tools/libxl/libxl_types.idl |  1 +
 tools/libxl/xl.c|  3 +++
 tools/libxl/xl.h|  1 +
 tools/libxl/xl_cmdimpl.c| 51 ++---
 8 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/docs/man/xl.conf.pod.5 b/docs/man/xl.conf.pod.5
index 8ae19bb..8f7fd28 100644
--- a/docs/man/xl.conf.pod.5
+++ b/docs/man/xl.conf.pod.5
@@ -111,6 +111,12 @@ Configures the default script used by Remus to setup 
network buffering.
 
 Default: C
 
+=item B
+
+Configures the default script used by COLO to setup colo-proxy.
+
+Default: C
+
 =item B
 
 Configures the default output format used by xl when printing "machine
diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index 431ef5e..47d58da 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -447,7 +447,6 @@ N.B: Remus support in xl is still in experimental 
(proof-of-concept) phase.
  Disk replication support is limited to DRBD disks.
 
  COLO support in xl is still in experimental (proof-of-concept) phase.
- There is no support for network at the moment.
 
 B
 
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 08d68df..f4079ee 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -3398,6 +3398,16 @@ void libxl__device_nic_add(libxl__egc *egc, uint32_t 
domid,
 flexarray_append(back, nic->ifname);
 }
 
+if (nic->forwarddev) {
+flexarray_append(back, "forwarddev");
+flexarray_append(back, nic->forwarddev);
+}
+
+if (nic->forwardbr) {
+flexarray_append(back, "forwardbr");
+flexarray_append(back, nic->forwardbr);
+}
+
 flexarray_append(back, "mac");
 flexarray_append(back,libxl__sprintf(gc,
 LIBXL_MAC_FMT, LIBXL_MAC_BYTES(nic->mac)));
@@ -3521,6 +3531,8 @@ static int libxl__device_nic_from_xs_be(libxl__gc *gc,
 nic->ip = READ_BACKEND(NOGC, "ip");
 nic->bridge = READ_BACKEND(NOGC, "bridge");
 nic->script = READ_BACKEND(NOGC, "script");
+nic->forwarddev = READ_BACKEND(NOGC, "forwarddev");
+nic->forwardbr = READ_BACKEND(NOGC, "forwardbr");
 
 /* vif_ioemu nics use the same xenstore entries as vif interfaces */
 tmp = READ_BACKEND(gc, "type");
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 1fae0a4..b1e9372 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1132,6 +1132,11 @@ static void domcreate_bootloader_done(libxl__egc *egc,
 crs->superpages = superpages;
 crs->pae = pae;
 crs->callback = libxl__colo_restore_setup_done;
+if (dcs->colo_proxy_script)
+crs->colo_proxy_script = libxl__strdup(gc, dcs->colo_proxy_script);
+else
+crs->colo_proxy_script = GCSPRINTF("%s/colo-proxy-setup",
+   libxl__xen_script_dir_path());
 libxl__colo_restore_setup(egc, crs);
 } else
 libxl__xc_domain_restore(egc, dcs,
@@ -1628,6 +1633,7 @@ static void domain_create_cb(libxl__egc *egc,
 static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config,
 uint32_t *domid, int restore_fd,
 int send_fd, int checkpointed_stream,
+const char *colo_proxy_script,
 const libxl_asyncop_how *ao_how,
 const libxl_asyncprogress_how *aop_console_how)
 {
@@ -1643,6 +1649,7 @@ static int do_domain_create(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 cdcs->dcs.send_fd = send_fd;
 cdcs->dcs.callback = domain_create_cb;
 cdcs->dcs.checkpointed_stream = checkpointed_stream;
+cdcs->dcs.colo_proxy_script = colo_proxy_script;
 libxl__ao_progress_gethow(&cdcs->dcs.aop_console_how, aop_console_how);
 cdcs->domid_out = domid;
 
@@ -1686,7 +1693,7 @@ int libxl_domain_create_new(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 const libxl_asyncprogress_how *aop_console_how)
 {
 unset_disk_colo_restore(d_config);
-return do_domain_create(ctx, d_config, domid, -1, -1, 0,
+return do_domain_create(ctx, d_config, domid, -1, -1, 0, NULL,
 ao_how, aop_console_how);
 }
 
@@ -1697,16 +1704,19 @@ int libxl_domain_create_restore(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 const libxl_asyncprogress_how *aop_console_how)
 {
 int send_fd = -1;
+char *colo_pr

[Xen-devel] [RFC PATCH COLO v5 27/29] setup and control colo proxy on primary side

2015-03-31 Thread Yang Hongyang
setup and control colo proxy on primary side

Signed-off-by: Yang Hongyang 
---
 tools/libxl/libxl_colo_save.c | 124 +++---
 tools/libxl/libxl_internal.h  |   4 ++
 2 files changed, 120 insertions(+), 8 deletions(-)

diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
index de270e2..e325cff 100644
--- a/tools/libxl/libxl_colo_save.c
+++ b/tools/libxl/libxl_colo_save.c
@@ -18,9 +18,11 @@
 #include "libxl_internal.h"
 #include "libxl_colo.h"
 
+extern const libxl__checkpoint_device_instance_ops colo_save_device_nic;
 extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk;
 
 static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+&colo_save_device_nic,
 &colo_save_device_qdisk,
 NULL,
 };
@@ -32,9 +34,15 @@ static int 
init_device_subkind(libxl__checkpoint_devices_state *cds)
 int rc;
 STATE_AO_GC(cds->ao);
 
-rc = init_subkind_qdisk(cds);
+rc = init_subkind_colo_nic(cds);
 if (rc) goto out;
 
+rc = init_subkind_qdisk(cds);
+if (rc) {
+cleanup_subkind_colo_nic(cds);
+goto out;
+}
+
 rc = 0;
 out:
 return rc;
@@ -45,6 +53,7 @@ static void 
cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 /* cleanup device subkind-specific state in the libxl ctx */
 STATE_AO_GC(cds->ao);
 
+cleanup_subkind_colo_nic(cds);
 cleanup_subkind_qdisk(cds);
 }
 
@@ -75,9 +84,16 @@ void libxl__colo_save_setup(libxl__egc *egc, 
libxl__colo_save_state *css)
 css->svm_running = false;
 css->paused = true;
 css->qdisk_setuped = false;
+libxl__ev_child_init(&css->child);
 
-/* TODO: nic support */
-cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
+if (dss->remus->netbufscript)
+css->colo_proxy_script = libxl__strdup(gc, dss->remus->netbufscript);
+else
+css->colo_proxy_script = GCSPRINTF("%s/colo-proxy-setup",
+   libxl__xen_script_dir_path());
+
+cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VIF) |
+ (1 << LIBXL__DEVICE_KIND_VBD);
 cds->ops = colo_ops;
 cds->callback = colo_save_setup_done;
 cds->ao = ao;
@@ -103,12 +119,18 @@ static void colo_save_setup_done(libxl__egc *egc,
 STATE_AO_GC(cds->ao);
 
 if (!rc) {
+css->cps.ao = ao;
+rc = colo_proxy_setup(&css->cps);
+if (rc)
+goto failed;
 libxl__domain_suspend(egc, dss);
 return;
 }
 
 LOG(ERROR, "COLO: failed to setup device for guest with domid %u",
 dss->domid);
+
+failed:
 css->cds.callback = colo_save_setup_failed;
 libxl__checkpoint_devices_teardown(egc, &css->cds);
 }
@@ -156,6 +178,7 @@ static void colo_teardown_done(libxl__egc *egc,
 libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
 
 cleanup_device_subkind(cds);
+colo_proxy_teardown(&css->cps);
 dss->callback(egc, dss, rc);
 }
 
@@ -436,6 +459,8 @@ static void colo_read_svm_ready_done(libxl__egc *egc,
 goto out;
 }
 
+colo_proxy_preresume(&css->cps);
+
 css->svm_running = true;
 css->cds.callback = colo_preresume_cb;
 libxl__checkpoint_devices_preresume(egc, &css->cds);
@@ -529,6 +554,8 @@ static void colo_read_svm_resumed_done(libxl__egc *egc,
 goto out;
 }
 
+colo_proxy_postresume(&css->cps);
+
 ok = 1;
 
 out:
@@ -537,6 +564,91 @@ out:
 
 
 /* = colo: wait new checkpoint = */
+
+static void colo_start_new_checkpoint(libxl__egc *egc,
+  libxl__checkpoint_devices_state *cds,
+  int rc);
+static void colo_proxy_async_wait_for_checkpoint(libxl__colo_save_state *css);
+static void colo_proxy_async_call_done(libxl__egc *egc,
+   libxl__ev_child *child,
+   int pid,
+   int status);
+
+static void colo_proxy_async_call(libxl__egc *egc,
+  libxl__colo_save_state *css,
+  void func(libxl__colo_save_state *),
+  libxl__ev_child_callback callback)
+{
+int pid = -1, rc;
+
+STATE_AO_GC(css->cds.ao);
+
+/* Fork and call */
+pid = libxl__ev_child_fork(gc, &css->child, callback);
+if (pid == -1) {
+LOG(ERROR, "unable to fork");
+rc = ERROR_FAIL;
+goto out;
+}
+
+if (!pid) {
+/* child */
+func(css);
+/* notreached */
+abort();
+}
+
+return;
+
+out:
+callback(egc, &css->child, -1, 1);
+}
+
+static void colo_proxy_wait_for_checkpoint(libxl__egc *egc,
+   libxl__colo_save_state *css)
+{
+colo_proxy_async_call(egc, css,
+  colo_proxy_async_wait_for_checkpoint,
+  colo_

[Xen-devel] [RFC PATCH COLO v5 25/29] COLO proxy: preresume, postresume and checkpoint

2015-03-31 Thread Yang Hongyang
preresume, postresume and checkpoint

Signed-off-by: Yang Hongyang 
---
 tools/libxl/libxl_colo.h   |  3 +++
 tools/libxl/libxl_colo_proxy.c | 57 ++
 2 files changed, 60 insertions(+)

diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 5983aa0..872c652 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -47,4 +47,7 @@ extern void libxl__colo_save_teardown(libxl__egc *egc,
 
 extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
 extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
+extern void colo_proxy_preresume(libxl__colo_proxy_state *cps);
+extern void colo_proxy_postresume(libxl__colo_proxy_state *cps);
+extern int colo_proxy_checkpoint(libxl__colo_proxy_state *cps);
 #endif
diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
index 486ed73..2483be3 100644
--- a/tools/libxl/libxl_colo_proxy.c
+++ b/tools/libxl/libxl_colo_proxy.c
@@ -208,3 +208,60 @@ void colo_proxy_teardown(libxl__colo_proxy_state *cps)
 cps->sock_fd = -1;
 }
 }
+
+/* = colo-proxy: preresume, postresume and checkpoint == */
+
+void colo_proxy_preresume(libxl__colo_proxy_state *cps)
+{
+colo_proxy_send(cps, NULL, 0, COLO_CHECKPOINT);
+/* TODO: need to handle if the call fails... */
+}
+
+void colo_proxy_postresume(libxl__colo_proxy_state *cps)
+{
+/* nothing to do... */
+}
+
+
+typedef struct colo_msg {
+bool is_checkpoint;
+} colo_msg;
+
+/*
+do checkpoint: return 1
+error: return -1
+do not checkpoint: return 0
+*/
+int colo_proxy_checkpoint(libxl__colo_proxy_state *cps)
+{
+uint8_t *buff;
+int64_t size;
+struct nlmsghdr *h;
+struct colo_msg *m;
+int ret = -1;
+
+size = colo_proxy_recv(cps, &buff, MSG_DONTWAIT);
+
+/* timeout, return no checkpoint message. */
+if (size <= 0) {
+return 0;
+}
+
+h = (struct nlmsghdr *) buff;
+
+if (h->nlmsg_type == NLMSG_ERROR) {
+goto out;
+}
+
+if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*m))) {
+goto out;
+}
+
+m = NLMSG_DATA(h);
+
+ret = m->is_checkpoint ? 1 : 0;
+
+out:
+free(buff);
+return ret;
+}
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC PATCH COLO v5 28/29] setup and control colo proxy on secondary side

2015-03-31 Thread Yang Hongyang
setup and control colo proxy on secondary side

Signed-off-by: Yang Hongyang 
---
 tools/libxl/libxl_colo_restore.c | 26 +++---
 tools/libxl/libxl_internal.h |  3 +++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
index 28eb8ab..151e7a5 100644
--- a/tools/libxl/libxl_colo_restore.c
+++ b/tools/libxl/libxl_colo_restore.c
@@ -64,9 +64,11 @@ static void libxl__colo_restore_domain_resume_callback(void 
*data);
 static void libxl__colo_restore_domain_checkpoint_callback(void *data);
 static void libxl__colo_restore_domain_suspend_callback(void *data);
 
+extern const libxl__checkpoint_device_instance_ops colo_restore_device_nic;
 extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk;
 
 static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
+&colo_restore_device_nic,
 &colo_restore_device_qdisk,
 NULL,
 };
@@ -166,8 +168,14 @@ static int 
init_device_subkind(libxl__checkpoint_devices_state *cds)
 int rc;
 STATE_AO_GC(cds->ao);
 
+rc = init_subkind_colo_nic(cds);
+if (rc) goto out;
+
 rc = init_subkind_qdisk(cds);
-if (rc)  goto out;
+if (rc) {
+cleanup_subkind_colo_nic(cds);
+goto out;
+}
 
 rc = 0;
 out:
@@ -179,6 +187,7 @@ static void 
cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 /* cleanup device subkind-specific state in the libxl ctx */
 STATE_AO_GC(cds->ao);
 
+cleanup_subkind_colo_nic(cds);
 cleanup_subkind_qdisk(cds);
 }
 
@@ -293,6 +302,11 @@ void libxl__colo_restore_setup(libxl__egc *egc,
 
 crs->qdisk_setuped = false;
 
+crs->cps.ao = ao;
+rc = colo_proxy_setup(&crs->cps);
+if (rc)
+goto err_init_dss2;
+
 rc = 0;
 
 out:
@@ -398,6 +412,8 @@ static void colo_restore_teardown_done(libxl__egc *egc,
 if (crcs->teardown_devices)
 cleanup_device_subkind(cds);
 
+colo_proxy_teardown(&crs->cps);
+
 rc = crcs->saved_rc;
 if (!rc) {
 crcs->callback = do_failover_done;
@@ -607,6 +623,8 @@ static void colo_restore_preresume_cb(libxl__egc *egc,
 goto out;
 }
 
+colo_proxy_preresume(&crs->cps);
+
 colo_restore_resume_vm(egc, crcs);
 
 return;
@@ -643,6 +661,8 @@ static void colo_resume_vm_done(libxl__egc *egc,
 
 crcs->status = LIBXL_COLO_RESUMED;
 
+colo_proxy_postresume(&crs->cps);
+
 /* avoid calling libxl__xc_domain_restore_done() more than once */
 if (crs->saved_cb) {
 dcs->callback = crs->saved_cb;
@@ -792,8 +812,8 @@ static void colo_setup_checkpoint_devices(libxl__egc *egc,
 
 STATE_AO_GC(crs->ao);
 
-/* TODO: nic support */
-cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
+cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VIF) |
+ (1 << LIBXL__DEVICE_KIND_VBD);
 cds->callback = colo_restore_setup_cds_done;
 cds->ao = ao;
 cds->domid = crs->domid;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index a64efdc..bd3c9e3 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3213,6 +3213,9 @@ struct libxl__colo_restore_state {
 
 /* private, used by qdisk block replication */
 bool qdisk_setuped;
+
+/* private, used by colo proxy */
+libxl__colo_proxy_state cps;
 };
 
 struct libxl__domain_create_state {
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC PATCH COLO v5 26/29] COLO nic: implement COLO nic subkind

2015-03-31 Thread Yang Hongyang
implement COLO nic subkind.

Signed-off-by: Yang Hongyang 
Signed-off-by: Wen Congyang 
---
 tools/hotplug/Linux/Makefile |   1 +
 tools/hotplug/Linux/colo-proxy-setup | 128 ++
 tools/libxl/Makefile |   1 +
 tools/libxl/libxl_colo_nic.c | 313 +++
 tools/libxl/libxl_internal.h |   5 +
 tools/libxl/libxl_types.idl  |   2 +
 6 files changed, 450 insertions(+)
 create mode 100755 tools/hotplug/Linux/colo-proxy-setup
 create mode 100644 tools/libxl/libxl_colo_nic.c

diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
index d94a9cb..1c28bea 100644
--- a/tools/hotplug/Linux/Makefile
+++ b/tools/hotplug/Linux/Makefile
@@ -25,6 +25,7 @@ XEN_SCRIPTS += vscsi
 XEN_SCRIPTS += block-iscsi
 XEN_SCRIPTS += block-drbd-probe
 XEN_SCRIPTS += $(XEN_SCRIPTS-y)
+XEN_SCRIPTS += colo-proxy-setup
 
 SUBDIRS-$(CONFIG_SYSTEMD) += systemd
 
diff --git a/tools/hotplug/Linux/colo-proxy-setup 
b/tools/hotplug/Linux/colo-proxy-setup
new file mode 100755
index 000..850f672
--- /dev/null
+++ b/tools/hotplug/Linux/colo-proxy-setup
@@ -0,0 +1,128 @@
+#! /bin/bash
+
+dir=$(dirname "$0")
+. "$dir/xen-hotplug-common.sh"
+. "$dir/hotplugpath.sh"
+. "$dir/xen-network-ft.sh"
+
+findCommand "$@"
+
+if [ "$command" != "setup" -a  "$command" != "teardown" ]
+then
+echo "Invalid command: $command"
+log err "Invalid command: $command"
+exit 1
+fi
+
+evalVariables "$@"
+
+: ${vifname:?}
+: ${forwarddev:?}
+: ${mode:?}
+: ${forwardbr:?}
+: ${index:?}
+: ${bridge:?}
+
+if [ "$mode" != "primary" -a "$mode" != "secondary" ]
+then
+echo "Invalid mode: $mode"
+log err "Invalid mode: $mode"
+exit 1
+fi
+
+if [ $index -lt 0 ] || [ $index -gt 100 ]; then
+echo "index overflow"
+exit 1
+fi
+
+function setup_primary()
+{
+do_without_error tc qdisc add dev $vifname root handle 1: prio
+do_without_error tc filter add dev $vifname parent 1: protocol ip prio 10 \
+u32 match u32 0 0 flowid 1:2 action mirred egress mirror dev 
$forwarddev
+do_without_error tc filter add dev $vifname parent 1: protocol arp prio 11 
\
+u32 match u32 0 0 flowid 1:2 action mirred egress mirror dev 
$forwarddev
+do_without_error tc filter add dev $vifname parent 1: protocol ipv6 prio \
+12 u32 match u32 0 0 flowid 1:2 action mirred egress mirror \
+dev $forwarddev
+
+do_without_error modprobe nf_conntrack_ipv4
+do_without_error modprobe xt_PMYCOLO sec_dev=$forwarddev
+
+do_without_error /usr/local/sbin/iptables -t mangle -I PREROUTING -m 
physdev --physdev-in \
+$vifname -j PMYCOLO --index $index
+do_without_error /usr/local/sbin/ip6tables -t mangle -I PREROUTING -m 
physdev --physdev-in \
+$vifname -j PMYCOLO --index $index
+do_without_error /usr/local/sbin/arptables -I INPUT -i $forwarddev -j MARK 
--set-mark $index
+}
+
+function teardown_primary()
+{
+do_without_error tc filter del dev $vifname parent 1: protocol ip prio 10 
u32 match u32 \
+0 0 flowid 1:2 action mirred egress mirror dev $forwarddev
+do_without_error tc filter del dev $vifname parent 1: protocol arp prio 11 
u32 match u32 \
+0 0 flowid 1:2 action mirred egress mirror dev $forwarddev
+do_without_error tc filter del dev $vifname parent 1: protocol ipv6 prio 
12 u32 match u32 \
+0 0 flowid 1:2 action mirred egress mirror dev $forwarddev
+do_without_error tc qdisc del dev $vifname root handle 1: prio
+
+do_without_error /usr/local/sbin/iptables -t mangle -F
+do_without_error /usr/local/sbin/ip6tables -t mangle -F
+do_without_error /usr/local/sbin/arptables -F
+do_without_error rmmod xt_PMYCOLO
+}
+
+function setup_secondary()
+{
+do_without_error brctl delif $bridge $vifname
+do_without_error brctl addif $forwardbr $vifname
+do_without_error brctl addif $forwardbr $forwarddev
+do_without_error modprobe xt_SECCOLO
+
+do_without_error /usr/local/sbin/iptables -t mangle -I PREROUTING -m 
physdev --physdev-in \
+$vifname -j SECCOLO --index $index
+do_without_error /usr/local/sbin/ip6tables -t mangle -I PREROUTING -m 
physdev --physdev-in \
+$vifname -j SECCOLO --index $index
+}
+
+function teardown_secondary()
+{
+do_without_error brctl delif $forwardbr $forwarddev
+do_without_error brctl delif $forwardbr $vifname
+do_without_error brctl addif $bridge $vifname
+
+do_without_error /usr/local/sbin/iptables -t mangle -F
+do_without_error /usr/local/sbin/ip6tables -t mangle -F
+do_without_error rmmod xt_SECCOLO
+}
+
+case "$command" in
+setup)
+if [ "$mode" = "primary" ]
+then
+setup_primary
+else
+setup_secondary
+fi
+
+success
+;;
+teardown)
+if [ "$mode" = "primary" ]
+then
+teardown_primary
+else
+teardown_secondary
+fi
+;;
+esac
+
+if [ "$

[Xen-devel] [RFC PATCH COLO v5 23/29] COLO: use qemu block replication

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

The guest should be paused before doing COLO!!!

Signed-off-by: Wen Congyang 
---
 tools/libxl/Makefile |   1 +
 tools/libxl/libxl_colo_qdisk.c   | 209 +++
 tools/libxl/libxl_colo_restore.c |  21 +++-
 tools/libxl/libxl_colo_save.c|  36 ++-
 tools/libxl/libxl_internal.h |  18 
 tools/libxl/libxl_qmp.c  |  31 ++
 6 files changed, 312 insertions(+), 4 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_qdisk.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index b2eaf14..12caf4c 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -58,6 +58,7 @@ endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
+LIBXL_OBJS-y += libxl_colo_qdisk.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_colo_qdisk.c b/tools/libxl/libxl_colo_qdisk.c
new file mode 100644
index 000..d73572e
--- /dev/null
+++ b/tools/libxl/libxl_colo_qdisk.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Wen Congyang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+typedef struct libxl__colo_qdisk {
+libxl__checkpoint_device *dev;
+} libxl__colo_qdisk;
+
+/* == init() and cleanup() == */
+int init_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+/*
+ * We don't know if we use qemu block replication, so
+ * we cannot start block replication here.
+ */
+return 0;
+}
+
+void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+}
+
+/* == setup() and teardown() == */
+static void colo_qdisk_setup(libxl__egc *egc, libxl__checkpoint_device *dev,
+ bool primary)
+{
+const libxl_device_disk *disk = dev->backend_dev;
+const char *addr = NULL;
+const char *export_name;
+int ret, rc = 0;
+
+/* Convenience aliases */
+libxl__checkpoint_devices_state *const cds = dev->cds;
+const char *colo_params = disk->colo_params;
+const int domid = cds->domid;
+
+EGC_GC;
+
+if (disk->backend != LIBXL_DISK_BACKEND_QDISK ||
+!libxl_defbool_val(disk->colo_enable)) {
+rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+goto out;
+}
+
+export_name = strstr(colo_params, ":exportname=");
+if (!export_name) {
+rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+goto out;
+}
+export_name += strlen(":exportname=");
+if (export_name[0] == 0) {
+rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+goto out;
+}
+
+dev->matched = 1;
+
+if (primary) {
+/* NBD server is not ready, so we cannot start block replication now */
+goto out;
+} else {
+libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+int len;
+
+if (crs->qdisk_setuped)
+goto out;
+
+crs->qdisk_setuped = true;
+
+len = export_name - strlen(":exportname=") - colo_params;
+addr = libxl__strndup(gc, colo_params, len);
+}
+
+ret = libxl__qmp_block_start_replication(gc, domid, primary, addr);
+if (ret)
+rc = ERROR_FAIL;
+
+out:
+dev->aodev.rc = rc;
+dev->aodev.callback(egc, &dev->aodev);
+}
+
+static void colo_qdisk_teardown(libxl__egc *egc, libxl__checkpoint_device *dev,
+bool primary)
+{
+int ret, rc = 0;
+
+/* Convenience aliases */
+libxl__checkpoint_devices_state *const cds = dev->cds;
+const int domid = cds->domid;
+
+EGC_GC;
+
+if (primary) {
+libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+
+if (!css->qdisk_setuped)
+goto out;
+
+css->qdisk_setuped = false;
+} else {
+libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+
+if (!crs->qdisk_setuped)
+goto out;
+
+crs->qdisk_setuped = false;
+}
+
+ret = libxl__qmp_block_stop_replication(gc, domid, primary);
+if (ret)
+rc = ERROR_FAIL;
+
+out:
+dev->aodev.rc = rc;
+dev->aodev.callback(egc, &dev->aodev);
+}
+
+/* == checkpointing APIs == */
+/* should be called after libxl__checkpoint_device_instance_ops.preresume */
+int c

[Xen-devel] [RFC PATCH COLO v5 24/29] COLO proxy: implement setup/teardown of COLO proxy module

2015-03-31 Thread Yang Hongyang
setup/teardown of COLO proxy module.
we use netlink to communicate with proxy module.

Signed-off-by: Yang Hongyang 
---
 tools/libxl/Makefile   |   1 +
 tools/libxl/libxl_colo.h   |   2 +
 tools/libxl/libxl_colo_proxy.c | 210 +
 tools/libxl/libxl_internal.h   |   9 ++
 4 files changed, 222 insertions(+)
 create mode 100644 tools/libxl/libxl_colo_proxy.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 12caf4c..c74ba79 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -59,6 +59,7 @@ endif
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
 LIBXL_OBJS-y += libxl_colo_qdisk.o
+LIBXL_OBJS-y += libxl_colo_proxy.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 26a2563..5983aa0 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -45,4 +45,6 @@ extern void libxl__colo_save_teardown(libxl__egc *egc,
   libxl__colo_save_state *css,
   int rc);
 
+extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
+extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
 #endif
diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
new file mode 100644
index 000..486ed73
--- /dev/null
+++ b/tools/libxl/libxl_colo_proxy.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+#include 
+
+#define NETLINK_COLO 28
+
+enum colo_netlink_op {
+COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1),
+COLO_CHECKPOINT,
+COLO_FAILOVER,
+COLO_PROXY_INIT,
+COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */
+};
+
+/* = colo-proxy: helper functions == */
+
+static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff, 
uint64_t size, int type)
+{
+struct sockaddr_nl sa;
+struct nlmsghdr msg;
+struct iovec iov;
+struct msghdr mh;
+int ret;
+
+STATE_AO_GC(cps->ao);
+
+memset(&sa, 0, sizeof(sa));
+sa.nl_family = AF_NETLINK;
+sa.nl_pid = 0;
+sa.nl_groups = 0;
+
+msg.nlmsg_len = NLMSG_SPACE(0);
+msg.nlmsg_flags = NLM_F_REQUEST;
+if (type == COLO_PROXY_INIT) {
+msg.nlmsg_flags |= NLM_F_ACK;
+}
+msg.nlmsg_seq = 0;
+/* This is untrusty */
+msg.nlmsg_pid = cps->index;
+msg.nlmsg_type = type;
+
+iov.iov_base = &msg;
+iov.iov_len = msg.nlmsg_len;
+
+mh.msg_name = &sa;
+mh.msg_namelen = sizeof(sa);
+mh.msg_iov = &iov;
+mh.msg_iovlen = 1;
+mh.msg_control = NULL;
+mh.msg_controllen = 0;
+mh.msg_flags = 0;
+
+ret = sendmsg(cps->sock_fd, &mh, 0);
+if (ret <= 0) {
+LOG(ERROR, "can't send msg to kernel by netlink: %s",
+strerror(errno));
+}
+
+return ret;
+}
+
+/* error: return -1, otherwise return 0 */
+static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff, 
int flags)
+{
+struct sockaddr_nl sa;
+struct iovec iov;
+struct msghdr mh = {
+.msg_name = &sa,
+.msg_namelen = sizeof(sa),
+.msg_iov = &iov,
+.msg_iovlen = 1,
+};
+uint32_t size = 16384;
+int64_t len = 0;
+int ret;
+
+STATE_AO_GC(cps->ao);
+uint8_t *tmp = libxl__malloc(gc, size);
+
+iov.iov_base = tmp;
+iov.iov_len = size;
+next:
+   ret = recvmsg(cps->sock_fd, &mh, flags);
+if (ret <= 0) {
+goto out;
+}
+
+len += ret;
+if (mh.msg_flags & MSG_TRUNC) {
+size += 16384;
+tmp = libxl__realloc(gc, tmp, size);
+iov.iov_base = tmp + len;
+iov.iov_len = size - len;
+goto next;
+}
+
+*buff = tmp;
+return len;
+
+out:
+free(tmp);
+*buff = NULL;
+return ret;
+}
+
+/* = colo-proxy: setup and teardown == */
+
+int colo_proxy_setup(libxl__colo_proxy_state *cps)
+{
+int skfd = 0;
+struct sockaddr_nl sa;
+struct nlmsghdr *h;
+struct timeval tv = {0, 50}; /* timeout for recvmsg from kernel */
+int i = 1;
+int ret = ERROR_FAIL;
+uint8_t *buff = NULL;
+int64_t size

[Xen-devel] [RFC PATCH COLO v5 22/29] Support colo mode for qemu disk

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

Usage: disk = ['...,colo,colo-params=xxx,active-disk=xxx,hidden-disk=xxx...']
The format of colo-params: host:port:exportname=xx

Signed-off-by: Wen Congyang 
Signed-off-by: Yang Hongyang 
---
 docs/man/xl.pod.1   |   2 +-
 tools/libxl/libxl.c |  42 ++-
 tools/libxl/libxl_create.c  |  25 -
 tools/libxl/libxl_device.c  |  38 +++
 tools/libxl/libxl_dm.c  | 262 ++--
 tools/libxl/libxl_types.idl |   5 +
 tools/libxl/libxlu_disk_l.l |   5 +
 7 files changed, 367 insertions(+), 12 deletions(-)

diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index adcbe37..431ef5e 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -447,7 +447,7 @@ N.B: Remus support in xl is still in experimental 
(proof-of-concept) phase.
  Disk replication support is limited to DRBD disks.
 
  COLO support in xl is still in experimental (proof-of-concept) phase.
- There is no support for network or disk at the moment.
+ There is no support for network at the moment.
 
 B
 
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index afe0cc9..08d68df 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -2296,6 +2296,8 @@ int libxl__device_disk_setdefault(libxl__gc *gc, 
libxl_device_disk *disk)
 int rc;
 
 libxl_defbool_setdefault(&disk->discard_enable, !!disk->readwrite);
+libxl_defbool_setdefault(&disk->colo_enable, false);
+libxl_defbool_setdefault(&disk->colo_restore_enable, false);
 
 rc = libxl__resolve_domid(gc, disk->backend_domname, &disk->backend_domid);
 if (rc < 0) return rc;
@@ -2496,6 +2498,14 @@ static void device_disk_add(libxl__egc *egc, uint32_t 
domid,
 flexarray_append(back, "params");
 flexarray_append(back, libxl__sprintf(gc, "%s:%s",
   
libxl__device_disk_string_of_format(disk->format), disk->pdev_path));
+if (libxl_defbool_val(disk->colo_enable)) {
+flexarray_append(back, "colo-params");
+flexarray_append(back, libxl__sprintf(gc, "%s", 
disk->colo_params));
+flexarray_append(back, "active-disk");
+flexarray_append(back, libxl__sprintf(gc, "%s", 
disk->active_disk));
+flexarray_append(back, "hidden-disk");
+flexarray_append(back, libxl__sprintf(gc, "%s", 
disk->hidden_disk));
+}
 assert(device->backend_kind == LIBXL__DEVICE_KIND_QDISK);
 break;
 default:
@@ -2610,7 +2620,10 @@ static int libxl__device_disk_from_xs_be(libxl__gc *gc,
 goto cleanup;
 }
 
-/* "params" may not be present; but everything else must be. */
+/*
+ * "params" and "colo-params" may not be present; but everything
+ * else must be.
+ */
 tmp = xs_read(ctx->xsh, XBT_NULL,
   libxl__sprintf(gc, "%s/params", be_path), &len);
 if (tmp && strchr(tmp, ':')) {
@@ -2620,6 +2633,33 @@ static int libxl__device_disk_from_xs_be(libxl__gc *gc,
 disk->pdev_path = tmp;
 }
 
+tmp = xs_read(ctx->xsh, XBT_NULL,
+  libxl__sprintf(gc, "%s/colo-params", be_path), &len);
+if (tmp) {
+libxl_defbool_set(&disk->colo_enable, true);
+disk->colo_params = tmp;
+} else {
+libxl_defbool_set(&disk->colo_enable, false);
+}
+
+if (libxl_defbool_val(disk->colo_enable)) {
+tmp = xs_read(ctx->xsh, XBT_NULL,
+  libxl__sprintf(gc, "%s/active-disk", be_path), &len);
+if (!tmp) {
+LOG(ERROR, "Missing xenstore node %s/active-disk", be_path);
+goto cleanup;
+}
+disk->active_disk = tmp;
+
+tmp = xs_read(ctx->xsh, XBT_NULL,
+  libxl__sprintf(gc, "%s/hidden-disk", be_path), &len);
+if (!tmp) {
+LOG(ERROR, "Missing xenstore node %s/hidden-disk", be_path);
+goto cleanup;
+}
+disk->hidden_disk = tmp;
+}
+
 
 tmp = libxl__xs_read(gc, XBT_NULL,
  libxl__sprintf(gc, "%s/type", be_path));
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 89c18dc..1fae0a4 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1663,12 +1663,29 @@ static void domain_create_cb(libxl__egc *egc,
 
 libxl__ao_complete(egc, ao, rc);
 }
-
+
+static void set_disk_colo_restore(libxl_domain_config *d_config)
+{
+int i;
+
+for (i = 0; i < d_config->num_disks; i++)
+libxl_defbool_set(&d_config->disks[i].colo_restore_enable, true);
+}
+
+static void unset_disk_colo_restore(libxl_domain_config *d_config)
+{
+int i;
+
+for (i = 0; i < d_config->num_disks; i++)
+libxl_defbool_set(&d_config->disks[i].colo_restore_enable, false);
+}
+
 int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config,
 uint32_t *

[Xen-devel] [RFC PATCH COLO v5 16/29] primary vm suspend/get_dirty_pfn/resume/checkpoint code

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

We will do the following things again and again:
1. Suspend primary vm
   a. Suspend primary vm
   b. do postsuspend
   c. Read LIBXL_COLO_SVM_SUSPENDED to master
   d. Read secondary vm's dirty page information to master(count + pfn list)
2. Get dirty pfn list
   a. Return secondary vm's dirty pfn list
3. Resume primary vm
   a. Read LIBXL_COLO_SVM_READY from slave
   b. Do presume
   c. Resume primary vm
   d. Read LIBXL_COLO_SVM_RESUMED from slave
4. Wait a new checkpoint
a. Wait a new checkpoint(not implemented)
b. Send LIBXL_COLO_NEW_CHECKPOINT to slave

Signed-off-by: Wen Congyang 
---
 tools/libxc/include/xenguest.h |  12 +
 tools/libxl/Makefile   |   2 +-
 tools/libxl/libxl.c|   6 +-
 tools/libxl/libxl_colo.h   |  10 +
 tools/libxl/libxl_colo_save.c  | 642 +
 tools/libxl/libxl_dom.c|  13 +-
 tools/libxl/libxl_internal.h   |  31 +-
 tools/libxl/libxl_save_msgs_gen.pl |   1 +
 tools/libxl/libxl_types.idl|   1 +
 9 files changed, 710 insertions(+), 8 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_save.c

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 6e621a6..266d96b 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -74,6 +74,18 @@ struct save_callbacks {
  */
 int (*toolstack_save)(uint32_t domid, uint8_t **buf, uint32_t *len, void 
*data);
 
+/* Called after the guest is suspended.
+ *
+ * returns the list of dirty pfn:
+ *  struct {
+ *  uint64_t count;
+ *  uint64_t pfn[];
+ *  };
+ *
+ *  Note: the caller must free the return value.
+ */
+uint8_t *(*get_dirty_pfn)(void *data);
+
 /* to be provided as the last argument to each callback function */
 void* data;
 };
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 8acfd5d..b2eaf14 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -57,7 +57,7 @@ LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
-LIBXL_OBJS-y += libxl_colo_restore.o
+LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 40a49c7..b6c5429 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -18,6 +18,7 @@
 
 #include "libxl_internal.h"
 #include "libxl_remus.h"
+#include "libxl_colo.h"
 
 #define PAGE_TO_MEMKB(pages) ((pages) * 4)
 #define BACKEND_STRING_SIZE 5
@@ -892,7 +893,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 assert(info);
 
 /* Point of no return */
-libxl__remus_setup(egc, &dss->rs);
+if (libxl_defbool_val(info->colo))
+libxl__colo_save_setup(egc, &dss->css);
+else
+libxl__remus_setup(egc, &dss->rs);
 return AO_INPROGRESS;
 
  out:
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 91df275..26a2563 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -35,4 +35,14 @@ extern void libxl__colo_restore_teardown(libxl__egc *egc,
  libxl__colo_restore_state *crs,
  int rc);
 
+extern void libxl__colo_save_domain_suspend_callback(void *data);
+extern void libxl__colo_save_domain_resume_callback(void *data);
+extern void libxl__colo_save_domain_checkpoint_callback(void *data);
+extern void libxl__colo_save_get_dirty_pfn_callback(void *data);
+extern void libxl__colo_save_setup(libxl__egc *egc,
+   libxl__colo_save_state *css);
+extern void libxl__colo_save_teardown(libxl__egc *egc,
+  libxl__colo_save_state *css,
+  int rc);
+
 #endif
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
new file mode 100644
index 000..bb5b434
--- /dev/null
+++ b/tools/libxl/libxl_colo_save.c
@@ -0,0 +1,642 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+
+static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+

[Xen-devel] [RFC PATCH COLO v5 15/29] secondary vm suspend/resume/checkpoint code

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

Secondary vm is running in colo mode. So we will do
the following things again and again:
1. Resume secondary vm
   a. Send LIBXL_COLO_SVM_READY to master.
   b. If it is not resumed the first time, call 
libxl__checkpoint_devices_preresume().
   c. If it is resumed the first time, call libxl__xc_domain_restore_done()
  to build the secondary vm. We should also enable secondary vm's logdirty.
  Otherwise, call libxl__domain_resume() to resume secondary vm.
   d. If it is resumed the first time, call libxl__checkpoint_devices_setup()
  to setup checkpoint devices.
   e. Send LIBXL_COLO_SVM_RESUMED to master.
2. Wait a new checkpoint
   a. Call libxl__checkpoint_devices_commit().
   a. Read LIBXL_COLO_NEW_CHECKPOINT from master.
3. Suspend secondary vm
   a. Suspend secondary vm.
   b. Call libxl__checkpoint_devices_postsuspend().
   c. Get secondary vm's dirty page information.
   d. Send LIBXL_COLO_SVM_SUSPENDED to master.
   e. Send secondary vm's dirty page information to master(count + pfn list).

Signed-off-by: Wen Congyang 
---
 tools/libxc/include/xenguest.h |   20 +
 tools/libxl/Makefile   |1 +
 tools/libxl/libxl_colo.h   |   38 ++
 tools/libxl/libxl_colo_restore.c   | 1158 
 tools/libxl/libxl_create.c |  116 +++-
 tools/libxl/libxl_dom.c|2 +-
 tools/libxl/libxl_internal.h   |   23 +
 tools/libxl/libxl_save_callout.c   |6 +-
 tools/libxl/libxl_save_msgs_gen.pl |6 +-
 9 files changed, 1363 insertions(+), 7 deletions(-)
 create mode 100644 tools/libxl/libxl_colo.h
 create mode 100644 tools/libxl/libxl_colo_restore.c

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 601b108..6e621a6 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -93,6 +93,26 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t 
dom, uint32_t max_iter
 
 /* callbacks provided by xc_domain_restore */
 struct restore_callbacks {
+/* Called after a new checkpoint to suspend the guest.
+ */
+int (*suspend)(void* data);
+
+/* Called after the secondary vm is ready to resume.
+ * Callback function resumes the guest & the device model,
+ *  returns to xc_domain_restore.
+ */
+int (*postcopy)(void* data);
+
+/* callback to wait a new checkpoint
+ *
+ * returns:
+ * 0: terminate checkpointing gracefully
+ * 1: take another checkpoint */
+int (*checkpoint)(void* data);
+
+/* Enable qemu-dm logging dirty pages to xen */
+int (*switch_qemu_logdirty)(int domid, unsigned enable, void *data); /* 
HVM only */
+
 /* callback to restore toolstack specific data */
 int (*toolstack_restore)(uint32_t domid, const uint8_t *buf,
 uint32_t size, void* data);
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 1e27754..8acfd5d 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -57,6 +57,7 @@ LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
+LIBXL_OBJS-y += libxl_colo_restore.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
new file mode 100644
index 000..91df275
--- /dev/null
+++ b/tools/libxl/libxl_colo.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#ifndef LIBXL_COLO_H
+#define LIBXL_COLO_H
+
+/*
+ * values to control suspend/resume primary vm and secondary vm
+ * at the same time
+ */
+enum {
+LIBXL_COLO_NEW_CHECKPOINT = 1,
+LIBXL_COLO_SVM_SUSPENDED,
+LIBXL_COLO_SVM_READY,
+LIBXL_COLO_SVM_RESUMED,
+};
+
+extern void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void,
+ int ret, int retval, int errnoval);
+extern void libxl__colo_restore_setup(libxl__egc *egc,
+  libxl__colo_restore_state *crs);
+extern void libxl__colo_restore_teardown(libxl__egc *egc,
+ libxl__colo_restore_state *crs,
+ int rc);
+
+#endif
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
new file mode 100644
index 000..7b825d4
--- /dev/null
+++ b/tools/libxl/libxl_colo_resto

[Xen-devel] [RFC PATCH COLO v5 19/29] send store mfn and console mfn to xl before resuming secondary vm

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

We will call libxl__xc_domain_restore_done() to rebuild secondary vm. But
we need store mfn and console mfn when rebuilding secondary vm. So make
restore_results is a function pointers in callbacks struct and struct
{save,restore}_callbacks, and use this callback to send store mfn and
console mfn to xl.

Signed-off-by: Wen Congyang 
---
 tools/libxc/include/xenguest.h | 8 
 tools/libxc/xc_domain_restore.c| 2 +-
 tools/libxl/libxl_colo_restore.c   | 5 -
 tools/libxl/libxl_create.c | 1 +
 tools/libxl/libxl_save_msgs_gen.pl | 2 +-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 266d96b..597f515 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -125,6 +125,14 @@ struct restore_callbacks {
 /* Enable qemu-dm logging dirty pages to xen */
 int (*switch_qemu_logdirty)(int domid, unsigned enable, void *data); /* 
HVM only */
 
+/*
+ * callback to send store mfn and console mfn to xl
+ * if we want to resume vm before xc_domain_save()
+ * exits.
+ */
+void (*restore_results)(unsigned long store_mfn, unsigned long console_mfn,
+void *data);
+
 /* callback to restore toolstack specific data */
 int (*toolstack_restore)(uint32_t domid, const uint8_t *buf,
 uint32_t size, void* data);
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 5cad21c..cc5c1ad 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -2408,7 +2408,7 @@ new_checkpoint:
 } while (0)
 /* COLO */
 
-/* TODO: call restore_results */
+callbacks->restore_results(*store_mfn, *console_mfn, callbacks->data);
 
 /* Resume secondary vm */
 frc = callbacks->postcopy(callbacks->data);
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
index 7b825d4..554474d 100644
--- a/tools/libxl/libxl_colo_restore.c
+++ b/tools/libxl/libxl_colo_restore.c
@@ -152,11 +152,6 @@ static void colo_resume_vm(libxl__egc *egc,
 return;
 }
 
-/*
- * TODO: get store mfn and console mfn
- *  We should call the callback restore_results in
- *  xc_domain_restore() before resuming the guest.
- */
 libxl__xc_domain_restore_done(egc, dcs, 0, 0, 0);
 
 return;
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index ba6e1fe..89c18dc 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1121,6 +1121,7 @@ static void domcreate_bootloader_done(libxl__egc *egc,
 rc = ERROR_INVAL;
 goto out;
 }
+callbacks->restore_results = libxl__srm_callout_callback_restore_results;
 
 if (checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
 crs->ao = ao;
diff --git a/tools/libxl/libxl_save_msgs_gen.pl 
b/tools/libxl/libxl_save_msgs_gen.pl
index fbb2d67..2ecd25d 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -32,7 +32,7 @@ our @msgs = (
 #toolstack_save  done entirely `by hand'
 [  7, 'rcxW',   "toolstack_restore", [qw(uint32_t domid
 BLOCK tsdata)] ],
-[  8, 'r',  "restore_results",   ['unsigned long', 'store_mfn',
+[  8, 'rcx',"restore_results",   ['unsigned long', 'store_mfn',
   'unsigned long', 'console_mfn'] 
],
 [  9, 'srW',"complete",  [qw(int retval
  int errnoval)] ],
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC PATCH COLO v5 21/29] tools: xc_doamin_restore: zero ioreq page only one time

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

ioreq page contains evtchn which will be set when we resume the
secondary vm the first time. The hypervisor will check if the
evtchn is corrupted, so we cannot zero the ioreq page more
than one time.

The ioreq->state is always STATE_IOREQ_NONE after the vm is
suspended, so it is OK if we only zero it one time.
---
 tools/libxc/xc_domain_restore.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index cc5c1ad..276db37 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1501,6 +1501,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 struct restore_ctx _ctx;
 struct restore_ctx *ctx = &_ctx;
 struct domain_info_context *dinfo = &ctx->dinfo;
+int skip_clear_ioreq_page = 0;
 
 DPRINTF("%s: starting restore of new domid %u", __func__, dom);
 
@@ -2331,13 +2332,30 @@ new_checkpoint:
 }
 
 /* These comms pages need to be zeroed at the start of day */
-if ( xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[0]) ||
- xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[1]) ||
- xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[2]) )
+if ( xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[2]) )
 {
 PERROR("error zeroing magic pages");
 goto out;
 }
+if ( !skip_clear_ioreq_page )
+{
+if ( xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[0]) ||
+ xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[1]) )
+{
+PERROR("error zeroing magic pages");
+goto out;
+}
+/*
+ * ioreq page contains evtchn which will be set when we resume the
+ * secondary vm the first time. The hypervisor will check if the
+ * evtchn is corrupted, so we cann't clear the ioreq page more
+ * than one time.
+ *
+ * The ioreq->state is always STATE_IOREQ_NONE after the vm is
+ * suspended, so it is OK if we only clear it one time.
+ */
+skip_clear_ioreq_page = 1;
+}
 
 if ( (frc = xc_hvm_param_set(xch, dom,
  HVM_PARAM_IOREQ_PFN, 
tailbuf.u.hvm.magicpfns[0]))
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC PATCH COLO v5 18/29] COLO: xc related codes

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

Save:
1. send XC_SAVE_ID_LAST_CHECKPOINT, so secondary vm can be resumed
2. call callbacks->get_dirty_pfn() after suspend primary vm if we
   are doing checkpoint.

Restore:
1. call the callbacks resume/checkpoint/suspend if secondary vm's
   status is the same as primary vm's status.
2. zero out tdata because we will use it zero out pagebuf.tdata.
3. don't apply the secondary vm's state when we failed to get new
   secondary vm's state, because we have applied it every checkpoint.

Signed-off-by: Wen Congyang 
---
 tools/libxc/xc_domain_restore.c | 82 +++--
 tools/libxc/xc_domain_save.c| 57 +++-
 2 files changed, 125 insertions(+), 14 deletions(-)

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index a382701..5cad21c 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1454,7 +1454,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 int nraces = 0;
 
 /* The new domain's shared-info frame number. */
-unsigned long shared_info_frame;
+unsigned long shared_info_frame = 0;
 unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
 shared_info_any_t *old_shared_info = 
 (shared_info_any_t *)shared_info_page;
@@ -1504,6 +1504,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 
 DPRINTF("%s: starting restore of new domid %u", __func__, dom);
 
+n = m = 0;
+
 pagebuf_init(&pagebuf);
 memset(&tailbuf, 0, sizeof(tailbuf));
 tailbuf.ishvm = hvm;
@@ -1629,7 +1631,6 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
  * We uncanonicalise page tables as we go.
  */
 
-n = m = 0;
  loadpages:
 for ( ; ; )
 {
@@ -1793,26 +1794,45 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 goto finish;
 }
 
+new_checkpoint:
 // DPRINTF("Buffered checkpoint\n");
 
 if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
 PERROR("error when buffering batch, finishing");
-/*
- * Remus: discard the current incomplete checkpoint and restore
- * backup from the last complete checkpoint.
- */
-goto finish;
+if ( callbacks && callbacks->checkpoint )
+{
+/* COLO: discard the current incomplete checkpoint */
+rc = 0;
+goto failover;
+}
+else
+{
+/*
+ * Remus: discard the current incomplete checkpoint and restore
+ * backup from the last complete checkpoint.
+ */
+goto finish;
+}
 }
 memset(&tmptail, 0, sizeof(tmptail));
 tmptail.ishvm = hvm;
 if ( buffer_tail(xch, ctx, &tmptail, io_fd, max_vcpu_id, vcpumap,
  ext_vcpucontext, vcpuextstate_size) < 0 ) {
 ERROR ("error buffering image tail, finishing");
-/*
- * Remus: discard the current incomplete checkpoint and restore
- * backup from the last complete checkpoint.
- */
-goto finish;
+if ( callbacks && callbacks->checkpoint )
+{
+/* COLO: discard the current incomplete checkpoint */
+rc = 0;
+goto failover;
+}
+else
+{
+/*
+ * Remus: discard the current incomplete checkpoint and restore
+ * backup from the last complete checkpoint.
+ */
+goto finish;
+}
 }
 tailbuf_free(&tailbuf);
 memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
@@ -2301,6 +2321,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 free(tdata.data);
 goto out;
 }
+memset(&tdata, 0, sizeof(tdata));
 }
 
 /* Dump the QEMU state to a state file for QEMU to load */
@@ -2368,6 +2389,43 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 rc = 0;
 
  out:
+if ( !rc && callbacks && callbacks->checkpoint )
+{
+#define HANDLE_CALLBACK_RETURN_VALUE(frc)   \
+do {\
+if ( frc == 0 ) \
+{   \
+/* Some internal error happens */   \
+rc = 1; \
+goto out;   \
+}   \
+else if ( frc == 2 )\
+{   \
+/* Reading/writing error, do failover */\
+rc = 0; \
+goto failover;  \
+}   \
+} wh

[Xen-devel] [RFC PATCH COLO v5 14/29] Allow slave sends data to master

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

In colo mode, slave needs to send data to master, but the io_fd
only can be written in master, and only can be read in slave.
Save recv_fd in domain_suspend_state, and send_fd in
domain_create_state.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl.c  |  2 +-
 tools/libxl/libxl_create.c   | 14 ++
 tools/libxl/libxl_internal.h |  2 ++
 tools/libxl/libxl_types.idl  |  7 +++
 tools/libxl/xl_cmdimpl.c |  7 +++
 5 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 7bc4fc4..40a49c7 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -883,7 +883,7 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 dss->callback = remus_failover_cb;
 dss->domid = domid;
 dss->fd = send_fd;
-/* TODO do something with recv_fd */
+dss->recv_fd = recv_fd;
 dss->type = type;
 dss->live = 1;
 dss->debug = 0;
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index af04248..392420f 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1513,8 +1513,8 @@ static void domain_create_cb(libxl__egc *egc,
  int rc, uint32_t domid);
 
 static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config,
-uint32_t *domid,
-int restore_fd, int checkpointed_stream,
+uint32_t *domid, int restore_fd,
+int send_fd, int checkpointed_stream,
 const libxl_asyncop_how *ao_how,
 const libxl_asyncprogress_how *aop_console_how)
 {
@@ -1527,6 +1527,7 @@ static int do_domain_create(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 libxl_domain_config_init(&cdcs->dcs.guest_config_saved);
 libxl_domain_config_copy(ctx, &cdcs->dcs.guest_config_saved, d_config);
 cdcs->dcs.restore_fd = restore_fd;
+cdcs->dcs.send_fd = send_fd;
 cdcs->dcs.callback = domain_create_cb;
 cdcs->dcs.checkpointed_stream = checkpointed_stream;
 libxl__ao_progress_gethow(&cdcs->dcs.aop_console_how, aop_console_how);
@@ -1555,7 +1556,7 @@ int libxl_domain_create_new(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 const libxl_asyncop_how *ao_how,
 const libxl_asyncprogress_how *aop_console_how)
 {
-return do_domain_create(ctx, d_config, domid, -1, 0,
+return do_domain_create(ctx, d_config, domid, -1, -1, 0,
 ao_how, aop_console_how);
 }
 
@@ -1565,7 +1566,12 @@ int libxl_domain_create_restore(libxl_ctx *ctx, 
libxl_domain_config *d_config,
 const libxl_asyncop_how *ao_how,
 const libxl_asyncprogress_how *aop_console_how)
 {
-return do_domain_create(ctx, d_config, domid, restore_fd,
+int send_fd = -1;
+
+if (params->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO)
+send_fd = params->send_fd;
+
+return do_domain_create(ctx, d_config, domid, restore_fd, send_fd,
 params->checkpointed_stream, ao_how, 
aop_console_how);
 }
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 7bfabd8..971d975 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2874,6 +2874,7 @@ struct libxl__domain_suspend_state {
 
 uint32_t domid;
 int fd;
+int recv_fd;
 libxl_domain_type type;
 int live;
 int debug;
@@ -3140,6 +3141,7 @@ struct libxl__domain_create_state {
 libxl_domain_config *guest_config;
 libxl_domain_config guest_config_saved; /* vanilla config */
 int restore_fd;
+int send_fd;
 libxl__domain_create_cb *callback;
 libxl_asyncprogress_how aop_console_how;
 /* private to domain_create */
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index fa85e5b..292d754 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -196,6 +196,12 @@ libxl_viridian_enlightenment = 
Enumeration("viridian_enlightenment", [
 (3, "reference_tsc"),
 ])
 
+libxl_checkpointed_stream = Enumeration("checkpointed_stream", [
+(0, "NONE"),
+(1, "REMUS"),
+(2, "COLO"),
+], init_val = 0)
+
 #
 # Complex libxl types
 #
@@ -344,6 +350,7 @@ libxl_domain_create_info = Struct("domain_create_info",[
 
 libxl_domain_restore_params = Struct("domain_restore_params", [
 ("checkpointed_stream", integer),
+("send_fd", integer),
 ])
 
 libxl_domain_sched_params = Struct("domain_sched_params",[
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 394b55d..4574d05 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -154,6 +154,7 @@ struct domain_create {
 const char *extra_config; /* extra config string */
 const char *restore_file;
 int migrate_fd; /* -1 means n

[Xen-devel] [RFC PATCH COLO v5 20/29] implement the cmdline for COLO

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

Add a new option -c to the command 'xl remus'. If you want
to use COLO HA instead of Remus HA, please use -c option.

Update man pages to reflect the addition of a new option to
'xl remus' command.

Also add a new option -c to the internal command 'xl migrate-receive'.

Signed-off-by: Wen Congyang 
---
 docs/man/xl.pod.1 | 12 +--
 tools/libxl/libxl.c   | 16 ++
 tools/libxl/xl_cmdimpl.c  | 53 +++
 tools/libxl/xl_cmdtable.c |  4 +++-
 4 files changed, 73 insertions(+), 12 deletions(-)

diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index 16783c8..adcbe37 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -440,12 +440,15 @@ Print huge (!) amount of debug during the migration 
process.
 
 =item B [I] I I
 
-Enable Remus HA for domain. By default B relies on ssh as a transport
-mechanism between the two hosts.
+Enable Remus HA or COLO HA for domain. By default B relies on ssh as a
+transport mechanism between the two hosts.
 
 N.B: Remus support in xl is still in experimental (proof-of-concept) phase.
  Disk replication support is limited to DRBD disks.
 
+ COLO support in xl is still in experimental (proof-of-concept) phase.
+ There is no support for network or disk at the moment.
+
 B
 
 =over 4
@@ -491,6 +494,11 @@ Disable network output buffering. Requires enabling unsafe 
mode.
 
 Disable disk replication. Requires enabling unsafe mode.
 
+=item B<-c>
+
+Enable COLO HA. It is conflict with B<-i> and B<-b>, and memory
+checkpoint compression must be disabled.
+
 =back
 
 =item B I
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index b6c5429..afe0cc9 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -862,6 +862,22 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 goto out;
 }
 
+/* The caller must set this defbool */
+if (libxl_defbool_is_default(info->colo)) {
+LOG(ERROR, "colo mode must be enabled/disabled");
+rc = ERROR_FAIL;
+goto out;
+}
+
+if (libxl_defbool_val(info->colo)) {
+libxl_defbool_setdefault(&info->compression, false);
+if (libxl_defbool_val(info->compression)) {
+LOG(ERROR, "cannot use memory checkpoint compression in COLO 
mode");
+rc = ERROR_FAIL;
+goto out;
+}
+}
+
 libxl_defbool_setdefault(&info->allow_unsafe, false);
 libxl_defbool_setdefault(&info->blackhole, false);
 libxl_defbool_setdefault(&info->compression, true);
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 4574d05..6c5b792 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -4250,6 +4250,9 @@ static void migrate_receive(int debug, int daemonize, int 
monitor,
 dom_info.send_fd = send_fd;
 dom_info.migration_domname_r = &migration_domname;
 dom_info.checkpointed_stream = remus;
+if (remus == LIBXL_CHECKPOINTED_STREAM_COLO)
+/* COLO uses stdout to send control message to master */
+dom_info.quiet = 1;
 
 rc = create_domain(&dom_info);
 if (rc < 0) {
@@ -4264,7 +4267,8 @@ static void migrate_receive(int debug, int daemonize, int 
monitor,
 /* If we are here, it means that the sender (primary) has crashed.
  * TODO: Split-Brain Check.
  */
-fprintf(stderr, "migration target: Remus Failover for domain %u\n",
+fprintf(stderr, "migration target: %s Failover for domain %u\n",
+remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
 domid);
 
 /*
@@ -4281,15 +4285,21 @@ static void migrate_receive(int debug, int daemonize, 
int monitor,
 rc = libxl_domain_rename(ctx, domid, migration_domname,
  common_domname);
 if (rc)
-fprintf(stderr, "migration target (Remus): "
+fprintf(stderr, "migration target (%s): "
 "Failed to rename domain from %s to %s:%d\n",
+remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : 
"Remus",
 migration_domname, common_domname, rc);
 }
 
+if (remus == LIBXL_CHECKPOINTED_STREAM_COLO)
+/* The guest is running after failover in COLO mode */
+exit(rc ? -ERROR_FAIL: 0);
+
 rc = libxl_domain_unpause(ctx, domid);
 if (rc)
-fprintf(stderr, "migration target (Remus): "
+fprintf(stderr, "migration target (%s): "
 "Failed to unpause domain %s (id: %u):%d\n",
+remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
 common_domname, domid, rc);
 
 exit(rc ? -ERROR_FAIL: 0);
@@ -4435,7 +4445,7 @@ int main_migrate_receive(int argc, char **argv)
 int debug = 0, daemonize = 1, monitor = 1, remus = 0;
 int opt;
 
-SWITCH_FOREACH_OPT(opt, "Fedr", NULL,

[Xen-devel] [RFC PATCH COLO v5 17/29] xc_domain_save: flush cache before calling callbacks->postcopy() in colo mode

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

In colo mode, secondary vm is running. We will use the io_fd to
ensure that both primary vm and secondary vm are resumed
at the same time. So we should call postcopy later.

Signed-off-by: Wen Congyang 
---
 tools/libxc/xc_domain_save.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index cef6995..045a050 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -2082,10 +2082,15 @@ int xc_domain_save(xc_interface *xch, int io_fd, 
uint32_t dom, uint32_t max_iter
  out_rc:
 completed = 1;
 
-if ( !rc && callbacks->postcopy )
+/*
+ * COLO: secondary vm is running. We will use the io_fd to
+ * ensure that both primary vm and secondary vm are resumed
+ * at the same time. So we should call postcopy later.
+ */
+if ( !rc && callbacks->postcopy && !callbacks->get_dirty_pfn )
 callbacks->postcopy(callbacks->data);
 
-/* guest has been resumed. Now we can compress data
+/* Remus: guest has been resumed. Now we can compress data
  * at our own pace.
  */
 if (!rc && compressing)
@@ -2113,6 +2118,13 @@ int xc_domain_save(xc_interface *xch, int io_fd, 
uint32_t dom, uint32_t max_iter
 
 discard_file_cache(xch, io_fd, 1 /* flush */);
 
+/*
+ * COLO: send qemu device state and resume both
+ * primary vm and secondary vm now.
+ */
+if ( !rc && callbacks->postcopy && callbacks->get_dirty_pfn )
+callbacks->postcopy(callbacks->data);
+
 /* Enable compression now, finally */
 compressing = (flags & XCFLAGS_CHECKPOINT_COMPRESS);
 
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC PATCH COLO v5 11/29] adjust the indentation

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl_checkpoint_device.c | 23 ---
 tools/libxl/libxl_internal.h  | 21 -
 tools/libxl/libxl_remus.c | 12 
 3 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/tools/libxl/libxl_checkpoint_device.c 
b/tools/libxl/libxl_checkpoint_device.c
index 109cd23..0cfabc3 100644
--- a/tools/libxl/libxl_checkpoint_device.c
+++ b/tools/libxl/libxl_checkpoint_device.c
@@ -73,9 +73,9 @@ static void devices_teardown_cb(libxl__egc *egc,
 /* checkpoint device setup and teardown */
 
 static libxl__checkpoint_device* checkpoint_device_init(libxl__egc *egc,
-  libxl__checkpoint_devices_state 
*cds,
-  libxl__device_kind kind,
-  void *libxl_dev)
+libxl__checkpoint_devices_state *cds,
+libxl__device_kind kind,
+void *libxl_dev)
 {
 libxl__checkpoint_device *dev = NULL;
 
@@ -89,9 +89,10 @@ static libxl__checkpoint_device* 
checkpoint_device_init(libxl__egc *egc,
 }
 
 static void checkpoint_devices_setup(libxl__egc *egc,
-libxl__checkpoint_devices_state *cds);
+ libxl__checkpoint_devices_state *cds);
 
-void libxl__checkpoint_devices_setup(libxl__egc *egc, 
libxl__checkpoint_devices_state *cds)
+void libxl__checkpoint_devices_setup(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds)
 {
 int i, rc;
 
@@ -137,7 +138,7 @@ out:
 }
 
 static void checkpoint_devices_setup(libxl__egc *egc,
-libxl__checkpoint_devices_state *cds)
+ libxl__checkpoint_devices_state *cds)
 {
 int i, rc;
 
@@ -223,7 +224,7 @@ static void all_devices_setup_cb(libxl__egc *egc,
 }
 
 void libxl__checkpoint_devices_teardown(libxl__egc *egc,
-   libxl__checkpoint_devices_state *cds)
+libxl__checkpoint_devices_state *cds)
 {
 int i;
 libxl__checkpoint_device *dev;
@@ -285,12 +286,12 @@ static void devices_checkpoint_cb(libxl__egc *egc,
 
 /* API implementations */
 
-#define define_checkpoint_api(api)\
-void libxl__checkpoint_devices_##api(libxl__egc *egc,\
-libxl__checkpoint_devices_state *cds)\
+#define define_checkpoint_api(api)  \
+void libxl__checkpoint_devices_##api(libxl__egc *egc,   \
+libxl__checkpoint_devices_state *cds)   \
 {   \
 int i;  \
-libxl__checkpoint_device *dev;   \
+libxl__checkpoint_device *dev;  \
 \
 STATE_AO_GC(cds->ao);   \
 \
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 7e7c3b3..4b8590c 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2652,7 +2652,8 @@ typedef struct libxl__save_helper_state {
  * Each device type needs to implement the interfaces specified in
  * the libxl__checkpoint_device_instance_ops if it wishes to support Remus.
  *
- * The high-level control flow through the checkpoint device layer is shown 
below:
+ * The high-level control flow through the checkpoint device layer is shown
+ * below:
  *
  * xl remus
  *  |->  libxl_domain_remus_start
@@ -2713,7 +2714,8 @@ int 
init_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
 void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
 
 typedef void libxl__checkpoint_callback(libxl__egc *,
-   libxl__checkpoint_devices_state *, int rc);
+libxl__checkpoint_devices_state *,
+int rc);
 
 /*
  * State associated with a checkpoint invocation, including parameters
@@ -2721,7 +2723,7 @@ typedef void libxl__checkpoint_callback(libxl__egc *,
  * save/restore machinery.
  */
 struct libxl__checkpoint_devices_state {
-/* must be set by caller of libxl__checkpoint_device_(setup|teardown) 
*/
+/*-- must be set by caller of libxl__checkpoint_device_(setup|teardown) 
--*/
 
 libxl__ao *ao;
 uint32_t domid;
@@ -2734,7 +2736,8 @@ struct libxl__checkpoint_devices_state {
 /*
  * this array is allocated before setup the checkpoint devices by the
  * checkpoint abstract layer.
- * devs may b

[Xen-devel] [RFC PATCH COLO v5 10/29] rename remus device to checkpoint device

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

This patch is auto generated by the following commands:
 1. git mv tools/libxl/libxl_remus_device.c 
tools/libxl/libxl_checkpoint_device.c
 2. perl -pi -e 's/libxl_remus_device/libxl_checkpoint_device/g' 
tools/libxl/Makefile
 3. perl -pi -e 's/\blibxl__remus_devices/libxl__checkpoint_devices/g' 
tools/libxl/*.[ch]
 4. perl -pi -e 's/\blibxl__remus_device\b/libxl__checkpoint_device/g' 
tools/libxl/*.[ch]
 5. perl -pi -e 
's/\blibxl__remus_device_instance_ops\b/libxl__checkpoint_device_instance_ops/g'
 tools/libxl/*.[ch]
 6. perl -pi -e 's/\blibxl__remus_callback\b/libxl__checkpoint_callback/g' 
tools/libxl/*.[ch]
 7. perl -pi -e 's/\bremus_device_init\b/checkpoint_device_init/g' 
tools/libxl/*.[ch]
 8. perl -pi -e 's/\bremus_devices_setup\b/checkpoint_devices_setup/g' 
tools/libxl/*.[ch]
 9. perl -pi -e 's/\bdefine_remus_checkpoint_api\b/define_checkpoint_api/g' 
tools/libxl/*.[ch]
10. perl -pi -e 's/\brds\b/cds/g' tools/libxl/*.[ch]
11. perl -pi -e 's/REMUS_DEVICE/CHECKPOINT_DEVICE/g' tools/libxl/*.[ch] 
tools/libxl/*.idl
12. perl -pi -e 's/REMUS_DEVOPS/CHECKPOINT_DEVOPS/g' tools/libxl/*.[ch] 
tools/libxl/*.idl
13. perl -pi -e 's/\bremus\b/checkpoint/g' 
tools/libxl/libxl_checkpoint_device.[ch]
14. perl -pi -e 's/\bremus device/checkpoint device/g' 
tools/libxl/libxl_internal.h
15. perl -pi -e 's/\bRemus device/checkpoint device/g' 
tools/libxl/libxl_internal.h
16. perl -pi -e 's/\bremus abstract/checkpoint abstract/g' 
tools/libxl/libxl_internal.h
17. perl -pi -e 's/\bremus invocation/checkpoint invocation/g' 
tools/libxl/libxl_internal.h
18. perl -pi -e 's/\blibxl__remus_device_\(/libxl__checkpoint_device_(/g' 
tools/libxl/libxl_internal.h

Signed-off-by: Wen Congyang 
Cc: Shriram Rajagopalan 
---
 tools/libxl/Makefile  |   2 +-
 tools/libxl/libxl_checkpoint_device.c | 327 ++
 tools/libxl/libxl_internal.h  | 112 ++--
 tools/libxl/libxl_netbuffer.c | 108 +--
 tools/libxl/libxl_nonetbuffer.c   |  10 +-
 tools/libxl/libxl_remus.c |  78 
 tools/libxl/libxl_remus_device.c  | 327 --
 tools/libxl/libxl_remus_disk_drbd.c   |  52 +++---
 tools/libxl/libxl_types.idl   |   4 +-
 9 files changed, 510 insertions(+), 510 deletions(-)
 create mode 100644 tools/libxl/libxl_checkpoint_device.c
 delete mode 100644 tools/libxl/libxl_remus_device.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 7eeda0e..1e27754 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -56,7 +56,7 @@ else
 LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
-LIBXL_OBJS-y += libxl_remus.o libxl_remus_device.o libxl_remus_disk_drbd.o
+LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_checkpoint_device.c 
b/tools/libxl/libxl_checkpoint_device.c
new file mode 100644
index 000..109cd23
--- /dev/null
+++ b/tools/libxl/libxl_checkpoint_device.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Yang Hongyang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+extern const libxl__checkpoint_device_instance_ops remus_device_nic;
+extern const libxl__checkpoint_device_instance_ops remus_device_drbd_disk;
+static const libxl__checkpoint_device_instance_ops *remus_ops[] = {
+&remus_device_nic,
+&remus_device_drbd_disk,
+NULL,
+};
+
+/*- helper functions -*/
+
+static int init_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+/* init device subkind-specific state in the libxl ctx */
+int rc;
+STATE_AO_GC(cds->ao);
+
+if (libxl__netbuffer_enabled(gc)) {
+rc = init_subkind_nic(cds);
+if (rc) goto out;
+}
+
+rc = init_subkind_drbd_disk(cds);
+if (rc) goto out;
+
+rc = 0;
+out:
+return rc;
+}
+
+static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+/* cleanup device subkind-specific state in the libxl ctx */
+STATE_AO_GC(cds->ao);
+
+if (libxl__netbuffer_enabled(gc))
+cleanup_subkind_nic(cds);
+
+cleanup_subkind_drbd_disk(cds);
+}
+
+/*- setup() and teardown() -*/
+
+/* callbacks */
+
+static void all_devices_setup_cb(libxl__egc *egc,
+  

[Xen-devel] [RFC PATCH COLO v5 12/29] don't touch remus in checkpoint_device

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

Checkpoint device is an abstract layer to do checkpoint.
COLO can also use it to do checkpoint. But there are
still some codes in checkpoint device which touch remus:
1. remus_ops: we use remus ops directly in checkpoint
   device. Store it in checkpoint device state.
2. concrete layer's private member: add a new structure
   remus state, and move them to remus state.
3. init/cleanup device subkind: we call (init|cleanup)_subkind_nic
   and (init|cleanup)_subkind_drbd_disk directly in checkpoint
   device. Call them before calling libxl__checkpoint_devices_setup()
   or after calling libxl__checkpoint_devices_teardown().

Signed-off-by: Wen Congyang 
Cc: Shriram Rajagopalan 
---
 tools/libxl/libxl.c   |  2 +-
 tools/libxl/libxl_checkpoint_device.c | 52 ++--
 tools/libxl/libxl_dom.c   |  3 +-
 tools/libxl/libxl_internal.h  | 37 ++-
 tools/libxl/libxl_netbuffer.c | 51 +++-
 tools/libxl/libxl_remus.c | 89 +++
 tools/libxl/libxl_remus.h |  5 +-
 tools/libxl/libxl_remus_disk_drbd.c   |  9 ++--
 8 files changed, 136 insertions(+), 112 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index bcbd961..7bc4fc4 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -892,7 +892,7 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 assert(info);
 
 /* Point of no return */
-libxl__remus_setup(egc, dss);
+libxl__remus_setup(egc, &dss->rs);
 return AO_INPROGRESS;
 
  out:
diff --git a/tools/libxl/libxl_checkpoint_device.c 
b/tools/libxl/libxl_checkpoint_device.c
index 0cfabc3..2b7318f 100644
--- a/tools/libxl/libxl_checkpoint_device.c
+++ b/tools/libxl/libxl_checkpoint_device.c
@@ -17,46 +17,6 @@
 
 #include "libxl_internal.h"
 
-extern const libxl__checkpoint_device_instance_ops remus_device_nic;
-extern const libxl__checkpoint_device_instance_ops remus_device_drbd_disk;
-static const libxl__checkpoint_device_instance_ops *remus_ops[] = {
-&remus_device_nic,
-&remus_device_drbd_disk,
-NULL,
-};
-
-/*- helper functions -*/
-
-static int init_device_subkind(libxl__checkpoint_devices_state *cds)
-{
-/* init device subkind-specific state in the libxl ctx */
-int rc;
-STATE_AO_GC(cds->ao);
-
-if (libxl__netbuffer_enabled(gc)) {
-rc = init_subkind_nic(cds);
-if (rc) goto out;
-}
-
-rc = init_subkind_drbd_disk(cds);
-if (rc) goto out;
-
-rc = 0;
-out:
-return rc;
-}
-
-static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
-{
-/* cleanup device subkind-specific state in the libxl ctx */
-STATE_AO_GC(cds->ao);
-
-if (libxl__netbuffer_enabled(gc))
-cleanup_subkind_nic(cds);
-
-cleanup_subkind_drbd_disk(cds);
-}
-
 /*- setup() and teardown() -*/
 
 /* callbacks */
@@ -94,14 +54,10 @@ static void checkpoint_devices_setup(libxl__egc *egc,
 void libxl__checkpoint_devices_setup(libxl__egc *egc,
  libxl__checkpoint_devices_state *cds)
 {
-int i, rc;
+int i;
 
 STATE_AO_GC(cds->ao);
 
-rc = init_device_subkind(cds);
-if (rc)
-goto out;
-
 cds->num_devices = 0;
 cds->num_nics = 0;
 cds->num_disks = 0;
@@ -134,7 +90,7 @@ void libxl__checkpoint_devices_setup(libxl__egc *egc,
 return;
 
 out:
-cds->callback(egc, cds, rc);
+cds->callback(egc, cds, 0);
 }
 
 static void checkpoint_devices_setup(libxl__egc *egc,
@@ -172,7 +128,7 @@ static void device_setup_iterate(libxl__egc *egc, 
libxl__ao_device *aodev)
 goto out;
 
 do {
-dev->ops = remus_ops[++dev->ops_index];
+dev->ops = dev->cds->ops[++dev->ops_index];
 if (!dev->ops) {
 libxl_device_nic * nic = NULL;
 libxl_device_disk * disk = NULL;
@@ -271,8 +227,6 @@ static void devices_teardown_cb(libxl__egc *egc,
 cds->disks = NULL;
 cds->num_disks = 0;
 
-cleanup_device_subkind(cds);
-
 cds->callback(egc, cds, rc);
 }
 
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 4693d32..e09a1eb 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1865,7 +1865,6 @@ void libxl__domain_suspend(libxl__egc *egc, 
libxl__domain_suspend_state *dss)
 dss2->save_dm = 1;
 
 if (r_info != NULL) {
-dss->interval = r_info->interval;
 if (libxl_defbool_val(r_info->compression))
 dss->xcflags |= XCFLAGS_CHECKPOINT_COMPRESS;
 }
@@ -2051,7 +2050,7 @@ static void domain_suspend_done(libxl__egc *egc,
dss2->guest_evtchn.port, 
&dss2->guest_evtchn_lockfd);
 
 if (dss->remus) {
-libxl__remus_teardown(egc, dss, rc);
+libxl__remus_teardown(egc, &dss->rs, rc);
 return;
 }
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 4b8590c..e12c7b5 100644
---

[Xen-devel] [RFC PATCH COLO v5 13/29] Update libxl_save_msgs_gen.pl to support return data from xl to xc

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

 Currently, all callbacks return an integer value or void. We cannot
 return some data to xc via callback. Update libxl_save_msgs_gen.pl
 to support this case.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl_internal.h   |  3 ++
 tools/libxl/libxl_save_callout.c   | 31 ++
 tools/libxl/libxl_save_helper.c| 17 ++
 tools/libxl/libxl_save_msgs_gen.pl | 65 ++
 4 files changed, 109 insertions(+), 7 deletions(-)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index e12c7b5..7bfabd8 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3180,6 +3180,9 @@ _hidden void libxl__xc_domain_save_done(libxl__egc*, void 
*dss_void,
  * When they are ready to indicate completion, they call this. */
 void libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
libxl__save_helper_state *shs, int return_value);
+void libxl__xc_domain_saverestore_async_callback_done_with_data(libxl__egc 
*egc,
+   libxl__save_helper_state *shs,
+   const void *data, uint64_t size);
 
 
 _hidden void libxl__domain_suspend_common_switch_qemu_logdirty
diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c
index 40b25e4..477e633 100644
--- a/tools/libxl/libxl_save_callout.c
+++ b/tools/libxl/libxl_save_callout.c
@@ -145,6 +145,15 @@ void 
libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
 shs->egc = 0;
 }
 
+void libxl__xc_domain_saverestore_async_callback_done_with_data(libxl__egc 
*egc,
+   libxl__save_helper_state *shs,
+   const void *data, uint64_t size)
+{
+shs->egc = egc;
+libxl__srm_callout_sendreply_data(data, size, shs);
+shs->egc = 0;
+}
+
 /*- helper execution -*/
 
 static void run_helper(libxl__egc *egc, libxl__save_helper_state *shs,
@@ -370,6 +379,28 @@ void libxl__srm_callout_sendreply(int r, void *user)
 helper_failed(egc, shs, ERROR_FAIL);
 }
 
+void libxl__srm_callout_sendreply_data(const void *data, uint64_t size, void 
*user)
+{
+libxl__save_helper_state *shs = user;
+libxl__egc *egc = shs->egc;
+STATE_AO_GC(shs->ao);
+int errnoval;
+
+errnoval = libxl_write_exactly(CTX, libxl__carefd_fd(shs->pipes[0]),
+   &size, sizeof(size), shs->stdin_what,
+   "callback return data length");
+if (errnoval)
+goto out;
+
+errnoval = libxl_write_exactly(CTX, libxl__carefd_fd(shs->pipes[0]),
+   data, size, shs->stdin_what,
+   "callback return data");
+
+out:
+if (errnoval)
+helper_failed(egc, shs, ERROR_FAIL);
+}
+
 void libxl__srm_callout_callback_log(uint32_t level, uint32_t errnoval,
   const char *context, const char *formatted, void *user)
 {
diff --git a/tools/libxl/libxl_save_helper.c b/tools/libxl/libxl_save_helper.c
index 74826a1..44c5807 100644
--- a/tools/libxl/libxl_save_helper.c
+++ b/tools/libxl/libxl_save_helper.c
@@ -155,6 +155,23 @@ int helper_getreply(void *user)
 return v;
 }
 
+uint8_t *helper_getreply_data(void *user)
+{
+uint64_t size;
+int r = read_exactly(0, &size, sizeof(size));
+uint8_t *data;
+
+if (r <= 0)
+exit(-2);
+
+data = helper_allocbuf(size, user);
+r = read_exactly(0, data, size);
+if (r <= 0)
+exit(-2);
+
+return data;
+}
+
 /*- other callbacks -*/
 
 static int toolstack_save_fd;
diff --git a/tools/libxl/libxl_save_msgs_gen.pl 
b/tools/libxl/libxl_save_msgs_gen.pl
index 6b4b65e..41ee000 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -15,6 +15,7 @@ our @msgs = (
 # and its null-ness needs to be passed through to the helper's xc
 #   W  - needs a return value; callback is synchronous
 #   A  - needs a return value; callback is asynchronous
+#   B  - return value is an pointer
 [  1, 'sr', "log",   [qw(uint32_t level
  uint32_t errnoval
  STRING context
@@ -99,23 +100,28 @@ our $libxl = "libxl__srm";
 our $callback = "${libxl}_callout_callback";
 our $receiveds = "${libxl}_callout_received";
 our $sendreply = "${libxl}_callout_sendreply";
+our $sendreply_data = "${libxl}_callout_sendreply_data";
 our $getcallbacks = "${libxl}_callout_get_callbacks";
 our $enumcallbacks = "${libxl}_callout_enumcallbacks";
 sub cbtype ($) { "${libxl}_".$_[0]."_autogen_callbacks"; };
 
 f_decl($sendreply, 'callout', 'void', "(int r, void *user)");
+f_decl($sendreply_data, 'callout', 'void',
+   "(const void *data, uint64_t size, void *user)");
 
 our $helper = "helper";
 our $encode = "${helper}_stub";
 our $allocbuf = "${helper}_allocbuf";
 

[Xen-devel] [RFC PATCH COLO v5 09/29] move remus related codes to libxl_remus.c

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

libxl_domain_remus_start() is external API, and is not moved.

Signed-off-by: Wen Congyang 
Cc: Shriram Rajagopalan 
---
 tools/libxl/Makefile  |   2 +-
 tools/libxl/libxl.c   |  57 +
 tools/libxl/libxl_dom.c   | 220 +---
 tools/libxl/libxl_remus.c | 318 ++
 tools/libxl/libxl_remus.h |  28 
 5 files changed, 352 insertions(+), 273 deletions(-)
 create mode 100644 tools/libxl/libxl_remus.c
 create mode 100644 tools/libxl/libxl_remus.h

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 1b16598..7eeda0e 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -56,7 +56,7 @@ else
 LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
-LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o
+LIBXL_OBJS-y += libxl_remus.o libxl_remus_device.o libxl_remus_disk_drbd.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 58629ed..bcbd961 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -17,6 +17,7 @@
 #include "libxl_osdeps.h"
 
 #include "libxl_internal.h"
+#include "libxl_remus.h"
 
 #define PAGE_TO_MEMKB(pages) ((pages) * 4)
 #define BACKEND_STRING_SIZE 5
@@ -842,11 +843,6 @@ out:
 GC_FREE;
 return ptr;
 }
-
-static void libxl__remus_setup_done(libxl__egc *egc,
-libxl__remus_devices_state *rds, int rc);
-static void libxl__remus_setup_failed(libxl__egc *egc,
-  libxl__remus_devices_state *rds, int rc);
 static void remus_failover_cb(libxl__egc *egc,
   libxl__domain_suspend_state *dss, int rc);
 
@@ -895,63 +891,14 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 
 assert(info);
 
-/* Convenience aliases */
-libxl__remus_devices_state *const rds = &dss->rds;
-
-if (libxl_defbool_val(info->netbuf)) {
-if (!libxl__netbuffer_enabled(gc)) {
-LOG(ERROR, "Remus: No support for network buffering");
-rc = ERROR_FAIL;
-goto out;
-}
-rds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VIF);
-}
-
-if (libxl_defbool_val(info->diskbuf))
-rds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VBD);
-
-rds->ao = ao;
-rds->domid = domid;
-rds->callback = libxl__remus_setup_done;
-
 /* Point of no return */
-libxl__remus_devices_setup(egc, rds);
+libxl__remus_setup(egc, dss);
 return AO_INPROGRESS;
 
  out:
 return AO_ABORT(rc);
 }
 
-static void libxl__remus_setup_done(libxl__egc *egc,
-libxl__remus_devices_state *rds, int rc)
-{
-libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds);
-STATE_AO_GC(dss->ao);
-
-if (!rc) {
-libxl__domain_suspend(egc, dss);
-return;
-}
-
-LOG(ERROR, "Remus: failed to setup device for guest with domid %u, rc %d",
-dss->domid, rc);
-rds->callback = libxl__remus_setup_failed;
-libxl__remus_devices_teardown(egc, rds);
-}
-
-static void libxl__remus_setup_failed(libxl__egc *egc,
-  libxl__remus_devices_state *rds, int rc)
-{
-libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds);
-STATE_AO_GC(dss->ao);
-
-if (rc)
-LOG(ERROR, "Remus: failed to teardown device after setup failed"
-" for guest with domid %u, rc %d", dss->domid, rc);
-
-dss->callback(egc, dss, rc);
-}
-
 static void remus_failover_cb(libxl__egc *egc,
   libxl__domain_suspend_state *dss, int rc)
 {
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index a3fce46..4693d32 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -19,6 +19,7 @@
 
 #include "libxl_internal.h"
 #include "libxl_arch.h"
+#include "libxl_remus.h"
 
 #include 
 #include 
@@ -1807,194 +1808,6 @@ static void 
domain_suspend_callback_common_done(libxl__egc *egc,
 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
 }
 
-/*- remus callbacks -*/
-static void remus_domain_suspend_callback_common_done(libxl__egc *egc,
-libxl__domain_suspend_state2 *dss2, int ok);
-static void remus_devices_postsuspend_cb(libxl__egc *egc,
- libxl__remus_devices_state *rds,
- int rc);
-static void remus_devices_preresume_cb(libxl__egc *egc,
-   libxl__remus_devices_state *rds,
-   int rc);
-
-static void libxl__remus_domain_suspend_callback(void *data)
-{
-libxl__save_helper_state *shs = data;
-libxl__egc *egc = shs->egc;
-libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
-
-/* Convenience aliases 

[Xen-devel] [RFC PATCH COLO v5 03/29] tools: libxl: introduce a new API libxl__domain_restore() to read qemu state

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

Secondary vm is running in colo mode. So we will do
the following things again and again:
1. suspend both primay vm and secondary vm
2. sync the state
3. resume both primary vm and secondary vm
We will send qemu's state each time in step2, and
slave's qemu should read it each time before resuming
secondary vm. Introduce a new API libxl__domain_restore()
to do it. This API should be called before resuming
secondary vm.

Note: we should update qemu to support it.
Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl.c  | 18 ++
 tools/libxl/libxl_dom.c  | 26 ++
 tools/libxl/libxl_internal.h |  4 
 tools/libxl/libxl_qmp.c  | 10 ++
 4 files changed, 58 insertions(+)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 2a735b3..6e55afc 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -510,6 +510,24 @@ int libxl_domain_rename(libxl_ctx *ctx, uint32_t domid,
 return rc;
 }
 
+int libxl__domain_restore(libxl__gc *gc, uint32_t domid)
+{
+int rc = 0;
+
+libxl_domain_type type = libxl__domain_type(gc, domid);
+if (type != LIBXL_DOMAIN_TYPE_HVM) {
+rc = ERROR_FAIL;
+goto out;
+}
+
+rc = libxl__domain_restore_device_model(gc, domid);
+if (rc)
+LOG(ERROR, "failed to restore device mode for domain %u:%d",
+domid, rc);
+out:
+return rc;
+}
+
 int libxl__domain_resume(libxl__gc *gc, uint32_t domid, int suspend_cancel)
 {
 int rc = 0;
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index d286851..fd0c5c2 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1343,6 +1343,32 @@ int libxl__domain_suspend_device_model(libxl__gc *gc,
 return ret;
 }
 
+int libxl__domain_restore_device_model(libxl__gc *gc, uint32_t domid)
+{
+char *state_file;
+int rc;
+
+switch (libxl__device_model_version_running(gc, domid)) {
+case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
+/* not supported now */
+return ERROR_FAIL;
+case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
+/*
+ * This function may be called too many times for the same gc,
+ * so we use NOGC, and free the memory before return to avoid
+ * OOM.
+ */
+state_file = libxl__sprintf(NOGC,
+XC_DEVICE_MODEL_RESTORE_FILE".%d",
+domid);
+rc = libxl__qmp_restore(gc, domid, state_file);
+free(state_file);
+return rc;
+default:
+return ERROR_INVAL;
+}
+}
+
 int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid)
 {
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index c1ea498..3b4e6c4 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1016,6 +1016,7 @@ _hidden int libxl__domain_rename(libxl__gc *gc, uint32_t 
domid,
 
 _hidden int libxl__toolstack_restore(uint32_t domid, const uint8_t *buf,
  uint32_t size, void *data);
+_hidden int libxl__domain_restore_device_model(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid);
 
 _hidden const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
@@ -1033,6 +1034,7 @@ _hidden int libxl__userdata_store(libxl__gc *gc, uint32_t 
domid,
   const char *userdata_userid,
   const uint8_t *data, int datalen);
 
+_hidden int libxl__domain_restore(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_resume(libxl__gc *gc, uint32_t domid,
  int suspend_cancel);
 
@@ -1629,6 +1631,8 @@ _hidden int libxl__qmp_stop(libxl__gc *gc, int domid);
 _hidden int libxl__qmp_resume(libxl__gc *gc, int domid);
 /* Save current QEMU state into fd. */
 _hidden int libxl__qmp_save(libxl__gc *gc, int domid, const char *filename);
+/* Load current QEMU state from fd. */
+_hidden int libxl__qmp_restore(libxl__gc *gc, int domid, const char *filename);
 /* Set dirty bitmap logging status */
 _hidden int libxl__qmp_set_global_dirty_log(libxl__gc *gc, int domid, bool 
enable);
 _hidden int libxl__qmp_insert_cdrom(libxl__gc *gc, int domid, const 
libxl_device_disk *disk);
diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c
index 9aa7e2e..1b66d55 100644
--- a/tools/libxl/libxl_qmp.c
+++ b/tools/libxl/libxl_qmp.c
@@ -892,6 +892,16 @@ int libxl__qmp_save(libxl__gc *gc, int domid, const char 
*filename)
NULL, NULL);
 }
 
+int libxl__qmp_restore(libxl__gc *gc, int domid, const char *state_file)
+{
+libxl__json_object *args = NULL;
+
+qmp_parameters_add_string(gc, &args, "filename", (char *)state_file);
+
+return qmp_run_command(gc, domid, "xen-load-devices-state", args,
+   NULL, NULL);
+}
+
 static int qmp_change(libxl__gc *gc, libxl__qmp_handler *qmp,

[Xen-devel] [RFC PATCH COLO v5 08/29] tools/libxl: Introduce bitops macros

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

This is the same set used by libxc.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl_bitops.h | 79 ++
 1 file changed, 79 insertions(+)
 create mode 100644 tools/libxl/libxl_bitops.h

diff --git a/tools/libxl/libxl_bitops.h b/tools/libxl/libxl_bitops.h
new file mode 100644
index 000..c6ef8df
--- /dev/null
+++ b/tools/libxl/libxl_bitops.h
@@ -0,0 +1,79 @@
+#ifndef LIBXL_BITOPS_H
+#define LIBXL_BITOPS_H 1
+
+/* bitmap operations for single threaded access */
+
+#include 
+#include 
+
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+#define ORDER_LONG (sizeof(unsigned long) == 4 ? 5 : 6)
+
+#define BITMAP_ENTRY(_nr,_bmap) ((_bmap))[(_nr)/BITS_PER_LONG]
+#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
+
+/* calculate required space for number of longs needed to hold nr_bits */
+static inline int bitmap_size(int nr_bits)
+{
+int nr_long, nr_bytes;
+nr_long = (nr_bits + BITS_PER_LONG - 1) >> ORDER_LONG;
+nr_bytes = nr_long * sizeof(unsigned long);
+return nr_bytes;
+}
+
+static inline unsigned long *bitmap_alloc(int nr_bits)
+{
+return calloc(1, bitmap_size(nr_bits));
+}
+
+static inline void bitmap_clear(unsigned long *addr, int nr_bits)
+{
+memset(addr, 0, bitmap_size(nr_bits));
+}
+
+static inline int test_bit(int nr, unsigned long *addr)
+{
+return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
+}
+
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
+}
+
+static inline void set_bit(int nr, unsigned long *addr)
+{
+BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
+}
+
+static inline int test_and_clear_bit(int nr, unsigned long *addr)
+{
+int oldbit = test_bit(nr, addr);
+clear_bit(nr, addr);
+return oldbit;
+}
+
+static inline int test_and_set_bit(int nr, unsigned long *addr)
+{
+int oldbit = test_bit(nr, addr);
+set_bit(nr, addr);
+return oldbit;
+}
+
+static inline void bitmap_or(unsigned long *dst, const unsigned long *other,
+ int nr_bits)
+{
+int i, nr_longs = (bitmap_size(nr_bits) / sizeof(unsigned long));
+for ( i = 0; i < nr_longs; ++i )
+dst[i] |= other[i];
+}
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC PATCH COLO v5 06/29] Update libxl__domain_unpause() to support qemu-xen

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

Currently, libxl__domain_unpause() only supports
qemu-xen-traditional. Update it to support qemu-xen.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl.c  | 13 +
 tools/libxl/libxl_dom.c  | 25 +
 tools/libxl/libxl_internal.h |  2 ++
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index c3898ce..58629ed 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -1039,8 +1039,6 @@ out:
 
 int libxl__domain_unpause(libxl__gc *gc, uint32_t domid)
 {
-char *path;
-char *state;
 int ret, rc = 0;
 
 libxl_domain_type type = libxl__domain_type(gc, domid);
@@ -1050,12 +1048,11 @@ int libxl__domain_unpause(libxl__gc *gc, uint32_t domid)
 }
 
 if (type == LIBXL_DOMAIN_TYPE_HVM) {
-path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", 
domid);
-state = libxl__xs_read(gc, XBT_NULL, path);
-if (state != NULL && !strcmp(state, "paused")) {
-libxl__qemu_traditional_cmd(gc, domid, "continue");
-libxl__wait_for_device_model_deprecated(gc, domid, "running",
- NULL, NULL, NULL);
+rc = libxl__domain_unpause_device_model(gc, domid);
+if (rc < 0) {
+LOG(ERROR, "failed to unpause device model for domain %u:%d",
+domid, rc);
+goto out;
 }
 }
 
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index eb4ed94..a3fce46 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -2272,6 +2272,31 @@ static void remus_teardown_done(libxl__egc *egc,
 dss->callback(egc, dss, rc);
 }
 
+int libxl__domain_unpause_device_model(libxl__gc *gc, uint32_t domid)
+{
+char *path;
+char *state;
+
+switch (libxl__device_model_version_running(gc, domid)) {
+case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
+path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", 
domid);
+state = libxl__xs_read(gc, XBT_NULL, path);
+if (state != NULL && !strcmp(state, "paused")) {
+libxl__qemu_traditional_cmd(gc, domid, "continue");
+libxl__wait_for_device_model_deprecated(gc, domid, "running",
+ NULL, NULL, NULL);
+}
+case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
+if (libxl__qmp_resume(gc, domid))
+return ERROR_FAIL;
+break;
+default:
+return ERROR_FAIL;
+}
+
+return 0;
+}
+
 /* Miscellaneous */
 
 char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 538ac4b..8d229ac 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1018,6 +1018,8 @@ _hidden int libxl__toolstack_restore(uint32_t domid, 
const uint8_t *buf,
  uint32_t size, void *data);
 _hidden int libxl__domain_restore_device_model(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid);
+_hidden int libxl__domain_unpause_device_model(libxl__gc *gc,
+   uint32_t domid);
 
 _hidden const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
  const char *userdata_userid,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC PATCH COLO v5 04/29] Update libxl__domain_suspend_common_switch_qemu_logdirty() for colo

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

Secondary vm is running in colo mode. So we need to
send secondary vm's dirty page information to master.
libxl__domain_suspend_common_switch_qemu_logdirty() is to enable
qemu logdirty. But it uses domain_suspend_state, and calls
libxl__xc_domain_saverestore_async_callback_done()
before exits.

Introduce a new API libxl__domain_common_switch_qemu_logdirty().
This API only uses libxl__logdirty_switch, and calls
lds->callback before exits.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl_dom.c  | 79 +++-
 tools/libxl/libxl_internal.h | 12 +--
 2 files changed, 59 insertions(+), 32 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index fd0c5c2..eb4ed94 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -,7 +,7 @@ static void switch_logdirty_timeout(libxl__egc *egc, 
libxl__ev_time *ev,
 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
 const char *watch_path, const char *event_path);
 static void switch_logdirty_done(libxl__egc *egc,
- libxl__domain_suspend_state *dss, int ok);
+ libxl__logdirty_switch *lds, int ok);
 
 static void logdirty_init(libxl__logdirty_switch *lds)
 {
@@ -1122,12 +1122,10 @@ static void logdirty_init(libxl__logdirty_switch *lds)
 
 static void domain_suspend_switch_qemu_xen_traditional_logdirty
(int domid, unsigned enable,
-libxl__save_helper_state *shs)
+libxl__logdirty_switch *lds,
+libxl__egc *egc)
 {
-libxl__egc *egc = shs->egc;
-libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
-libxl__logdirty_switch *lds = &dss->logdirty;
-STATE_AO_GC(dss->ao);
+STATE_AO_GC(lds->ao);
 int rc;
 xs_transaction_t t = 0;
 const char *got;
@@ -1188,64 +1186,85 @@ static void 
domain_suspend_switch_qemu_xen_traditional_logdirty
  out:
 LOG(ERROR,"logdirty switch failed (rc=%d), aborting suspend",rc);
 libxl__xs_transaction_abort(gc, &t);
-switch_logdirty_done(egc,dss,-1);
+switch_logdirty_done(egc,lds,-1);
 }
 
 static void domain_suspend_switch_qemu_xen_logdirty
(int domid, unsigned enable,
-libxl__save_helper_state *shs)
+libxl__logdirty_switch *lds,
+libxl__egc *egc)
 {
-libxl__egc *egc = shs->egc;
-libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
-STATE_AO_GC(dss->ao);
+STATE_AO_GC(lds->ao);
 int rc;
 
 rc = libxl__qmp_set_global_dirty_log(gc, domid, enable);
 if (!rc) {
-libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+lds->callback(egc, lds, 0);
 } else {
 LOG(ERROR,"logdirty switch failed (rc=%d), aborting suspend",rc);
-libxl__xc_domain_saverestore_async_callback_done(egc, shs, -1);
+lds->callback(egc, lds, -1);
 }
 }
 
+static void libxl__domain_suspend_switch_qemu_logdirty_done
+(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc)
+{
+libxl__domain_suspend_state *dss = CONTAINER_OF(lds, *dss, logdirty);
+
+libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, rc);
+}
+
 void libxl__domain_suspend_common_switch_qemu_logdirty
(int domid, unsigned enable, void *user)
 {
 libxl__save_helper_state *shs = user;
 libxl__egc *egc = shs->egc;
 libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
-STATE_AO_GC(dss->ao);
+
+/* convenience aliases */
+libxl__logdirty_switch *const lds = &dss->logdirty;
+
+lds->callback = libxl__domain_suspend_switch_qemu_logdirty_done;
+
+libxl__domain_common_switch_qemu_logdirty(domid, enable, lds, egc);
+}
+
+void libxl__domain_common_switch_qemu_logdirty(int domid, unsigned enable,
+   libxl__logdirty_switch *lds,
+   libxl__egc *egc)
+{
+STATE_AO_GC(lds->ao);
 
 switch (libxl__device_model_version_running(gc, domid)) {
 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
-domain_suspend_switch_qemu_xen_traditional_logdirty(domid, enable, 
shs);
+domain_suspend_switch_qemu_xen_traditional_logdirty(domid, enable,
+lds, egc);
 break;
 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
-domain_suspend_switch_qemu_xen_logdirty(domid, enable, shs);
+domain_suspend_switch_qemu_xen_logdirty(domid, enable, lds, egc);
 break;
 default:
 LOG(ERROR,"logdirty switch failed"
 ", no valid device model ver

[Xen-devel] [RFC PATCH COLO v5 00/29] COarse-grain LOck-stepping Virtual Machines for Non-stop Service

2015-03-31 Thread Yang Hongyang
This patchset is for xen-4.6. The main diffrence from previous versions are:
1. Use qdisk block replication
   http://wiki.qemu.org/Features/BlockReplication
2. Nic replication based on colo-proxy
   http://wiki.qemu.org/Features/COLO#Components
Note that COLO feature is under active development, this version is not well
tested and has some known problems.
We post this early in order to give you a brief impression about how COLO
will be implemented and we request for your comments about the general idea
of COLO and of course the implementation, if you have any idea/suggestion
on COLO, please do not hesitate to give your comments, thanks in advance.

Virtual machine (VM) replication is a well known technique for providing
application-agnostic software-implemented hardware fault tolerance -
"non-stop service". Currently, remus provides this function, but it buffers
all output packets, and the latency is unacceptable.
In xen summit 2012, We introduce a new VM replication solution: colo
(COarse-grain LOck-stepping virtual machine). The presentation is in
the following URL:
http://www.slideshare.net/xen_com_mgr/colo-coarsegrain-lockstepping-virtual-machines-for-nonstop-service

Here is the summary of the solution:
>From the client's point of view, as long as the client observes identical
responses from the primary and secondary VMs, according to the service
semantics, then the secondary vm is a valid replica of the primary
vm, and can successfully take over when a hardware failure of the
primary vm is detected.

This patchset is based on migration v1.
Only supports hvm guest now. The codes are also hosted on github:
https://github.com/macrosheep/xen/tree/COLO_RFC_v5

TODO list:
1. Code reviews and Bug fixes
2. Switch to migration v2
3. Support pvm

Known bugs:
1. Secondary vm may crash due to triple fault.

Wiki pages:
http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping
http://wiki.qemu.org/Features/COLO

Patch 1: Add readme
Patch 2-8  : Some refactor and prepare work
Patch 9-12 : Update remus to reuse remus device codes
Patch 13-21: COLO framework related codes
Patch 22-23: implement disk replication
Patch 24-29: implement nic replication

Changelog from v4 to v5:
1. rebase to the latest xen upstream
2. disk replication: blktap2->qdisk
3. nic replication: colo-agent->colo-proxy

Changelog from v3 to v4:
1. rebase to newest xen
2. bug fix

Changlog from v2 to v3:
1. rebase to newest remus
2. add nic replication support

Changlog from v1 to v2:
1. rebase to newest remus
2. add disk replication support

Wen Congyang (23):
  Add readme
  Refactor domain_suspend_callback_common()
  tools: libxl: introduce a new API libxl__domain_restore() to read qemu
state
  Update libxl__domain_suspend_common_switch_qemu_logdirty() for colo
  Introduce a new internal API libxl__domain_unpause()
  Update libxl__domain_unpause() to support qemu-xen
  support to resume uncooperative HVM guests
  tools/libxl: Introduce bitops macros
  move remus related codes to libxl_remus.c
  rename remus device to checkpoint device
  adjust the indentation
  don't touch remus in checkpoint_device
  Update libxl_save_msgs_gen.pl to support return data from xl to xc
  Allow slave sends data to master
  secondary vm suspend/resume/checkpoint code
  primary vm suspend/get_dirty_pfn/resume/checkpoint code
  xc_domain_save: flush cache before calling callbacks->postcopy() in
colo mode
  COLO: xc related codes
  send store mfn and console mfn to xl before resuming secondary vm
  implement the cmdline for COLO
  tools: xc_doamin_restore: zero ioreq page only one time
  Support colo mode for qemu disk
  COLO: use qemu block replication

Yang Hongyang (6):
  COLO proxy: implement setup/teardown of COLO proxy module
  COLO proxy: preresume, postresume and checkpoint
  COLO nic: implement COLO nic subkind
  setup and control colo proxy on primary side
  setup and control colo proxy on secondary side
  cmdline switches and config vars to control colo-proxy

 docs/README.colo  |   92 +++
 docs/man/xl.conf.pod.5|6 +
 docs/man/xl.pod.1 |   11 +-
 tools/hotplug/Linux/Makefile  |1 +
 tools/hotplug/Linux/colo-proxy-setup  |  128 
 tools/libxc/include/xenguest.h|   40 ++
 tools/libxc/xc_domain_restore.c   |  106 ++-
 tools/libxc/xc_domain_save.c  |   71 +-
 tools/libxc/xc_resume.c   |   20 +-
 tools/libxl/Makefile  |6 +-
 tools/libxl/libxl.c   |  185 +++--
 tools/libxl/libxl_bitops.h|   79 +++
 tools/libxl/libxl_checkpoint_device.c |  282 
 tools/libxl/libxl_colo.h  |   53 ++
 tools/libxl/libxl_colo_nic.c  |  313 +
 tools/libxl/libxl_colo_proxy.c|  267 
 tools/libxl/libxl_colo_qdisk.c|  209 ++
 tools/libxl/libxl_colo_restore.c  | 1190 +
 tools/libxl/libxl_colo_save.c |  782 +++

[Xen-devel] [RFC PATCH COLO v5 01/29] Add readme

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Yang Hongyang 
---
 docs/README.colo | 92 
 1 file changed, 92 insertions(+)
 create mode 100644 docs/README.colo

diff --git a/docs/README.colo b/docs/README.colo
new file mode 100644
index 000..60f487d
--- /dev/null
+++ b/docs/README.colo
@@ -0,0 +1,92 @@
+COLO provides fault tolerance for virtual machines by sending continuous
+checkpoints to a backup, which will activate if the target VM fails. It
+only supports HVM guest(without pv extensions).
+
+Requriements:
+1. Hardware requriements
+   There is at least one directly connected nic to forward the nic from client
+   to secondary vm. The directly connected nic must not be used by any other
+   purpose. If your guest has more than one nic, you should have directly
+   connected nic for each guest nic. If you don't have enouth directly 
connected
+   nic, you can use vlan.
+2. Dom0 requirements
+   - Support dom0
+   - kernel module:
+sch_ingress
+cls_basic
+cls_tcindex
+cls_u32
+act_mirred
+   - libnl-tools >= 3.0. This package provides the command nl-qdisc-list, and
+ colo need this command.
+   - If your host os has OEM-released xen tools, please uninstall it first.
+   - You can load the module which is not provided by OEM.
+3. Guest requirements
+   Only HVM guest(without pv extensions) is supported now. If you want to
+   use OEM released guest os, please use SUSE. REDHAT and Ubuntu is not
+   supported now because I don't find any way to disable pv extensions.
+   If you want to use REDHAT or Ubuntu, you need to build the newest
+   kernel which has the parameter xen_nopv.
+
+Network link topology
+   Please refer to: http://wiki.qemu.org/Features/COLO#Network_link_topology
+
+The steps to setup COLO environment:
+You need to recompile your host kernel because colo-proxy module need cooperate
+with linux kernel.
+Please refer to: http://wiki.qemu.org/Features/COLO#Test_environment_prepare
+1. Build and install xen
+2. Apply the patch for qemu xen, and rebuild xen tools:
+- cd tools/qemu-xen-dir
+- use git am to apply the patch:
+  
https://raw.githubusercontent.com/wencongyang/colo-files/master/patch_for_qemu/*.patch
+- make tools && make install-tools
+Note: You must use qemu-xen. qemu-xen-traditional is not supported.
+3. Install COLO proxy module:
+3.1 Download COLO proxy, compile and install it:
+https://github.com/gao-feng/colo-proxy.git
+3.2 Download iptables patch, it is based on v1.4.21 compile and install it:
+
https://github.com/gao-feng/colo-proxy/blob/master/colo-patch-for-kernel.patch
+4. Install the guest
+4.1 Add "xen_platform_pci=0" into the guest configfile
+4.2 If you use suse, please select physical machine
+4.3 copy the disk image to the secondary host
+5. Update your guest config file for COLO:
+5.1 disk
+disk = [
+
'format=raw,devtype=disk,access=w,vdev=hda,backendtype=qdisk,colo,colo-params=192.168.3.1:9000:exportname=qdisk1,active-disk=/mnt/ramfs/active_disk.img,hidden-disk=/mnt/ramfs/hidden_disk.img,target=/root/images/colo-hvm.img'
 ]
+5.2 nic
+vif = [ 'mac=00:16:4f:00:00:11, bridge=br0, model=e1000, 
forwarddev=eth0, forwardbr=br1' ]
+Note:
+a. The ip/port in colo-params is the secondary host's IP. Don't use the
+   directly connected nic's IP.
+b. forwarddev is the directly connected nic.
+c. If you have more than one disk, colo-params's host/port must be the same
+   and colo-param's exportname must be different.
+6. Run COLO:
+xl remus -c -u  
+Note: The ip must not be the directly connected nic's IP.
+Note:
+Secondary host only need to do step 1-3.
+
+The known problem:
+1. Secondary vm may crash due to triple fault.
+2. The heartbeat is not reliable. If you want to test the performance,
+   please disable the heartbeat(modify the xen codes). You can use the
+   branch colo-v4-noheartbeat.
+3. Suspending the vm fails, and the error message is:
+libxl: error: libxl_qmp.c:429:qmp_next: timeout
+
+Problem 1 and 3 don't happen every time. So you can run colo again to
+avoid this problem.
+
+Virtio-Net:
+1. If you want to get better performance, you can use virtio-net.
+
+Trouble shooting:
+If there's some error happend when staritng COLO, you can do:
+1. Make sure you have all necessary modules that DOM0 needed on both side.
+2. Make sure you have followed all the instructions in this README.
+3. Try to reboot both primary and secondary host.
+4. If you still have problems, collect the error logs and contact
+   Wen Congyang(we...@cn.fujitsu.com)/Yang Hongyang(yan...@cn.fujitsu.com).
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC PATCH COLO v5 07/29] support to resume uncooperative HVM guests

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

For PVHVM, the hypercall return code is 0, and it can be resumed
in a new domain context.

For HVM, do nothing.

Signed-off-by: Wen Congyang 
---
 tools/libxc/xc_resume.c | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/tools/libxc/xc_resume.c b/tools/libxc/xc_resume.c
index e67bebd..b862ce3 100644
--- a/tools/libxc/xc_resume.c
+++ b/tools/libxc/xc_resume.c
@@ -109,6 +109,21 @@ static int xc_domain_resume_cooperative(xc_interface *xch, 
uint32_t domid)
 return do_domctl(xch, &domctl);
 }
 
+static int xc_domain_resume_hvm(xc_interface *xch, uint32_t domid)
+{
+DECLARE_DOMCTL;
+
+/*
+ * If it is PVHVM, the hypercall return code is 0, and resume
+ * it in a new domain context.
+ *
+ * If it is a HVM, do nothing.
+ */
+domctl.cmd = XEN_DOMCTL_resumedomain;
+domctl.domain = domid;
+return do_domctl(xch, &domctl);
+}
+
 static int xc_domain_resume_any(xc_interface *xch, uint32_t domid)
 {
 DECLARE_DOMCTL;
@@ -138,10 +153,7 @@ static int xc_domain_resume_any(xc_interface *xch, 
uint32_t domid)
  */
 #if defined(__i386__) || defined(__x86_64__)
 if ( info.hvm )
-{
-ERROR("Cannot resume uncooperative HVM guests");
-return rc;
-}
+return xc_domain_resume_hvm(xch, domid);
 
 if ( xc_domain_get_guest_width(xch, domid, &dinfo->guest_width) != 0 )
 {
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC PATCH COLO v5 05/29] Introduce a new internal API libxl__domain_unpause()

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

The guest is paused after libxl_domain_create_restore().
Secondary vm is running in colo mode. So we need to unpause
the guest. The current API libxl_domain_unpause() is
not an internal API. Introduce a new API to support it.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl.c  | 21 +++--
 tools/libxl/libxl_internal.h |  1 +
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 6e55afc..c3898ce 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -1037,9 +1037,8 @@ out:
 return AO_INPROGRESS;
 }
 
-int libxl_domain_unpause(libxl_ctx *ctx, uint32_t domid)
+int libxl__domain_unpause(libxl__gc *gc, uint32_t domid)
 {
-GC_INIT(ctx);
 char *path;
 char *state;
 int ret, rc = 0;
@@ -1059,12 +1058,22 @@ int libxl_domain_unpause(libxl_ctx *ctx, uint32_t domid)
  NULL, NULL, NULL);
 }
 }
-ret = xc_domain_unpause(ctx->xch, domid);
-if (ret<0) {
-LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "unpausing domain %d", domid);
+
+ret = xc_domain_unpause(CTX->xch, domid);
+if (ret < 0) {
+LOGE(ERROR, "unpausing domain %d", domid);
 rc = ERROR_FAIL;
 }
- out:
+
+out:
+return rc;
+}
+
+int libxl_domain_unpause(libxl_ctx *ctx, uint32_t domid)
+{
+GC_INIT(ctx);
+int rc = libxl__domain_unpause(gc, domid);
+
 GC_FREE;
 return rc;
 }
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 6470866..538ac4b 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1037,6 +1037,7 @@ _hidden int libxl__userdata_store(libxl__gc *gc, uint32_t 
domid,
 _hidden int libxl__domain_restore(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_resume(libxl__gc *gc, uint32_t domid,
  int suspend_cancel);
+_hidden int libxl__domain_unpause(libxl__gc *gc, uint32_t domid);
 
 /* returns 0 or 1, or a libxl error code */
 _hidden int libxl__domain_pvcontrol_available(libxl__gc *gc, uint32_t domid);
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC PATCH COLO v5 02/29] Refactor domain_suspend_callback_common()

2015-03-31 Thread Yang Hongyang
From: Wen Congyang 

libxl__domain_suspend() is to save the guest. I think
we should call it libxl__domain_save(), but I don't
rename it.

Secondary vm is running in colo mode. So we will do
the following things again and again:
1. suspend both primay vm and secondary vm
2. sync the state
3. resume both primary vm and secondary vm
To suspend secondary vm, we need an independent API to
suspend vm.

The core function to suspend vm is domain_suspend_callback_common().
So use a new structure libxl__domain_suspend_state2 to
instead of libxl__domain_suspend_state. The dss's members that
will be used in domain_suspend_callback_common() are
moved to dss2.

We introduce a new API libxl__domain_suspend2() too.

Signed-off-by: Wen Congyang 
---
 tools/libxl/libxl_dom.c  | 235 ---
 tools/libxl/libxl_internal.h |  39 +--
 2 files changed, 159 insertions(+), 115 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 26a0382..d286851 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1094,7 +1094,7 @@ int libxl__toolstack_restore(uint32_t domid, const 
uint8_t *buf,
 static void domain_suspend_done(libxl__egc *egc,
 libxl__domain_suspend_state *dss, int rc);
 static void domain_suspend_callback_common_done(libxl__egc *egc,
-libxl__domain_suspend_state *dss, int ok);
+libxl__domain_suspend_state2 *dss2, int ok);
 
 /*- complicated callback, called by xc_domain_save -*/
 
@@ -1312,16 +1312,17 @@ static void switch_logdirty_done(libxl__egc *egc,
 /*- callbacks, called by xc_domain_save -*/
 
 int libxl__domain_suspend_device_model(libxl__gc *gc,
-   libxl__domain_suspend_state *dss)
+   libxl__domain_suspend_state2 *dss2)
 {
 int ret = 0;
-uint32_t const domid = dss->domid;
-const char *const filename = dss->dm_savefile;
+uint32_t const domid = dss2->domid;
+const char *const filename = dss2->dm_savefile;
 
 switch (libxl__device_model_version_running(gc, domid)) {
 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
 LOG(DEBUG, "Saving device model state to %s", filename);
-libxl__qemu_traditional_cmd(gc, domid, "save");
+if (dss2->save_dm)
+libxl__qemu_traditional_cmd(gc, domid, "save");
 libxl__wait_for_device_model_deprecated(gc, domid, "paused", NULL, 
NULL, NULL);
 break;
 }
@@ -1329,9 +1330,11 @@ int libxl__domain_suspend_device_model(libxl__gc *gc,
 if (libxl__qmp_stop(gc, domid))
 return ERROR_FAIL;
 /* Save DM state into filename */
-ret = libxl__qmp_save(gc, domid, filename);
-if (ret)
-unlink(filename);
+if (dss2->save_dm) {
+ret = libxl__qmp_save(gc, domid, filename);
+if (ret)
+unlink(filename);
+}
 break;
 default:
 return ERROR_INVAL;
@@ -1361,9 +1364,9 @@ int libxl__domain_resume_device_model(libxl__gc *gc, 
uint32_t domid)
 }
 
 static void domain_suspend_common_wait_guest(libxl__egc *egc,
- libxl__domain_suspend_state *dss);
+ libxl__domain_suspend_state2 
*dss2);
 static void domain_suspend_common_guest_suspended(libxl__egc *egc,
- libxl__domain_suspend_state *dss);
+ libxl__domain_suspend_state2 *dss2);
 
 static void domain_suspend_common_pvcontrol_suspending(libxl__egc *egc,
   libxl__xswait_state *xswa, int rc, const char *state);
@@ -1372,14 +1375,14 @@ static void 
domain_suspend_common_wait_guest_evtchn(libxl__egc *egc,
 static void suspend_common_wait_guest_watch(libxl__egc *egc,
   libxl__ev_xswatch *xsw, const char *watch_path, const char *event_path);
 static void suspend_common_wait_guest_check(libxl__egc *egc,
-libxl__domain_suspend_state *dss);
+libxl__domain_suspend_state2 
*dss2);
 static void suspend_common_wait_guest_timeout(libxl__egc *egc,
   libxl__ev_time *ev, const struct timeval *requested_abs);
 
 static void domain_suspend_common_failed(libxl__egc *egc,
- libxl__domain_suspend_state *dss);
+ libxl__domain_suspend_state2 *dss2);
 static void domain_suspend_common_done(libxl__egc *egc,
-   libxl__domain_suspend_state *dss,
+   libxl__domain_suspend_state2 *dss2,
bool ok);
 
 static bool domain_suspend_pvcontrol_acked(const char *state) {
@@ -1388,36 +1391,36 @@ static bool domain_suspend_pvcontrol_acked(const char 
*state) {
 return strcmp(state,"suspend");
 }
 
-/* calls dss->c

Re: [Xen-devel] [PATCH] xen-blkback: define pr_fmt macro to avoid the duplication of DRV_PFX

2015-03-31 Thread Chentao (Boby)

Thanks roger and joe.

I will adopt your suggestions in my v2 patch.

On 2015/3/31 22:57, Roger Pau Monné wrote:

El 31/03/15 a les 23.14, Tao Chen ha escrit:

Define pr_fmt macro with {xen-blkback: } prefix, then remove all use
of DRV_PFX in the pr and DPRINTK sentences. It will simplify the code.

And if the pr sentences miss a \n, add it in the end. If the DPRINTK
sentences have redundant \n, remove it. It will format the code.

These all make the readability of the code become better.


Thanks for the patch.


Signed-off-by: Tao Chen 
---
  drivers/block/xen-blkback/blkback.c | 62 ++---
  drivers/block/xen-blkback/common.h  |  6 
  drivers/block/xen-blkback/xenbus.c  | 18 ++-
  3 files changed, 42 insertions(+), 44 deletions(-)


[...]

diff --git a/drivers/block/xen-blkback/common.h 
b/drivers/block/xen-blkback/common.h
index 375d288..f620b5d 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -44,12 +44,6 @@
  #include 
  #include 

-#define DRV_PFX "xen-blkback:"
-#define DPRINTK(fmt, args...)  \
-   pr_debug(DRV_PFX "(%s:%d) " fmt ".\n",  \
-__func__, __LINE__, ##args)
-
-
  /*
   * This is the maximum number of segments that would be allowed in indirect
   * requests. This value will also be passed to the frontend.
diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index b33083e..0dbbfeb 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -14,6 +14,11 @@

  */

+#define pr_fmt(fmt) "xen-blkback: " fmt
+#define DPRINTK(fmt, args...)  \
+   pr_debug("(%s:%d) " fmt ".\n",  \
+   __func__, __LINE__, ##args)
+
  #include 
  #include 
  #include 
@@ -426,14 +431,14 @@ static int xen_vbd_create(struct xen_blkif *blkif, 
blkif_vdev_t handle,
 FMODE_READ : FMODE_WRITE, NULL);

if (IS_ERR(bdev)) {
-   DPRINTK("xen_vbd_create: device %08x could not be opened.\n",
+   DPRINTK("xen_vbd_create: device %08x could not be opened",
vbd->pdevice);
return -ENOENT;
}

vbd->bdev = bdev;
if (vbd->bdev->bd_disk == NULL) {
-   DPRINTK("xen_vbd_create: device %08x doesn't exist.\n",
+   DPRINTK("xen_vbd_create: device %08x doesn't exist",
vbd->pdevice);


IMHO this two above should be made a pr_warn probably...


xen_vbd_free(vbd);
return -ENOENT;
@@ -452,7 +457,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, 
blkif_vdev_t handle,
if (q && blk_queue_secdiscard(q))
vbd->discard_secure = true;

-   DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+   DPRINTK("Successful creation of handle=%04x (dom=%u)",
handle, blkif->domid);


...and this should be turned into a plain pr_debug. And with that we can
get rid of DPRINTK.

Roger.


.




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [qemu-upstream-4.4-testing test] 50274: tolerable FAIL - PUSHED

2015-03-31 Thread osstest service user
flight 50274 qemu-upstream-4.4-testing real [real]
http://logs.test-lab.xenproject.org/osstest/logs/50274/

Failures :-/ but no regressions.

Tests which are failing intermittently (not blocking):
 test-amd64-i386-pair 17 guest-migrate/src_host/dst_host fail pass in 36769
 test-amd64-i386-xl-win7-amd64 13 guest-localmigrate/x10 fail pass in 36769
 test-amd64-amd64-xl-qemuu-winxpsp3 7 windows-install fail in 36769 pass in 
50274

Tests which did not succeed, but are not blocking:
 test-amd64-i386-libvirt  10 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xend-winxpsp3 17 leak-check/check fail  never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xend-qemut-winxpsp3 17 leak-check/checkfail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-amd64-xl-pcipt-intel  9 guest-startfail in 36769 never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail in 36769 never pass

version targeted for testing:
 qemuud173a0c20d7970c17fa593cf86abc1791a8a4a3a
baseline version:
 qemuub04df88d41f64fc6b56d193b6e90fb840cedb1d3


People who touched revisions under test:
  Benoit Canet 
  BenoĂƒÂ®t Canet 
  Dmitry Fleytman 
  Gerd Hoffmann 
  Jason Wang 
  Jeff Cody 
  Juan Quintela 
  Kevin Wolf 
  Laszlo Ersek 
  Michael Roth 
  Michael S. Tsirkin 
  Peter Maydell 
  Petr Matousek 
  Stefan Hajnoczi 
  Stefano Stabellini 


jobs:
 build-amd64-xend pass
 build-i386-xend  pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl  pass
 test-amd64-i386-xl   pass
 test-amd64-i386-rhel6hvm-amd pass
 test-amd64-i386-qemut-rhel6hvm-amd   pass
 test-amd64-i386-qemuu-rhel6hvm-amd   pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64pass
 test-amd64-i386-xl-qemut-debianhvm-amd64 pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-i386-xl-qemuu-debianhvm-amd64 pass
 test-amd64-i386-freebsd10-amd64  pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass
 test-amd64-amd64-xl-qemut-win7-amd64 fail
 test-amd64-i386-xl-qemut-win7-amd64  fail
 test-amd64-amd64-xl-qemuu-win7-amd64 fail
 test-amd64-i386-xl-qemuu-win7-amd64  fail
 test-amd64-amd64-xl-win7-amd64   fail
 test-amd64-i386-xl-win7-amd64fail
 test-amd64-amd64-xl-credit2  pass
 test-amd64-i386-freebsd10-i386   pass
 test-amd64-i386-rhel6hvm-intel   pass
 test-amd64-i386-qemut-rhel6hvm-intel pass
 test-amd64-i386-qemuu-rhel6hvm-intel pass
 test-amd64-amd64-libvirt pass
 test-amd64-i386-libvirt  pass
 test-amd64-amd64-xl-multivcpupass
 test-amd64-amd64-pairpass
 test-amd64-i386-pair fail
 test-amd64-amd64-xl-sedf-pin pass
 test-amd64-amd64-pv  pass

Re: [Xen-devel] [OSSTEST Nested PATCH v7 2/6] Edit some testsupport APIs for nested test

2015-03-31 Thread Pang, LongtaoX


> -Original Message-
> From: Ian Campbell [mailto:ian.campb...@citrix.com]
> Sent: Tuesday, March 31, 2015 9:50 PM
> To: Pang, LongtaoX
> Cc: xen-devel@lists.xen.org; ian.jack...@eu.citrix.com; wei.l...@citrix.com;
> Hu, Robert
> Subject: Re: [OSSTEST Nested PATCH v7 2/6] Edit some testsupport APIs for
> nested test
> 
> On Fri, 2015-03-27 at 19:06 -0400, longtao.pang wrote:
> > 1. Designate vif model to 'e1000' by make-flight.
> 
> Strictly you could s/to 'e1000'// here since the make-flight changes are
> elsewhere and that would better describe the generic change.
> 
Do you mean that I should change the description from "Designate vif model to 
'e1000' by make-flight" to "Designate vif model by make-flight"?
> > 2. In L2 installation context, its host (L1) IP address is not queried
> > from DNS, but after running "ts-nested-setup + host + nested", L1 IP
> > is stored in runvar.
> >
> > Signed-off-by: longtao.pang 
> 
> Acked-by: Ian Campbell 
> 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [xen-unstable bisection] complete test-amd64-amd64-xl-multivcpu

2015-03-31 Thread xen . org
branch xen-unstable
xen branch xen-unstable
job test-amd64-amd64-xl-multivcpu
test guest-localmigrate

Tree: linux git://xenbits.xen.org/linux-pvops.git
Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git
Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git
Tree: qemuu git://xenbits.xen.org/staging/qemu-upstream-unstable.git
Tree: xen git://xenbits.xen.org/xen.git

*** Found and reproduced problem changeset ***

  Bug is in tree:  xen git://xenbits.xen.org/xen.git
  Bug introduced:  d639e6a05a0f8ee0e61c6cc4eebba78934ef3648
  Bug not present: 88a2372c6ba44dd42b915a95a823cf9d4d260e25


  commit d639e6a05a0f8ee0e61c6cc4eebba78934ef3648
  Author: Jan Beulich 
  Date:   Mon Mar 23 16:51:14 2015 +0100
  
  x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P
  
  Xen L4 entries being uniformly installed into any L4 table and 64-bit
  PV kernels running in ring 3 means that user mode was able to see the
  read-only M2P presented by Xen to the guests. While apparently not
  really representing an exploitable information leak, this still very
  certainly was never meant to be that way.
  
  Building on the fact that these guests already have separate kernel and
  user mode page tables we can allow guest kernels to tell Xen that they
  don't want user mode to see this table. We can't, however, do this by
  default: There is no ABI requirement that kernel and user mode page
  tables be separate. Therefore introduce a new VM-assist flag allowing
  the guest to control respective hypervisor behavior:
  - when not set, L4 tables get created with the respective slot blank,
and whenever the L4 table gets used as a kernel one the missing
mapping gets inserted,
  - when set, L4 tables get created with the respective slot initialized
as before, and whenever the L4 table gets used as a user one the
mapping gets zapped.
  
  Signed-off-by: Jan Beulich 
  Reviewed-by: Tim Deegan 


For bisection revision-tuple graph see:
   
http://www.chiark.greenend.org.uk/~xensrcts/results/bisect.xen-unstable.test-amd64-amd64-xl-multivcpu.guest-localmigrate.html
Revision IDs in each graph node refer, respectively, to the Trees above.


Searching for failure / basis pass:
 36772 fail [host=scape-moth] / 36622 [host=potato-beetle] 36540 
[host=fire-frog] 36514 [host=bush-cricket] 35957 [host=field-cricket] 35887 
[host=grain-weevil] 35810 [host=bush-cricket] 35556 ok.
Failure / basis pass flights: 36772 / 35556
(tree with no url: ovmf)
(tree with no url: seabios)
Tree: linux git://xenbits.xen.org/linux-pvops.git
Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git
Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git
Tree: qemuu git://xenbits.xen.org/staging/qemu-upstream-unstable.git
Tree: xen git://xenbits.xen.org/xen.git
Latest 8a5f782c33c04ea5c9b3ca6fb32d6039e2e5c0c9 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
a4b276b4ce49c8d70dd841ff885b900ec652b994 
42ffdf360dd9df66b0a4a7ada059c02a3cf3a8de 
84066dd4ef4bb5983e246c629a26ef4f3394e5d5
Basis pass a74f1d1204a5c892466b52ac68ee6443c1e459d7 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
a4b276b4ce49c8d70dd841ff885b900ec652b994 
0d37748342e29854db7c9f6c47d7f58c6cfba6b2 
befe0a0da90d7ac063fd8b5891c7d0cafa5f
Generating revisions with ./adhoc-revtuple-generator  
git://xenbits.xen.org/linux-pvops.git#a74f1d1204a5c892466b52ac68ee6443c1e459d7-8a5f782c33c04ea5c9b3ca6fb32d6039e2e5c0c9
 
git://xenbits.xen.org/osstest/linux-firmware.git#c530a75c1e6a472b0eb9558310b518f0dfcd8860-c530a75c1e6a472b0eb9558310b518f0dfcd8860
 
git://xenbits.xen.org/staging/qemu-xen-unstable.git#a4b276b4ce49c8d70dd841ff885b900ec652b994-a4b276b4ce49c8d70dd841ff885b900ec652b994
 
git://xenbits.xen.org/staging/qemu-upstream-unstable.git#0d37748342e29854db7c9f6c47d7f58c6cfba6b2-42ffdf360dd9df66b0a4a7ada059c02a3cf3a8de
 
git://xenbits.xen.org/xen.git#befe0a0da90d7ac063fd8b5891c7d0cafa5f-84066dd4ef4bb5983e246c629a26ef4f3394e5d5
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/linux-pvops
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://xenbits.xen.org/linux-pvops.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/qemu-upstream-unstable
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://xenbits.xen.org/staging/qemu-upstream-unstable.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/xen
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://xenbits.xen.org/xen.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/linux-pvops
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://xenbits.xen.org/linux-pvops.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
+ exec
+ sh -xe
+ 

[Xen-devel] [xen-unstable test] 50273: regressions - FAIL

2015-03-31 Thread osstest service user
flight 50273 xen-unstable real [real]
http://logs.test-lab.xenproject.org/osstest/logs/50273/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-i386-freebsd10-i386 11 guest-localmigrate  fail REGR. vs. 36514

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-libvirt  9 guest-start   fail REGR. vs. 36514
 test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail like 36514

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt-xsm  1 build-check(1)   blocked  n/a
 test-amd64-i386-libvirt-xsm   1 build-check(1)   blocked  n/a
 test-amd64-amd64-xl-xsm   1 build-check(1)   blocked  n/a
 test-amd64-i386-xl-xsm1 build-check(1)   blocked  n/a
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsm  1 build-check(1)blocked n/a
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm  1 build-check(1) blocked n/a
 test-amd64-amd64-xl-qemut-debianhvm-amd64-xsm  1 build-check(1)blocked n/a
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm  1 build-check(1) blocked n/a
 test-armhf-armhf-libvirt-xsm  1 build-check(1)   blocked  n/a
 test-armhf-armhf-xl-xsm   1 build-check(1)   blocked  n/a
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-amd64-i386-libvirt  10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-armhf-armhf-xl-sedf-pin 10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  10 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 10 migrate-support-checkfail   never pass
 build-armhf-xsm   5 xen-buildfail   never pass
 test-armhf-armhf-xl-cubietruck 10 migrate-support-checkfail never pass
 test-armhf-armhf-xl  10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-sedf 10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 10 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-credit2  10 migrate-support-checkfail   never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass
 build-amd64-xsm   5 xen-buildfail   never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass

version targeted for testing:
 xen  71cba2a07bb541f25390cdd3546c9ee296a7257b
baseline version:
 xen  3a28f760508fb35c430edac17a9efde5aff6d1d5


People who touched revisions under test:
  Andrew Cooper 
  Boris Ostrovsky 
  Daniel De Graaf 
  Dario Faggioli 
  Don Slutz 
  George Dunlap 
  Ian Campbell 
  Ian Jackson 
  Jan Beulich 
  JeHyeon Yeon 
  Jim Fehlig 
  Juergen Gross 
  Kevin Tian 
  Konrad Rzeszutek Wilk 
  Koushik Chakravarty 
  Olaf Hering 
  Pramod Devendra 
  Quan Xu 
  Riku Voipio 
  Roger Pau MonnĂƒÂ© 
  Ross Lagerwall 
  Tim Deegan 
  Vijaya Kumar K 
  Vijaya Kumar K
  Wei Liu 
  Wen Congyang 
  Yang Hongyang 
  Yang Zhang 


jobs:
 build-amd64-xsm  fail
 build-armhf-xsm  fail
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-oldkern  

Re: [Xen-devel] [OSSTEST Nested PATCH v7 1/6] parsing grub which has 'submenu' primitive

2015-03-31 Thread Hu, Robert
> -Original Message-
> From: Ian Campbell [mailto:ian.campb...@citrix.com]
> Sent: Tuesday, March 31, 2015 9:44 PM
> To: Pang, LongtaoX
> Cc: xen-devel@lists.xen.org; ian.jack...@eu.citrix.com; wei.l...@citrix.com;
> Hu, Robert
> Subject: Re: [OSSTEST Nested PATCH v7 1/6] parsing grub which has 'submenu'
> primitive
> 
> On Fri, 2015-03-27 at 19:06 -0400, longtao.pang wrote:
> > From a hvm kernel build from Linux stable Kernel tree,
> > the auto generated grub2 menu will have 'submenu' primitive, upon the
> > 'menuentry' items. Xen boot entries will be grouped into a submenu. This
> > patch adds capability to support such grub formats.
> >
> > Signed-off-by: longtao.pang 
> > ---
> > Changes in v7:
> > Remove the reformatting change for Debian.pm and keep the original format.
> 
> Thank you.
> 
> > ---
> >  Osstest/Debian.pm |   21 -
> >  1 file changed, 16 insertions(+), 5 deletions(-)
> >
> > diff --git a/Osstest/Debian.pm b/Osstest/Debian.pm
> > index 6784024..35163a0 100644
> > --- a/Osstest/Debian.pm
> > +++ b/Osstest/Debian.pm
> > @@ -398,10 +398,18 @@ sub setupboot_grub2 () {
> >
> >  my $count= 0;
> >  my $entry;
> > +my $submenu;
> >  while (<$f>) {
> >  next if m/^\s*\#/ || !m/\S/;
> >  if (m/^\s*\}\s*$/) {
> > -die unless $entry;
> > +die unless $entry || $submenu;
> > +if(!defined $entry && defined $submenu){
> > +logm("Met end of a submenu starting from ".
> > +"$submenu->{StartLine}. ".
> > +"Our want kern is $want_kernver");
> > +$submenu=undef;
> > +next;
> > +}
> >  my (@missing) =
> >  grep { !defined $entry->{$_} }
> > (defined $xenhopt
> > @@ -432,21 +440,24 @@ sub setupboot_grub2 () {
> >  $entry= { Title => $1, StartLine => $., Number =>
> $count };
> >  $count++;
> >  }
> > -if (m/^\s*multiboot\s*\/(xen\-[0-9][-+.0-9a-z]*\S+)/) {
> > +if (m/^submenu\s+[\'\"](.*)[\'\"].*\{\s*$/) {
> > +$submenu={ StartLine =>$.};
> > +}
> 
> This looks reasonable enough to support a single nesting, I suppose we
> can leave more deeply nested submenus for another time.
> 
> So in that regard this patch looks ok to me.
> 
> > +if (m/^\s*multiboot\s*(?:\/boot)*\/(xen\S+)/) {
> >  die unless $entry;
> >  $entry->{Hv}= $1;
> >  }
> > -if (m/^\s*multiboot\s*\/(vmlinu[xz]-(\S+))/) {
> > +if (m/^\s*multiboot\s*(?:\/boot)*\/(vmlinu[xz]-(\S+))/) {
> 
> What are these changes all about? I think they must be unrelated to the
> use of submenu (perhaps relate to having a separate /boot or not?). If
> so then please do in a separate patch.
> 
You're right. This has nothing to do with submenu.
Going to separate it out in another patch.
> If this is somehow to do with submenu then please explain how/why in the
> commit log.
> 
> BTW, your regex as it stand will accept /boot/boot/boot/boot/vmlinuz. I
> think you maybe meant to add "(?:\/boot)?" to match zero or one
> occurrences?
Yes, this is a potential bug. Thanks for point out!
> 
> Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v3][PATCH 2/2] libxl: introduce gfx_passthru_kind

2015-03-31 Thread Chen, Tiejun

On 2015/3/30 17:19, Ian Campbell wrote:

On Mon, 2015-03-30 at 09:28 +0800, Chen, Tiejun wrote:

Sounds it should be a legacy fix to qemu-xen-tranditional :) So lets do
it now,

@@ -326,6 +326,10 @@ static char **
libxl__build_device_model_args_old(libxl__gc *gc,
   }
   if (libxl_defbool_val(b_info->u.hvm.gfx_passthru)) {
   flexarray_append(dm_args, "-gfx_passthru");
+if (b_info->u.hvm.gfx_passthru_kind >
+LIBXL_GFX_PASSTHRU_KIND_IGD)
+LOG(ERROR, "unsupported device type for
\"gfx_passthru\".\n");
+return NULL;


I'd rather not encode any ordering constraints if we don't have to. I
think this is preferable:

   if (libxl_defbool_val(b_info->u.hvm.gfx_passthru)) {
switch (b_info->u.hvm.gfx_passthru_kind) {
case LIBXL_GFX_PASSTHRU_KIND_DEFAULT:
case LIBXL_GFX_PASSTHRU_KIND_IGD:
flexarray_append(dm_args, "-gfx_passthru");
break;
default:
LOG(ERROR, "unsupported gfx_passthru_kind.\n");
return NULL;
}
  }

(notice that the error message above doesn't refer to the xl specific
option naming).



Sorry for this delay response.

This looks reasonable and I regenerate this patch based on this comment:

 libxl: introduce gfx_passthru_kind

Although we already have 'gfx_passthru' in b_info, this doesn't suffice
after we want to handle IGD specifically. Now we define a new field of
type, gfx_passthru_kind, to indicate we're trying to pass IGD. Actually
this means we can benefit this to support other specific devices just
by extending gfx_passthru_kind. And then we can cooperate with
gfx_passthru to address IGD cases as follows:

gfx_passthru = 0=> sets build_info.u.gfx_passthru to false
gfx_passthru = 1=> sets build_info.u.gfx_passthru to true and
   build_info.u.gfx_passthru_kind to DEFAULT
gfx_passthru = "igd"=> sets build_info.u.gfx_passthru to true
   and build_info.u.gfx_passthru_kind to IGD

Here if gfx_passthru_kind = DEFAULT, we will call
libxl__is_igd_vga_passthru() to check if we're hitting that table to need
to pass that option to qemu. But if gfx_passthru_kind = "igd" we always
force to pass that.

And now "gfx_passthru" is supported both with the qemu-xen-traditional
device-model and upstream qemu-xen device-model. But when given as a
string this option describes the type of device to enable. Note this
behavior is only supported with the upstream qemu-xen device-model.

Signed-off-by: Tiejun Chen 
---
 docs/man/xl.cfg.pod.5   | 34 +
 tools/libxl/libxl.h |  6 ++
 tools/libxl/libxl_dm.c  | 46 
+

 tools/libxl/libxl_types.idl |  6 ++
 tools/libxl/xl_cmdimpl.c| 14 --
 5 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
index 408653f..dfde92d 100644
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -671,7 +671,7 @@ through to this VM. See L above.
 devices passed through to this VM. See L
 above.

-=item B
+=item B

 Enable graphics device PCI passthrough. This option makes an assigned
 PCI graphics card become primary graphics card in the VM. The QEMU
@@ -699,9 +699,35 @@ working graphics passthrough. See the 
XenVGAPassthroughTestedAdapters

 L wiki page
 for currently supported graphics cards for gfx_passthru.

-gfx_passthru is currently only supported with the qemu-xen-traditional
-device-model. Upstream qemu-xen device-model currently does not have
-support for gfx_passthru.
+gfx_passthru is currently supported both with the qemu-xen-traditional
+device-model and upstream qemu-xen device-model.
+
+When given as a boolean the B option either disables gfx
+passthru or enables autodetection.
+
+But when given as a string the B option describes the type
+of device to enable. Note this behavior is only supported with the upstream
+qemu-xen device-model.
+
+Currently, valid options are:
+
+=over 4
+
+=item B
+
+Disables graphics device PCI passthrough.
+
+=item B, B
+
+Enables graphics device PCI passthrough and autodetects the type of device
+which is being used.
+
+=item "igd"
+
+Enables graphics device PCI passthrough but forcing the type
+of device to Intel Graphics Device.
+
+=back

 Note that some graphics adapters (AMD/ATI cards, for example) do not
 necessarily require gfx_passthru option, so you can use the normal Xen
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 5eec092..1144c5e 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -720,6 +720,12 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, 
libxl_mac *src);

 #define LIBXL_HAVE_PSR_MBM 1
 #endif

+/*
+ * libxl_domain_

Re: [Xen-devel] Xen-unstable-staging: Xen BUG at iommu_map.c:455

2015-03-31 Thread Sander Eikelenboom

Wednesday, April 1, 2015, 1:38:34 AM, you wrote:

> On 31/03/2015 22:11, Sander Eikelenboom wrote:
>> Hi all,
>>
>> I just tested xen-unstable staging (changeset: git:0522407-dirty) 
>>
>> with revert of commit 1aeb1156fa43fe2cd2b5003995b20466cd19a622
>> (due to an already reported but not yet resolved issue)
>>
>> and build with qemu xen from 
>> git://xenbits.xen.org/staging/qemu-upstream-unstable.git
>> (to include the pci command register patch from Jan)
>>
>>
>> and now came across this new splat when starting an HVM with PCI passtrhough:

> Wow - you are getting all the fun bugs at the moment!

Hrmm i'm not so sure at the moment .. could also be a stale tree or is it just
that it's april 1st ..
*sigh* 
tried to git reset --hard to a known good changeset .. but it still seems
to fail, even with cold boot. 

So sorry for the noise and please ignore for the moment while i'm trying to
figure out what is fooling me :-)

--
sander



> Nothing has changed in the AMD IOMMU driver for a while, but the
> BUG_ON() is particularly unhelpful at identifying what went wrong.

> As a first pass triage, can you rerun with

> diff --git a/xen/drivers/passthrough/amd/iommu_map.c
> b/xen/drivers/passthrough/amd/iommu_map.c
> index 495ff5c..f15c324 100644
> --- a/xen/drivers/passthrough/amd/iommu_map.c
> +++ b/xen/drivers/passthrough/amd/iommu_map.c
> @@ -451,8 +451,9 @@ static int iommu_pde_from_gfn(struct domain *d,
> unsigned long pfn,
>  table = hd->arch.root_table;
>  level = hd->arch.paging_mode;

> -BUG_ON( table == NULL || level < IOMMU_PAGING_MODE_LEVEL_1 ||
-level >> IOMMU_PAGING_MODE_LEVEL_6 );
> +BUG_ON(table == NULL);
> +BUG_ON(level < IOMMU_PAGING_MODE_LEVEL_1);
+BUG_ON(level >> IOMMU_PAGING_MODE_LEVEL_6);

>  next_table_mfn = page_to_mfn(table);

> which will help identify which of the conditions is failing.

> Can you please also provide the full serial log, including iommu=debug?

> ~Andrew



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Xen-unstable-staging: Xen BUG at iommu_map.c:455

2015-03-31 Thread Andrew Cooper
On 31/03/2015 22:11, Sander Eikelenboom wrote:
> Hi all,
>
> I just tested xen-unstable staging (changeset: git:0522407-dirty) 
>
> with revert of commit 1aeb1156fa43fe2cd2b5003995b20466cd19a622
> (due to an already reported but not yet resolved issue)
>
> and build with qemu xen from 
> git://xenbits.xen.org/staging/qemu-upstream-unstable.git
> (to include the pci command register patch from Jan)
>
>
> and now came across this new splat when starting an HVM with PCI passtrhough:

Wow - you are getting all the fun bugs at the moment!

Nothing has changed in the AMD IOMMU driver for a while, but the
BUG_ON() is particularly unhelpful at identifying what went wrong.

As a first pass triage, can you rerun with

diff --git a/xen/drivers/passthrough/amd/iommu_map.c
b/xen/drivers/passthrough/amd/iommu_map.c
index 495ff5c..f15c324 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -451,8 +451,9 @@ static int iommu_pde_from_gfn(struct domain *d,
unsigned long pfn,
 table = hd->arch.root_table;
 level = hd->arch.paging_mode;

-BUG_ON( table == NULL || level < IOMMU_PAGING_MODE_LEVEL_1 ||
-level > IOMMU_PAGING_MODE_LEVEL_6 );
+BUG_ON(table == NULL);
+BUG_ON(level < IOMMU_PAGING_MODE_LEVEL_1);
+BUG_ON(level > IOMMU_PAGING_MODE_LEVEL_6);

 next_table_mfn = page_to_mfn(table);

which will help identify which of the conditions is failing.

Can you please also provide the full serial log, including iommu=debug?

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [qemu-mainline test] 50272: regressions - FAIL

2015-03-31 Thread osstest service user
flight 50272 qemu-mainline real [real]
http://logs.test-lab.xenproject.org/osstest/logs/50272/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-i386-freebsd10-i386 14 guest-localmigrate/x10  fail REGR. vs. 36709

Regressions which are regarded as allowable (not blocking):
 test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail like 36709

Tests which did not succeed, but are not blocking:
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-i386-libvirt-xsm   9 guest-start  fail   never pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-i386-xl-xsm9 guest-start  fail   never pass
 test-amd64-i386-libvirt  10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-xsm   9 guest-start  fail   never pass
 test-amd64-amd64-libvirt 10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-amd64-amd64-libvirt-xsm  9 guest-start  fail   never pass
 test-armhf-armhf-libvirt 10 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-xsm  5 xen-boot fail   never pass
 test-armhf-armhf-xl-xsm   5 xen-boot fail   never pass
 test-armhf-armhf-xl-cubietruck 10 migrate-support-checkfail never pass
 test-armhf-armhf-xl-sedf 10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 10 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-sedf-pin 10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  10 migrate-support-checkfail   never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass

version targeted for testing:
 qemuu627f91b1f80fecc73d00727181a9ddb6162cc30e
baseline version:
 qemuu362ca922eea03240916287a8a6267801ab095d12


People who touched revisions under test:
  Alexander Graf 
  Alexey Kardashevskiy 
  Bastian Koppelmann 
  Cole Robinson 
  David Gibson 
  Dirk Mueller 
  Dirk MĂƒÂ¼ller 
  Dr. David Alan Gilbert 
  Eduardo Otubo 
  Fam Zheng 
  Gabriel L. Somlo 
  Gabriel Somlo 
  Gerd Hoffmann 
  Gonglei 
  HervĂƒÂ© Poussineau 
  Jason Wang 
  John Snow 
  Juan Quintela 
  Leon Alrae 
  Markus Armbruster 
  Meghana Cheripady 
  Michael S. Tsirkin 
  Padmanabh Ratnakar 
  Paolo Bonzini 
  Peter Crosthwaite 
  Peter Maydell 
  Shannon Zhao 
  Shannon Zhao 
  Stefan Hajnoczi 
  Stefan Weil 
  Ting Wang 


jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvops 

Re: [Xen-devel] [PATCH RESEND 1/2] xenbus_client: Extend interface to support multi-page ring

2015-03-31 Thread Bob Liu
Hi Juergen,

On 03/31/2015 08:36 PM, Juergen Gross wrote:
> On 03/31/2015 02:15 PM, Bob Liu wrote:
>> From: Wei Liu 
>>
>> Originally Xen PV drivers only use single-page ring to pass along
>> information. This might limit the throughput between frontend and
>> backend.
>>
>> The patch extends Xenbus driver to support multi-page ring, which in
>> general should improve throughput if ring is the bottleneck. Changes to
>> various frontend / backend to adapt to the new interface are also
>> included.
>>
>> Affected Xen drivers:
>> * blkfront/back
>> * netfront/back
>> * pcifront/back
> 
> What about pvscsi drivers?
> They are affected, too!
> 

Thanks for the reminding, I'll send an new version fix it.

Regards,
-Bob

> 
> Juergen
> 
>>
>> The interface is documented, as before, in xenbus_client.c.
>>
>> Change in V2:
>> * allow ring has arbitrary number of pages <= XENBUS_MAX_RING_PAGES
>>
>> Change in V3:
>> * update function prototypes
>> * carefully deal with types of different sizes
>>
>> Change in V4:
>> * use PAGE_KERNEL instead of PAGE_KERNEL_IO to avoid breakage on Arm
>>
>> Change in V5:
>> * fix off-by-one error and other minor glitches spotted by Mathew Daley
>>
>> Signed-off-by: Wei Liu 
>> Signed-off-by: Paul Durrant 
>> Signed-off-by: Bob Liu 
>> Cc: Konrad Wilk 
>> Cc: David Vrabel 
>> Cc: Boris Ostrovsky 
>> ---
>>   drivers/block/xen-blkback/xenbus.c |   5 +-
>>   drivers/block/xen-blkfront.c   |   5 +-
>>   drivers/net/xen-netback/netback.c  |   4 +-
>>   drivers/net/xen-netfront.c |   9 +-
>>   drivers/pci/xen-pcifront.c |   5 +-
>>   drivers/xen/xen-pciback/xenbus.c   |   2 +-
>>   drivers/xen/xenbus/xenbus_client.c | 387
>> +++--
>>   include/xen/xenbus.h   |  20 +-
>>   8 files changed, 317 insertions(+), 120 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkback/xenbus.c
>> b/drivers/block/xen-blkback/xenbus.c
>> index e3afe97..ff30259 100644
>> --- a/drivers/block/xen-blkback/xenbus.c
>> +++ b/drivers/block/xen-blkback/xenbus.c
>> @@ -193,7 +193,7 @@ fail:
>>   return ERR_PTR(-ENOMEM);
>>   }
>>
>> -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long
>> shared_page,
>> +static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
>>unsigned int evtchn)
>>   {
>>   int err;
>> @@ -202,7 +202,8 @@ static int xen_blkif_map(struct xen_blkif *blkif,
>> unsigned long shared_page,
>>   if (blkif->irq)
>>   return 0;
>>
>> -err = xenbus_map_ring_valloc(blkif->be->dev, shared_page,
>> &blkif->blk_ring);
>> +err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
>> + &blkif->blk_ring);
>>   if (err < 0)
>>   return err;
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index 37779e4..2c61cf8 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -1245,6 +1245,7 @@ static int setup_blkring(struct xenbus_device *dev,
>>struct blkfront_info *info)
>>   {
>>   struct blkif_sring *sring;
>> +grant_ref_t gref;
>>   int err;
>>
>>   info->ring_ref = GRANT_INVALID_REF;
>> @@ -1257,13 +1258,13 @@ static int setup_blkring(struct xenbus_device
>> *dev,
>>   SHARED_RING_INIT(sring);
>>   FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
>>
>> -err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
>> +err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref);
>>   if (err < 0) {
>>   free_page((unsigned long)sring);
>>   info->ring.sring = NULL;
>>   goto fail;
>>   }
>> -info->ring_ref = err;
>> +info->ring_ref = gref;
>>
>>   err = xenbus_alloc_evtchn(dev, &info->evtchn);
>>   if (err)
>> diff --git a/drivers/net/xen-netback/netback.c
>> b/drivers/net/xen-netback/netback.c
>> index 997cf09..865203f 100644
>> --- a/drivers/net/xen-netback/netback.c
>> +++ b/drivers/net/xen-netback/netback.c
>> @@ -1782,7 +1782,7 @@ int xenvif_map_frontend_rings(struct
>> xenvif_queue *queue,
>>   int err = -ENOMEM;
>>
>>   err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
>> - tx_ring_ref, &addr);
>> + &tx_ring_ref, 1, &addr);
>>   if (err)
>>   goto err;
>>
>> @@ -1790,7 +1790,7 @@ int xenvif_map_frontend_rings(struct
>> xenvif_queue *queue,
>>   BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE);
>>
>>   err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
>> - rx_ring_ref, &addr);
>> + &rx_ring_ref, 1, &addr);
>>   if (err)
>>   goto err;
>>
>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
>> index e9b960f..13f5e7f 100644
>> --- a/drivers/net/xen-netfront.c
>> +++ b/drivers/net/xen-netfront.c
>> @@ -1486,6 +1486,7 @@ static int setup_netfront(struct xenbus_device
>> *dev,
>>   {
>>   struct xen_netif_tx_sring *txs;
>>   struct xen

[Xen-devel] Xen-unstable-staging: Xen BUG at iommu_map.c:455

2015-03-31 Thread Sander Eikelenboom
Hi all,

I just tested xen-unstable staging (changeset: git:0522407-dirty) 

with revert of commit 1aeb1156fa43fe2cd2b5003995b20466cd19a622
(due to an already reported but not yet resolved issue)

and build with qemu xen from 
git://xenbits.xen.org/staging/qemu-upstream-unstable.git
(to include the pci command register patch from Jan)


and now came across this new splat when starting an HVM with PCI passtrhough:

(XEN) [2015-03-31 20:58:20.710] io.c:429: d17: bind: m_gsi=37 g_gsi=36 
dev=00.00.5 intx=0
(XEN) [2015-03-31 20:58:21.100] Xen BUG at iommu_map.c:455
(XEN) [2015-03-31 20:58:21.100] [ Xen-4.6-unstable  x86_64  debug=y  Not 
tainted ]
(XEN) [2015-03-31 20:58:21.100] CPU:0
(XEN) [2015-03-31 20:58:21.100] RIP:e008:[] 
iommu_pde_from_gfn+0x38/0x430
(XEN) [2015-03-31 20:58:21.100] RFLAGS: 00010202   CONTEXT: hypervisor
(XEN) [2015-03-31 20:58:21.100] rax: 0008   rbx: 0003   
rcx: 82c000802000
(XEN) [2015-03-31 20:58:21.100] rdx: 82e007d56740   rsi:    
rdi: 8305167dd000
(XEN) [2015-03-31 20:58:21.100] rbp: 82d0802efad8   rsp: 82d0802efa78   
r8:  83054eb755b0
(XEN) [2015-03-31 20:58:21.100] r9:  0003   r10: 0200   
r11: 82d0802fc0d0
(XEN) [2015-03-31 20:58:21.100] r12: 82e0075527e0   r13: 05e9   
r14: 
(XEN) [2015-03-31 20:58:21.100] r15: 7d20   cr0: 80050033   
cr4: 06f0
(XEN) [2015-03-31 20:58:21.100] cr3: 00051a197000   cr2: 7efdd5ee1d48
(XEN) [2015-03-31 20:58:21.100] ds:    es:    fs:    gs:    ss: 
e010   cs: e008
(XEN) [2015-03-31 20:58:21.100] Xen stack trace from rsp=82d0802efa78:
(XEN) [2015-03-31 20:58:21.100]8305167dd000 82d0802efb30 
 8305167dd190
(XEN) [2015-03-31 20:58:21.100]0286 82e007d56740 
82e007552800 0003
(XEN) [2015-03-31 20:58:21.100]82e0075527e0 05e9 
 7d20
(XEN) [2015-03-31 20:58:21.100]82d0802efb98 82d0801560b6 
7d2f7fd104e7 0001802351d2
(XEN) [2015-03-31 20:58:21.100]003aa93f  
00020001 8305167dd938
(XEN) [2015-03-31 20:58:21.100]82004ff8 8305167dd000 
0020941c 
(XEN) [2015-03-31 20:58:21.100]  
 
(XEN) [2015-03-31 20:58:21.100]  
8305167dd938 8305167dd000
(XEN) [2015-03-31 20:58:21.100]82e0075527e0 05e9 
 7d20
(XEN) [2015-03-31 20:58:21.100]82d0802efbf8 82d08015a54d 
 8305167dd020
(XEN) [2015-03-31 20:58:21.100]82d0802e8000 003aa93f 
82d0802efbf8 
(XEN) [2015-03-31 20:58:21.100]8305167dd000 0800 
8305167dd000 
(XEN) [2015-03-31 20:58:21.100]82d0802efc98 82d08014c6c1 
82d0802efc78 82d08012c298
(XEN) [2015-03-31 20:58:21.100]0286 82d0802efc28 
0020 
(XEN) [2015-03-31 20:58:21.100]  
0008 7f6525ed2004
(XEN) [2015-03-31 20:58:21.100]83054eb1ab60 83055cc6c300 
0282 7f6525ed2004
(XEN) [2015-03-31 20:58:21.100]8305167dd000 7f6525ed2004 
8305167dd000 0005
(XEN) [2015-03-31 20:58:21.100]82d0802efca8 82d08014908b 
82d0802efd98 82d080161f2d
(XEN) [2015-03-31 20:58:21.100]0020  
0005 0001
(XEN) [2015-03-31 20:58:21.100]82d080331bb8 0001 
82d0802efde8 82d080120d00
(XEN) [2015-03-31 20:58:21.100] Xen call trace:
(XEN) [2015-03-31 20:58:21.100][] 
iommu_pde_from_gfn+0x38/0x430
(XEN) [2015-03-31 20:58:21.100][] 
amd_iommu_map_page+0x10d/0x4e6
(XEN) [2015-03-31 20:58:21.100][] 
arch_iommu_populate_page_table+0x179/0x4d8
(XEN) [2015-03-31 20:58:21.100][] 
iommu_do_pci_domctl+0x395/0x604
(XEN) [2015-03-31 20:58:21.100][] 
iommu_do_domctl+0x17/0x1a
(XEN) [2015-03-31 20:58:21.100][] 
arch_do_domctl+0x2469/0x26e1
(XEN) [2015-03-31 20:58:21.100][] do_domctl+0x1a1f/0x1d60
(XEN) [2015-03-31 20:58:21.100][] syscall_enter+0xeb/0x145
(XEN) [2015-03-31 20:58:21.100] 
(XEN) [2015-03-31 20:58:22.167] 
(XEN) [2015-03-31 20:58:22.176] 
(XEN) [2015-03-31 20:58:22.195] Panic on CPU 0:
(XEN) [2015-03-31 20:58:22.208] Xen BUG at iommu_map.c:455
(XEN) [2015-03-31 20:58:22.223] 
(XEN) [2015-03-31 20:58:22.243] 
(XEN) [2015-03-31 20:58:22.252] Manual reset required ('noreboot' specified)


Haven't tried without the revert yet.

--
Sander


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen

[Xen-devel] [linux-3.10 test] 50271: tolerable FAIL - PUSHED

2015-03-31 Thread osstest service user
flight 50271 linux-3.10 real [real]
http://logs.test-lab.xenproject.org/osstest/logs/50271/

Failures :-/ but no regressions.

Regressions which are regarded as allowable (not blocking):
 test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail like 26303

Tests which did not succeed, but are not blocking:
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-i386-xl-xsm9 guest-start  fail   never pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64-xsm 7 debian-hvm-install fail never 
pass
 test-amd64-amd64-xl-xsm   9 guest-start  fail   never pass
 test-armhf-armhf-xl-arndale   5 xen-boot fail   never pass
 test-amd64-i386-libvirt-xsm   9 guest-start  fail   never pass
 test-amd64-amd64-libvirt-xsm  9 guest-start  fail   never pass
 test-amd64-i386-libvirt  10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-amd64-amd64-libvirt 10 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck  5 xen-boot fail never pass
 test-armhf-armhf-xl-multivcpu  5 xen-boot fail  never pass
 test-armhf-armhf-xl-sedf-pin  5 xen-boot fail   never pass
 test-armhf-armhf-xl-credit2   5 xen-boot fail   never pass
 test-armhf-armhf-libvirt  5 xen-boot fail   never pass
 test-armhf-armhf-xl-xsm   5 xen-boot fail   never pass
 test-armhf-armhf-libvirt-xsm  5 xen-boot fail   never pass
 test-armhf-armhf-xl   5 xen-boot fail   never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-armhf-armhf-xl-sedf  5 xen-boot fail   never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass

version targeted for testing:
 linux73895725a9401bd3454757fcfa7d691270ac7498
baseline version:
 linuxbe67db109090b17b56eb8eb2190cd70700f107aa


1013 people touched revisions under test,
not listing them all


jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 build-amd64-rumpuserxen  pass
 build-i386-rumpuserxen   pass
 test-amd64-amd64-xl  pass
 test-armhf-armhf-xl  fail
 test-amd64-i386-xl   pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64-xsmfail
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm   

[Xen-devel] [PATCH 13/28] libxl: domain create: Do not destroy on cancellation

2015-03-31 Thread Ian Jackson
If we cancelled the domain creation, do not try to tear it down again
Document this.

This is a backwards-compatible API change since old libxl users will
never cancel any operations.

In the current code, there is no functional change, because
ERROR_CANCELLED is never generated anywhere yet.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
 tools/libxl/libxl.h|4 
 tools/libxl/libxl_create.c |6 --
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 5eec092..dc05e02 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -920,6 +920,10 @@ int libxl_ctx_free(libxl_ctx *ctx /* 0 is OK */);
 
 /* domain related functions */
 
+/* If the result is ERROR_CANCELLED, the domain may or may not exist
+ * (in a half-created state).  *domid will be valid and will be the
+ * domain id, or -1, as appropriate */
+
 int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config,
 uint32_t *domid,
 const libxl_asyncop_how *ao_how,
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 98687bd..f12ed72 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1446,7 +1446,9 @@ static void domcreate_complete(libxl__egc *egc,
 if (!rc && d_config->b_info.exec_ssidref)
 rc = xc_flask_relabel_domain(CTX->xch, dcs->guest_domid, 
d_config->b_info.exec_ssidref);
 
-if (!rc) {
+bool retain_domain = !rc || rc == ERROR_CANCELLED;
+
+if (retain_domain) {
 libxl__domain_userdata_lock *lock;
 
 /* Note that we hold CTX lock at this point so only need to
@@ -1465,7 +1467,7 @@ static void domcreate_complete(libxl__egc *egc,
 
 libxl_domain_config_dispose(d_config_saved);
 
-if (rc) {
+if (!retain_domain) {
 if (dcs->guest_domid) {
 dcs->dds.ao = ao;
 dcs->dds.domid = dcs->guest_domid;
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 22/28] libxl: cancellation: Support cancellation where we spot domain death

2015-03-31 Thread Ian Jackson
Make an active libxl__domaindeathcheck contain an active
libxl__ao_cancellable.

Consequential changes are:
 * domaindeath callbacks now take an rc value.
 * libxl__domaindeathcheck_start takes an ao, not a gc.
 * bootloader_domaindeath plumbs the rc through to its caller.
 * libxl__domaindeathcheck_init and _stop are not quite trivial any
   more so are moved from (inline functions) in libxl_internal.h, to
   ordinary functions defined in libxl_event.c.
 * libxl__domaindeathcheck_start is not trivial any more, and now has
   the standard error-handling pattern.

The only current user of libxl__domaindeathcheck is the bootloader.
So the result is that now it is possible to effectively cancel domain
creation while the bootloader is running.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v2: New in this version of the series.
---
 tools/libxl/libxl_bootloader.c |   11 +
 tools/libxl/libxl_event.c  |   50 
 tools/libxl/libxl_internal.h   |   12 +-
 3 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c
index c3f3a1f..21f92dc 100644
--- a/tools/libxl/libxl_bootloader.c
+++ b/tools/libxl/libxl_bootloader.c
@@ -33,7 +33,8 @@ static void bootloader_keystrokes_copyfail(libxl__egc *egc,
libxl__datacopier_state *dc, int onwrite, int errnoval);
 static void bootloader_display_copyfail(libxl__egc *egc,
libxl__datacopier_state *dc, int onwrite, int errnoval);
-static void bootloader_domaindeath(libxl__egc*, libxl__domaindeathcheck *dc);
+static void bootloader_domaindeath(libxl__egc*, libxl__domaindeathcheck *dc,
+   int rc);
 static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
 pid_t pid, int status);
 
@@ -496,7 +497,7 @@ static void bootloader_gotptys(libxl__egc *egc, 
libxl__openpty_state *op)
 bl->deathcheck.what = "stopping bootloader";
 bl->deathcheck.domid = bl->domid;
 bl->deathcheck.callback = bootloader_domaindeath;
-rc = libxl__domaindeathcheck_start(gc, &bl->deathcheck);
+rc = libxl__domaindeathcheck_start(ao, &bl->deathcheck);
 if (rc) goto out;
 
 if (bl->console_available)
@@ -608,10 +609,12 @@ static void bootloader_display_copyfail(libxl__egc *egc,
 bootloader_copyfail(egc, "bootloader output", bl, 1, onwrite, errnoval);
 }
 
-static void bootloader_domaindeath(libxl__egc *egc, libxl__domaindeathcheck 
*dc)
+static void bootloader_domaindeath(libxl__egc *egc,
+   libxl__domaindeathcheck *dc,
+   int rc)
 {
 libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, deathcheck);
-bootloader_stop(egc, bl, ERROR_DOMAIN_DESTROYED);
+bootloader_stop(egc, bl, rc);
 }
 
 static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index db3d419..e28c465 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -921,6 +921,18 @@ int libxl__ev_devstate_wait(libxl__ao *ao, 
libxl__ev_devstate *ds,
  * futile.
  */
 
+void libxl__domaindeathcheck_init(libxl__domaindeathcheck *dc)
+{
+libxl__ao_cancellable_init(&dc->cancel);
+libxl__ev_xswatch_init(&dc->watch);
+}
+
+void libxl__domaindeathcheck_stop(libxl__gc *gc, libxl__domaindeathcheck *dc)
+{
+libxl__ao_cancellable_deregister(&dc->cancel);
+libxl__ev_xswatch_deregister(gc,&dc->watch);
+}
+
 static void domaindeathcheck_callback(libxl__egc *egc, libxl__ev_xswatch *w,
 const char *watch_path, const char *event_path)
 {
@@ -929,6 +941,8 @@ static void domaindeathcheck_callback(libxl__egc *egc, 
libxl__ev_xswatch *w,
 const char *p = libxl__xs_read(gc, XBT_NULL, watch_path);
 if (p) return;
 
+libxl__domaindeathcheck_stop(gc,dc);
+
 if (errno!=ENOENT) {
 LIBXL__EVENT_DISASTER(egc,"failed to read xenstore"
   " for domain detach check", errno, 0);
@@ -937,15 +951,43 @@ static void domaindeathcheck_callback(libxl__egc *egc, 
libxl__ev_xswatch *w,
 
 LOG(ERROR,"%s: domain %"PRIu32" removed (%s no longer in xenstore)",
 dc->what, dc->domid, watch_path);
-dc->callback(egc, dc);
+dc->callback(egc, dc, ERROR_DOMAIN_DESTROYED);
+}
+
+static void domaindeathcheck_cancel(libxl__egc *egc,
+libxl__ao_cancellable *cancel,
+int rc)
+{
+libxl__domaindeathcheck *dc = CONTAINER_OF(cancel, *dc, cancel);
+EGC_GC;
+
+libxl__domaindeathcheck_stop(gc,dc);
+dc->callback(egc, dc, rc);
 }
 
-int libxl__domaindeathcheck_start(libxl__gc *gc,
+int libxl__domaindeathcheck_start(libxl__ao *ao,
   libxl__domaindeathcheck *dc)
 {
+AO_GC;
+int rc;
 const char *path = GCSPRINTF("/local/domain/%"PRIu32, dc->domid);
-r

[Xen-devel] [PATCH 25/28] libxl: cancellation: Handle SIGTERM in save/restore helper

2015-03-31 Thread Ian Jackson
During startup of the save/restore helper, set the disposition of
SIGTERM appropriately.

For restore, we can simply die immediately - there is no point trying
to do any kind of cleanup on what is now going to be a trashed domain.

For save, we want to arrange that libxc's cleanup code (eg turning off
logdirty) takes place.  So our signal handler replaces the fd with one
on which writes will fail, causing libxc's own loop to fail next time
it actually tries to do a write.

Currently this has only a minor beneficial effect: we don't send the
helper a SIGTERM ourselves, and if someone else contrives to send our
helper a SIGTERM they have probably sent one to libxl too in which
case things are going to be a bit messy anyway.

But in the next patch libxl is going to use SIGTERM itself on ao
cancellation.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v2: New in this version of the series.
---
 tools/libxl/libxl_save_helper.c |   58 +++
 1 file changed, 58 insertions(+)

diff --git a/tools/libxl/libxl_save_helper.c b/tools/libxl/libxl_save_helper.c
index 7514b2e..0be77c9 100644
--- a/tools/libxl/libxl_save_helper.c
+++ b/tools/libxl/libxl_save_helper.c
@@ -40,8 +40,10 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "libxl.h"
+#include "libxl_utils.h"
 
 #include "xenctrl.h"
 #include "xenguest.h"
@@ -120,6 +122,58 @@ static void *xmalloc(size_t sz)
 return r;
 }
 
+/*- signal handling -*/
+
+static int unwriteable_fd;
+
+static void save_signal_handler(int num)
+{
+/*
+ * We want to be able to interrupt save.  But the code in libxc
+ * which does the actual saving is straight-through, and we need
+ * to execute its error path to put the guest back to sanity.
+ *
+ * So what we do is this: when we get the signal, we dup2
+ * the result of open("/dev/null",O_RDONLY) onto the output fd.
+ *
+ * This is guaranteed to 1. interrupt libxc's write (causing it to
+ * return short, or maybe EINTR); 2. make the next write give
+ * EBADF, so that: 3. at latest, libxc will notice when it next
+ * tries to write data and will then go into its cleanup path.
+ *
+ * We make no effort here to sanitise the resulting errors.
+ * That's libxl's job.
+ */
+int esave = errno;
+
+int r = dup2(unwriteable_fd, io_fd);
+assert(r == io_fd); /* if not we can't write an xtl message because we
+ * might end up interleaving on our control stream */
+
+errno = esave;
+}
+
+static void setup_signals(void (*handler)(int))
+{
+struct sigaction sa;
+sigset_t spmask;
+int r;
+
+unwriteable_fd = open("/dev/null",O_RDONLY);
+if (unwriteable_fd < 0) fail(errno,"open /dev/null for reading");
+
+LIBXL_FILLZERO(sa);
+sa.sa_handler = handler;
+sigemptyset(&sa.sa_mask);
+r = sigaction(SIGTERM, &sa, 0);
+if (r) fail(errno,"sigaction SIGTERM failed");
+
+sigemptyset(&spmask);
+sigaddset(&spmask,SIGTERM);
+r = sigprocmask(SIG_UNBLOCK,&spmask,0);
+if (r) fail(errno,"sigprocmask unblock SIGTERM failed");
+}
+
 /*- helper functions called by autogenerated stubs -*/
 
 unsigned char * helper_allocbuf(int len, void *user)
@@ -229,6 +283,8 @@ int main(int argc, char **argv)
 helper_setcallbacks_save(&helper_save_callbacks, cbflags);
 
 startup("save");
+setup_signals(save_signal_handler);
+
 r = xc_domain_save(xch, io_fd, dom, max_iters, max_factor, flags,
&helper_save_callbacks, hvm);
 complete(r);
@@ -254,6 +310,8 @@ int main(int argc, char **argv)
 unsigned long console_mfn = 0;
 
 startup("restore");
+setup_signals(SIG_DFL);
+
 r = xc_domain_restore(xch, io_fd, dom, store_evtchn, &store_mfn,
   store_domid, console_evtchn, &console_mfn,
   console_domid, hvm, pae, superpages,
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 18/28] libxl: cancellation: Provide explicit internal cancel check API

2015-03-31 Thread Ian Jackson
Some places in libxl which can't handle cancellation via a
libxl__ao_cancellable callback might nevertheless benefit from being
able to explicitly check for cancellation.

Provide the (fairly trivial) internal API function to do this.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 

---
v2: New in this version of the series.
---
 tools/libxl/libxl_event.c|   11 +++
 tools/libxl/libxl_internal.h |2 ++
 2 files changed, 13 insertions(+)

diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index c224715..d7c478c 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -2028,6 +2028,17 @@ _hidden int libxl_ao_cancel(libxl_ctx *ctx, const 
libxl_asyncop_how *how)
 return rc;
 }
 
+int libxl__ao_cancelling(libxl__ao *ao)
+{
+libxl__ao *root = ao_nested_root(ao);
+if (root->cancelling) {
+DBG("ao=%p: cancelling at explicit check (root=%p)", ao, root);
+return ERROR_CANCELLED;
+}
+
+return 0;
+}
+
 int libxl__ao_cancellable_register(libxl__ao_cancellable *canc)
 {
 libxl__ao *ao = canc->ao;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 46383c4..6caf042 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -207,6 +207,8 @@ static inline void libxl__ao_cancellable_init
 static inline bool libxl__ao_cancellable_isregistered
   (const libxl__ao_cancellable *c) { return c->registered; }
 
+int libxl__ao_cancelling(libxl__ao *ao); /* -> 0 or ERROR_CANCELLED */
+
 
 typedef struct libxl__ev_time libxl__ev_time;
 typedef void libxl__ev_time_callback(libxl__egc *egc, libxl__ev_time *ev,
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 23/28] libxl: Introduce FILLZERO

2015-03-31 Thread Ian Jackson
FILLZERO is a macro for memset(&foo,0,sizeof(foo)).  It eliminates the
possiblity to make the error memset(&foo,0,sizeof(&foo)).

No callers yet, but document it in CODING_STYLE.  (In accordance with
existing libxl policy, I haven't gone through all existing possible
call sites.)

Signed-off-by: Ian Jackson 
---
v2: New in this version of the series.
---
 tools/libxl/CODING_STYLE |1 +
 tools/libxl/libxl_internal.h |3 +++
 tools/libxl/libxl_utils.h|3 +++
 3 files changed, 7 insertions(+)

diff --git a/tools/libxl/CODING_STYLE b/tools/libxl/CODING_STYLE
index f5b5890..a65efb3 100644
--- a/tools/libxl/CODING_STYLE
+++ b/tools/libxl/CODING_STYLE
@@ -62,6 +62,7 @@ whenever they are applicable.  For example:
   libxl__ctx_[un]lock CTX_LOCK, CTX_UNLOCK
   gc=...; ao=...; EGC_GC, AO_GC, STATE_AO_GC
   explicit gc creationGC_INIT, GC_FREE
+  memset(..,0,sizeof..)   FILLZERO
 
 
 ERROR HANDLING
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 36a13ea..465cdda 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3225,6 +3225,9 @@ _hidden const char 
*libxl__device_model_savefile(libxl__gc *gc, uint32_t domid);
 })
 
 
+#define FILLZERO LIBXL_FILLZERO
+
+
 /*
  * All of these assume (or define)
  *libxl__gc *gc;
diff --git a/tools/libxl/libxl_utils.h b/tools/libxl/libxl_utils.h
index acacdd9..51eac68 100644
--- a/tools/libxl/libxl_utils.h
+++ b/tools/libxl/libxl_utils.h
@@ -154,6 +154,9 @@ int libxl_cpumap_to_nodemap(libxl_ctx *ctx,
 
 void libxl_string_copy(libxl_ctx *ctx, char **dst, char **src);
 
+
+#define LIBXL_FILLZERO(object) (memset(&(object), 0, sizeof((object
+
 #endif
 
 /*
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 12/28] libxl: events: Permit timeouts to signal cancellation

2015-03-31 Thread Ian Jackson
The callback functions provided by users must take an rc value.  This
rc value can be ERROR_TIMEDOUT or ERROR_CANCELLED.

Users of xswait are now expected to deal correctly with
ERROR_CANCELLED.  If they experience this, it hasn't been logged.
And the caller won't log it either since it's not TIMEDOUT.
Luckily this is correct, so we can just change the doc comment.

Currently nothing generates ERROR_CANCELLED; in particular the
timeouts cannot in fact signal cancellation.

There should be no publicly visible change except that some error
returns from libxl will change from ERROR_FAIL to ERROR_TIMEDOUT, and
some changes to debugging messages.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
 tools/libxl/libxl_aoutils.c|   11 ---
 tools/libxl/libxl_device.c |8 +---
 tools/libxl/libxl_dom.c|   29 -
 tools/libxl/libxl_event.c  |8 
 tools/libxl/libxl_internal.h   |   12 +++-
 tools/libxl/libxl_test_timedereg.c |8 +---
 6 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
index 891cdb8..0b6d750 100644
--- a/tools/libxl/libxl_aoutils.c
+++ b/tools/libxl/libxl_aoutils.c
@@ -80,12 +80,13 @@ void xswait_xswatch_callback(libxl__egc *egc, 
libxl__ev_xswatch *xsw,
 }
 
 void xswait_timeout_callback(libxl__egc *egc, libxl__ev_time *ev,
- const struct timeval *requested_abs)
+ const struct timeval *requested_abs,
+ int rc)
 {
 EGC_GC;
 libxl__xswait_state *xswa = CONTAINER_OF(ev, *xswa, time_ev);
 LOG(DEBUG, "%s: xswait timeout (path=%s)", xswa->what, xswa->path);
-xswait_report_error(egc, xswa, ERROR_TIMEDOUT);
+xswait_report_error(egc, xswa, rc);
 }
 
 static void xswait_report_error(libxl__egc *egc, libxl__xswait_state *xswa,
@@ -455,11 +456,15 @@ int libxl__openptys(libxl__openpty_state *op,
 
 static void async_exec_timeout(libxl__egc *egc,
libxl__ev_time *ev,
-   const struct timeval *requested_abs)
+   const struct timeval *requested_abs,
+   int rc)
 {
 libxl__async_exec_state *aes = CONTAINER_OF(ev, *aes, time);
 STATE_AO_GC(aes->ao);
 
+if (!aes->rc)
+aes->rc = rc;
+
 libxl__ev_time_deregister(gc, &aes->time);
 
 assert(libxl__ev_child_inuse(&aes->child));
diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index 84114ff..3b1c3b2 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -717,7 +717,7 @@ out:
 
 /* This callback is part of the Qemu devices Badge */
 static void device_qemu_timeout(libxl__egc *egc, libxl__ev_time *ev,
-const struct timeval *requested_abs);
+const struct timeval *requested_abs, int rc);
 
 static void device_backend_callback(libxl__egc *egc, libxl__ev_devstate *ds,
int rc);
@@ -880,7 +880,7 @@ out:
 }
 
 static void device_qemu_timeout(libxl__egc *egc, libxl__ev_time *ev,
-const struct timeval *requested_abs)
+const struct timeval *requested_abs, int rc)
 {
 libxl__ao_device *aodev = CONTAINER_OF(ev, *aodev, timeout);
 STATE_AO_GC(aodev->ao);
@@ -888,7 +888,9 @@ static void device_qemu_timeout(libxl__egc *egc, 
libxl__ev_time *ev,
 char *state_path = GCSPRINTF("%s/state", be_path);
 const char *xs_state;
 xs_transaction_t t = 0;
-int rc = 0;
+
+if (rc != ERROR_TIMEDOUT)
+goto out;
 
 libxl__ev_time_deregister(gc, &aodev->timeout);
 
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index dcce394..379ac07 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -943,7 +943,8 @@ static void domain_suspend_callback_common_done(libxl__egc 
*egc,
  */
 
 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
-const struct timeval *requested_abs);
+const struct timeval *requested_abs,
+int rc);
 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
 const char *watch_path, const char *event_path);
 static void switch_logdirty_done(libxl__egc *egc,
@@ -1069,7 +1070,8 @@ void libxl__domain_suspend_common_switch_qemu_logdirty
 }
 }
 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
-const struct timeval *requested_abs)
+const struct timeval *requested_abs,
+int rc)
 {
 libxl__domain_suspend_state *dss = CONTAINER_OF(ev, *dss, 
logdirty.timeout);
 STATE_AO_GC(dss->ao);
@@ -1218,7 +1220,7 @@ 

[Xen-devel] [PATCH 19/28] libxl: cancellation: Make timeouts cancellable

2015-03-31 Thread Ian Jackson
Make libxl__ev_time* register with the cancellation machinery, so that
libxl_ao_cancel can cancel any operation which has a timeout.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
 tools/libxl/libxl_event.c|   27 +++
 tools/libxl/libxl_internal.h |3 ++-
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index d7c478c..db3d419 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -292,6 +292,8 @@ static int time_register_finite(libxl__gc *gc, 
libxl__ev_time *ev,
 
 static void time_deregister(libxl__gc *gc, libxl__ev_time *ev)
 {
+libxl__ao_cancellable_deregister(&ev->cancel);
+
 if (!ev->infinite) {
 struct timeval right_away = { 0, 0 };
 if (ev->nexus) /* only set if app provided hooks */
@@ -314,6 +316,23 @@ static void time_done_debug(libxl__gc *gc, const char 
*func,
 #endif
 }
 
+static void time_cancelled(libxl__egc *egc, libxl__ao_cancellable *canc, int 
rc)
+{
+libxl__ev_time *ev = CONTAINER_OF(canc, *ev, cancel);
+EGC_GC;
+
+time_deregister(gc, ev);
+DBG("ev_time=%p cancelled", ev);
+ev->func(egc, ev, &ev->abs, rc);
+}
+
+static int time_register_cancel(libxl__ao *ao, libxl__ev_time *ev)
+{
+ev->cancel.ao = ao;
+ev->cancel.callback = time_cancelled;
+return libxl__ao_cancellable_register(&ev->cancel);
+}
+
 int libxl__ev_time_register_abs(libxl__ao *ao, libxl__ev_time *ev,
 libxl__ev_time_callback *func,
 struct timeval absolute)
@@ -326,6 +345,9 @@ int libxl__ev_time_register_abs(libxl__ao *ao, 
libxl__ev_time *ev,
 DBG("ev_time=%p register abs=%lu.%06lu",
 ev, (unsigned long)absolute.tv_sec, (unsigned long)absolute.tv_usec);
 
+rc = time_register_cancel(ao, ev);
+if (rc) goto out;
+
 rc = time_register_finite(gc, ev, absolute);
 if (rc) goto out;
 
@@ -333,6 +355,7 @@ int libxl__ev_time_register_abs(libxl__ao *ao, 
libxl__ev_time *ev,
 
 rc = 0;
  out:
+libxl__ao_cancellable_deregister(&ev->cancel);
 time_done_debug(gc,__func__,ev,rc);
 CTX_UNLOCK;
 return rc;
@@ -351,6 +374,9 @@ int libxl__ev_time_register_rel(libxl__ao *ao, 
libxl__ev_time *ev,
 
 DBG("ev_time=%p register ms=%d", ev, milliseconds);
 
+rc = time_register_cancel(ao, ev);
+if (rc) goto out;
+
 if (milliseconds < 0) {
 ev->infinite = 1;
 } else {
@@ -365,6 +391,7 @@ int libxl__ev_time_register_rel(libxl__ao *ao, 
libxl__ev_time *ev,
 rc = 0;
 
  out:
+libxl__ao_cancellable_deregister(&ev->cancel);
 time_done_debug(gc,__func__,ev,rc);
 CTX_UNLOCK;
 return rc;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 6caf042..790a489 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -223,6 +223,7 @@ struct libxl__ev_time {
 LIBXL_TAILQ_ENTRY(libxl__ev_time) entry;
 struct timeval abs;
 libxl__osevent_hook_nexus *nexus;
+libxl__ao_cancellable cancel;
 };
 
 typedef struct libxl__ev_xswatch libxl__ev_xswatch;
@@ -828,7 +829,7 @@ _hidden int libxl__ev_time_modify_abs(libxl__gc*, 
libxl__ev_time *ev,
   struct timeval);
 _hidden void libxl__ev_time_deregister(libxl__gc*, libxl__ev_time *ev);
 static inline void libxl__ev_time_init(libxl__ev_time *ev)
-{ ev->func = 0; }
+{ ev->func = 0; libxl__ao_cancellable_init(&ev->cancel); }
 static inline int libxl__ev_time_isregistered(const libxl__ev_time *ev)
 { return !!ev->func; }
 
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 04/28] libxl: suspend: common suspend callbacks take rc

2015-03-31 Thread Ian Jackson
Change the following functions to take a libxl error code rather than
a boolean "ok" value, and translate that value to the boolean expected
by libxc at the last moment:
  domain_suspend_callback_common_done} dss->callback_common_done
  remus_domain_suspend_callback_common_done  }
  domain_suspend_common_done

Also, abolish domain_suspend_common_failed as
domain_suspend_common_done can easily do its job and the call sites
now have to supply the right rc value anyway.

In domain_suspend_common_guest_suspended, change "ret" to "rc"
as it contains a libxl error code.

There is no functional change in this patch: the proper rc value now
propagates further, but is still eventually smashed to a boolean.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v2: Fix a leftover comment referring to domain_suspend_common_failed
---
 tools/libxl/libxl_dom.c |   54 +--
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 08a7600..5eef7e3 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -930,7 +930,7 @@ int libxl__toolstack_restore(uint32_t domid, const uint8_t 
*buf,
 static void domain_suspend_done(libxl__egc *egc,
 libxl__domain_suspend_state *dss, int rc);
 static void domain_suspend_callback_common_done(libxl__egc *egc,
-libxl__domain_suspend_state *dss, int ok);
+libxl__domain_suspend_state *dss, int rc);
 
 /*- complicated callback, called by xc_domain_save -*/
 
@@ -1217,11 +1217,9 @@ static void suspend_common_wait_guest_check(libxl__egc 
*egc,
 static void suspend_common_wait_guest_timeout(libxl__egc *egc,
   libxl__ev_time *ev, const struct timeval *requested_abs);
 
-static void domain_suspend_common_failed(libxl__egc *egc,
- libxl__domain_suspend_state *dss);
 static void domain_suspend_common_done(libxl__egc *egc,
libxl__domain_suspend_state *dss,
-   bool ok);
+   int rc);
 
 static bool domain_suspend_pvcontrol_acked(const char *state) {
 /* any value other than "suspend", including ENOENT (i.e. !state), is OK */
@@ -1251,6 +1249,7 @@ static void domain_suspend_callback_common(libxl__egc 
*egc,
 ret = xc_evtchn_notify(CTX->xce, dss->guest_evtchn.port);
 if (ret < 0) {
 LOG(ERROR, "xc_evtchn_notify failed ret=%d", ret);
+rc = ERROR_FAIL;
 goto err;
 }
 
@@ -1271,6 +1270,7 @@ static void domain_suspend_callback_common(libxl__egc 
*egc,
 ret = xc_domain_shutdown(CTX->xch, domid, SHUTDOWN_suspend);
 if (ret < 0) {
 LOGE(ERROR, "xc_domain_shutdown failed");
+rc = ERROR_FAIL;
 goto err;
 }
 /* The guest does not (need to) respond to this sort of request. */
@@ -1285,7 +1285,7 @@ static void domain_suspend_callback_common(libxl__egc 
*egc,
 libxl__domain_pvcontrol_write(gc, XBT_NULL, domid, "suspend");
 
 dss->pvcontrol.path = libxl__domain_pvcontrol_xspath(gc, domid);
-if (!dss->pvcontrol.path) goto err;
+if (!dss->pvcontrol.path) { rc = ERROR_FAIL; goto err; }
 
 dss->pvcontrol.ao = ao;
 dss->pvcontrol.what = "guest acknowledgement of suspend request";
@@ -1295,7 +1295,7 @@ static void domain_suspend_callback_common(libxl__egc 
*egc,
 return;
 
  err:
-domain_suspend_common_failed(egc, dss);
+domain_suspend_common_done(egc, dss, rc);
 }
 
 static void domain_suspend_common_wait_guest_evtchn(libxl__egc *egc,
@@ -1305,8 +1305,8 @@ static void 
domain_suspend_common_wait_guest_evtchn(libxl__egc *egc,
 STATE_AO_GC(dss->ao);
 /* If we should be done waiting, suspend_common_wait_guest_check
  * will end up calling domain_suspend_common_guest_suspended or
- * domain_suspend_common_failed, both of which cancel the evtchn
- * wait.  So re-enable it now. */
+ * domain_suspend_common_done, both of which cancel the evtchn
+ * wait as needed.  So re-enable it now. */
 libxl__ev_evtchn_wait(gc, &dss->guest_evtchn);
 suspend_common_wait_guest_check(egc, dss);
 }
@@ -1371,7 +1371,7 @@ static void 
domain_suspend_common_pvcontrol_suspending(libxl__egc *egc,
 
  err:
 libxl__xs_transaction_abort(gc, &t);
-domain_suspend_common_failed(egc, dss);
+domain_suspend_common_done(egc, dss, rc);
 return;
 }
 
@@ -1395,7 +1395,7 @@ static void domain_suspend_common_wait_guest(libxl__egc 
*egc,
 return;
 
  err:
-domain_suspend_common_failed(egc, dss);
+domain_suspend_common_done(egc, dss, rc);
 }
 
 static void suspend_common_wait_guest_watch(libxl__egc *egc,
@@ -1445,7 +1445,7 @@ static void suspend_common_wait_guest_check(libxl__egc 
*egc,
 return;
 
  err:
-domain_suspend_common_failed(egc, dss);
+domain_

[Xen-devel] [PATCH 28/28] libxl: cancellation: Make datacopiers cancellable

2015-03-31 Thread Ian Jackson
libxl__datacopier_* can now actually generate a callback with
rc==CANCELLED.

This provides cancellation during some corner cases, including (at
least) copying the device model data during the end of domain save.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v2: New in this version of the series.
---
 tools/libxl/libxl_aoutils.c  |   16 
 tools/libxl/libxl_internal.h |1 +
 2 files changed, 17 insertions(+)

diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
index ece7981..919bf12 100644
--- a/tools/libxl/libxl_aoutils.c
+++ b/tools/libxl/libxl_aoutils.c
@@ -103,6 +103,7 @@ static void xswait_report_error(libxl__egc *egc, 
libxl__xswait_state *xswa,
 void libxl__datacopier_init(libxl__datacopier_state *dc)
 {
 assert(dc->ao);
+libxl__ao_cancellable_init(&dc->cancel);
 libxl__ev_fd_init(&dc->toread);
 libxl__ev_fd_init(&dc->towrite);
 LIBXL_TAILQ_INIT(&dc->bufs);
@@ -113,6 +114,7 @@ void libxl__datacopier_kill(libxl__datacopier_state *dc)
 STATE_AO_GC(dc->ao);
 libxl__datacopier_buf *buf, *tbuf;
 
+libxl__ao_cancellable_deregister(&dc->cancel);
 libxl__ev_fd_deregister(gc, &dc->toread);
 libxl__ev_fd_deregister(gc, &dc->towrite);
 LIBXL_TAILQ_FOREACH_SAFE(buf, &dc->bufs, entry, tbuf)
@@ -196,6 +198,15 @@ static int datacopier_pollhup_handled(libxl__egc *egc,
 return 0;
 }
 
+static void datacopier_cancel(libxl__egc *egc, libxl__ao_cancellable *cancel,
+  int rc)
+{
+libxl__datacopier_state *dc = CONTAINER_OF(cancel, *dc, cancel);
+STATE_AO_GC(dc->ao);
+
+datacopier_callback(egc, dc, rc, -1, 0);
+}
+
 static void datacopier_readable(libxl__egc *egc, libxl__ev_fd *ev,
 int fd, short events, short revents) {
 libxl__datacopier_state *dc = CONTAINER_OF(ev, *dc, toread);
@@ -312,6 +323,11 @@ int libxl__datacopier_start(libxl__datacopier_state *dc)
 
 libxl__datacopier_init(dc);
 
+dc->cancel.ao = ao;
+dc->cancel.callback = datacopier_cancel;
+rc = libxl__ao_cancellable_register(&dc->cancel);
+if (rc) goto out;
+
 rc = libxl__ev_fd_register(gc, &dc->toread, datacopier_readable,
dc->readfd, POLLIN);
 if (rc) goto out;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index be5bece..35e6643 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2593,6 +2593,7 @@ struct libxl__datacopier_state {
 libxl__datacopier_callback *callback;
 libxl__datacopier_callback *callback_pollhup;
 /* remaining fields are private to datacopier */
+libxl__ao_cancellable cancel;
 libxl__ev_fd toread, towrite;
 ssize_t used;
 LIBXL_TAILQ_HEAD(libxl__datacopier_bufs, libxl__datacopier_buf) bufs;
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 14/28] libxl: ao: Record ultimate parent of a nested ao

2015-03-31 Thread Ian Jackson
This will be used by the cancellation machinery.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
 tools/libxl/libxl_event.c|   25 +++--
 tools/libxl/libxl_internal.h |3 ++-
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index 8604610..c9ec3c4 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -31,6 +31,9 @@
 #define DBG(args, ...) LIBXL__DBG_LOG(CTX, args, __VA_ARGS__)
 
 
+static libxl__ao *ao_nested_root(libxl__ao *ao);
+
+
 /*
  * The counter osevent_in_hook is used to ensure that the application
  * honours the reentrancy restriction documented in libxl_event.h.
@@ -1759,7 +1762,7 @@ void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, 
int rc)
 LOG(DEBUG,"ao %p: complete, rc=%d",ao,rc);
 assert(ao->magic == LIBXL__AO_MAGIC);
 assert(!ao->complete);
-assert(!ao->nested);
+assert(!ao->nested_root);
 ao->complete = 1;
 ao->rc = rc;
 
@@ -1930,7 +1933,7 @@ void libxl__ao_progress_report(libxl__egc *egc, libxl__ao 
*ao,
 const libxl_asyncprogress_how *how, libxl_event *ev)
 {
 AO_GC;
-assert(!ao->nested);
+assert(!ao->nested_root);
 if (how->callback == dummy_asyncprogress_callback_ignore) {
 LOG(DEBUG,"ao %p: progress report: ignored",ao);
 libxl_event_free(CTX,ev);
@@ -1953,21 +1956,23 @@ void libxl__ao_progress_report(libxl__egc *egc, 
libxl__ao *ao,
 
 /* nested ao */
 
+static libxl__ao *ao_nested_root(libxl__ao *ao) {
+libxl__ao *root = ao->nested_root ? : ao;
+assert(!root->nested_root);
+return root;
+}
+
 _hidden libxl__ao *libxl__nested_ao_create(libxl__ao *parent)
 {
-/* We only use the parent to get the ctx.  However, we require the
- * caller to provide us with an ao, not just a ctx, to prove that
- * they are already in an asynchronous operation.  That will avoid
- * people using this to (for example) make an ao in a non-ao_how
- * function somewhere in the middle of libxl. */
-libxl__ao *child = NULL;
+libxl__ao *child = NULL, *root;
 libxl_ctx *ctx = libxl__gc_owner(&parent->gc);
 
 assert(parent->magic == LIBXL__AO_MAGIC);
+root = ao_nested_root(parent);
 
 child = libxl__zalloc(&ctx->nogc_gc, sizeof(*child));
 child->magic = LIBXL__AO_MAGIC;
-child->nested = 1;
+child->nested_root = root;
 LIBXL_INIT_GC(child->gc, ctx);
 libxl__gc *gc = &child->gc;
 
@@ -1978,7 +1983,7 @@ _hidden libxl__ao *libxl__nested_ao_create(libxl__ao 
*parent)
 _hidden void libxl__nested_ao_free(libxl__ao *child)
 {
 assert(child->magic == LIBXL__AO_MAGIC);
-assert(child->nested);
+assert(child->nested_root);
 libxl_ctx *ctx = libxl__gc_owner(&child->gc);
 libxl__ao__destroy(ctx, child);
 }
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index accbab8..fe5c94f 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -448,7 +448,8 @@ struct libxl__ao {
  * only in libxl__ao_complete.)
  */
 uint32_t magic;
-unsigned constructing:1, in_initiator:1, complete:1, notified:1, nested:1;
+unsigned constructing:1, in_initiator:1, complete:1, notified:1;
+libxl__ao *nested_root;
 int progress_reports_outstanding;
 int rc;
 libxl__gc gc;
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 26/28] libxl: cancellation: Cancel libxc save/restore

2015-03-31 Thread Ian Jackson
Register the the save/restore helper interface with the cancellation
machinery.  When we are informed that save/restore should be
cancelled, we make a note of the that in our rc variable, and send the
helper a SIGTERM.  It will die in due course.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v2: New in this version of the series.
---
 tools/libxl/libxl_internal.h |1 +
 tools/libxl/libxl_save_callout.c |   25 +
 2 files changed, 26 insertions(+)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 465cdda..883daae 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2638,6 +2638,7 @@ typedef struct libxl__save_helper_state {
 int rc;
 int completed; /* retval/errnoval valid iff completed */
 int retval, errnoval; /* from xc_domain_save / xc_domain_restore */
+libxl__ao_cancellable cancel;
 libxl__carefd *pipes[2]; /* 0 = helper's stdin, 1 = helper's stdout */
 libxl__ev_fd readable;
 libxl__ev_child child;
diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c
index 1d584f1..d9fa0d2 100644
--- a/tools/libxl/libxl_save_callout.c
+++ b/tools/libxl/libxl_save_callout.c
@@ -32,6 +32,7 @@ static void run_helper(libxl__egc *egc, 
libxl__save_helper_state *shs,
const unsigned long *argnums, int num_argnums);
 
 static void helper_failed(libxl__egc*, libxl__save_helper_state *shs, int rc);
+static void helper_cancel(libxl__egc *egc, libxl__ao_cancellable*, int rc);
 static void helper_stdout_readable(libxl__egc *egc, libxl__ev_fd *ev,
int fd, short events, short revents);
 static void helper_exited(libxl__egc *egc, libxl__ev_child *ch,
@@ -166,9 +167,15 @@ static void run_helper(libxl__egc *egc, 
libxl__save_helper_state *shs,
 shs->rc = 0;
 shs->completed = 0;
 shs->pipes[0] = shs->pipes[1] = 0;
+libxl__ao_cancellable_init(&shs->cancel);
 libxl__ev_fd_init(&shs->readable);
 libxl__ev_child_init(&shs->child);
 
+shs->cancel.ao = shs->ao;
+shs->cancel.callback = helper_cancel;
+rc = libxl__ao_cancellable_register(&shs->cancel);
+if (rc) goto out;
+
 shs->stdin_what = GCSPRINTF("domain %"PRIu32" save/restore helper"
 " stdin pipe", domid);
 shs->stdout_what = GCSPRINTF("domain %"PRIu32" save/restore helper"
@@ -262,6 +269,23 @@ static void helper_failed(libxl__egc *egc, 
libxl__save_helper_state *shs,
 sendsig(gc, shs, SIGKILL);
 }
 
+static void helper_cancel(libxl__egc *egc, libxl__ao_cancellable *cancel,
+  int rc)
+{
+libxl__save_helper_state *shs = CONTAINER_OF(cancel, *shs, cancel);
+STATE_AO_GC(shs->ao);
+
+if (!libxl__ev_child_inuse(&shs->child)) {
+helper_failed(egc, shs, rc);
+return;
+}
+
+if (!shs->rc)
+shs->rc = rc;
+
+sendsig(gc, shs, SIGTERM);
+}
+
 static void helper_stdout_readable(libxl__egc *egc, libxl__ev_fd *ev,
int fd, short events, short revents)
 {
@@ -332,6 +356,7 @@ static void helper_done(libxl__egc *egc, 
libxl__save_helper_state *shs)
 {
 STATE_AO_GC(shs->ao);
 
+libxl__ao_cancellable_deregister(&shs->cancel);
 libxl__ev_fd_deregister(gc, &shs->readable);
 libxl__carefd_close(shs->pipes[0]);  shs->pipes[0] = 0;
 libxl__carefd_close(shs->pipes[1]);  shs->pipes[1] = 0;
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 21/28] libxl: Introduce DOMAIN_DESTROYED error code

2015-03-31 Thread Ian Jackson
This is currently reported only by the bootloader code, if the domain
is destroyed while the bootloader is running.

In the future it would be nice to return it for other circumstances
where the domain existed when the operation started but subsequently
vanished.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v2: New in this version of the series.
---
 tools/libxl/libxl_bootloader.c |2 +-
 tools/libxl/libxl_types.idl|1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c
index 79947d4..c3f3a1f 100644
--- a/tools/libxl/libxl_bootloader.c
+++ b/tools/libxl/libxl_bootloader.c
@@ -611,7 +611,7 @@ static void bootloader_display_copyfail(libxl__egc *egc,
 static void bootloader_domaindeath(libxl__egc *egc, libxl__domaindeathcheck 
*dc)
 {
 libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, deathcheck);
-bootloader_stop(egc, bl, ERROR_FAIL);
+bootloader_stop(egc, bl, ERROR_DOMAIN_DESTROYED);
 }
 
 static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 478c561..2ddaef1 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -66,6 +66,7 @@ libxl_error = Enumeration("error", [
 (-20, "CANCELLED"),
 (-21, "NOTFOUND"),
 (-22, "NOTIMPLEMENTED"),
+(-23, "DOMAIN_DESTROYED"),
 ], value_namespace = "")
 
 libxl_domain_type = Enumeration("domain_type", [
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 03/28] libxl: suspend: switch_logdirty_done takes rc

2015-03-31 Thread Ian Jackson
switch_logdirty_done used to take the value to pass to
libxl__xc_domain_saverestore_async_callback_done (ie, the return value
from the callback).  (This was mistakenly described as "ok" in the
prototype, but in the definition it is "broke" and all the call sites
passed 0 for success or -1 for error.)

Instead, make it take a libxl error code (rc).  Convert this to the
suspend callback value at the end.

No functional change in this patch.

Signed-off-by: Ian Jackson 
---
 tools/libxl/libxl_dom.c |   23 ++-
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index a16d4a1..08a7600 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -947,7 +947,7 @@ static void switch_logdirty_timeout(libxl__egc *egc, 
libxl__ev_time *ev,
 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
 const char *watch_path, const char *event_path);
 static void switch_logdirty_done(libxl__egc *egc,
- libxl__domain_suspend_state *dss, int ok);
+ libxl__domain_suspend_state *dss, int rc);
 
 static void logdirty_init(libxl__logdirty_switch *lds)
 {
@@ -1024,7 +1024,7 @@ static void 
domain_suspend_switch_qemu_xen_traditional_logdirty
  out:
 LOG(ERROR,"logdirty switch failed (rc=%d), aborting suspend",rc);
 libxl__xs_transaction_abort(gc, &t);
-switch_logdirty_done(egc,dss,-1);
+switch_logdirty_done(egc,dss,rc);
 }
 
 static void domain_suspend_switch_qemu_xen_logdirty
@@ -1072,7 +1072,7 @@ static void switch_logdirty_timeout(libxl__egc *egc, 
libxl__ev_time *ev,
 libxl__domain_suspend_state *dss = CONTAINER_OF(ev, *dss, 
logdirty.timeout);
 STATE_AO_GC(dss->ao);
 LOG(ERROR,"logdirty switch: wait for device model timed out");
-switch_logdirty_done(egc,dss,-1);
+switch_logdirty_done(egc,dss,ERROR_FAIL);
 }
 
 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch *watch,
@@ -1124,17 +1124,16 @@ static void switch_logdirty_xswatch(libxl__egc *egc, 
libxl__ev_xswatch *watch,
  */
 libxl__xs_transaction_abort(gc, &t);
 
-if (!rc) {
-switch_logdirty_done(egc,dss,0);
-} else if (rc < 0) {
-LOG(ERROR,"logdirty switch: failed (rc=%d)",rc);
-switch_logdirty_done(egc,dss,-1);
+if (rc <= 0) {
+if (rc < 0)
+LOG(ERROR,"logdirty switch: failed (rc=%d)",rc);
+switch_logdirty_done(egc,dss,rc);
 }
 }
 
 static void switch_logdirty_done(libxl__egc *egc,
  libxl__domain_suspend_state *dss,
- int broke)
+ int rc)
 {
 STATE_AO_GC(dss->ao);
 libxl__logdirty_switch *lds = &dss->logdirty;
@@ -1142,6 +1141,12 @@ static void switch_logdirty_done(libxl__egc *egc,
 libxl__ev_xswatch_deregister(gc, &lds->watch);
 libxl__ev_time_deregister(gc, &lds->timeout);
 
+int broke;
+if (rc) {
+broke = -1;
+} else {
+broke = 0;
+}
 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, broke);
 }
 
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 24/28] libxl: cancellation: Preparations for save/restore cancellation

2015-03-31 Thread Ian Jackson
Two unrelated non-functional changes, broken out into a pre-patch for
easier review:

Break out a function sendsig() in libxl_save_callout.c.

Move io_fd to be a global variable in libxl_save_helper.c.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v2: New in this version of the series.
---
 tools/libxl/libxl_save_callout.c |   10 +++---
 tools/libxl/libxl_save_helper.c  |5 +++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c
index 40b25e4..1d584f1 100644
--- a/tools/libxl/libxl_save_callout.c
+++ b/tools/libxl/libxl_save_callout.c
@@ -237,6 +237,12 @@ static void run_helper(libxl__egc *egc, 
libxl__save_helper_state *shs,
 libxl__carefd_close(childs_pipes[1]);
 helper_failed(egc, shs, rc);;
 }
+static void sendsig(libxl__gc *gc, libxl__save_helper_state *shs, int sig)
+{
+int r = kill(shs->child.pid, sig);
+if (r) LOGE(WARN, "failed to kill save/restore helper [%lu] (signal %d)",
+(unsigned long)shs->child.pid, sig);
+}
 
 static void helper_failed(libxl__egc *egc, libxl__save_helper_state *shs,
   int rc)
@@ -253,9 +259,7 @@ static void helper_failed(libxl__egc *egc, 
libxl__save_helper_state *shs,
 return;
 }
 
-int r = kill(shs->child.pid, SIGKILL);
-if (r) LOGE(WARN, "failed to kill save/restore helper [%lu]",
-(unsigned long)shs->child.pid);
+sendsig(gc, shs, SIGKILL);
 }
 
 static void helper_stdout_readable(libxl__egc *egc, libxl__ev_fd *ev,
diff --git a/tools/libxl/libxl_save_helper.c b/tools/libxl/libxl_save_helper.c
index 74826a1..7514b2e 100644
--- a/tools/libxl/libxl_save_helper.c
+++ b/tools/libxl/libxl_save_helper.c
@@ -85,6 +85,7 @@ static xentoollog_logger logger = {
 tellparent_destroy,
 };
 static xc_interface *xch;
+static int io_fd;
 
 /*- error handling -*/
 
@@ -211,7 +212,7 @@ int main(int argc, char **argv)
 
 if (!strcmp(mode,"--save-domain")) {
 
-int io_fd =atoi(NEXTARG);
+io_fd =atoi(NEXTARG);
 uint32_t dom = strtoul(NEXTARG,0,10);
 uint32_t max_iters =   strtoul(NEXTARG,0,10);
 uint32_t max_factor =  strtoul(NEXTARG,0,10);
@@ -234,7 +235,7 @@ int main(int argc, char **argv)
 
 } else if (!strcmp(mode,"--restore-domain")) {
 
-int io_fd =atoi(NEXTARG);
+io_fd =atoi(NEXTARG);
 uint32_t dom = strtoul(NEXTARG,0,10);
 unsigned store_evtchn =strtoul(NEXTARG,0,10);
 domid_t store_domid =  strtoul(NEXTARG,0,10);
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 20/28] libxl: cancellation: Note that driver domain task cannot be usefully cancelled

2015-03-31 Thread Ian Jackson
In practice, cancelling this task will cause all subsequent actual
backend operations to fail, but will not actually cause the
libxl_device_events_handler operation to complete.

Signed-off-by: Ian Jackson 
CC: Roger Pau Monné 
Acked-by: Roger Pau Monné 
Acked-by: Ian Campbell 
---
v2: New in this version of the series.
---
 tools/libxl/libxl.h |3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index a688070..04c399f 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -1371,6 +1371,9 @@ libxl_device_pci *libxl_device_pci_list(libxl_ctx *ctx, 
uint32_t domid,
  * From a libxl API point of view, this starts a long-running
  * operation.  That operation consists of "being a driver domain"
  * and never completes.
+ *
+ * Attempting to cancel this operation is not advisable; proper
+ * shutdown of the driver domain task is not supported.
  */
 int libxl_device_events_handler(libxl_ctx *ctx,
 const libxl_asyncop_how *ao_how)
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 15/28] libxl: ao: Count the nested progeny of an ao

2015-03-31 Thread Ian Jackson
This will detect any "escaped" nested aos.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
 tools/libxl/libxl_event.c|8 +++-
 tools/libxl/libxl_internal.h |1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index c9ec3c4..c95db5b 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -1763,6 +1763,7 @@ void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, 
int rc)
 assert(ao->magic == LIBXL__AO_MAGIC);
 assert(!ao->complete);
 assert(!ao->nested_root);
+assert(!ao->nested_progeny);
 ao->complete = 1;
 ao->rc = rc;
 
@@ -1973,6 +1974,8 @@ _hidden libxl__ao *libxl__nested_ao_create(libxl__ao 
*parent)
 child = libxl__zalloc(&ctx->nogc_gc, sizeof(*child));
 child->magic = LIBXL__AO_MAGIC;
 child->nested_root = root;
+assert(root->nested_progeny < INT_MAX);
+root->nested_progeny++;
 LIBXL_INIT_GC(child->gc, ctx);
 libxl__gc *gc = &child->gc;
 
@@ -1983,7 +1986,10 @@ _hidden libxl__ao *libxl__nested_ao_create(libxl__ao 
*parent)
 _hidden void libxl__nested_ao_free(libxl__ao *child)
 {
 assert(child->magic == LIBXL__AO_MAGIC);
-assert(child->nested_root);
+libxl__ao *root = child->nested_root;
+assert(root);
+assert(root->nested_progeny > 0);
+root->nested_progeny--;
 libxl_ctx *ctx = libxl__gc_owner(&child->gc);
 libxl__ao__destroy(ctx, child);
 }
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index fe5c94f..e29db43 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -450,6 +450,7 @@ struct libxl__ao {
 uint32_t magic;
 unsigned constructing:1, in_initiator:1, complete:1, notified:1;
 libxl__ao *nested_root;
+int nested_progeny;
 int progress_reports_outstanding;
 int rc;
 libxl__gc gc;
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 27/28] libxl: ao: datacopier callback gets an rc

2015-03-31 Thread Ian Jackson
libxl__datacopier_* now provides its caller's callback function with
an rc value.  This relieves the caller of the need to figure out an
appropriate rc value.

Arrange that the `other internal failure' cases now get a valid
positive errno value (EIO).

In a few places, assert that errno is nonzero before passing it to our
caller.

Extend the datacopier callback API to permit the dc to signal
CANCELLED.  (It doesn't actually do this yet, though.)

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
CC: Andrew Cooper 
CC: David Vrabel 
---
v2: New in this version of the series.
---
 tools/libxl/libxl_aoutils.c|   22 --
 tools/libxl/libxl_bootloader.c |   20 +++-
 tools/libxl/libxl_dom.c|   10 +++---
 tools/libxl/libxl_internal.h   |   18 +++---
 4 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
index 0b6d750..ece7981 100644
--- a/tools/libxl/libxl_aoutils.c
+++ b/tools/libxl/libxl_aoutils.c
@@ -121,10 +121,10 @@ void libxl__datacopier_kill(libxl__datacopier_state *dc)
 }
 
 static void datacopier_callback(libxl__egc *egc, libxl__datacopier_state *dc,
-int onwrite, int errnoval)
+int rc, int onwrite, int errnoval)
 {
 libxl__datacopier_kill(dc);
-dc->callback(egc, dc, onwrite, errnoval);
+dc->callback(egc, dc, rc, onwrite, errnoval);
 }
 
 static void datacopier_writable(libxl__egc *egc, libxl__ev_fd *ev,
@@ -142,13 +142,13 @@ static void datacopier_check_state(libxl__egc *egc, 
libxl__datacopier_state *dc)
 if (rc) {
 LOG(ERROR, "unable to establish write event on %s"
 " during copy of %s", dc->writewhat, dc->copywhat);
-datacopier_callback(egc, dc, -1, 0);
+datacopier_callback(egc, dc, ERROR_FAIL, -1, EIO);
 return;
 }
 }
 } else if (!libxl__ev_fd_isregistered(&dc->toread)) {
 /* we have had eof */
-datacopier_callback(egc, dc, 0, 0);
+datacopier_callback(egc, dc, 0, 0, 0);
 return;
 } else {
 /* nothing buffered, but still reading */
@@ -190,7 +190,7 @@ static int datacopier_pollhup_handled(libxl__egc *egc,
 onwrite ? dc->writewhat : dc->readwhat,
 dc->copywhat);
 libxl__datacopier_kill(dc);
-dc->callback_pollhup(egc, dc, onwrite, -1);
+dc->callback_pollhup(egc, dc, ERROR_FAIL, onwrite, -1);
 return 1;
 }
 return 0;
@@ -207,7 +207,7 @@ static void datacopier_readable(libxl__egc *egc, 
libxl__ev_fd *ev,
 if (revents & ~POLLIN) {
 LOG(ERROR, "unexpected poll event 0x%x (should be POLLIN)"
 " on %s during copy of %s", revents, dc->readwhat, dc->copywhat);
-datacopier_callback(egc, dc, -1, 0);
+datacopier_callback(egc, dc, ERROR_FAIL, -1, EIO);
 return;
 }
 assert(revents & POLLIN);
@@ -234,9 +234,10 @@ static void datacopier_readable(libxl__egc *egc, 
libxl__ev_fd *ev,
 if (r < 0) {
 if (errno == EINTR) continue;
 if (errno == EWOULDBLOCK) break;
+assert(errno);
 LOGE(ERROR, "error reading %s during copy of %s",
  dc->readwhat, dc->copywhat);
-datacopier_callback(egc, dc, 0, errno);
+datacopier_callback(egc, dc, ERROR_FAIL, 0, errno);
 return;
 }
 if (r == 0) {
@@ -249,7 +250,7 @@ static void datacopier_readable(libxl__egc *egc, 
libxl__ev_fd *ev,
 assert(ferror(dc->log));
 assert(errno);
 LOGE(ERROR, "error logging %s", dc->copywhat);
-datacopier_callback(egc, dc, 0, errno);
+datacopier_callback(egc, dc, ERROR_FAIL, 0, errno);
 return;
 }
 }
@@ -271,7 +272,7 @@ static void datacopier_writable(libxl__egc *egc, 
libxl__ev_fd *ev,
 if (revents & ~POLLOUT) {
 LOG(ERROR, "unexpected poll event 0x%x (should be POLLOUT)"
 " on %s during copy of %s", revents, dc->writewhat, dc->copywhat);
-datacopier_callback(egc, dc, -1, 0);
+datacopier_callback(egc, dc, ERROR_FAIL, -1, EIO);
 return;
 }
 assert(revents & POLLOUT);
@@ -288,9 +289,10 @@ static void datacopier_writable(libxl__egc *egc, 
libxl__ev_fd *ev,
 if (r < 0) {
 if (errno == EINTR) continue;
 if (errno == EWOULDBLOCK) break;
+assert(errno);
 LOGE(ERROR, "error writing to %s during copy of %s",
  dc->writewhat, dc->copywhat);
-datacopier_callback(egc, dc, 1, errno);
+datacopier_callback(egc, dc, ERROR_FAIL, 1, errno);
 return;
 }
 assert(r > 0);
diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c
index 21f92dc..c26f1d6 100644
--- a/tools

[Xen-devel] [PATCH 11/28] libxl: events: Make libxl__async_exec_* pass caller an rc

2015-03-31 Thread Ian Jackson
The internal user of libxl__async_exec_start et al now gets an rc as
well as the process's exit status.

For now this is always either 0 or ERROR_FAIL, but with ao
cancellation this will possibly be CANCELLED or TIMEDOUT too.

Signed-off-by: Ian Jackson 
---
v2: New patch due to rebause; v1 had changes to device_hotplug_*
 scripts instead.
Callback now gets unambiguous information about error situation:
 previously, if only thing that went wrong was that child died
 badly, rc would be FAILED, which was unambigously; now rc=0.
Add a comment document the meaning of the rc and status parameters
 to the callback.
---
 tools/libxl/libxl_aoutils.c |9 ++---
 tools/libxl/libxl_device.c  |   13 +
 tools/libxl/libxl_internal.h|   11 ++-
 tools/libxl/libxl_netbuffer.c   |   19 ++-
 tools/libxl/libxl_remus_disk_drbd.c |8 +---
 5 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
index 754e2d1..891cdb8 100644
--- a/tools/libxl/libxl_aoutils.c
+++ b/tools/libxl/libxl_aoutils.c
@@ -483,11 +483,12 @@ static void async_exec_done(libxl__egc *egc,
 libxl__ev_time_deregister(gc, &aes->time);
 
 if (status) {
-libxl_report_child_exitstatus(CTX, LIBXL__LOG_ERROR,
-  aes->what, pid, status);
+if (!aes->rc)
+libxl_report_child_exitstatus(CTX, LIBXL__LOG_ERROR,
+  aes->what, pid, status);
 }
 
-aes->callback(egc, aes, status);
+aes->callback(egc, aes, aes->rc, status);
 }
 
 void libxl__async_exec_init(libxl__async_exec_state *aes)
@@ -506,6 +507,8 @@ int libxl__async_exec_start(libxl__async_exec_state *aes)
 libxl__ev_child *const child = &aes->child;
 char ** const args = aes->args;
 
+aes->rc = 0;
+
 /* Set execution timeout */
 if (libxl__ev_time_register_rel(ao, &aes->time,
 async_exec_timeout,
diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index c80749f..84114ff 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -729,7 +729,7 @@ static void device_hotplug(libxl__egc *egc, 
libxl__ao_device *aodev);
 
 static void device_hotplug_child_death_cb(libxl__egc *egc,
   libxl__async_exec_state *aes,
-  int status);
+  int rc, int status);
 
 static void device_destroy_be_watch_cb(libxl__egc *egc,
libxl__xswait_state *xswait,
@@ -1052,7 +1052,7 @@ out:
 
 static void device_hotplug_child_death_cb(libxl__egc *egc,
   libxl__async_exec_state *aes,
-  int status)
+  int rc, int status)
 {
 libxl__ao_device *aodev = CONTAINER_OF(aes, *aodev, aes);
 STATE_AO_GC(aodev->ao);
@@ -1061,12 +1061,17 @@ static void device_hotplug_child_death_cb(libxl__egc 
*egc,
 
 device_hotplug_clean(gc, aodev);
 
-if (status) {
+if (status && !rc) {
 hotplug_error = libxl__xs_read(gc, XBT_NULL,
GCSPRINTF("%s/hotplug-error", be_path));
 if (hotplug_error)
 LOG(ERROR, "script: %s", hotplug_error);
-aodev->rc = ERROR_FAIL;
+rc = ERROR_FAIL;
+}
+
+if (rc) {
+if (!aodev->rc)
+aodev->rc = rc;
 if (aodev->action == LIBXL__DEVICE_ACTION_ADD)
 /*
  * Only fail on device connection, on disconnection
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index b615fc5..02cac7b 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2089,7 +2089,15 @@ _hidden const char *libxl__run_dir_path(void);
 typedef struct libxl__async_exec_state libxl__async_exec_state;
 
 typedef void libxl__async_exec_callback(libxl__egc *egc,
-libxl__async_exec_state *aes, int status);
+libxl__async_exec_state *aes, int rc, int status);
+/*
+ * Meaning of status and rc:
+ *  rc==0, status==0all went well
+ *  rc==0, status!=0everything OK except child exited nonzero (logged)
+ *  rc!=0   something else went wrong (status is real
+ *   exit status, maybe reflecting SIGKILL if aes
+ *   code killed the child).  Logged unless CANCELLED.
+ */
 
 struct libxl__async_exec_state {
 /* caller must fill these in */
@@ -2105,6 +2113,7 @@ struct libxl__async_exec_state {
 /* private */
 libxl__ev_time time;
 libxl__ev_child child;
+int rc;
 };
 
 void libxl__async_exec_init(libxl__async_exec_state *aes);
diff --git a/tools/libxl/libxl_netbuffer.c b/tools/libxl/libxl_netbuffer.c
index edc6843..ff2d6c7 100644

[Xen-devel] [PATCH 17/28] libxl: cancellation: Provide public ao cancellation API

2015-03-31 Thread Ian Jackson
Provide libxl_ao_cancel.

There is machinery to allow an ao to register an interest in its
cancellation, using a libxl__ao_cancellable.

This API is not currently very functional: attempting cancellation it
will always return NOTIMPLEMENTED and have no effect.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v2: Minor comment improvements
---
 tools/libxl/libxl.c  |3 ++
 tools/libxl/libxl.h  |   64 ++
 tools/libxl/libxl_event.c|  123 ++
 tools/libxl/libxl_internal.h |   42 ++-
 4 files changed, 231 insertions(+), 1 deletion(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index de0fc6b..19e36f1 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -73,6 +73,8 @@ int libxl_ctx_alloc(libxl_ctx **pctx, int version,
 LIBXL_LIST_INIT(&ctx->evtchns_waiting);
 libxl__ev_fd_init(&ctx->evtchn_efd);
 
+LIBXL_LIST_INIT(&ctx->aos_inprogress);
+
 LIBXL_TAILQ_INIT(&ctx->death_list);
 libxl__ev_xswatch_init(&ctx->death_watch);
 
@@ -174,6 +176,7 @@ int libxl_ctx_free(libxl_ctx *ctx)
 assert(LIBXL_LIST_EMPTY(&ctx->efds));
 assert(LIBXL_TAILQ_EMPTY(&ctx->etimes));
 assert(LIBXL_LIST_EMPTY(&ctx->evtchns_waiting));
+assert(LIBXL_LIST_EMPTY(&ctx->aos_inprogress));
 
 if (ctx->xch) xc_interface_close(ctx->xch);
 libxl_version_info_dispose(&ctx->version_info);
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index dc05e02..a688070 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -641,6 +641,11 @@ typedef struct libxl__ctx libxl_ctx;
  */
 #define LIBXL_HAVE_DEVICE_CHANNEL 1
 
+/*
+ * LIBXL_HAVE_AO_CANCEL indicates the availability of libxl_ao_cancel
+ */
+#define LIBXL_HAVE_AO_CANCEL 1
+
 /* Functions annotated with LIBXL_EXTERNAL_CALLERS_ONLY may not be
  * called from within libxl itself. Callers outside libxl, who
  * do not #include libxl_internal.h, are fine. */
@@ -910,6 +915,65 @@ typedef struct {
 void *for_callback; /* passed to callback */
 } libxl_asyncprogress_how;
 
+/*
+ * It is sometimes possible to cancel an asynchronous operation.
+ *
+ * libxl_ao_cancel searches for an ongoing asynchronous operation whose
+ * ao_how is identical to *how, and tries to cancel it.  The return
+ * values from libxl_ao_cancel are as follows:
+ *
+ *  0
+ *
+ * The operation in question has (at least some) support for
+ * cancellation.  It will be cut short.  However, it may still
+ * take some time to cancel.
+ *
+ *  ERROR_NOTFOUND
+ *
+ *  No matching ongoing operation was found.  This might happen
+ *  for an actual operation if the operation has already completed
+ *  (perhaps on another thread).  The call to libxl_ao_cancel has
+ *  had no effect.
+ *
+ *  ERROR_NOTIMPLEMENTED
+ *
+ * As far as could be determined, the operation in question does
+ * not support cancellation.  The operation may subsequently
+ * complete normally, as if it had never been cancelled; however,
+ * the cancellation attempt will still have been noted and it is
+ * possible that the operation will be successfully cancelled.
+ *
+ *  ERROR_CANCELLED
+ *
+ * The operation has already been the subject of at least one
+ * call to libxl_ao_cancel.
+ *
+ * If the operation was indeed cut short due to the cancellation, it
+ * will complete, at some point in the future, with ERROR_CANCELLED.
+ * In that case, depending on the operation it have performed some of
+ * the work in question and left the operation half-done.  Consult the
+ * documentation for individual operations.
+ *
+ * Note that a cancelled operation might still fail for other reasons
+ * even after it has been cancelled.
+ *
+ * If your application is multithreaded you must not reuse an
+ * ao_how->for_event or ao_how->for_callback value (with a particular
+ * ao_how->callback) unless you are sure that none of your other
+ * threads are going to cancel the previous operation using that
+ * value; otherwise you risk cancelling the wrong operation if the
+ * intended target of the cancellation completes in the meantime.
+ *
+ * It is possible to cancel even an operation which is being performed
+ * synchronously, but since in that case how==NULL you had better only
+ * have one such operation, because it is not possible to tell them
+ * apart.  (And, if you want to do this, obviously the cancellation
+ * would have to be requested on a different thread.)
+ */
+int libxl_ao_cancel(libxl_ctx *ctx, const libxl_asyncop_how *how)
+LIBXL_EXTERNAL_CALLERS_ONLY;
+
+
 #define LIBXL_VERSION 0
 
 /* context functions */
diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index fffadf3..c224715 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -1774,6 +1774,7 @@ void libxl__ao_abort(libxl__ao *ao)
 assert(ao->in_initiator);
 assert(!ao->complete);
 assert(!ao->progress_reports_outstanding);
+a

[Xen-devel] [PATCH 16/28] libxl: ao: Provide manip_refcnt

2015-03-31 Thread Ian Jackson
Previously we used in_initiator to stop the ao being freed while we
were still in the initiator function (which would result in the
initiator's call to lixl__ao_inprogress accessing the ao after it had
been freed).

We are going to introduce a new libxl entrypoint which finds, and
operates on, ongoing aos.  This function needs the same protection,
and might even end up running on the same ao multiple times
concurrently.

So do this with reference counting instead, with a new variable
ao->manip_refcnt.

We keep ao->in_initiator because that allows us to keep some useful
asserts about the sequencing of libxl__ao_inprogress, etc.

Signed-off-by: Ian Jackson 
---
v3: Add a missing space.
Mention locking in the comment.
---
 tools/libxl/libxl_event.c|   43 +-
 tools/libxl/libxl_internal.h |1 +
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index c95db5b..fffadf3 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -33,6 +33,8 @@
 
 static libxl__ao *ao_nested_root(libxl__ao *ao);
 
+static void ao__check_destroy(libxl_ctx *ctx, libxl__ao *ao);
+
 
 /*
  * The counter osevent_in_hook is used to ensure that the application
@@ -1345,8 +1347,7 @@ static void egc_run_callbacks(libxl__egc *egc)
 ao->how.callback(CTX, ao->rc, ao->how.u.for_callback);
 CTX_LOCK;
 ao->notified = 1;
-if (!ao->in_initiator)
-libxl__ao__destroy(CTX, ao);
+ao__check_destroy(CTX, ao);
 CTX_UNLOCK;
 }
 }
@@ -1727,6 +1728,33 @@ int libxl_event_wait(libxl_ctx *ctx, libxl_event 
**event_r,
  *  - destroy the ao
  */
 
+
+/*
+ * A "manip" is a libxl public function manipulating this ao, which
+ * has a pointer to it.  We have to not destroy it while that's the
+ * case, obviously.  Callers must have the ctx locked, obviously.
+ */
+static void ao__manip_enter(libxl__ao *ao)
+{
+assert(ao->manip_refcnt < INT_MAX);
+ao->manip_refcnt++;
+}
+
+static void ao__manip_leave(libxl_ctx *ctx, libxl__ao *ao)
+{
+assert(ao->manip_refcnt > 0);
+ao->manip_refcnt--;
+ao__check_destroy(ctx, ao);
+}
+
+static void ao__check_destroy(libxl_ctx *ctx, libxl__ao *ao)
+{
+if (!ao->manip_refcnt && ao->notified) {
+assert(ao->complete);
+libxl__ao__destroy(ctx, ao);
+}
+}
+
 void libxl__ao__destroy(libxl_ctx *ctx, libxl__ao *ao)
 {
 AO_GC;
@@ -1808,8 +1836,8 @@ void libxl__ao_complete_check_progress_reports(libxl__egc 
*egc, libxl__ao *ao)
 }
 ao->notified = 1;
 }
-if (!ao->in_initiator && ao->notified)
-libxl__ao__destroy(ctx, ao);
+
+ao__check_destroy(ctx, ao);
 }
 
 libxl__ao *libxl__ao_create(libxl_ctx *ctx, uint32_t domid,
@@ -1824,6 +1852,7 @@ libxl__ao *libxl__ao_create(libxl_ctx *ctx, uint32_t 
domid,
 ao->magic = LIBXL__AO_MAGIC;
 ao->constructing = 1;
 ao->in_initiator = 1;
+ao__manip_enter(ao);
 ao->poller = 0;
 ao->domid = domid;
 LIBXL_INIT_GC(ao->gc, ctx);
@@ -1904,11 +1933,7 @@ int libxl__ao_inprogress(libxl__ao *ao,
 }
 
 ao->in_initiator = 0;
-
-if (ao->notified) {
-assert(ao->complete);
-libxl__ao__destroy(CTX,ao);
-}
+ao__manip_leave(CTX, ao);
 
 return rc;
 }
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index e29db43..d2c2637 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -449,6 +449,7 @@ struct libxl__ao {
  */
 uint32_t magic;
 unsigned constructing:1, in_initiator:1, complete:1, notified:1;
+int manip_refcnt;
 libxl__ao *nested_root;
 int nested_progeny;
 int progress_reports_outstanding;
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 09/28] libxl: New error codes CANCELLED etc.

2015-03-31 Thread Ian Jackson
We introduce ERROR_CANCELLED now, so that we can write code to handle
it, and decreee that functions might return it, even though currently
there is nowhere where this error is generated.

While we're here, provide ERROR_NOTFOUND and ERROR_NOTIMPLEMENTED,
which will also be used later, but only as part of the public API.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v2: Rebase means new errors have bigger (more negative) numbers.
---
 tools/libxl/libxl_types.idl |3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 47af340..478c561 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -63,6 +63,9 @@ libxl_error = Enumeration("error", [
 (-17, "DEVICE_EXISTS"),
 (-18, "REMUS_DEVOPS_DOES_NOT_MATCH"),
 (-19, "REMUS_DEVICE_NOT_SUPPORTED"),
+(-20, "CANCELLED"),
+(-21, "NOTFOUND"),
+(-22, "NOTIMPLEMENTED"),
 ], value_namespace = "")
 
 libxl_domain_type = Enumeration("domain_type", [
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 10/28] libxl: events: Make timeout and async exec setup take an ao, not a gc

2015-03-31 Thread Ian Jackson
Change the timeout setup functions to take a libxl__ao, not a
libxl__gc.  This is going to be needed for ao cancellation, because
timeouts are going to be a main hook for ao cancellation - so the
timeouts need to be associated with an ao.

This means that timeouts can only occur as part of a long-running
libxl function (but this is of course correct, as libxl shouldn't have
any global timeouts, and indeed all the call sites have an ao).

Also remove the gc parameter from libxl__async_exec_start.  It can
just use the gc from the ao supplied in the aes.

All the callers follow the obvious patterns and therefore supply the
ao's gc to libxl__async_exec_start and the timeout setup functions.
There is therefore no functional change in this patch.

Signed-off-by: Ian Jackson 
CC: Yang Hongyang 
CC: Wen Congyang 
CC: Lai Jiangshan 
Acked-by: Wen Congyang 
Acked-by: Ian Campbell 

---
v2: This patch split off from "Permit timeouts to signal cancellation".
Rebased; consequently, deal with libxl__async_exec_start.
CC'd authors of the libxl__async_exec_* functions.
---
 tools/libxl/libxl_aoutils.c |8 +---
 tools/libxl/libxl_device.c  |4 ++--
 tools/libxl/libxl_dom.c |8 
 tools/libxl/libxl_event.c   |6 --
 tools/libxl/libxl_internal.h|6 +++---
 tools/libxl/libxl_remus_disk_drbd.c |2 +-
 tools/libxl/libxl_test_timedereg.c  |9 +
 7 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
index 44dc222..754e2d1 100644
--- a/tools/libxl/libxl_aoutils.c
+++ b/tools/libxl/libxl_aoutils.c
@@ -46,7 +46,7 @@ int libxl__xswait_start(libxl__gc *gc, libxl__xswait_state 
*xswa)
 {
 int rc;
 
-rc = libxl__ev_time_register_rel(gc, &xswa->time_ev,
+rc = libxl__ev_time_register_rel(xswa->ao, &xswa->time_ev,
  xswait_timeout_callback, 
xswa->timeout_ms);
 if (rc) goto err;
 
@@ -496,16 +496,18 @@ void libxl__async_exec_init(libxl__async_exec_state *aes)
 libxl__ev_child_init(&aes->child);
 }
 
-int libxl__async_exec_start(libxl__gc *gc, libxl__async_exec_state *aes)
+int libxl__async_exec_start(libxl__async_exec_state *aes)
 {
 pid_t pid;
 
 /* Convenience aliases */
+libxl__ao *ao = aes->ao;
+AO_GC;
 libxl__ev_child *const child = &aes->child;
 char ** const args = aes->args;
 
 /* Set execution timeout */
-if (libxl__ev_time_register_rel(gc, &aes->time,
+if (libxl__ev_time_register_rel(ao, &aes->time,
 async_exec_timeout,
 aes->timeout_ms)) {
 LOG(ERROR, "unable to register timeout for executing: %s", aes->what);
diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index 0455134..c80749f 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -808,7 +808,7 @@ void libxl__initiate_device_remove(libxl__egc *egc,
  * TODO: 4.2 Bodge due to QEMU, see comment on top of
  * libxl__initiate_device_remove in libxl_internal.h
  */
-rc = libxl__ev_time_register_rel(gc, &aodev->timeout,
+rc = libxl__ev_time_register_rel(ao, &aodev->timeout,
  device_qemu_timeout,
  LIBXL_QEMU_BODGE_TIMEOUT * 1000);
 if (rc) {
@@ -1034,7 +1034,7 @@ static void device_hotplug(libxl__egc *egc, 
libxl__ao_device *aodev)
 aes->stdfds[1] = 2;
 aes->stdfds[2] = -1;
 
-rc = libxl__async_exec_start(gc, aes);
+rc = libxl__async_exec_start(aes);
 if (rc)
 goto out;
 
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 291b803..dcce394 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -980,7 +980,7 @@ static void 
domain_suspend_switch_qemu_xen_traditional_logdirty
 switch_logdirty_xswatch, lds->ret_path);
 if (rc) goto out;
 
-rc = libxl__ev_time_register_rel(gc, &lds->timeout,
+rc = libxl__ev_time_register_rel(ao, &lds->timeout,
 switch_logdirty_timeout, 10*1000);
 if (rc) goto out;
 
@@ -1260,7 +1260,7 @@ static void domain_suspend_callback_common(libxl__egc 
*egc,
 rc = libxl__ev_evtchn_wait(gc, &dss->guest_evtchn);
 if (rc) goto err;
 
-rc = libxl__ev_time_register_rel(gc, &dss->guest_timeout,
+rc = libxl__ev_time_register_rel(ao, &dss->guest_timeout,
  suspend_common_wait_guest_timeout,
  60*1000);
 if (rc) goto err;
@@ -1391,7 +1391,7 @@ static void domain_suspend_common_wait_guest(libxl__egc 
*egc,
 "@releaseDomain");
 if (rc) goto err;
 
-rc = libxl__ev_time_register_rel(gc, &dss->guest_timeout,
+rc = libxl__ev_time_register_rel(ao,

[Xen-devel] [PATCH 07/28] libxl: xswait/devstate: Move xswait to before devstate

2015-03-31 Thread Ian Jackson
Pure code motion.  We are going to make devstate use xswait.

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
 tools/libxl/libxl_internal.h |  109 +-
 1 file changed, 55 insertions(+), 54 deletions(-)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 5a76d51..edc33bb 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1090,6 +1090,61 @@ _hidden const char *libxl__device_nic_devname(libxl__gc 
*gc,
 
 _hidden int libxl__get_domid(libxl__gc *gc, uint32_t *domid);
 
+/*- xswait: wait for a xenstore node to be suitable -*/
+
+typedef struct libxl__xswait_state libxl__xswait_state;
+
+/*
+ * rc describes the circumstances of this callback:
+ *
+ * rc==0
+ *
+ * The xenstore path (may have) changed.  It has been read for
+ * you.  The result is in data (allocated from the ao gc).
+ * data may be NULL, which means that the xenstore read gave
+ * ENOENT.
+ *
+ * If you are satisfied, you MUST call libxl__xswait_stop.
+ * Otherwise, xswait will continue waiting and watching and
+ * will call you back later.
+ *
+ * rc==ERROR_TIMEDOUT
+ *
+ * The specified timeout was reached.
+ * This has NOT been logged (except to the debug log).
+ * xswait will not continue (but calling libxl__xswait_stop is OK).
+ *
+ * rc!=0, !=ERROR_TIMEDOUT
+ *
+ * Some other error occurred.
+ * This HAS been logged.
+ * xswait will not continue (but calling libxl__xswait_stop is OK).
+ *
+ * xswait.path may start with with '@', in which case no read is done
+ * and the callback will always get data==0.
+ */
+typedef void libxl__xswait_callback(libxl__egc *egc,
+  libxl__xswait_state *xswa, int rc, const char *data);
+
+struct libxl__xswait_state {
+/* caller must fill these in, and they must all remain valid */
+libxl__ao *ao;
+const char *what; /* for error msgs: noun phrase, what we're waiting for */
+const char *path;
+int timeout_ms; /* as for poll(2) */
+libxl__xswait_callback *callback;
+/* remaining fields are private to xswait */
+libxl__ev_time time_ev;
+libxl__ev_xswatch watch_ev;
+};
+
+void libxl__xswait_init(libxl__xswait_state*);
+void libxl__xswait_stop(libxl__gc*, libxl__xswait_state*); /*idempotent*/
+bool libxl__xswait_inuse(const libxl__xswait_state *ss);
+
+int libxl__xswait_start(libxl__gc*, libxl__xswait_state*);
+
+
 /*
  * libxl__ev_devstate - waits a given time for a device to
  * reach a given state.  Follows the libxl_ev_* conventions.
@@ -1177,60 +1232,6 @@ _hidden int libxl__create_pci_backend(libxl__gc *gc, 
uint32_t domid,
   libxl_device_pci *pcidev, int num);
 _hidden int libxl__device_pci_destroy_all(libxl__gc *gc, uint32_t domid);
 
-/*- xswait: wait for a xenstore node to be suitable -*/
-
-typedef struct libxl__xswait_state libxl__xswait_state;
-
-/*
- * rc describes the circumstances of this callback:
- *
- * rc==0
- *
- * The xenstore path (may have) changed.  It has been read for
- * you.  The result is in data (allocated from the ao gc).
- * data may be NULL, which means that the xenstore read gave
- * ENOENT.
- *
- * If you are satisfied, you MUST call libxl__xswait_stop.
- * Otherwise, xswait will continue waiting and watching and
- * will call you back later.
- *
- * rc==ERROR_TIMEDOUT
- *
- * The specified timeout was reached.
- * This has NOT been logged (except to the debug log).
- * xswait will not continue (but calling libxl__xswait_stop is OK).
- *
- * rc!=0, !=ERROR_TIMEDOUT
- *
- * Some other error occurred.
- * This HAS been logged.
- * xswait will not continue (but calling libxl__xswait_stop is OK).
- *
- * xswait.path may start with with '@', in which case no read is done
- * and the callback will always get data==0.
- */
-typedef void libxl__xswait_callback(libxl__egc *egc,
-  libxl__xswait_state *xswa, int rc, const char *data);
-
-struct libxl__xswait_state {
-/* caller must fill these in, and they must all remain valid */
-libxl__ao *ao;
-const char *what; /* for error msgs: noun phrase, what we're waiting for */
-const char *path;
-int timeout_ms; /* as for poll(2) */
-libxl__xswait_callback *callback;
-/* remaining fields are private to xswait */
-libxl__ev_time time_ev;
-libxl__ev_xswatch watch_ev;
-};
-
-void libxl__xswait_init(libxl__xswait_state*);
-void libxl__xswait_stop(libxl__gc*, libxl__xswait_state*); /*idempotent*/
-bool libxl__xswait_inuse(const libxl__xswait_state *ss);
-
-int libxl__xswait_start(libxl__gc*, libxl__xswait_state*);
-
 /*
  *- spawn -
  *
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 02/28] libxl: Comment cleanups

2015-03-31 Thread Ian Jackson
* Add two comments in libxl_remus_disk_drbd documenting buggy handling
  of the hotplug script exit status.

* Add a section heading for async exec in libxl_aoutils.c

* Mention the right function name (libxl__ev_child_fork, not
  libxl__ev_fork) in libxl_internal.h

Signed-off-by: Ian Jackson 
CC: Yang Hongyang 
CC: Wen Congyang 
CC: Lai Jiangshan 
Acked-by: Yang Hongyang 
Acked-by: Ian Campbell 
---
v2: New patch in this version of the series.
---
 tools/libxl/libxl_aoutils.c |2 ++
 tools/libxl/libxl_internal.h|2 +-
 tools/libxl/libxl_remus_disk_drbd.c |2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
index b10d2e1..44dc222 100644
--- a/tools/libxl/libxl_aoutils.c
+++ b/tools/libxl/libxl_aoutils.c
@@ -451,6 +451,8 @@ int libxl__openptys(libxl__openpty_state *op,
 return rc;
 }
 
+/*- async exec -*/
+
 static void async_exec_timeout(libxl__egc *egc,
libxl__ev_time *ev,
const struct timeval *requested_abs)
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 934465a..99db92a 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1393,7 +1393,7 @@ _hidden int libxl__spawn_record_pid(libxl__gc*, 
libxl__spawn_state*,
  * This is a NOT function for waiting for ordinary child processes.
  * If you want to run (fork/exec/wait) subprocesses from libxl:
  *  - Make your libxl entrypoint use the ao machinery
- *  - Use libxl__ev_fork, and use the callback programming style
+ *  - Use libxl__ev_child_fork, and use the callback programming style
  *
  * This function is intended for interprocess communication with a
  * service process.  If the service process does not respond quickly,
diff --git a/tools/libxl/libxl_remus_disk_drbd.c 
b/tools/libxl/libxl_remus_disk_drbd.c
index 3215f93..afe9b61 100644
--- a/tools/libxl/libxl_remus_disk_drbd.c
+++ b/tools/libxl/libxl_remus_disk_drbd.c
@@ -145,6 +145,8 @@ static void match_async_exec_cb(libxl__egc *egc,
 
 if (status) {
 rc = ERROR_REMUS_DEVOPS_DOES_NOT_MATCH;
+/* BUG: seems to assume that any exit status means `no match' */
+/* BUG: exit status will have been logged as an error */
 goto out;
 }
 
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 05/28] libxl: suspend: Return correct error from callbacks

2015-03-31 Thread Ian Jackson
If a suspend callback fails, it has a libxl error code in its hand.
However we must return to libxc the values that libxc expects.  So we
stash the libxl error code in dss->rc and fish it out again after
libxc returns from the suspend call.

While we're here, abolish the now-redundant `ok' variable in
remus_devices_postsuspend_cb.

The overall functional change is that libxl_domain_save now completes
with the correct error code as determined when the underlying failure
happened.  (Usually this is, still, ERROR_FAIL.)

Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v2: Add cleanup in remus_devices_postsuspend_cb.
---
 tools/libxl/libxl_dom.c  |   22 --
 tools/libxl/libxl_internal.h |1 +
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 5eef7e3..291b803 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1041,6 +1041,7 @@ static void domain_suspend_switch_qemu_xen_logdirty
 libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
 } else {
 LOG(ERROR,"logdirty switch failed (rc=%d), aborting suspend",rc);
+dss->rc = rc;
 libxl__xc_domain_saverestore_async_callback_done(egc, shs, -1);
 }
 }
@@ -1063,6 +1064,7 @@ void libxl__domain_suspend_common_switch_qemu_logdirty
 default:
 LOG(ERROR,"logdirty switch failed"
 ", no valid device model version found, aborting suspend");
+dss->rc = ERROR_FAIL;
 libxl__xc_domain_saverestore_async_callback_done(egc, shs, -1);
 }
 }
@@ -1144,6 +1146,7 @@ static void switch_logdirty_done(libxl__egc *egc,
 int broke;
 if (rc) {
 broke = -1;
+dss->rc = rc;
 } else {
 broke = 0;
 }
@@ -1587,6 +1590,7 @@ static void libxl__domain_suspend_callback(void *data)
 static void domain_suspend_callback_common_done(libxl__egc *egc,
 libxl__domain_suspend_state *dss, int rc)
 {
+dss->rc = rc;
 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, !rc);
 }
 
@@ -1622,6 +1626,7 @@ static void 
remus_domain_suspend_callback_common_done(libxl__egc *egc,
 return;
 
 out:
+dss->rc = rc;
 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, !rc);
 }
 
@@ -1629,16 +1634,17 @@ static void remus_devices_postsuspend_cb(libxl__egc 
*egc,
  libxl__remus_devices_state *rds,
  int rc)
 {
-int ok = 0;
 libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds);
 
 if (rc)
 goto out;
 
-ok = 1;
+rc = 0;
 
 out:
-libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+if (rc)
+dss->rc = rc;
+libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, !rc);
 }
 
 static void libxl__remus_domain_resume_callback(void *data)
@@ -1657,7 +1663,6 @@ static void remus_devices_preresume_cb(libxl__egc *egc,
libxl__remus_devices_state *rds,
int rc)
 {
-int ok = 0;
 libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds);
 STATE_AO_GC(dss->ao);
 
@@ -1669,10 +1674,12 @@ static void remus_devices_preresume_cb(libxl__egc *egc,
 if (rc)
 goto out;
 
-ok = 1;
+rc = 0;
 
 out:
-libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+if (rc)
+dss->rc = rc;
+libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, !rc);
 }
 
 /*- remus asynchronous checkpoint callback -*/
@@ -1790,6 +1797,7 @@ void libxl__domain_suspend(libxl__egc *egc, 
libxl__domain_suspend_state *dss)
 libxl__srm_save_autogen_callbacks *const callbacks =
 &dss->shs.callbacks.save.a;
 
+dss->rc = 0;
 logdirty_init(&dss->logdirty);
 libxl__xswait_init(&dss->pvcontrol);
 libxl__ev_evtchn_init(&dss->guest_evtchn);
@@ -1877,6 +1885,8 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void 
*dss_void,
  "domain did not respond to suspend request");
 if ( !dss->guest_responded )
 rc = ERROR_GUEST_TIMEDOUT;
+else if (dss->rc)
+rc = dss->rc;
 else
 rc = ERROR_FAIL;
 goto out;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 99db92a..2862c69 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2800,6 +2800,7 @@ struct libxl__domain_suspend_state {
 int debug;
 const libxl_domain_remus_info *remus;
 /* private */
+int rc;
 libxl__ev_evtchn guest_evtchn;
 int guest_evtchn_lockfd;
 int hvm;
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 06/28] libxl: Use libxl__xswait* in libxl__ao_device

2015-03-31 Thread Ian Jackson
Replace the separate timeout and xenstore watch with use of
libxl__xswait*.

Different control flow, but no ultimate functional change apart from
slight changes to the text of error messages.

Signed-off-by: Ian Jackson 
---
 tools/libxl/libxl_device.c   |   64 --
 tools/libxl/libxl_internal.h |2 +-
 2 files changed, 19 insertions(+), 47 deletions(-)

diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index 0f50d04..64ee541 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -450,7 +450,7 @@ void libxl__prepare_ao_device(libxl__ao *ao, 
libxl__ao_device *aodev)
  * Initialize xs_watch, because it's not used on all possible
  * execution paths, but it's unconditionally destroyed when finished.
  */
-libxl__ev_xswatch_init(&aodev->xs_watch);
+libxl__xswait_init(&aodev->xswait);
 aodev->active = 1;
 /* We init this here because we might call device_hotplug_done
  * without actually calling any hotplug script */
@@ -731,13 +731,9 @@ static void device_hotplug_child_death_cb(libxl__egc *egc,
   libxl__async_exec_state *aes,
   int status);
 
-static void device_destroy_be_timeout_cb(libxl__egc *egc, libxl__ev_time *ev,
- const struct timeval *requested_abs);
-
 static void device_destroy_be_watch_cb(libxl__egc *egc,
-   libxl__ev_xswatch *watch,
-   const char *watch_path,
-   const char *event_path);
+   libxl__xswait_state *xswait,
+   int rc, const char *data);
 
 static void device_hotplug_done(libxl__egc *egc, libxl__ao_device *aodev);
 
@@ -988,22 +984,14 @@ static void device_hotplug(libxl__egc *egc, 
libxl__ao_device *aodev)
 if (aodev->action != LIBXL__DEVICE_ACTION_REMOVE)
 goto out;
 
-rc = libxl__ev_time_register_rel(gc, &aodev->timeout,
- device_destroy_be_timeout_cb,
- LIBXL_DESTROY_TIMEOUT * 1000);
-if (rc) {
-LOG(ERROR, "setup of xs watch timeout failed");
-goto out;
-}
-
-rc = libxl__ev_xswatch_register(gc, &aodev->xs_watch,
-device_destroy_be_watch_cb,
-be_path);
-if (rc) {
-LOG(ERROR, "setup of xs watch for %s failed", be_path);
-libxl__ev_time_deregister(gc, &aodev->timeout);
+aodev->xswait.ao = ao;
+aodev->xswait.what = "removal of backend path";
+aodev->xswait.path = be_path;
+aodev->xswait.timeout_ms = LIBXL_DESTROY_TIMEOUT * 1000;
+aodev->xswait.callback = device_destroy_be_watch_cb;
+rc = libxl__xswait_start(gc, &aodev->xswait);
+if (rc)
 goto out;
-}
 return;
 }
 
@@ -1101,37 +1089,21 @@ error:
 device_hotplug_done(egc, aodev);
 }
 
-static void device_destroy_be_timeout_cb(libxl__egc *egc, libxl__ev_time *ev,
- const struct timeval *requested_abs)
-{
-libxl__ao_device *aodev = CONTAINER_OF(ev, *aodev, timeout);
-STATE_AO_GC(aodev->ao);
-
-LOG(ERROR, "timed out while waiting for %s to be removed",
-   libxl__device_backend_path(gc, aodev->dev));
-
-aodev->rc = ERROR_TIMEDOUT;
-
-device_hotplug_done(egc, aodev);
-return;
-}
-
 static void device_destroy_be_watch_cb(libxl__egc *egc,
-   libxl__ev_xswatch *watch,
-   const char *watch_path,
-   const char *event_path)
+   libxl__xswait_state *xswait,
+   int rc, const char *dir)
 {
-libxl__ao_device *aodev = CONTAINER_OF(watch, *aodev, xs_watch);
+libxl__ao_device *aodev = CONTAINER_OF(xswait, *aodev, xswait);
 STATE_AO_GC(aodev->ao);
-const char *dir;
-int rc;
 
-rc = libxl__xs_read_checked(gc, XBT_NULL, watch_path, &dir);
 if (rc) {
-LOG(ERROR, "unable to read backend path: %s", watch_path);
+if (rc == ERROR_TIMEDOUT)
+LOG(ERROR, "timed out while waiting for %s to be removed",
+xswait->path);
 aodev->rc = rc;
 goto out;
 }
+
 if (dir) {
 /* backend path still exists, wait a little longer... */
 return;
@@ -1164,7 +1136,7 @@ static void device_hotplug_clean(libxl__gc *gc, 
libxl__ao_device *aodev)
 {
 /* Clean events and check reentrancy */
 libxl__ev_time_deregister(gc, &aodev->timeout);
-libxl__ev_xswatch_deregister(gc, &aodev->xs_watch);
+libxl__xswait_stop(gc, &aodev->xswait);
 assert(!libx

[Xen-devel] [PATCH 01/28] libxl: Further fix exit paths from libxl_device_events_handler

2015-03-31 Thread Ian Jackson
On the success path, do not call GC_FREE explicitly.  Instead, call
AO_INPROGRESS.

GC_FREE will free the gc underlying the long-term ao, which is then
subsequently referenced in backend_watch_callback's call to
libxl__nested_ao_create.  It is a miracle that this ever works at all.

Also, add an `if (rc) goto out;' after the xswatch registration.

After this, libxl_device_events_handler has the conventional and
correct ao initiation pattern.

Signed-off-by: Ian Jackson 
Acked-by: Roger Pau Monné 
Acked-by: Ian Campbell 
---
v2: New in this version of the series.
---
 tools/libxl/libxl.c |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 94b4d59..de0fc6b 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -4547,11 +4547,12 @@ int libxl_device_events_handler(libxl_ctx *ctx,
 be_path = GCSPRINTF("/local/domain/%u/backend", domid);
 rc = libxl__ev_xswatch_register(gc, &ddomain.watch, backend_watch_callback,
 be_path);
+if (rc) goto out;
 
-out:
-GC_FREE;
-if (rc) return AO_ABORT(rc);
 return AO_INPROGRESS;
+
+out:
+return AO_ABORT(rc);
 }
 
 
/**/
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 08/28] libxl: devstate: Use libxl__xswait*

2015-03-31 Thread Ian Jackson
Signed-off-by: Ian Jackson 
Acked-by: Ian Campbell 
---
v3: Initialise ds->w.ao
---
 tools/libxl/libxl_device.c   |4 +--
 tools/libxl/libxl_event.c|   79 +++---
 tools/libxl/libxl_internal.h |   11 +++---
 3 files changed, 41 insertions(+), 53 deletions(-)

diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index 64ee541..0455134 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -758,7 +758,7 @@ void libxl__wait_device_connection(libxl__egc *egc, 
libxl__ao_device *aodev)
 return;
 }
 
-rc = libxl__ev_devstate_wait(gc, &aodev->backend_ds,
+rc = libxl__ev_devstate_wait(ao, &aodev->backend_ds,
  device_backend_callback,
  state_path, XenbusStateInitWait,
  LIBXL_INIT_TIMEOUT * 1000);
@@ -859,7 +859,7 @@ void libxl__initiate_device_remove(libxl__egc *egc,
 if (rc < 0) goto out;
 }
 
-rc = libxl__ev_devstate_wait(gc, &aodev->backend_ds,
+rc = libxl__ev_devstate_wait(ao, &aodev->backend_ds,
  device_backend_callback,
  state_path, XenbusStateClosed,
  LIBXL_DESTROY_TIMEOUT * 1000);
diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index 595da2b..d4d62a8 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -805,68 +805,59 @@ void libxl__ev_evtchn_cancel(libxl__gc *gc, 
libxl__ev_evtchn *evev)
  * waiting for device state
  */
 
-static void devstate_watch_callback(libxl__egc *egc, libxl__ev_xswatch *watch,
-const char *watch_path, const char *event_path)
+static void devstate_callback(libxl__egc *egc, libxl__xswait_state *xsw,
+  int rc, const char *sstate)
 {
 EGC_GC;
-libxl__ev_devstate *ds = CONTAINER_OF(watch, *ds, watch);
-int rc;
+libxl__ev_devstate *ds = CONTAINER_OF(xsw, *ds, w);
 
-char *sstate = libxl__xs_read(gc, XBT_NULL, watch_path);
+if (rc) {
+if (rc == ERROR_TIMEDOUT)
+LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d "
+   " timed out", ds->w.path, ds->wanted);
+goto out;
+}
 if (!sstate) {
-if (errno == ENOENT) {
-LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d"
-   " but it was removed", watch_path, ds->wanted);
-rc = ERROR_INVAL;
-} else {
-LIBXL__LOG_ERRNO(CTX, LIBXL__LOG_ERROR, "backend %s wanted state"
- " %d but read failed", watch_path, ds->wanted);
-rc = ERROR_FAIL;
-}
+LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d"
+   " but it was removed", ds->w.path, ds->wanted);
+rc = ERROR_INVAL;
+goto out;
+}
+
+int got = atoi(sstate);
+if (got == ds->wanted) {
+LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d ok",
+   ds->w.path, ds->wanted);
+rc = 0;
 } else {
-int got = atoi(sstate);
-if (got == ds->wanted) {
-LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d ok",
-   watch_path, ds->wanted);
-rc = 0;
-} else {
-LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d"
-   " still waiting state %d", watch_path, ds->wanted, got);
-return;
-}
+LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d"
+   " still waiting state %d", ds->w.path, ds->wanted, got);
+return;
 }
-libxl__ev_devstate_cancel(gc, ds);
-ds->callback(egc, ds, rc);
-}
 
-static void devstate_timeout(libxl__egc *egc, libxl__ev_time *ev,
- const struct timeval *requested_abs)
-{
-EGC_GC;
-libxl__ev_devstate *ds = CONTAINER_OF(ev, *ds, timeout);
-LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d "
-   " timed out", ds->watch.path, ds->wanted);
+ out:
 libxl__ev_devstate_cancel(gc, ds);
-ds->callback(egc, ds, ERROR_TIMEDOUT);
+ds->callback(egc, ds, rc);
 }
 
-int libxl__ev_devstate_wait(libxl__gc *gc, libxl__ev_devstate *ds,
+int libxl__ev_devstate_wait(libxl__ao *ao, libxl__ev_devstate *ds,
 libxl__ev_devstate_callback cb,
 const char *state_path, int state, int 
milliseconds)
 {
+AO_GC;
 int rc;
 
-libxl__ev_time_init(&ds->timeout);
-libxl__ev_xswatch_init(&ds->watch);
+libxl__xswait_init(&ds->w);
 ds->wanted = state;
 ds->callback = cb;
 
-rc = libxl__ev_time_register_rel(gc, &ds->timeout, devstate_timeout,
- milliseconds);
-if (rc) goto out;
-
-rc = libxl__ev_xswatch_register(gc, &ds->watch, d

[Xen-devel] [PATCH RFC v3 00/28] libxl: Cancelling asynchronous operations

2015-03-31 Thread Ian Jackson
This is v3 of my work-in-progress series to support cancellation of
long-running libxl operations.

Changes from v2 are very minor: one bugfix, and comment and style
changes.

I have rebased this onto current xen.git#master (not #staging).  I
have compiled it and smoke tested it: it can do xl create and xl
destroy (of a PV guest) and xl list.  I have not executed the
cancellation paths AT ALL.  I am hoping for testing support from the
consumers - primarily XenServer.

As a result this patch series SHOULD NOT BE APPLIED.

I have gone through the replies to v2 and in each case made all the
changes which were suggested, or made my own responses.  I'm afraid I
haven't necessarily waited for each little subthread to a come to a
conclusion.  Changes from those ongoing threads will be incorporated
in future version(s).

The list of patches is very similar to last time.  Notably, "libxl:
cancellation: Make spawns cancellable" has been dropped (see my
email <21786.60220.577051.878...@mariner.uk.xensource.com>).

Thanks,
Ian.





___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 24/29] libxl: Introduce FILLZERO

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 24/29] libxl: Introduce FILLZERO"):
> On Tue, 2015-02-10 at 20:10 +, Ian Jackson wrote:
> > FILLZERO is a macro for memset(&foo,0,sizeof(foo)).  It eliminates the
> > possiblity to make the error memset(&foo,0,sizeof(&foo)).
> 
> but not:
> foo *p = allocate_a_foo()
>  memset(p, 0, sizeof(p))
> although that's probably less likely to go wrong and I don't think it
> can be avoided by the sorts of tricks used here.

Well, we could make a macro that you _have_ to pass the pointer to.
The result is that passing a non-pointer object gets you an error.

> > No callers yet, but document it in CODING_STYLE.  (In accordance with
> > existing libxl policy, I haven't gone through all existing possible
> > call sites.)
> 
> We don't usually expose such helpers in the public API, but I suppose
> you have a good reason to do so here, could you mention it in the commit
> log please.

It should be in the public API because xl*.c is full of uses of memset
which ought to be replaced with FILLZERO.  Do you really think I need
to mention this in the commit message ?

> > +#define LIBXL_FILLZERO(object) (memset(&(object), 0, sizeof((object
> 
> Evaluates object twice, so LIBXL_FILEZERO(*(p++)), would behave
> surprisingly. I'm not sure if this can be resolved though, so this might
> be a Don't Do That Then situation.

sizeof() does not evaluate its argument.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 22/29] libxl: Introduce DOMAIN_DESTROYED error code

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 22/29] libxl: Introduce 
DOMAIN_DESTROYED error code"):
> Konrad has a semantically similar error code which he is adding, I think
> in his recent libxl series to do with vcpu-set.

That is valuable, I think.

> AIUI Konrad's semantics are simply "domain does not exist", which seems
> to be usefully distinct from your "did exist but doesn't any more".

Indeed, although it's not clear to me without peering at the code in
detail whether my code always checks that the domain does exist before
setting up the watch which might result in its death being reported.

> I just wanted to mention it in case I'd misunderstood one or both error
> codes. As it stands this patch seems fine to me:
...
> Acked-by: Ian Campbell 

Thanks.

> I do wonder though if we ought to be better about documenting in the
> code|headers|idl what error codes mean and where they should be used
> (some are global, others specific to a subset of calls etc).

That would definitely be nice.  Do you think it would be helpful to
respin this patch with that information ?

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 21/29] libxl: cancellation: Make spawns cancellable

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 21/29] libxl: cancellation: Make 
spawns cancellable"):
> On Tue, 2015-02-10 at 20:10 +, Ian Jackson wrote:
> > The libxl__spawn_spawn internal API permits the caller to specify
> > .timeout_ms==-1, meaning to wait forever.  Provide an explicit
> > cancellable to allow spawns to be cancelled.
> 
> AIUI this also lets spawns with timeout_ms > -1 to be cancelled, which I
> think is desirable.

Yes.

Spawns with finite timeouts will be cancelled either by the explicit
cancellable in the spawn, or the one implied by the finite timeout; it
doesn't matter which - the path goes from spawn_cancel or
spawn_watch_event (respectively) to spawn_fail.

I was going to say that this patch is needed because libxl__ev_time's
cancellable is not registered when the timeout is infinite.  But
actually I see from libxl_event.c that this isn't the case.  So I
think I should drop this patch.

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 20/29] libxl: cancellation: Note that driver domain task cannot be usefully cancelled

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 20/29] libxl: cancellation: Note 
that driver domain task cannot be usefully cancelled"):
> On Tue, 2015-02-10 at 20:10 +, Ian Jackson wrote:
> > In practice, cancelling this task will cause all subsequent actual
> > backend operations to fail, but will not actually cause the
> > libxl_device_events_handler operation to complete.
...
> Acked-by: Ian Campbell 
> 
> This is an unfortunate short-coming though, and I presume one which
> could be fixed by updates to the toolstack<->driver domain protocol?

It could be fixed by updates to the code inside libxl for the driver
domain task.  But it is not clear that the benefit would be large, and
the memory management in the driver domain task means that it would be
tricky to implement - there would have to be more tracking of what
operations are ongoing.

The behaviour of a driver domain service task which is in the process
of being cancelled is not anything that you would want, so a
cancel-and-restart isn't a nice operation anyway.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 17/29] libxl: cancellation: Provide public ao cancellation API

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 17/29] libxl: cancellation: 
Provide public ao cancellation API"):
> On Tue, 2015-02-10 at 20:10 +, Ian Jackson wrote:
> > +/*
> > + * For nested aos:
> > + *  Semantically, cancellation affects the whole tree of aos,
> > + *not just the parent.
> > + *  libxl__ao_cancellable.ao refers to the child, so
> > + *that the child callback sees the right ao.  (After all,
> > + *it was code dealing with the child that set .ao.)
> > + *  But, the cancellable is recorded on the "cancellables" list
> > + *for the ultimate root ao, so that every possible child
> > + *cancellation occurs as a result of the cancellation of the
> > + *parent.
> > + *  We set ao->cancelling only in the root.
> > + */
> 
> WRT this, given a tree of ao's, which ones need to be cancellable for a
> cancellation to succeed? I would assume all of them do, or else the
> cancellation can only occur if/when the non-cancellable ones happen to
> end?

It depends what you mean by `succeed'.  libxl_ao_cancel reports
success if it did something.  That is, if anyone was listening.  For
this purpose, with a tree of ao's, it is sufficient for any one of the
children to have registered a cancellable.

> Do we(/are we going to) take steps to stop new non-cancellable ao's to
> the tree once the root is cancelled?

Yes.  Individual callback setups, and operations, can check by calling
libxl__ao_cancelling.  Normally this is done by
libxl__ao_cancellable_register: it is not possible to register a new
cancellable for an ao which we are supposed to be cancelling.

This affects, for example, libxl__ev_time_register_*.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 17/29] libxl: cancellation: Provide public ao cancellation API

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 17/29] libxl: cancellation: 
Provide public ao cancellation API"):
> On Tue, 2015-02-10 at 20:10 +, Ian Jackson wrote:
> > +/*
> > + * It is sometimes possible to cancel an asynchronous operation.
> > + *
> > + * libxl_ao_cancel searches for an ongoing asynchronous operation whose
> > + * ao_how is identical to *how, and tries to cancel it.
> 
> I can see that you have arranged for the pointer not to be required to
> match, just the contents of the struct, which may be convenient for some
> callers who haven't remembered the ao_how somewhere convenient, but is
> it permissible to use the same pointer if it is convenient?

The pointer is a const libxl_asyncop_how* (both at operation
initiation and for libxl_ao_cancel), so libxl promises not to fiddle
with the struct's contents.  I think that it should therefore be
obvious that you can use the same pointer, as well as another struct
with the same contents (the latter being defined by the doc comment
saying that libxl_ao_cancel looks for an identical *how).

> Other than wondering about that this patch looks good,
> Acked-by: Ian Campbell 

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 13/29] libxl: domain create: Do not destroy on cancellation

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 13/29] libxl: domain create: Do 
not destroy on cancellation"):
> I presume at some later stage in the series a suitable
> LIBXL_HAVE_CANCELLATION will materialise? I mention it here because it
> is on my mind.

Yes, this is in
  libxl: cancellation: Provide public ao cancellation API
(17/29 in v2)

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 11/29] libxl: events: Make libxl__async_exec_* pass caller an rc

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 11/29] libxl: events: Make 
libxl__async_exec_* pass caller an rc"):
> On Tue, 2015-02-10 at 20:09 +, Ian Jackson wrote:
> > diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
> > index 754e2d1..891cdb8 100644
> > --- a/tools/libxl/libxl_aoutils.c
> > +++ b/tools/libxl/libxl_aoutils.c
> > @@ -483,11 +483,12 @@ static void async_exec_done(libxl__egc *egc,
> >  libxl__ev_time_deregister(gc, &aes->time);
> >  
> >  if (status) {
> > -libxl_report_child_exitstatus(CTX, LIBXL__LOG_ERROR,
> > -  aes->what, pid, status);
> > +if (!aes->rc)
> 
> Could be one "if (status && !aes->rc)", unless perhaps there is more
> code to come in this block?

No, there is no more to come.  I find it clearer this way but I don't
mind changing it.

> > +libxl__async_exec_state *aes, int rc, int status);
> > +/*
> > + * Meaning of status and rc:
> > + *  rc==0, status==0all went well
> > + *  rc==0, status!=0everything OK except child exited nonzero (logged)
> > + *  rc!=0   something else went wrong (status is real
> > + *   exit status, maybe reflecting SIGKILL if aes
> > + *   code killed the child).  Logged unless CANCELLED.
> 
> I'm unclear on whether status is valid in this third case or not. I
> think you are saying that it is (probably?) valid but if rc!=0 the
> caller likely doesn't actually care what it is?

status is definitely valid but maybe uninteresting, as stated in the
comment.

Would it help to add something about status to the third row of the
little table bit at the left ?

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 16/29] libxl: ao: Provide manip_refcnt

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 16/29] libxl: ao: Provide 
manip_refcnt"):
> On Tue, 2015-02-10 at 20:10 +, Ian Jackson wrote:
> > +/*
> > + * A "manip" is a libxl public function manipulating this ao, which
> > + * has a pointer to it.  We have to not destroy it while that's the
> > + * case, obviously.
> 
> It might be nice to the reader to make a reference to the "An ao and its
> gc may be accessed only with the ctx lock held." sentence in the overall
> ao docs?

This seems to have been documented already in May 2012 in exactly
those terms, at the top of struct libxl__ao, in 738b3d442522.  Am I
confused ?

I have added a comment about locking in the manip functions' doc
comment.

> [...]
> > +libxl__ao__destroy(ctx,ao);
> 
> Nit: missing space after ",".

Fixed.

> But other than those and including if you disagree about extending the
> comment, since it's not a big deal:
> Acked-by: Ian Campbell 

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 10/29] libxl: events: Make timeout and async exec setup take an ao, not a gc

2015-03-31 Thread Ian Jackson
Wen Congyang writes ("Re: [PATCH 10/29] libxl: events: Make timeout and async 
exec setup take an ao, not a gc"):
> On 02/11/2015 04:09 AM, Ian Jackson wrote:
> > Change the timeout setup functions to take a libxl__ao, not a
> > libxl__gc.  This is going to be needed for ao cancellation, because
> > timeouts are going to be a main hook for ao cancellation - so the
> > timeouts need to be associated with an ao.
> > 
> > This means that timeouts can only occur as part of a long-running
> > libxl function (but this is of course correct, as libxl shouldn't have
> > any global timeouts, and indeed all the call sites have an ao).
> > 
> > Also remove the gc parameter from libxl__async_exec_start.  It can
> > just use the gc from the ao supplied in the aes.

> > Signed-off-by: Ian Jackson 
> > CC: Yang Hongyang 
> > CC: Wen Congyang 
> > CC: Lai Jiangshan 
> 
> libxl__async_exec_start() related modifications look fine to me.

Thanks for the review.  I'll take that as ack, if you don't object.

Regards,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 06/29] libxl: Use libxl__xswait* in libxl__ao_device

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 06/29] libxl: Use libxl__xswait* 
in libxl__ao_device"):
> On Tue, 2015-02-10 at 20:09 +, Ian Jackson wrote:
> > @@ -1164,7 +1136,7 @@ static void device_hotplug_clean(libxl__gc *gc, 
> > libxl__ao_device *aodev)
> >  {
> >  /* Clean events and check reentrancy */
> >  libxl__ev_time_deregister(gc, &aodev->timeout);
> 
> You seem to have removed the initialisation of this in a previous hunk
> but not this deregistration or the field itself.
> 
> Was that deliberate, perhaps it was serving dual purpose somewhere?

Yes.  See "TODO: 4.2 Bodge due to QEMU, see comment on top of"...
in libxl__initiate_device_remove in libxl_device.c.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 04/29] libxl: suspend: common suspend callbacks take rc

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 04/29] libxl: suspend: common 
suspend callbacks take rc"):
> On Tue, 2015-02-10 at 20:09 +, Ian Jackson wrote:
> > Change the following functions to take a libxl error code rather than
> > a boolean "ok" value, and translate that value to the boolean expected
> > by libxc at the last moment:
> >   domain_suspend_callback_common_done} dss->callback_common_done
> >   remus_domain_suspend_callback_common_done  }
> >   domain_suspend_common_done
...
> Acked-by: Ian Campbell 

Thanks.

> There are a few new ERROR_FAILs which we might like to consider making
> more specific either now or later.

Indeed.  I have chosen not to try to drain the error handling swamp
myself at this stage.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 03/29] libxl: suspend: switch_logdirty_done takes rc

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [PATCH 03/29] libxl: suspend: 
switch_logdirty_done takes rc"):
> On Tue, 2015-02-10 at 20:09 +, Ian Jackson wrote:
> > +int broke;
> > +if (rc) {
> > +broke = -1;
> > +} else {
> > +broke = 0;
> > +}
> 
> int broke = rc ? -1 : 0;
> 
> ?
> 
> But it looks like perhaps you are preparing to add other code in one or
> the other case, or maybe you just prefer this for some reason. Either
> way:

Later we change one of the branches to set dss->rc too.

> Acked-by: Ian Campbell 

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 4/4] sched: credit2: consider per-vcpu soft affinity

2015-03-31 Thread George Dunlap
On 03/26/2015 09:48 AM, Justin T. Weaver wrote:
> when making decisions for vcpus (run queue assignment, run queue migration,
> cpu assignment, and cpu tickling).
> 
> Added soft affinity balancing loops to...
>  * get_fallback_cpu
>  * runq_tickle (one for idle, but not tickled; one for non-idle, and not
>tickled)
>  * choose_cpu
> 
> choose_cpu now tries to find the run queue with the most cpus in the given
> vcpu's soft affinity. It uses minimum run queue load as a tie breaker.
> 
> Added a function to determine the number of soft cpus gained (or lost) by a
> given vcpu if it is migrated from a given source run queue to a given
> destination run queue.
> 
> Modified algorithm in balance_load and consider...
>  * if the load on lrqd and/or orqd is less than the number of their active
>cpus, balance_load will look for vcpus that would have their soft affinity
>improved by being pushed and/or pulled. Load does not need to be considered
>since a run queue recieveing a pushed or pulled vcpu is not being fully
>utilized. This returns vcpus that may have been migrated away from their
>soft affinity due to load balancing back to their soft affinity.
>  * in consider, vcpus that might be picked for migration because pushing or
>pulling them decreases the load delta are not picked if their current run
>queue's load is less than its active cpu count and if that migration would
>harm their soft affinity. There's no need to push/pull if the load is under
>capacity, and the vcpu would lose access to some or all of its soft cpus.
>  * in consider, if a push/pull/swap migration decreases the load delta by a
>similar amount to another push/pull/swap migration, then use soft cpu gain
>as a tie breaker. This allows load to continue to balance across run 
> queues,
>but favors soft affinity gains if the load deltas are close.
> 
> Signed-off-by: Justin T. Weaver 

First of all, thank you for doing the careful work to try to make this
happen.  The logic here is *very* complicated!

One minor thing re the changelog: You should probably mention somewhere
that you're also removing on-stack cpumask_t to use the per-cpu scratch
variable instead.

> ---
> Changes in v3:
>  * get_fallback_cpu: added balance loop to try to find a soft affinity cpu 
>  * runq_tickle: replaced use of local var mask with csched2_cpumask
>  * runq_tickle: added two balance loops, one for finding idle, but not
>tickled, and other for finding non-idle with lowest credit
>  * choose_cpu: added balance loop to find cpu for given vcpu that has most
>soft cpus (with run queue load being a tie breaker), or if none were found,
>or not considering soft affinity, pick cpu from runq with least load
>  * balance_load / consider: removed code that ignored a migration if it meant
>moving a vcpu away from its soft affinity; added migration of vcpus to
>improve their soft affinity if the destination run queue was under load;
>added check in consider, if current run queue is under load and migration
>would hurt the vcpu's soft affinity, do not consider the migration; added
>soft affinity tie breaker in consider if current load delta and consider
>load delta are close
>  * added helper functions for soft affinity related changes to balance_load
> Changes in v2:
>  * Not submitted in version 2; focus was on the hard affinity patch
> ---
>  xen/common/sched_credit2.c |  344 
> 
>  1 file changed, 313 insertions(+), 31 deletions(-)
> 
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index bbcfbf2..47d0bad 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -127,6 +127,14 @@
>  #define CSCHED2_CREDIT_RESET 0
>  /* Max timer: Maximum time a guest can be run for. */
>  #define CSCHED2_MAX_TIMERMILLISECS(2)
> +/* Used in balance_load to specify migration direction. */
> +#define CSCHED2_PULL 0
> +#define CSCHED2_PUSH 1
> +/*
> + * Used in balance_load to decide if deltas are close enough to use soft
> + * affinity as a tie breaker.
> + */
> +#define CSCHED2_DIVIDE_BY_16 4
>  
>  
>  #define CSCHED2_IDLE_CREDIT (-(1<<30))
> @@ -288,15 +296,33 @@ struct csched2_dom {
>   */
>  static int get_fallback_cpu(struct csched2_vcpu *svc)

Don't forget to update the comment. :-)

>  {
> +int balance_step;
> +
>  if ( likely(cpumask_test_cpu(svc->vcpu->processor,
>  svc->vcpu->cpu_hard_affinity)) )
>  return svc->vcpu->processor;
>  
> -cpumask_and(csched2_cpumask, svc->vcpu->cpu_hard_affinity,
> -&svc->rqd->active);
> -if ( cpumask_empty(csched2_cpumask) )
> -cpumask_and(csched2_cpumask, svc->vcpu->cpu_hard_affinity,
> -VCPU2ONLINE(svc->vcpu));
> +for_each_sched_balance_step( balance_step )
> +{
> +if ( balance_step == SCHED_BALANCE_SOFT_AFFINITY
> +  

Re: [Xen-devel] [PATCH v3 1/4] sched: credit2: respect per-vcpu hard affinity

2015-03-31 Thread George Dunlap
On 03/31/2015 06:14 PM, Dario Faggioli wrote:
>>> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
>>> index 7581731..af716e4 100644
>>> --- a/xen/common/sched_credit2.c
>>> +++ b/xen/common/sched_credit2.c
> 
>>> @@ -2024,6 +2096,13 @@ csched2_alloc_pdata(const struct scheduler *ops, int 
>>> cpu)
>>>  printk("%s: cpu %d not online yet, deferring initializatgion\n",
>>> __func__, cpu);
>>>  
>>> +/*
>>> + * For each new pcpu, allocate a cpumask_t for use throughout the
>>> + * scheduler to avoid putting any cpumask_t structs on the stack.
>>> + */
>>> +if ( !zalloc_cpumask_var(&scratch_mask[cpu]) )
>>
>> Any reason not to use "scratch_mask + cpu" here rather than
>> "&scratch_mask[cpu]"?
>>
> With the renaming you suggested, that would be "_scratch_mask + cpu",
> wouldn't it? I mean, it has to be the actual variable, not the #define,
> since this is (IIRC) called from another CPU, and hence the macro, which
> does smp_processor_id(), would give us the wrong element of the per-cpu
> array.
> 
> That being said, I personally find the array syntax easier to read and
> more consistent, especially if we add this:

Yes, _scratch_mask.  I think I probably wrote this before suggesting the
rename. :-)

I actually find the other syntax easier to read in general; but it's not
too big a deal, and if we add the ASSERT, it certainly makes sense to
keep it an array for consistency.

> IOW, if we always free what we allocate, there is no need for the
> pointers to be NULL, and this is how I addressed the matter in the
> message. I agree it probably doesn't look super clear, this other email,
> describing a similar scenario, may contain a better explanation of my
> take on this:
> 
> <1426601529.32500.94.ca...@citrix.com>
> 
>> I think it's just dangerous to leave uninitialized pointers around.  The
>> invariant should be that if the array entry is invalid it's NULL, and if
>> it's non-null then it's valid.
>>
> I see. I guess this makes things more "defensive programming"-ish
> oriented, which is a good thing.
> 
> I don't know how well this is enforced around the scheduler code (or in
> general), but I'm certainly ok making a step in that direction. This is
> no hot-path, so no big deal zeroing the memory... Not zeroing forces one
> to think harder at what is being allocated and freed, which is why I
> like it, but I'm, of course more than ok with zalloc_*, so go for
> it. :-)

I'm not sure how it forces you to think harder.  Having a null pointer
deference is bad enough that I already try hard to think carefully about
what's being allocated and freed.  Having a double free or
use-after-free bug is certainly worse than a null pointer dereference,
but at some point worse consequences don't actually lead to better behavior.

It's like those politicians who say they want to double the punishment
for some crime; say, assaulting hospital staff.  Well, assaulting
hospital staff is already a crime that can lead to jail time; if the
original punishment didn't deter the guy, doubling it isn't really going
to do much.  :-)

>> Also -- I think this allocation wants to happen in global_init(), right?
>>  Otherwise if you make a second cpupool with the credit2 scheduler this
>> will be clobbered.  (I think nr_cpu_ids should be defined at that point.)
>>
> Good point, actually. This just made me realize I've done the same
> mistake somewhere else... Thanks!! :-P

That one slipped under my radar too.

...and it's also a good example of why "just think harder about it"
doesn't really work.  I should have made you add ASSERTs when setting
that global variable, that it actually was NULL. :-D

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4 25/33] xen/xsm: Add helpers to check permission for device tree passthrough

2015-03-31 Thread Julien Grall
Hi Daniel,

On 31/03/15 18:12, Daniel De Graaf wrote:
> On 03/19/2015 03:29 PM, Julien Grall wrote:
>> This is a follow-up of commit 525ee49 "xsm: add device tree labeling
>> support" which add support for device tree labelling in flask.
>>
>> Those helpers will be use latter when non-pci passthrough (i.e device
>> tree) will be added.
>>
>> Signed-off-by: Julien Grall 
> 
> Looks good to me with one assumption below.
> 
> Acked-by: Daniel De Graaf 
> 
> [...]
>> diff --git a/xen/xsm/flask/avc.c b/xen/xsm/flask/avc.c
>> index b1a4f8a..31bc702 100644
>> --- a/xen/xsm/flask/avc.c
>> +++ b/xen/xsm/flask/avc.c
>> @@ -600,6 +600,9 @@ void avc_audit(u32 ssid, u32 tsid, u16 tclass, u32
>> requested,
>>   case AVC_AUDIT_DATA_MEMORY:
>>   avc_printk(&buf, "pte=%#lx mfn=%#lx ", a->memory.pte,
>> a->memory.mfn);
>>   break;
>> +case AVC_AUDIT_DATA_DTDEV:
>> +avc_printk(&buf, "dtdevice=%s ", a->dtdev);
>> +break;
>>   }
>>
>>   avc_dump_query(&buf, ssid, tsid, tclass);
> 
> This output could be end up being ambiguous if a device tree path can
> contain
> spaces.  Am I correct in assuming that they are invalid in device tree
> paths?

Correct.

Thanks,

-- 
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 1/4] sched: credit2: respect per-vcpu hard affinity

2015-03-31 Thread Dario Faggioli
On Tue, 2015-03-31 at 15:37 +0100, George Dunlap wrote:
> On 03/26/2015 09:48 AM, Justin T. Weaver wrote:
> > by making sure that vcpus only run on the pcpu(s) they are allowed to
> > run on based on their hard affinity cpu masks.
> > 
> > Signed-off-by: Justin T. Weaver 
> 
> Hey Justin!  Getting close.  A couple of comments:
> 
Hi from here too...

I'll also provide my comments on this series shortly, just the time to
finish a thing I'm busy with. :-/

For now, just a few replies to direct questions...

> > diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> > index 7581731..af716e4 100644
> > --- a/xen/common/sched_credit2.c
> > +++ b/xen/common/sched_credit2.c

> > @@ -2024,6 +2096,13 @@ csched2_alloc_pdata(const struct scheduler *ops, int 
> > cpu)
> >  printk("%s: cpu %d not online yet, deferring initializatgion\n",
> > __func__, cpu);
> >  
> > +/*
> > + * For each new pcpu, allocate a cpumask_t for use throughout the
> > + * scheduler to avoid putting any cpumask_t structs on the stack.
> > + */
> > +if ( !zalloc_cpumask_var(&scratch_mask[cpu]) )
> 
> Any reason not to use "scratch_mask + cpu" here rather than
> "&scratch_mask[cpu]"?
> 
With the renaming you suggested, that would be "_scratch_mask + cpu",
wouldn't it? I mean, it has to be the actual variable, not the #define,
since this is (IIRC) called from another CPU, and hence the macro, which
does smp_processor_id(), would give us the wrong element of the per-cpu
array.

That being said, I personally find the array syntax easier to read and
more consistent, especially if we add this:

> It might not be a bad idea to ad ASSERT(scratch_mask[cpu] == NULL)
> before this, just to be paranoid...
> 
But, no big deal, I'm fine with the '+'.

> > @@ -2159,6 +2240,10 @@ csched2_init(struct scheduler *ops)
> >  
> >  prv->load_window_shift = opt_load_window_shift;
> >  
> > +scratch_mask = xmalloc_array(cpumask_t *, nr_cpu_ids);
> 
> I realize Dario recommended using xmalloc_array() instead of
> xzalloc_array(), but I don't understand why he thinks that's OK.  
>
Well, I didn't went as far as recommending it, but yes, I'd do it that
way, and I think it is both safe and fine.

> His
> mail says "(see below about why I actually don't
> think we need)", but I don't actually see that addressed in that e-mail.
> 
Right. In thet email (message-id:
 )

I was focusing on why the call to free() in a loop was not necessary,
and we should instead free what have been previously allocated, rather
than always freeing everything, relying on the fact that, at "worse" the
pointer will be NULL anyway.

IOW, if we always free what we allocate, there is no need for the
pointers to be NULL, and this is how I addressed the matter in the
message. I agree it probably doesn't look super clear, this other email,
describing a similar scenario, may contain a better explanation of my
take on this:

<1426601529.32500.94.ca...@citrix.com>

> I think it's just dangerous to leave uninitialized pointers around.  The
> invariant should be that if the array entry is invalid it's NULL, and if
> it's non-null then it's valid.
> 
I see. I guess this makes things more "defensive programming"-ish
oriented, which is a good thing.

I don't know how well this is enforced around the scheduler code (or in
general), but I'm certainly ok making a step in that direction. This is
no hot-path, so no big deal zeroing the memory... Not zeroing forces one
to think harder at what is being allocated and freed, which is why I
like it, but I'm, of course more than ok with zalloc_*, so go for
it. :-)

> Also -- I think this allocation wants to happen in global_init(), right?
>  Otherwise if you make a second cpupool with the credit2 scheduler this
> will be clobbered.  (I think nr_cpu_ids should be defined at that point.)
> 
Good point, actually. This just made me realize I've done the same
mistake somewhere else... Thanks!! :-P

Regards,
Dario


signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4 25/33] xen/xsm: Add helpers to check permission for device tree passthrough

2015-03-31 Thread Daniel De Graaf

On 03/19/2015 03:29 PM, Julien Grall wrote:

This is a follow-up of commit 525ee49 "xsm: add device tree labeling
support" which add support for device tree labelling in flask.

Those helpers will be use latter when non-pci passthrough (i.e device
tree) will be added.

Signed-off-by: Julien Grall 


Looks good to me with one assumption below.

Acked-by: Daniel De Graaf 

[...]

diff --git a/xen/xsm/flask/avc.c b/xen/xsm/flask/avc.c
index b1a4f8a..31bc702 100644
--- a/xen/xsm/flask/avc.c
+++ b/xen/xsm/flask/avc.c
@@ -600,6 +600,9 @@ void avc_audit(u32 ssid, u32 tsid, u16 tclass, u32 
requested,
  case AVC_AUDIT_DATA_MEMORY:
  avc_printk(&buf, "pte=%#lx mfn=%#lx ", a->memory.pte, a->memory.mfn);
  break;
+case AVC_AUDIT_DATA_DTDEV:
+avc_printk(&buf, "dtdevice=%s ", a->dtdev);
+break;
  }

  avc_dump_query(&buf, ssid, tsid, tclass);


This output could be end up being ambiguous if a device tree path can contain
spaces.  Am I correct in assuming that they are invalid in device tree paths?

--
Daniel De Graaf
National Security Agency

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 5/5] xen: sched_rt: print useful affinity info when dumping

2015-03-31 Thread Dario Faggioli
On Mon, 2015-03-30 at 14:47 +0100, George Dunlap wrote:
> On 03/17/2015 03:33 PM, Dario Faggioli wrote:

> > Such scratch area can be used to kill most of the
> > cpumasks{_var}_t local variables in other functions
> > in the file, but that is *NOT* done in this chage.
> > 
> > Finally, convert the file to use keyhandler scratch,
> > instead of open coded string buffers.
> > 
> > Signed-off-by: Dario Faggioli 
> > Cc: George Dunlap 
> > Cc: Meng Xu 
> > Cc: Jan Beulich 
> > Cc: Keir Fraser 
> > ---
> > Changes from v1:
> >  * improved changelog;
> >  * made a local variable to point to the correct
> >scratch mask, as suggested during review.
> 
> Thanks,
> 
> Reviewed-by: George Dunlap 
> 
Thanks to you, but I think this has an issue.

The same one that you spotted in Justin's Credit2 affinity series: the
scratch mask array should not be allocated in rt_init(), or at least not
without checking that it's not there already, or the creation of a new
cpupool with the same scheduler will screw things up.

So, at least this very patch needs a v3. It's pretty independent from
the rest of the series, which should be all acked by now, but I can
resend it all if it's easier/better, just let me know. :-)

Thanks again and Regards,
Dario


signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen-blkback: define pr_fmt macro to avoid the duplication of DRV_PFX

2015-03-31 Thread Joe Perches
On Tue, 2015-03-31 at 16:57 +0200, Roger Pau Monné wrote:
> El 31/03/15 a les 23.14, Tao Chen ha escrit:
> > Define pr_fmt macro with {xen-blkback: } prefix, then remove all use
> > of DRV_PFX in the pr and DPRINTK sentences. It will simplify the code.
[]
> > diff --git a/drivers/block/xen-blkback/xenbus.c 
> > b/drivers/block/xen-blkback/xenbus.c
[]
> > @@ -14,6 +14,11 @@
> >  
> >  */
> >  
> > +#define pr_fmt(fmt) "xen-blkback: " fmt
> > +#define DPRINTK(fmt, args...)  \
> > +   pr_debug("(%s:%d) " fmt ".\n",  \
> > +   __func__, __LINE__, ##args)

As dynamic debug can emit __func__ and __LINE__
I suggest converting DPRINTK to pr_debug.



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v1 3/3] xen/arm: smmu: Renaming struct iommu_domain *domain to, struct iommu_domain *iommu_domain

2015-03-31 Thread Stefano Stabellini
On Fri, 27 Mar 2015, Jaggi, Manish wrote:
> From: Julien Grall 
> Sent: Friday, March 27, 2015 6:34 PM
> To: Jaggi, Manish; Xen Devel; prasun.kap...@cavium.com; Kumar, Vijaya; Ian 
> Campbell; Stefano Stabellini
> Subject: Re: [PATCH v1 3/3] xen/arm: smmu: Renaming struct iommu_domain 
> *domain to, struct iommu_domain *iommu_domain
> 
> Hi manish,
> 
> On 27/03/15 07:24, Manish Jaggi wrote:
> > It is good for code readability as there are many structures ending with
> > the name domain.
> > Also a code like this one is now easy to understand with the rename
> > old: dev_iommu_domain(dev) = domain;
> > new: dev_iommu_domain(dev) = iommu_domain;
> [manish] Did u see this line
> >
> > Also in current code struct smmu_domain pointer variable name is always
> > smmu_domain.
> > The change is on the same lines
> 
> You are modifying the code from Linux just for your own comprehension.
> And we are trying to not diverge from a specific Linux commit in order
> to easily backport patch.
> 
> [manish] please rethink on nack. There are so many data structures ending in 
> _domain we need to provide proper naming.

We are trying to stay as close as possible to the Linux SMMU driver so
that we can easily import future updates of the driver into Xen. Although
it is true that the current naming is not great, changing the naming
scheme would make future driver updates much harder.

If it helps we could add a couple of comments on top of the structs in
smmu.c to explain the meaning of the fields, like:


/* iommu_domain, not to be confused with a Xen domain */

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH OSSTEST v2 1/2] tcl: Handle environment variables which are unset.

2015-03-31 Thread Ian Campbell
On Tue, 2015-03-31 at 17:42 +0100, Ian Jackson wrote:
> Ian Campbell writes ("[PATCH OSSTEST v2 1/2] tcl: Handle environment 
> variables which are unset."):
> > This allows wrappers such as the standalone wrapper to do
> >  OSSTEST_SIMULATE=$foo ./sg-run-job
> > and not worry if $foo is unset.
> > 
> > Do likewise for OSSTEST_TCL_JOBDB_DEBUG.
> > 
> > Signed-off-by: Ian Campbell 
> > Signed-off-by: Ian Jackson 
> 
> Acked-by: Ian Jackson 
> 
> FWIW

Well, it shows I didn't botch your instructions ;-)



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] x86: Factor out common CPU initialization code

2015-03-31 Thread Boris Ostrovsky

On 03/31/2015 11:55 AM, Ingo Molnar wrote:


* Boris Ostrovsky  wrote:


Some of x86 bare-metal and Xen CPU initialization code is common between the two
and therefore can be factored out to avoid code duplication.

As a side effect, doing so will also extend the fix provided by commit
a7fcf28d431e ("x86/asm/entry: Replace this_cpu_sp0() with current_top_of_stack()
to x86_32") to 32-bit Xen PV guests.

Signed-off-by: Boris Ostrovsky 
---
  arch/x86/include/asm/smp.h |  1 +
  arch/x86/kernel/smpboot.c  | 39 +++
  arch/x86/xen/smp.c | 14 +-
  3 files changed, 25 insertions(+), 29 deletions(-)


This does not apply to the latest x86 tree (tip:master).

Thanks,

Ingo




Yes, I was somewhat out-of-date, sorry. Let me re-test this and submit v2.

-boris

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH OSSTEST v2 1/2] tcl: Handle environment variables which are unset.

2015-03-31 Thread Ian Jackson
Ian Campbell writes ("[PATCH OSSTEST v2 1/2] tcl: Handle environment variables 
which are unset."):
> This allows wrappers such as the standalone wrapper to do
>  OSSTEST_SIMULATE=$foo ./sg-run-job
> and not worry if $foo is unset.
> 
> Do likewise for OSSTEST_TCL_JOBDB_DEBUG.
> 
> Signed-off-by: Ian Campbell 
> Signed-off-by: Ian Jackson 

Acked-by: Ian Jackson 

FWIW

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 8/8] tools: add tools support for Intel CAT

2015-03-31 Thread Ian Campbell
On Thu, 2015-03-26 at 20:38 +0800, Chao Peng wrote:
> This is the xc/xl changes to support Intel Cache Allocation
> Technology(CAT). Two commands are introduced:
> - xl psr-cat-cbm-set [-s socket]  
>   Set cache capacity bitmasks(CBM) for a domain.
> - xl psr-cat-show 
>   Show Cache Allocation Technology information.

Please could you show an example of the output from this one.

> 
> Signed-off-by: Chao Peng 
> ---
> Changes in v3:
> * Add manpage.
> * libxl_psr_cat_set/get_domain_data => libxl_psr_cat_set/get_cbm.
> * Move libxl_count_physical_sockets into seperate patch.
> * Support LIBXL_PSR_TARGET_ALL for libxl_psr_cat_set_cbm.
> * Clean up the print codes.
> ---
>  docs/man/xl.pod.1 |  31 +
>  tools/libxc/include/xenctrl.h |  15 +
>  tools/libxc/xc_psr.c  |  74 +
>  tools/libxl/libxl.h   |  20 ++
>  tools/libxl/libxl_psr.c   | 126 ++--
>  tools/libxl/libxl_types.idl   |   5 ++
>  tools/libxl/xl.h  |   4 ++
>  tools/libxl/xl_cmdimpl.c  | 146 
> ++
>  tools/libxl/xl_cmdtable.c |  12 
>  9 files changed, 426 insertions(+), 7 deletions(-)
> 
> diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
> index b016272..99979a5 100644
> --- a/docs/man/xl.pod.1
> +++ b/docs/man/xl.pod.1
> @@ -1492,6 +1492,37 @@ monitor types are:
>  
>  =back
>  
> +=head1 CACHE ALLOCATION TECHNOLOGY
> +
> +Intel Broadwell and later server platforms offer capabilities to configure 
> and
> +make use of the Cache Allocation Technology (CAT) mechanisms, which enable 
> more
> +cache resources (i.e. L3 cache) to be made available for high priority
> +applications. In Xen implementation, CAT is used to control cache allocation
> +on VM basis. To enforce cache on a specific domain, just set capacity 
> bitmasks
> +(CBM) for the domain.
> +
> +=over 4
> +
> +=item B [I] [I]
> +
> +Set cache capacity bitmasks(CBM) for a domain.

What is the syntax of these bitmaps, and where do I pass them?

I think there is also a bunch of terminology (CBM, COS) which need
explaining, otherwise no one will know how to use it. Perhaps that
belongs in a separate document or the wiki though?

> diff --git a/tools/libxc/xc_psr.c b/tools/libxc/xc_psr.c
> index e367a80..6c670d5 100644
> --- a/tools/libxc/xc_psr.c
> +++ b/tools/libxc/xc_psr.c
> @@ -248,6 +248,80 @@ int xc_psr_cmt_enabled(xc_interface *xch)
>  
>  return 0;
>  }
> +int xc_psr_cat_set_domain_data(xc_interface *xch, uint32_t domid,
> +   xc_psr_cat_type type, uint32_t target,
> +   uint64_t data)
> +{
> +DECLARE_DOMCTL;
> +uint32_t cmd;
> +
> +switch ( type )
> +{
> +case XC_PSR_CAT_L3_CBM:
> +cmd = XEN_DOMCTL_PSR_CAT_OP_SET_L3_CBM;
> +break;
> +default:
> +return -1;

You should also set errno to an appropriate value, since calling code
will try and use it to log with.

There were several instances of this I think.

> +}


> @@ -1513,6 +1520,19 @@ int libxl_psr_cmt_get_sample(libxl_ctx *ctx,
>   uint64_t *tsc_r);
>  #endif
>  
> +#ifdef LIBXL_HAVE_PSR_CAT
> +
> +#define LIBXL_PSR_TARGET_ALL (~0U)
> +int libxl_psr_cat_set_cbm(libxl_ctx *ctx, uint32_t domid,
> +  libxl_psr_cat_type type, uint32_t target,
> +  uint64_t cbm);
> +int libxl_psr_cat_get_cbm(libxl_ctx *ctx, uint32_t domid,
> +  libxl_psr_cat_type type, uint32_t target,
> +  uint64_t *cbm_r);

What are the units of the various cbm*

If they are now more precisely typed (i.e. not the opaque data from last
time) then is the type parameter still needed?

> +int libxl_psr_cat_get_l3_info(libxl_ctx *ctx, uint32_t socket,
> +  uint32_t *cos_max_r, uint32_t *cbm_len_r);

Is there going to be any user documentation regarding what cos and cbm
are and how to interpret them and set them?

> @@ -247,6 +290,75 @@ out:
>  return rc;
>  }
>  
> +int libxl_psr_cat_set_cbm(libxl_ctx *ctx, uint32_t domid,
> +  libxl_psr_cat_type type, uint32_t target,
> +  uint64_t cbm)
> +{
> +GC_INIT(ctx);
> +int rc;
> +
> +uint32_t i, nr_sockets;
> +
> +if (target != LIBXL_PSR_TARGET_ALL) {
> +rc = xc_psr_cat_set_domain_data(ctx->xch, domid, type, target, cbm);
> +if (rc < 0) {
> +libxl__psr_cat_log_err_msg(gc, errno);
> +rc = ERROR_FAIL;
> +}
> +} else {
> +nr_sockets = libxl_count_physical_sockets(ctx);
> +if (nr_sockets == 0) {
> +LOGE(ERROR, "failed to get system socket count");
> +rc = ERROR_FAIL;
> +goto out;
> +}
> +for (i = 0; i < nr_sockets; i++) {
> +rc = xc_psr_cat_set_domain_data(ctx->xch, domid, type, i, cbm);
> +if (rc < 0) {

Re: [Xen-devel] domU jiffies not incrementing - timer issue? - Kernel 3.18.10 on Xen 4.5.0

2015-03-31 Thread Mark Chambers
On 31 March 2015 at 11:56, Mark Chambers  wrote:
>
>
> It's nested under Hyper-V in the same manner as the problematic install. I
> was deliberately trying to replicate the issue, but the problem doesn't
> manifest.
>
> Mark
>
>>
>
Hi,

I've got it booting.

The machine without boot problems reports the use of emulated TSC:

(XEN) TSC not marked as either constant or reliable, warp=575 (count=2)
(XEN) dom109: mode=0,ofs=0x417376aa9c8c,khz=2633032,inc=1,vtsc count:
3576850 kernel, 9534 user

The machine with problems reports no domains having emulated TSC:

(XEN) TSC has constant rate, deep Cstates possible, so not reliable, warp=0
(count=3)
(XEN) dom23: mode=0,ofs=0x41dc316839ac,khz=2208968,inc=1
(XEN) No domains have emulated TSC

I have nothing specified in the xl config for tsc_mode. If I set
tsc_mode='native'
and restart the DomU it boots without any problems.

If I explicitly specify any of the other tsc_mode it gets stuck with
jiffies not
incrementing as before.

Mark
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3] xentop: add support for qdisks

2015-03-31 Thread Ian Campbell
On Mon, 2015-03-30 at 16:20 +0100, Wei Liu wrote:
> On Tue, Mar 24, 2015 at 04:59:47PM +, Ian Campbell wrote:
> [...]
> > > for running xentop in batch mode where the output can be captured. For
> > > normal screen viewing, I doubt anyone has a screen with more that 1024
> > > lines on which to view the output :)
> > 
> > :-). I'm not sure if there is anyone out there who uses libxenstat
> > directly for other purposes, but I suppose it isn't impossible.
> > 
> > > I'll code up another version with your suggestion.
> > 
> > Thanks, given the libxl limit I'm wondering about just taking v3 of this
> > patch and taking what would otherwise have been v4 as an improvement.
> > 
> 
> Agreed.

OK, Acked + applied.

Ian.



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 0/3] Automatically derive soft affinity from vnuma information

2015-03-31 Thread Ian Campbell
On Thu, 2015-03-26 at 09:54 +0100, Dario Faggioli wrote:
> Round 2. All patches have Wei's Ack.

Applied.

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


  1   2   3   >