Detect and react on situations when libvirtd was restarted or killed
when a job was active.
---
src/qemu/qemu_domain.c | 14
src/qemu/qemu_domain.h |2 +
src/qemu/qemu_process.c | 80 +++
3 files changed, 96 insertions(+), 0 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 062ecc7..b26308e 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -142,6 +142,20 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
memset(&job->signalsData, 0, sizeof(job->signalsData));
}
+void
+qemuDomainObjRestoreJob(virDomainObjPtr obj,
+struct qemuDomainJobObj *job)
+{
+qemuDomainObjPrivatePtr priv = obj->privateData;
+
+memset(job, 0, sizeof(*job));
+job->active = priv->job.active;
+job->asyncJob = priv->job.asyncJob;
+
+qemuDomainObjResetJob(priv);
+qemuDomainObjResetAsyncJob(priv);
+}
+
static void
qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv)
{
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 17d1356..49be3d2 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -177,6 +177,8 @@ void qemuDomainObjEndNestedJob(struct qemud_driver *driver,
void qemuDomainObjSaveJob(struct qemud_driver *driver, virDomainObjPtr obj);
void qemuDomainObjSetAsyncJobMask(virDomainObjPtr obj,
unsigned long long allowedJobs);
+void qemuDomainObjRestoreJob(virDomainObjPtr obj,
+ struct qemuDomainJobObj *job);
void qemuDomainObjDiscardAsyncJob(struct qemud_driver *driver,
virDomainObjPtr obj);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 3ffde51..49625b5 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2223,6 +2223,80 @@ qemuProcessUpdateState(struct qemud_driver *driver,
virDomainObjPtr vm)
return 0;
}
+static int
+qemuProcessRecoverJob(struct qemud_driver *driver,
+ virDomainObjPtr vm,
+ virConnectPtr conn,
+ const struct qemuDomainJobObj *job)
+{
+virDomainState state;
+int reason;
+
+state = virDomainObjGetState(vm, &reason);
+
+switch (job->asyncJob) {
+case QEMU_ASYNC_JOB_MIGRATION_OUT:
+case QEMU_ASYNC_JOB_MIGRATION_IN:
+/* we don't know what to do yet */
+break;
+
+case QEMU_ASYNC_JOB_SAVE:
+case QEMU_ASYNC_JOB_DUMP:
+/* TODO cancel possibly running migrate operation */
+/* resume the domain but only if it was paused as a result of
+ * running save/dump operation */
+if (state == VIR_DOMAIN_PAUSED &&
+((job->asyncJob == QEMU_ASYNC_JOB_DUMP &&
+ reason == VIR_DOMAIN_PAUSED_DUMP) ||
+ (job->asyncJob == QEMU_ASYNC_JOB_SAVE &&
+ reason == VIR_DOMAIN_PAUSED_SAVE) ||
+ reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
+if (qemuProcessStartCPUs(driver, vm, conn,
+ VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+VIR_WARN("Could not resume domain %s after", vm->def->name);
+}
+}
+break;
+
+case QEMU_ASYNC_JOB_NONE:
+case QEMU_ASYNC_JOB_LAST:
+break;
+}
+
+if (!virDomainObjIsActive(vm))
+return -1;
+
+switch (job->active) {
+case QEMU_JOB_QUERY:
+/* harmless */
+break;
+
+case QEMU_JOB_DESTROY:
+VIR_DEBUG("Domain %s should have already been destroyed",
+ vm->def->name);
+return -1;
+
+case QEMU_JOB_SUSPEND:
+/* mostly harmless */
+break;
+
+case QEMU_JOB_MODIFY:
+/* XXX depending on the command we may be in an inconsistent state and
+ * we should probably fall back to "monitor error" state and refuse to
+ */
+break;
+
+case QEMU_JOB_ASYNC:
+case QEMU_JOB_ASYNC_NESTED:
+/* async job was already handled above */
+case QEMU_JOB_NONE:
+case QEMU_JOB_LAST:
+break;
+}
+
+return 0;
+}
+
struct qemuProcessReconnectData {
virConnectPtr conn;
struct qemud_driver *driver;
@@ -2239,9 +2313,12 @@ qemuProcessReconnect(void *payload, const void *name
ATTRIBUTE_UNUSED, void *opa
struct qemud_driver *driver = data->driver;
qemuDomainObjPrivatePtr priv;
virConnectPtr conn = data->conn;
+struct qemuDomainJobObj oldjob;
virDomainObjLock(obj);
+qemuDomainObjRestoreJob(obj, &oldjob);
+
VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name);
priv = obj->privateData;
@@ -2287,6 +2364,9 @@ qemuProcessReconnect(void *payload, const void *name
ATTRIBUTE_UNUSED, void *opa
if (qemuProcessFiltersInstantiate(conn, obj->def))
goto error;
+if (qemuProcessRecoverJob(driver, obj, conn, &oldjob) < 0)
+goto error;
+
priv->job.active = QEMU_JOB_NONE;
/* update domai