The QEMU integrates with the lock manager instructure in a number of key places
* During startup, a lock is acquired in between the fork & exec * During startup, the libvirtd process acquires a lock before setting file labelling * During shutdown, the libvirtd process acquires a lock before restoring file labelling * During hotplug, unplug & media change the libvirtd process holds a lock while setting/restoring labels The main content lock is only ever held by the QEMU child process, or libvirtd during VM shutdown. The rest of the operations only require libvirtd to hold the metadata locks, relying on the active QEMU still holding the content lock. * src/qemu/qemu_conf.c, src/qemu/qemu_conf.h, src/qemu/libvirtd_qemu.aug, src/qemu/test_libvirtd_qemu.aug: Add config parameter for configuring lock managers * src/qemu/qemu_driver.c: Add calls to the lock manager --- src/qemu/libvirtd_qemu.aug | 1 + src/qemu/qemu.conf | 7 ++ src/qemu/qemu_conf.c | 12 ++++ src/qemu/qemu_conf.h | 3 + src/qemu/qemu_domain.c | 5 ++ src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 13 ++++- src/qemu/qemu_hotplug.c | 56 ++++++++++++++++- src/qemu/qemu_process.c | 130 ++++++++++++++++++++++++++++++--------- src/qemu/test_libvirtd_qemu.aug | 4 + 10 files changed, 198 insertions(+), 34 deletions(-) diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug index ac30b8e..66858ae 100644 --- a/src/qemu/libvirtd_qemu.aug +++ b/src/qemu/libvirtd_qemu.aug @@ -48,6 +48,7 @@ module Libvirtd_qemu = | bool_entry "allow_disk_format_probing" | bool_entry "set_process_name" | int_entry "max_processes" + | str_entry "lock_manager" (* Each enty in the config is one of the following three ... *) let entry = vnc_entry diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf index c70050e..2c50d9d 100644 --- a/src/qemu/qemu.conf +++ b/src/qemu/qemu.conf @@ -280,3 +280,10 @@ # override default value set by host OS. # # max_processes = 0 + +# To enable strict 'fcntl' based locking of the file +# content (to prevent two VMs writing to the same +# disk), start the 'virtlockd' service, and uncomment +# this +# +# lock_manager = "fcntl" diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index f977673..ea4d7d0 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -115,6 +115,9 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, } #endif + if (!(driver->lockManager = + virLockManagerPluginNew("nop", 0))) + return -1; /* Just check the file is readable before opening it, otherwise * libvirt emits an error. @@ -428,6 +431,15 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, CHECK_TYPE("max_processes", VIR_CONF_LONG); if (p) driver->maxProcesses = p->l; + p = virConfGetValue (conf, "lock_manager"); + CHECK_TYPE ("lock_manager", VIR_CONF_STRING); + if (p && p->str) { + virLockManagerPluginUnref(driver->lockManager); + if (!(driver->lockManager = + virLockManagerPluginNew(p->str, 0))) + VIR_ERROR(_("Failed to load lock manager %s"), p->str); + } + virConfFree (conf); return 0; } diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index ceec16d..bf6dcf4 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -43,6 +43,7 @@ # include "macvtap.h" # include "command.h" # include "threadpool.h" +# include "locking/lock_manager.h" # define QEMUD_CPUMASK_LEN CPU_SETSIZE @@ -124,6 +125,8 @@ struct qemud_driver { virBitmapPtr reservedVNCPorts; virSysinfoDefPtr hostsysinfo; + + virLockManagerPluginPtr lockManager; }; typedef struct _qemuDomainCmdlineDef qemuDomainCmdlineDef; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index bcacb18..81ac2bc 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -101,6 +101,7 @@ static void qemuDomainObjPrivateFree(void *data) qemuDomainPCIAddressSetFree(priv->pciaddrs); virDomainChrSourceDefFree(priv->monConfig); VIR_FREE(priv->vcpupids); + VIR_FREE(priv->lockState); /* This should never be non-NULL if we get here, but just in case... */ if (priv->mon) { @@ -157,6 +158,9 @@ static int qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data) virBufferAddLit(buf, " </qemuCaps>\n"); } + if (priv->lockState) + virBufferAsprintf(buf, " <lockstate>%s</lockstate>\n", priv->lockState); + return 0; } @@ -260,6 +264,7 @@ static int qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, void *data) } VIR_FREE(nodes); + priv->lockState = virXPathString("string(./lockstate)", ctxt); return 0; diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 6d24f53..0fca974 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -79,6 +79,7 @@ struct _qemuDomainObjPrivate { int persistentAddrs; virBitmapPtr qemuCaps; + char *lockState; }; struct qemuDomainWatchdogEvent diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 4e09c61..112237a 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1,5 +1,5 @@ /* - * driver.c: core driver methods for managing qemu guests + * qemu_driver.c: core driver methods for managing qemu guests * * Copyright (C) 2006-2011 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange @@ -87,6 +87,7 @@ #include "fdstream.h" #include "configmake.h" #include "threadpool.h" +#include "locking/lock_manager.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -529,6 +530,14 @@ qemudStartup(int privileged) { } VIR_FREE(driverConf); + /* We should always at least have the 'nop' manager, so + * NULLs here are a fatal error + */ + if (!qemu_driver->lockManager) { + VIR_ERROR(_("Missing lock manager implementation")); + goto error; + } + if (qemuSecurityInit(qemu_driver) < 0) goto error; @@ -769,6 +778,8 @@ qemudShutdown(void) { virCgroupFree(&qemu_driver->cgroup); + virLockManagerPluginUnref(qemu_driver->lockManager); + qemuDriverUnlock(qemu_driver); virMutexDestroy(&qemu_driver->lock); virThreadPoolFree(qemu_driver->workerPool); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 3cf7d35..a8e73c4 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -38,6 +38,7 @@ #include "pci.h" #include "files.h" #include "qemu_cgroup.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -82,9 +83,15 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, return -1; } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } if (!(driveAlias = qemuDeviceDriveHostAlias(origdisk, priv->qemuCaps))) goto error; @@ -115,6 +122,9 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, vm, origdisk) < 0) VIR_WARN("Unable to restore security label on ejected image %s", origdisk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, origdisk) < 0) + VIR_WARN("Unable to release lock on disk %s", origdisk->src); + VIR_FREE(origdisk->src); origdisk->src = disk->src; disk->src = NULL; @@ -128,9 +138,14 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, error: VIR_FREE(driveAlias); + if (virSecurityManagerRestoreImageLabel(driver->securityManager, vm, disk) < 0) VIR_WARN("Unable to restore security label on new media %s", disk->src); + + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -154,9 +169,15 @@ int qemuDomainAttachPciDiskDevice(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) { if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &disk->info) < 0) @@ -228,6 +249,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -364,10 +388,15 @@ int qemuDomainAttachSCSIDisk(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } /* We should have an address already, so make sure */ if (disk->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_DRIVE) { @@ -456,6 +485,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -477,10 +509,17 @@ int qemuDomainAttachUsbMassstorageDevice(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } + /* XXX not correct once we allow attaching a USB CDROM */ if (!disk->src) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("disk source path is missing")); @@ -538,6 +577,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -1184,6 +1226,9 @@ int qemuDomainDetachPciDiskDevice(struct qemud_driver *driver, NULLSTR(dev->data.disk->src)); } + if (virDomainLockDiskDetach(driver->lockManager, vm, dev->data.disk) < 0) + VIR_WARN("Unable to release lock on %s", dev->data.disk->src); + ret = 0; cleanup: @@ -1262,6 +1307,9 @@ int qemuDomainDetachDiskDevice(struct qemud_driver *driver, NULLSTR(dev->data.disk->src)); } + if (virDomainLockDiskDetach(driver->lockManager, vm, dev->data.disk) < 0) + VIR_WARN("Unable to release lock on disk %s", dev->data.disk->src); + ret = 0; cleanup: diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 01b15e0..833e035 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -50,6 +50,7 @@ #include "nodeinfo.h" #include "processinfo.h" #include "domain_nwfilter.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -344,6 +345,7 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, virDomainObjLock(vm); if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to unknown event", vm->def->name); @@ -352,6 +354,11 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_PAUSED); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Unable to save status on vm %s after state change", vm->def->name); @@ -413,6 +420,7 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name); virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_WATCHDOG); @@ -420,6 +428,11 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Unable to save status on vm %s after watchdog event", vm->def->name); @@ -492,6 +505,7 @@ qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name); virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_IOERROR); @@ -499,6 +513,11 @@ qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_IOERROR); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); } @@ -1777,6 +1796,17 @@ struct qemuProcessHookData { static int qemuProcessHook(void *data) { struct qemuProcessHookData *h = data; + int ret = -1; + + /* Some later calls want pid present */ + h->vm->pid = getpid(); + + VIR_DEBUG("Obtaining domain lock"); + if (virDomainLockProcessStart(h->driver->lockManager, + h->vm, + /* QEMU is always pased initially */ + true) < 0) + goto cleanup; if (qemuProcessLimits(h->driver) < 0) return -1; @@ -1784,18 +1814,25 @@ static int qemuProcessHook(void *data) /* This must take place before exec(), so that all QEMU * memory allocation is on the correct NUMA node */ + VIR_DEBUG("Moving procss to cgroup"); if (qemuAddToCgroup(h->driver, h->vm->def) < 0) - return -1; + goto cleanup; /* This must be done after cgroup placement to avoid resetting CPU * affinity */ + VIR_DEBUG("Setup CPU affinity"); if (qemuProcessInitCpuAffinity(h->vm) < 0) - return -1; + goto cleanup; + VIR_DEBUG("Setting up security labeling"); if (virSecurityManagerSetProcessLabel(h->driver->securityManager, h->vm) < 0) - return -1; + goto cleanup; - return 0; + ret = 0; + +cleanup: + VIR_DEBUG("Hook complete ret=%d", ret); + return ret; } @@ -1824,12 +1861,27 @@ qemuProcessStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm, int ret; qemuDomainObjPrivatePtr priv = vm->privateData; + VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainLockProcessResume(driver->lockManager, vm, priv->lockState) < 0) { + /* Don't free priv->lockState on error, because we need + * to make sure we have state still present if the user + * tries to resume again + */ + return -1; + } + VIR_FREE(priv->lockState); + qemuDomainObjEnterMonitorWithDriver(driver, vm); ret = qemuMonitorStartCPUs(priv->mon, conn); qemuDomainObjExitMonitorWithDriver(driver, vm); - if (ret == 0) + if (ret == 0) { virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason); + } else { + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } return ret; } @@ -1843,6 +1895,7 @@ int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm, int oldReason; qemuDomainObjPrivatePtr priv = vm->privateData; + VIR_FREE(priv->lockState); oldState = virDomainObjGetState(vm, &oldReason); virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason); @@ -1850,8 +1903,13 @@ int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm, ret = qemuMonitorStopCPUs(priv->mon); qemuDomainObjExitMonitorWithDriver(driver, vm); - if (ret < 0) + if (ret == 0) { + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } else { virDomainObjSetState(vm, oldState, oldReason); + } return ret; } @@ -2096,29 +2154,6 @@ int qemuProcessStart(virConnectPtr conn, } qemuAuditSecurityLabel(vm, true); - VIR_DEBUG("Generating setting domain security labels (if required)"); - if (virSecurityManagerSetAllLabel(driver->securityManager, - vm, stdin_path) < 0) - goto cleanup; - - if (stdin_fd != -1) { - /* if there's an fd to migrate from, and it's a pipe, put the - * proper security label on it - */ - struct stat stdin_sb; - - VIR_DEBUG("setting security label on pipe used for migration"); - - if (fstat(stdin_fd, &stdin_sb) < 0) { - virReportSystemError(errno, - _("cannot stat fd %d"), stdin_fd); - goto cleanup; - } - if (S_ISFIFO(stdin_sb.st_mode) && - virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) - goto cleanup; - } - /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); @@ -2303,6 +2338,7 @@ int qemuProcessStart(virConnectPtr conn, virCommandNonblockingFDs(cmd); virCommandSetPidFile(cmd, pidfile); virCommandDaemonize(cmd); + virCommandRequireHandshake(cmd); ret = virCommandRun(cmd, NULL); VIR_FREE(pidfile); @@ -2333,6 +2369,42 @@ int qemuProcessStart(virConnectPtr conn, #endif } + VIR_DEBUG("Waiting for handshake from child"); + if (virCommandHandshakeWait(cmd) < 0) { + ret = -1; + goto cleanup; + } + + VIR_DEBUG("Setting domain security labels"); + if (virSecurityManagerSetAllLabel(driver->securityManager, + vm, stdin_path) < 0) + goto cleanup; + + if (stdin_fd != -1) { + /* if there's an fd to migrate from, and it's a pipe, put the + * proper security label on it + */ + struct stat stdin_sb; + + VIR_DEBUG("setting security label on pipe used for migration"); + + if (fstat(stdin_fd, &stdin_sb) < 0) { + virReportSystemError(errno, + _("cannot stat fd %d"), stdin_fd); + goto cleanup; + } + if (S_ISFIFO(stdin_sb.st_mode) && + virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) + goto cleanup; + } + + VIR_DEBUG("Labelling done, completing handshake to child"); + if (virCommandHandshakeNotify(cmd) < 0) { + ret = -1; + goto cleanup; + } + VIR_DEBUG("Handshake complete, child running"); + if (migrateFrom) start_paused = true; diff --git a/src/qemu/test_libvirtd_qemu.aug b/src/qemu/test_libvirtd_qemu.aug index 917bd4f..b1f9114 100644 --- a/src/qemu/test_libvirtd_qemu.aug +++ b/src/qemu/test_libvirtd_qemu.aug @@ -113,6 +113,8 @@ allow_disk_format_probing = 1 vnc_auto_unix_socket = 1 max_processes = 12345 + +lock_manager = \"fcntl\" " test Libvirtd_qemu.lns get conf = @@ -236,3 +238,5 @@ max_processes = 12345 { "vnc_auto_unix_socket" = "1" } { "#empty" } { "max_processes" = "12345" } +{ "#empty" } +{ "lock_manager" = "fcntl" } -- 1.7.4.4 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list