If the timing is "just right", there is a possibility that the
udev nodeStateInitialize conflicts with another systemd thread
running an lspci command leaving both waiting for "something",
but resulting in a hung libvirtd (and hung lspci thread) from
which the only recovery is a reboot because killing either thread
is impossible and results in a defunct libvirtd process if a
SIGKILL is performed.

In order to avoid this let's move where the PCI initialization
is done to be where it's actually needed. Ensure we only perform
the initialization once via a driver bool.  Likewise, during
cleanup ensure we only call udevPCITranslateDeinit once the
initialization is successful.

At least a failure for this driver won't hang out the rest of the
the libvirt event loop. May not make certain things usable though.
Still a libvirtd restart is far easier than a host reboot.

Signed-off-by: John Ferlan <jfer...@redhat.com>
---
 src/conf/virnodedeviceobj.h        |  1 +
 src/node_device/node_device_udev.c | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/conf/virnodedeviceobj.h b/src/conf/virnodedeviceobj.h
index 87f908369..b5f96f206 100644
--- a/src/conf/virnodedeviceobj.h
+++ b/src/conf/virnodedeviceobj.h
@@ -41,6 +41,7 @@ struct _virNodeDeviceDriverState {
     virNodeDeviceObjListPtr devs;       /* currently-known devices */
     void *privateData;                  /* driver-specific private data */
     bool privileged;                    /* whether we run in privileged mode */
+    bool initPCI;                       /* Set when PCI thread completed */
 
     /* Immutable pointer, self-locking APIs */
     virObjectEventStatePtr nodeDeviceEventState;
diff --git a/src/node_device/node_device_udev.c 
b/src/node_device/node_device_udev.c
index ca5b47767..9cbba8562 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -551,12 +551,21 @@ udevProcessPCI(struct udev_device *device,
     virPCIEDeviceInfoPtr pci_express = NULL;
     virPCIDevicePtr pciDev = NULL;
     int ret = -1;
+    int rc = 0;
     char *p;
     bool privileged;
 
     nodeDeviceLock();
     privileged = driver->privileged;
+
+    if (!driver->initPCI) {
+        rc = udevPCITranslateInit(driver->privileged);
+        driver->initPCI = true;
+    }
+
     nodeDeviceUnlock();
+    if (rc < 0)
+        goto cleanup;
 
     if (udevGetUintProperty(device, "PCI_CLASS", &pci_dev->class, 16) < 0)
         goto cleanup;
@@ -1681,6 +1690,9 @@ nodeStateCleanup(void)
         virThreadJoin(&priv->th);
     }
 
+    if (driver->initPCI)
+        udevPCITranslateDeinit();
+
     virObjectUnref(priv);
     virObjectUnref(driver->nodeDeviceEventState);
 
@@ -1688,7 +1700,6 @@ nodeStateCleanup(void)
     virMutexDestroy(&driver->lock);
     VIR_FREE(driver);
 
-    udevPCITranslateDeinit();
     return 0;
 }
 
@@ -1962,9 +1973,6 @@ nodeStateInitialize(bool privileged,
     driver->privateData = priv;
     driver->nodeDeviceEventState = virObjectEventStateNew();
 
-    if (udevPCITranslateInit(privileged) < 0)
-        goto cleanup;
-
     udev = udev_new();
     if (!udev) {
         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-- 
2.13.6

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Reply via email to