When creating multiple VMs in parallel, concurrent volume creation
holds an async job on the pool. A pool refresh during this window
fails immediately with "pool has asynchronous jobs running", which
causes cascading failures in parallel provisioning workflows.

The refresh operation genuinely cannot run while a volume build is
in progress (it clears all volume metadata via
virStoragePoolObjClearVols), but the failure is premature since
the async job will finish shortly.

Add a condition variable to virStoragePoolObj that allows
storagePoolRefresh() to wait up to 30 seconds for async jobs to
drain. The volume build thread broadcasts the condition when it
decrements asyncjobs to zero. After waking, the refresh function
re-validates preconditions (pool still active, not starting) since
the pool lock was released during the wait.

Only storagePoolRefresh() gets the wait mechanism. The other three
operations (destroy, undefine, delete) keep the immediate error
because waiting to destroy or delete a pool during volume creation
is not a sensible user workflow.

Resolves: https://issues.redhat.com/browse/RHEL-150758
Signed-off-by: Lucas Amaral <[email protected]>
---
Build-tested on CentOS Stream 9 (297 OK, 0 failures).

Runtime-validated using LD_PRELOAD to inject a 15-second delay into
fallocate64() for pool directory files (the storage backend calls
fallocate() directly in storage_util.c:331, which with LFS resolves
to fallocate64; on local filesystems it completes in microseconds,
making the race window too narrow to hit naturally).

Reproduction steps (privileged CentOS Stream 9 container, virtstoraged):

  1. Compile LD_PRELOAD shim that sleeps 15s in fallocate64() for
     files under /var/lib/libvirt/images/ larger than 1MB
  2. Start virtstoraged with LD_PRELOAD
  3. Create and start a dir-type pool
  4. Start vol-create-as in background (triggers ~15s async job)
  5. After 3s, call pool-refresh

  Before (unpatched):
    $ virsh vol-create-as default testvol 100M &
    $ sleep 3 && virsh pool-refresh default
    error: Failed to refresh pool default
    error: internal error: pool 'default' has asynchronous jobs running.

  After (both patches applied):
    $ virsh vol-create-as default testvol 100M &
    $ sleep 3 && virsh pool-refresh default
    Vol testvol created
    Pool default refreshed              (waited ~12s for async job)

  pool-destroy correctly rejects immediately (no wait):
    $ virsh vol-create-as default testvol 100M &
    $ sleep 3 && virsh pool-destroy default
    error: Failed to destroy pool default
    error: Requested operation is not valid: pool 'default' has asynchronous 
jobs running.
    Vol testvol created

 src/conf/virstorageobj.c     | 39 ++++++++++++++++++++++++++++++++++++
 src/conf/virstorageobj.h     |  3 +++
 src/libvirt_private.syms     |  1 +
 src/storage/storage_driver.c | 19 ++++++++++++++----
 4 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
index ac79ff4c26..2f8f1339ad 100644
--- a/src/conf/virstorageobj.c
+++ b/src/conf/virstorageobj.c
@@ -31,6 +31,7 @@
 #include "virlog.h"
 #include "virscsihost.h"
 #include "virstring.h"
+#include "virtime.h"
 #include "virvhba.h"
 
 #define VIR_FROM_THIS VIR_FROM_STORAGE
@@ -86,6 +87,7 @@ struct _virStoragePoolObj {
     bool starting;
     bool autostart;
     unsigned int asyncjobs;
+    virCond asyncjobsCond;
 
     virStoragePoolDef *def;
     virStoragePoolDef *newDef;
@@ -196,6 +198,11 @@ virStoragePoolObjNew(void)
     if (!(obj = virObjectLockableNew(virStoragePoolObjClass)))
         return NULL;
 
+    if (virCondInit(&obj->asyncjobsCond) < 0) {
+        virObjectUnref(obj);
+        return NULL;
+    }
+
     if (!(obj->volumes = virStorageVolObjListNew())) {
         virObjectUnref(obj);
         return NULL;
@@ -338,6 +345,36 @@ void
 virStoragePoolObjDecrAsyncjobs(virStoragePoolObj *obj)
 {
     obj->asyncjobs--;
+    if (obj->asyncjobs == 0)
+        virCondBroadcast(&obj->asyncjobsCond);
+}
+
+
+int
+virStoragePoolObjWaitAsyncjobs(virStoragePoolObj *obj)
+{
+    unsigned long long deadline;
+
+    if (virTimeMillisNow(&deadline) < 0)
+        return -1;
+    deadline += 30 * 1000;
+
+    while (obj->asyncjobs > 0) {
+        if (virCondWaitUntil(&obj->asyncjobsCond,
+                             &obj->parent.lock, deadline) < 0) {
+            if (errno == ETIMEDOUT) {
+                virReportError(VIR_ERR_OPERATION_TIMEOUT,
+                               _("timed out waiting for async jobs on pool 
'%1$s'"),
+                               obj->def->name);
+            } else {
+                virReportSystemError(errno, "%s",
+                                     _("failed to wait for pool async jobs"));
+            }
+            return -1;
+        }
+    }
+
+    return 0;
 }
 
 
@@ -354,6 +391,8 @@ virStoragePoolObjDispose(void *opaque)
 
     g_free(obj->configFile);
     g_free(obj->autostartLink);
+
+    virCondDestroy(&obj->asyncjobsCond);
 }
 
 
diff --git a/src/conf/virstorageobj.h b/src/conf/virstorageobj.h
index e1eabfdb3a..a478d2f33e 100644
--- a/src/conf/virstorageobj.h
+++ b/src/conf/virstorageobj.h
@@ -110,6 +110,9 @@ virStoragePoolObjIncrAsyncjobs(virStoragePoolObj *obj);
 void
 virStoragePoolObjDecrAsyncjobs(virStoragePoolObj *obj);
 
+int
+virStoragePoolObjWaitAsyncjobs(virStoragePoolObj *obj);
+
 int
 virStoragePoolObjLoadAllConfigs(virStoragePoolObjList *pools,
                                 const char *configDir,
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d8ae4f46cd..950ba51384 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1547,6 +1547,7 @@ virStoragePoolObjSetDef;
 virStoragePoolObjSetStarting;
 virStoragePoolObjVolumeGetNames;
 virStoragePoolObjVolumeListExport;
+virStoragePoolObjWaitAsyncjobs;
 
 
 # cpu/cpu.h
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
index 8f5c921157..f7b5c8bfbd 100644
--- a/src/storage/storage_driver.c
+++ b/src/storage/storage_driver.c
@@ -1231,10 +1231,21 @@ storagePoolRefresh(virStoragePoolPtr pool,
     }
 
     if (virStoragePoolObjGetAsyncjobs(obj) > 0) {
-        virReportError(VIR_ERR_INTERNAL_ERROR,
-                       _("pool '%1$s' has asynchronous jobs running."),
-                       def->name);
-        goto cleanup;
+        if (virStoragePoolObjWaitAsyncjobs(obj) < 0)
+            goto cleanup;
+
+        if (!virStoragePoolObjIsActive(obj)) {
+            virReportError(VIR_ERR_OPERATION_INVALID,
+                           _("storage pool '%1$s' is not active"), def->name);
+            goto cleanup;
+        }
+
+        if (virStoragePoolObjIsStarting(obj)) {
+            virReportError(VIR_ERR_OPERATION_INVALID,
+                           _("storage pool '%1$s' is starting up"),
+                           def->name);
+            goto cleanup;
+        }
     }
 
     stateFile = virFileBuildPath(driver->stateDir, def->name, ".xml");
-- 
2.52.0

Reply via email to