On 07.02.2014 04:53, Jim Fehlig wrote:
Large balloon operation can be time consuming.  Use the recently
added job functions and unlock the virDomainObj while ballooning.

Signed-off-by: Jim Fehlig <jfeh...@suse.com>
---
  src/libxl/libxl_driver.c | 32 ++++++++++++++++++++++----------
  1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
index 7c964c5..4f333bd 100644
--- a/src/libxl/libxl_driver.c
+++ b/src/libxl/libxl_driver.c
@@ -1646,6 +1646,9 @@ libxlDomainSetMemoryFlags(virDomainPtr dom, unsigned long 
newmem,
      if (virDomainSetMemoryFlagsEnsureACL(dom->conn, vm->def, flags) < 0)
          goto cleanup;

+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
+        goto cleanup;
+
      isActive = virDomainObjIsActive(vm);

Correct, domain needs to be checked if still alive after job was successfully started.


      if (flags == VIR_DOMAIN_MEM_CURRENT) {
@@ -1664,19 +1667,19 @@ libxlDomainSetMemoryFlags(virDomainPtr dom, unsigned 
long newmem,
      if (!isActive && (flags & VIR_DOMAIN_MEM_LIVE)) {
          virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                         _("cannot set memory on an inactive domain"));
-        goto cleanup;
+        goto endjob;
      }

      if (flags & VIR_DOMAIN_MEM_CONFIG) {
          if (!vm->persistent) {
              virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                             _("cannot change persistent config of a transient 
domain"));
-            goto cleanup;
+            goto endjob;
          }
          if (!(persistentDef = virDomainObjGetPersistentDef(cfg->caps,
                                                             driver->xmlopt,
                                                             vm)))
-            goto cleanup;
+            goto endjob;
      }

      if (flags & VIR_DOMAIN_MEM_MAXIMUM) {
@@ -1688,7 +1691,7 @@ libxlDomainSetMemoryFlags(virDomainPtr dom, unsigned long 
newmem,
                  virReportError(VIR_ERR_INTERNAL_ERROR,
                                 _("Failed to set maximum memory for domain 
'%d'"
                                   " with libxenlight"), dom->id);
-                goto cleanup;
+                goto endjob;
              }
          }

@@ -1699,7 +1702,7 @@ libxlDomainSetMemoryFlags(virDomainPtr dom, unsigned long 
newmem,
              if (persistentDef->mem.cur_balloon > newmem)
                  persistentDef->mem.cur_balloon = newmem;
              ret = virDomainSaveConfig(cfg->configDir, persistentDef);
-            goto cleanup;
+            goto endjob;
          }

      } else {
@@ -1708,17 +1711,23 @@ libxlDomainSetMemoryFlags(virDomainPtr dom, unsigned 
long newmem,
          if (newmem > vm->def->mem.max_balloon) {
              virReportError(VIR_ERR_INVALID_ARG, "%s",
                             _("cannot set memory higher than max memory"));
-            goto cleanup;
+            goto endjob;
          }

          if (flags & VIR_DOMAIN_MEM_LIVE) {
+            int res;
+
              priv = vm->privateData;
-            if (libxl_set_memory_target(priv->ctx, dom->id, newmem, 0,
-                                        /* force */ 1) < 0) {
+            /* Unlock virDomainObj while ballooning memory */
+            virObjectUnlock(vm);

1: ^^

+            res = libxl_set_memory_target(priv->ctx, dom->id, newmem, 0,
+                                          /* force */ 1);
+            virObjectLock(vm);

2: ^^

+            if (res < 0) {
                  virReportError(VIR_ERR_INTERNAL_ERROR,
                                 _("Failed to set memory for domain '%d'"
                                   " with libxenlight"), dom->id);
-                goto cleanup;
+                goto endjob;
              }
          }

@@ -1726,12 +1735,15 @@ libxlDomainSetMemoryFlags(virDomainPtr dom, unsigned 
long newmem,
              sa_assert(persistentDef);
              persistentDef->mem.cur_balloon = newmem;
              ret = virDomainSaveConfig(cfg->configDir, persistentDef);
-            goto cleanup;
+            goto endjob;
          }
      }

      ret = 0;

+endjob:
+    libxlDomainObjEndJob(driver, vm);
+

Hm. I wonder if we should rather check for libxlDomainObjEndJob return value. Currently, as you unlock the domain at [1] another thread waiting on the domain lock may come and lock it for itself. Then we will wait at [2] until the domain is unlocked again. This is possibly dangerous if the other thread was executing libxlDomainDestroyFlags(). If that's the case libxlDomainObjEndJob shall return false, as we held the last reference to @vm. But the memory @vm points to has been already freed (in the EndJob()) ...

  cleanup:
      if (vm)
          virObjectUnlock(vm);


... what means we are playing with fire here (SIGSEGV).

Therefore I think we should check for libxlDomainObjEndJob() return value:

endjob:
    if (!libxlDomainObjEndJob(driver, vm))
        vm = NULL;

BTW: the domain is unlocked and locked in libxlDomainObjBeginJob() too. Yes, it's for a very short time, but it may be sufficient to another thread hop in and do something bad. This reveals even more interesting bug:

libxlVmCleanup() sets vm->dom->id = -1 only for persistent domains. So if the SetMemoryFlags() is running over a transient domain, after job was successfully acquired, virDomainObjIsActive() will return true. And since libxlVmCleanup() replaces vm->def with vm->newDef, the SetMemoryFlags() will continue working on (in general) completely different domain definition than the one when the API started. Sigh.

(quick look over the rest of the patches reveals the same problem, but I won't repeat myself in each of them)

Otherwise the code looks okay.

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Reply via email to