[PATCH master 12/15] Allow memory over-commitment in cluster verify

'Oleg Ponomarev' via ganeti-devel Mon, 05 Oct 2015 00:54:50 -0700

There was only one memory check during the cluster verification - check
for N+1 consistency. It's modified in order to account memory
over-commitment. Also hv_state used instead of mem_dom0 in cluster verify.


Signed-off-by: Oleg Ponomarev <[email protected]>
---
 lib/cmdlib/cluster/verify.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/lib/cmdlib/cluster/verify.py b/lib/cmdlib/cluster/verify.py
index d2c8d85..574d9a3 100644
--- a/lib/cmdlib/cluster/verify.py
+++ b/lib/cmdlib/cluster/verify.py
@@ -335,6 +335,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     @ivar sbp: dictionary of {primary-node: list of instances} for all
         instances for which this node is secondary (config)
     @ivar mfree: free memory, as reported by hypervisor (runtime)
+    @ivar mtotal: total memory, as reported by hypervisor (runtime)
+    @ivar mdom0: domain0 memory, as reported by hypervisor (runtime)
     @ivar dfree: free disk, as reported by the node (runtime)
     @ivar offline: the offline status (config)
     @type rpc_fail: boolean
@@ -366,6 +368,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
       self.sinst = []
       self.sbp = {}
       self.mfree = 0
+      self.mtotal = 0
+      self.mdom0 = 0
       self.dfree = 0
       self.offline = offline
       self.vm_capable = vm_capable
@@ -929,6 +933,10 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     """
     cluster_info = self.cfg.GetClusterInfo()
+    ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster_info,
+                                                            self.group_info)
+    memory_ratio = ipolicy[constants.IPOLICY_MEMORY_RATIO]
+
     for node_uuid, n_img in node_image.items():
       # This code checks that every node which is now listed as
       # secondary has enough memory to host all instances it is
@@ -952,7 +960,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
           bep = cluster_info.FillBE(all_insts[inst_uuid])
           if bep[constants.BE_AUTO_BALANCE]:
             needed_mem += bep[constants.BE_MINMEM]
-        test = n_img.mfree < needed_mem
+        mnode = n_img.mdom0
+        (hv, hv_state) = cfg.GetFilledHvStateParams(prinode).items()[0]
+        if hv != HT_XEN_PVM and hv != HT_XEN_HVM:
+          mnode = hv_state["mem_node"]
+        # minimum allowed free memory (it's negative due to over-commitment)
+        mem_treshold = (n_img.mtotal - n_img.mdom0) * (memory_ratio - 1)
+        test = n_img.mfree - needed_mem < mem_treshold
         self._ErrorIf(test, constants.CV_ENODEN1,
                       self.cfg.GetNodeName(node_uuid),
                       "not enough memory to accomodate instance failovers"
@@ -1540,12 +1554,16 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     """
     # try to read free memory (from the hypervisor)
     hv_info = nresult.get(constants.NV_HVINFO, None)
-    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
+    test = not isinstance(hv_info, dict) or "memory_free"  not in hv_info \
+                                         or "memory_total" not in hv_info \
+                                         or "memory_dom0"  not in hv_info
     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
                   "rpc call to node failed (hvinfo)")
     if not test:
       try:
-        nimg.mfree = int(hv_info["memory_free"])
+        nimg.mfree  = int(hv_info["memory_free"])
+        nimg.mtotal = int(hv_info["memory_total"])
+        nimg.mdom0  = int(hv_info["memory_dom0"])
       except (ValueError, TypeError):
         self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
                       "node returned invalid nodeinfo, check hypervisor")
-- 
2.6.0.rc2.230.g3dd15c0

[PATCH master 12/15] Allow memory over-commitment in cluster verify

Reply via email to