There was only one memory check during the cluster verification - check for N+1 consistency. It's modified in order to account memory over-commitment. Also hv_state used instead of mem_dom0 in cluster verify.
Signed-off-by: Oleg Ponomarev <[email protected]> --- lib/cmdlib/cluster/verify.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/lib/cmdlib/cluster/verify.py b/lib/cmdlib/cluster/verify.py index d2c8d85..574d9a3 100644 --- a/lib/cmdlib/cluster/verify.py +++ b/lib/cmdlib/cluster/verify.py @@ -335,6 +335,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): @ivar sbp: dictionary of {primary-node: list of instances} for all instances for which this node is secondary (config) @ivar mfree: free memory, as reported by hypervisor (runtime) + @ivar mtotal: total memory, as reported by hypervisor (runtime) + @ivar mdom0: domain0 memory, as reported by hypervisor (runtime) @ivar dfree: free disk, as reported by the node (runtime) @ivar offline: the offline status (config) @type rpc_fail: boolean @@ -366,6 +368,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): self.sinst = [] self.sbp = {} self.mfree = 0 + self.mtotal = 0 + self.mdom0 = 0 self.dfree = 0 self.offline = offline self.vm_capable = vm_capable @@ -929,6 +933,10 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): """ cluster_info = self.cfg.GetClusterInfo() + ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster_info, + self.group_info) + memory_ratio = ipolicy[constants.IPOLICY_MEMORY_RATIO] + for node_uuid, n_img in node_image.items(): # This code checks that every node which is now listed as # secondary has enough memory to host all instances it is @@ -952,7 +960,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): bep = cluster_info.FillBE(all_insts[inst_uuid]) if bep[constants.BE_AUTO_BALANCE]: needed_mem += bep[constants.BE_MINMEM] - test = n_img.mfree < needed_mem + mnode = n_img.mdom0 + (hv, hv_state) = cfg.GetFilledHvStateParams(prinode).items()[0] + if hv != HT_XEN_PVM and hv != HT_XEN_HVM: + mnode = hv_state["mem_node"] + # minimum allowed free memory (it's negative due to over-commitment) + mem_treshold = (n_img.mtotal - n_img.mdom0) * (memory_ratio - 1) + test = n_img.mfree - needed_mem < mem_treshold self._ErrorIf(test, constants.CV_ENODEN1, self.cfg.GetNodeName(node_uuid), "not enough memory to accomodate instance failovers" @@ -1540,12 +1554,16 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): """ # try to read free memory (from the hypervisor) hv_info = nresult.get(constants.NV_HVINFO, None) - test = not isinstance(hv_info, dict) or "memory_free" not in hv_info + test = not isinstance(hv_info, dict) or "memory_free" not in hv_info \ + or "memory_total" not in hv_info \ + or "memory_dom0" not in hv_info self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, "rpc call to node failed (hvinfo)") if not test: try: - nimg.mfree = int(hv_info["memory_free"]) + nimg.mfree = int(hv_info["memory_free"]) + nimg.mtotal = int(hv_info["memory_total"]) + nimg.mdom0 = int(hv_info["memory_dom0"]) except (ValueError, TypeError): self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name, "node returned invalid nodeinfo, check hypervisor") -- 2.6.0.rc2.230.g3dd15c0
