On Wed, Apr 15, 2015 at 08:55:07AM +0200, 'Klaus Aehlig' via ganeti-devel wrote:
In the presence of shared storage, global N+1 redundancy has
to be checked. Therefore, make hcheck report the number of nodes
with instances that cannot immediately be restarted in case
of node failure.

Signed-off-by: Klaus Aehlig <[email protected]>
---
src/Ganeti/HTools/Program/Hcheck.hs | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/src/Ganeti/HTools/Program/Hcheck.hs 
b/src/Ganeti/HTools/Program/Hcheck.hs
index c6ce982..9c93bd4 100644
--- a/src/Ganeti/HTools/Program/Hcheck.hs
+++ b/src/Ganeti/HTools/Program/Hcheck.hs
@@ -39,6 +39,7 @@ module Ganeti.HTools.Program.Hcheck
  ) where

import Control.Monad
+import qualified Data.IntMap as IntMap
import Data.List (transpose)
import System.Exit
import Text.Printf (printf)
@@ -48,6 +49,7 @@ import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Cluster as Cluster
import qualified Ganeti.HTools.Cluster.Metrics as Metrics
import qualified Ganeti.HTools.Cluster.Utils as ClusterUtils
+import qualified Ganeti.HTools.GlobalN1 as GlobalN1
import qualified Ganeti.HTools.Group as Group
import qualified Ganeti.HTools.Node as Node
import qualified Ganeti.HTools.Instance as Instance
@@ -120,6 +122,7 @@ commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
            , ("OFFLINE_PRI", "Instances having the primary node offline")
            , ("OFFLINE_SEC", "Instances having a secondary node offline")
+            , ("GN1_FAIL", "Nodes not directly evacuateable")
            ]

-- | Data showed per group.
@@ -251,6 +254,8 @@ perGroupChecks gl (gidx, (nl, il)) =
  let grp = Container.find gidx gl
      offnl = filter Node.offline (Container.elems nl)
      n1violated = length . fst $ Cluster.computeBadItems nl il
+      gn1fail = length . filter (not . GlobalN1.canEvacuateNode (nl, il))
+                  $ IntMap.elems nl
      conflicttags = length $ filter (>0)
                     (map Node.conflictingPrimaries (Container.elems nl))
      offline_pri = sum . map length $ map Node.pList offnl
@@ -260,6 +265,7 @@ perGroupChecks gl (gidx, (nl, il)) =
                   , conflicttags
                   , offline_pri
                   , offline_sec
+                   , gn1fail
                   ]
  in ((grp, score), groupstats)

--
2.2.0.rc0.207.ga3a616c


LGTM

Reply via email to