On Wed, Jun 24, 2015 at 11:28:59AM +0200, 'Klaus Aehlig' via ganeti-devel wrote:
When determining if a the cluster has enough capacity to keep all the
instances should a node die, also consider placement of the recreated
plain instances, as per our design.

Signed-off-by: Klaus Aehlig <[email protected]>
---
src/Ganeti/HTools/GlobalN1.hs | 36 ++++++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/Ganeti/HTools/GlobalN1.hs b/src/Ganeti/HTools/GlobalN1.hs
index d90c07e..a4e6028 100644
--- a/src/Ganeti/HTools/GlobalN1.hs
+++ b/src/Ganeti/HTools/GlobalN1.hs
@@ -40,17 +40,20 @@ module Ganeti.HTools.GlobalN1

import Control.Monad (foldM, foldM_)
import qualified Data.Foldable as Foldable
-import Data.List (partition)
+import Data.Function (on)
+import Data.List (partition, sortBy)

import Ganeti.BasicTypes (isOk, Result)
import Ganeti.HTools.AlgorithmParams (AlgorithmOptions(..), defaultOptions)
+import Ganeti.HTools.Cluster.AllocatePrimitives (allocateOnSingle)
import qualified Ganeti.HTools.Cluster.AllocationSolution as AllocSol
import qualified Ganeti.HTools.Cluster.Evacuate as Evacuate
import Ganeti.HTools.Cluster.Moves (move)
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Instance as Instance
import qualified Ganeti.HTools.Node as Node
-import Ganeti.HTools.Types ( IMove(Failover), Ndx, Gdx, Idx, opToResult)
+import Ganeti.HTools.Types ( IMove(Failover), Ndx, Gdx, Idx, opToResult,
+                             FailMode(FailN1) )
import Ganeti.Types ( DiskTemplate(DTDrbd8), diskTemplateMovable
                    , EvacMode(ChangePrimary))

@@ -65,6 +68,24 @@ evac gdx ndxs (nl, il) idx = do
                     gdx ndxs
  return (nl', il')

+-- | Foldable function describing how a non-movable instance is to
+-- be recreated on one of the given nodes.
+recreate :: [Ndx]
+         -> (Node.List, Instance.List)
+         -> Instance.Instance
+         -> Result (Node.List, Instance.List)
+recreate targetnodes (nl, il) inst = do
+  let opts = defaultOptions { algIgnoreSoftErrors = True, algEvacMode = True }
+      sols = foldl (\cstate ->
+                       AllocSol.concatAllocCollections cstate
+                       . allocateOnSingle opts nl inst
+                   ) AllocSol.emptyAllocCollection targetnodes
+      sol = AllocSol.collectionToSolution FailN1 (const True) sols
+  alloc <- maybe (fail "No solution found") return $ AllocSol.asSolution sol
+  let il' = AllocSol.updateIl il $ Just alloc
+      nl' = AllocSol.extractNl nl il $ Just alloc
+  return (nl', il')
+
-- | Decide if a node can be evacuated, i.e., all DRBD instances
-- failed over and all shared/external storage instances moved off
-- to other nodes.
@@ -74,9 +95,9 @@ canEvacuateNode (nl, il) n = isOk $ do
                                         . Instance.diskTemplate
                                         . flip Container.find il)
                              $ Node.pList n
-      sharedIdxs = filter (diskTemplateMovable
-                           . Instance.diskTemplate
-                           . flip Container.find il) otherIdxs
+      (sharedIdxs, nonMoveIdxs) = partition (diskTemplateMovable
+                                  . Instance.diskTemplate
+                                  . flip Container.find il) otherIdxs
  -- failover all DRBD instances with primaries on n
  (nl', il') <- opToResult
                . foldM move (nl, il) $ map (flip (,) Failover) drbdIdxs
@@ -86,7 +107,10 @@ canEvacuateNode (nl, il) n = isOk $ do
                    . map Node.idx
                    . filter ((== grp) . Node.group)
                    $ Container.elems nl'
-  foldM_ (evac grp escapenodes) (nl',il') sharedIdxs
+  (nl'', il'') <- foldM (evac grp escapenodes) (nl',il') sharedIdxs
+  let recreateInstances = sortBy (flip compare `on` Instance.mem)
+                          $ map (`Container.find` il'') nonMoveIdxs
+  foldM_ (recreate escapenodes) (nl'', il'') recreateInstances

-- | Predicate on wheter a given situation is globally N+1 redundant.
redundant :: Node.List -> Instance.List -> Bool
--
2.4.3.573.g4eafbef


LGTM

Reply via email to