In this way, make it available for other high-level functions
like global N+1 redundancy. For this purpose, also export
nodeEvacInstance.

As part of this splitting, move some elementary functions to
a Ganeti.HTools.Cluster.Utils module.

Signed-off-by: Klaus Aehlig <[email protected]>
---
 Makefile.am                           |   2 +
 src/Ganeti/HTools/Backend/IAlloc.hs   |  17 +-
 src/Ganeti/HTools/Cluster.hs          | 460 +---------------------------------
 src/Ganeti/HTools/Cluster/Evacuate.hs | 411 ++++++++++++++++++++++++++++++
 src/Ganeti/HTools/Cluster/Utils.hs    | 150 +++++++++++
 src/Ganeti/HTools/Dedicated.hs        |   3 +-
 src/Ganeti/HTools/Program/Hbal.hs     |   3 +-
 src/Ganeti/HTools/Program/Hcheck.hs   |   3 +-
 src/Ganeti/HTools/Program/Hinfo.hs    |   3 +-
 test/hs/Test/Ganeti/HTools/Cluster.hs |  14 +-
 10 files changed, 596 insertions(+), 470 deletions(-)
 create mode 100644 src/Ganeti/HTools/Cluster/Evacuate.hs
 create mode 100644 src/Ganeti/HTools/Cluster/Utils.hs

diff --git a/Makefile.am b/Makefile.am
index 2e119b2..f293f40 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -905,8 +905,10 @@ HS_LIB_SRCS = \
        src/Ganeti/HTools/Backend/Text.hs \
        src/Ganeti/HTools/CLI.hs \
        src/Ganeti/HTools/Cluster.hs \
+       src/Ganeti/HTools/Cluster/Evacuate.hs \
        src/Ganeti/HTools/Cluster/Metrics.hs \
        src/Ganeti/HTools/Cluster/Moves.hs \
+       src/Ganeti/HTools/Cluster/Utils.hs \
        src/Ganeti/HTools/Container.hs \
        src/Ganeti/HTools/Dedicated.hs \
        src/Ganeti/HTools/ExtLoader.hs \
diff --git a/src/Ganeti/HTools/Backend/IAlloc.hs 
b/src/Ganeti/HTools/Backend/IAlloc.hs
index 8c45603..3451282 100644
--- a/src/Ganeti/HTools/Backend/IAlloc.hs
+++ b/src/Ganeti/HTools/Backend/IAlloc.hs
@@ -52,6 +52,7 @@ import Text.JSON (JSObject, JSValue(JSArray),
 
 import Ganeti.BasicTypes
 import qualified Ganeti.HTools.Cluster as Cluster
+import qualified Ganeti.HTools.Cluster.Evacuate as Evacuate
 import qualified Ganeti.HTools.Container as Container
 import qualified Ganeti.HTools.Group as Group
 import qualified Ganeti.HTools.Node as Node
@@ -325,20 +326,20 @@ formatMultiAlloc (fin_nl, fin_il, ars) =
 formatNodeEvac :: Group.List
                -> Node.List
                -> Instance.List
-               -> (Node.List, Instance.List, Cluster.EvacSolution)
+               -> (Node.List, Instance.List, Evacuate.EvacSolution)
                -> Result IAllocResult
 formatNodeEvac gl nl il (fin_nl, fin_il, es) =
   let iname = Instance.name . flip Container.find il
       nname = Node.name . flip Container.find nl
       gname = Group.name . flip Container.find gl
-      fes = map (\(idx, msg) -> (iname idx, msg)) $ Cluster.esFailed es
+      fes = map (\(idx, msg) -> (iname idx, msg)) $ Evacuate.esFailed es
       mes = map (\(idx, gdx, ndxs) -> (iname idx, gname gdx, map nname ndxs))
-            $ Cluster.esMoved es
+            $ Evacuate.esMoved es
       failed = length fes
       moved  = length mes
       info = show failed ++ " instances failed to move and " ++ show moved ++
              " were moved successfully"
-  in Ok (info, showJSON (mes, fes, Cluster.esOpCodes es), fin_nl, fin_il)
+  in Ok (info, showJSON (mes, fes, Evacuate.esOpCodes es), fin_nl, fin_il)
 
 -- | Runs relocate for a single instance.
 --
@@ -371,12 +372,12 @@ processRelocate opts gl nl il idx 1 exndx = do
        fail $ "Unsupported request: excluded nodes not equal to\
               \ instance's " ++  node_type ++ "(" ++ show exp_node
               ++ " versus " ++ show exndx ++ ")"
-  (nl', il', esol) <- Cluster.tryNodeEvac opts gl nl il reloc_type [idx]
-  nodes <- case lookup idx (Cluster.esFailed esol) of
+  (nl', il', esol) <- Evacuate.tryNodeEvac opts gl nl il reloc_type [idx]
+  nodes <- case lookup idx (Evacuate.esFailed esol) of
              Just msg -> fail msg
              Nothing ->
                  case lookup idx (map (\(a, _, b) -> (a, b))
-                                  (Cluster.esMoved esol)) of
+                                  (Evacuate.esMoved esol)) of
                    Nothing ->
                        fail "Internal error: lost instance idx during move"
                    Just n -> return n
@@ -432,7 +433,7 @@ processRequest opts request =
          Cluster.tryChangeGroup opts gl nl il idxs gdxs >>=
                 formatNodeEvac gl nl il
        NodeEvacuate xi mode ->
-         Cluster.tryNodeEvac opts gl nl il mode xi >>=
+         Evacuate.tryNodeEvac opts gl nl il mode xi >>=
                 formatNodeEvac gl nl il
        MultiAllocate xies ->
          Cluster.allocList opts gl nl il xies [] >>= formatMultiAlloc
diff --git a/src/Ganeti/HTools/Cluster.hs b/src/Ganeti/HTools/Cluster.hs
index 63e39de..8913c37 100644
--- a/src/Ganeti/HTools/Cluster.hs
+++ b/src/Ganeti/HTools/Cluster.hs
@@ -46,7 +46,6 @@ module Ganeti.HTools.Cluster
   , sumAllocs
   , updateIl
   , extractNl
-  , EvacSolution(..)
   , Table(..)
   , CStats(..)
   , AllocNodes
@@ -82,7 +81,6 @@ module Ganeti.HTools.Cluster
   , tryMGAlloc
   , filterMGResults
   , sortMGResults
-  , tryNodeEvac
   , tryChangeGroup
   , collapseFailures
   , allocList
@@ -92,7 +90,6 @@ module Ganeti.HTools.Cluster
   -- * Node group functions
   , instanceGroup
   , findSplitInstances
-  , splitCluster
   ) where
 
 import Control.Applicative ((<$>), liftA2)
@@ -105,23 +102,26 @@ import Data.Ord (comparing)
 import Text.Printf (printf)
 
 import Ganeti.BasicTypes
-import qualified Ganeti.Constants as C
 import Ganeti.HTools.AlgorithmParams (AlgorithmOptions(..), defaultOptions)
 import qualified Ganeti.HTools.Container as Container
-import Ganeti.HTools.Cluster.Metrics ( compCV, compCVfromStats, compCVNodes
+import Ganeti.HTools.Cluster.Evacuate ( EvacSolution(..), emptyEvacSolution
+                                      , updateEvacSolution, reverseEvacSolution
+                                      , nodeEvacInstance)
+import Ganeti.HTools.Cluster.Metrics ( compCV, compCVfromStats
                                      , compClusterStatistics
                                      , updateClusterStatisticsTwice)
 import Ganeti.HTools.Cluster.Moves (setInstanceLocationScore, applyMoveEx)
+import Ganeti.HTools.Cluster.Utils (splitCluster, instancePriGroup
+                                   , availableGroupNodes, iMoveToJob)
 import qualified Ganeti.HTools.Instance as Instance
 import qualified Ganeti.HTools.Nic as Nic
 import qualified Ganeti.HTools.Node as Node
 import qualified Ganeti.HTools.Group as Group
 import Ganeti.HTools.Types
 import Ganeti.Compat
-import qualified Ganeti.OpCodes as OpCodes
 import Ganeti.Utils
 import Ganeti.Utils.Statistics
-import Ganeti.Types (EvacMode(..), mkNonEmpty, mkNonNegative)
+import Ganeti.Types (EvacMode(..))
 
 -- * Types
 
@@ -142,16 +142,6 @@ data GenericAllocSolution a = AllocSolution
 
 type AllocSolution = GenericAllocSolution Score
 
--- | Node evacuation/group change iallocator result type. This result
--- type consists of actual opcodes (a restricted subset) that are
--- transmitted back to Ganeti.
-data EvacSolution = EvacSolution
-  { esMoved   :: [(Idx, Gdx, [Ndx])]  -- ^ Instances moved successfully
-  , esFailed  :: [(Idx, String)]      -- ^ Instances which were not
-                                      -- relocated
-  , esOpCodes :: [[OpCodes.OpCode]]   -- ^ List of jobs
-  } deriving (Show)
-
 -- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
 type AllocResult = (FailStats, Node.List, Instance.List,
                     [Instance.Instance], [CStats])
@@ -174,13 +164,6 @@ emptyAllocSolution :: GenericAllocSolution a
 emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
                                    , asSolution = Nothing, asLog = [] }
 
--- | The empty evac solution.
-emptyEvacSolution :: EvacSolution
-emptyEvacSolution = EvacSolution { esMoved = []
-                                 , esFailed = []
-                                 , esOpCodes = []
-                                 }
-
 -- | The complete state for the balancing solution.
 data Table = Table Node.List Instance.List Score [Placement]
              deriving (Show)
@@ -223,10 +206,6 @@ type AllocMethod =  Node.List           -- ^ Node list
                  -> [CStats]            -- ^ Running cluster stats
                  -> Result AllocResult  -- ^ Allocation result
 
--- | A simple type for the running solution of evacuations.
-type EvacInnerState =
-  Either String (Node.List, Instance.Instance, Score, Ndx)
-
 -- * Utility functions
 
 -- | Verifies the N+1 status and return the affected nodes.
@@ -630,14 +609,6 @@ genericAnnotateSolution formatMetrics as =
 annotateSolution :: AllocSolution -> AllocSolution
 annotateSolution = genericAnnotateSolution (printf "%.8f")
 
--- | Reverses an evacuation solution.
---
--- Rationale: we always concat the results to the top of the lists, so
--- for proper jobset execution, we should reverse all lists.
-reverseEvacSolution :: EvacSolution -> EvacSolution
-reverseEvacSolution (EvacSolution f m o) =
-  EvacSolution (reverse f) (reverse m) (reverse o)
-
 -- | Generate the valid node allocation singles or pairs for a new instance.
 genAllocNodes :: Group.List        -- ^ Group list
               -> Node.List         -- ^ The node map
@@ -864,317 +835,7 @@ allocList opts gl nl il ((xi, AllocDetails xicnt 
mgn):xies) result = do
       il' = updateIl il sol
   allocList opts gl nl' il' xies ((xi, ares):result)
 
--- | Function which fails if the requested mode is change secondary.
---
--- This is useful since except DRBD, no other disk template can
--- execute change secondary; thus, we can just call this function
--- instead of always checking for secondary mode. After the call to
--- this function, whatever mode we have is just a primary change.
-failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
-failOnSecondaryChange ChangeSecondary dt =
-  fail $ "Instances with disk template '" ++ diskTemplateToRaw dt ++
-         "' can't execute change secondary"
-failOnSecondaryChange _ _ = return ()
-
--- | Run evacuation for a single instance.
---
--- /Note:/ this function should correctly execute both intra-group
--- evacuations (in all modes) and inter-group evacuations (in the
--- 'ChangeAll' mode). Of course, this requires that the correct list
--- of target nodes is passed.
-nodeEvacInstance :: AlgorithmOptions
-                 -> Node.List         -- ^ The node list (cluster-wide)
-                 -> Instance.List     -- ^ Instance list (cluster-wide)
-                 -> EvacMode          -- ^ The evacuation mode
-                 -> Instance.Instance -- ^ The instance to be evacuated
-                 -> Gdx               -- ^ The group we're targetting
-                 -> [Ndx]             -- ^ The list of available nodes
-                                      -- for allocation
-                 -> Result (Node.List, Instance.List, [OpCodes.OpCode])
-nodeEvacInstance opts nl il mode inst@(Instance.Instance
-                                    {Instance.diskTemplate = dt@DTDiskless})
-                 gdx avail_nodes =
-                   failOnSecondaryChange mode dt >>
-                   evacOneNodeOnly opts nl il inst gdx avail_nodes
-
-nodeEvacInstance _ _ _ _ (Instance.Instance
-                          {Instance.diskTemplate = DTPlain}) _ _ =
-                  fail "Instances of type plain cannot be relocated"
-
-nodeEvacInstance _ _ _ _ (Instance.Instance
-                          {Instance.diskTemplate = DTFile}) _ _ =
-                  fail "Instances of type file cannot be relocated"
-
-nodeEvacInstance opts nl il mode inst@(Instance.Instance
-                                    {Instance.diskTemplate = dt@DTSharedFile})
-                 gdx avail_nodes =
-                   failOnSecondaryChange mode dt >>
-                   evacOneNodeOnly opts nl il inst gdx avail_nodes
-
-nodeEvacInstance opts nl il mode inst@(Instance.Instance
-                                    {Instance.diskTemplate = dt@DTBlock})
-                 gdx avail_nodes =
-                   failOnSecondaryChange mode dt >>
-                   evacOneNodeOnly opts nl il inst gdx avail_nodes
-
-nodeEvacInstance opts nl il mode inst@(Instance.Instance
-                                    {Instance.diskTemplate = dt@DTRbd})
-                 gdx avail_nodes =
-                   failOnSecondaryChange mode dt >>
-                   evacOneNodeOnly opts nl il inst gdx avail_nodes
-
-nodeEvacInstance opts nl il mode inst@(Instance.Instance
-                                    {Instance.diskTemplate = dt@DTExt})
-                 gdx avail_nodes =
-                   failOnSecondaryChange mode dt >>
-                   evacOneNodeOnly opts nl il inst gdx avail_nodes
-
-nodeEvacInstance opts nl il mode inst@(Instance.Instance
-                                    {Instance.diskTemplate = dt@DTGluster})
-                 gdx avail_nodes =
-                   failOnSecondaryChange mode dt >>
-                   evacOneNodeOnly opts nl il inst gdx avail_nodes
-
-nodeEvacInstance opts nl il ChangePrimary
-                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
-                 _ _ =
-  do
-    (nl', inst', _, _) <- opToResult
-                          $ applyMoveEx (algIgnoreSoftErrors opts) nl inst
-                            Failover
-    let idx = Instance.idx inst
-        il' = Container.add idx inst' il
-        ops = iMoveToJob nl' il' idx Failover
-    return (nl', il', ops)
-
-nodeEvacInstance opts nl il ChangeSecondary
-                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
-                 gdx avail_nodes =
-  evacOneNodeOnly opts nl il inst gdx avail_nodes
-
--- The algorithm for ChangeAll is as follows:
---
--- * generate all (primary, secondary) node pairs for the target groups
--- * for each pair, execute the needed moves (r:s, f, r:s) and compute
---   the final node list state and group score
--- * select the best choice via a foldl that uses the same Either
---   String solution as the ChangeSecondary mode
-nodeEvacInstance opts nl il ChangeAll
-                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
-                 gdx avail_nodes =
-  do
-    let no_nodes = Left "no nodes available"
-        node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
-    (nl', il', ops, _) <-
-        annotateResult "Can't find any good nodes for relocation" .
-        eitherToResult $
-        foldl'
-        (\accu nodes -> case evacDrbdAllInner opts nl il inst gdx nodes of
-                          Bad msg ->
-                              case accu of
-                                Right _ -> accu
-                                -- we don't need more details (which
-                                -- nodes, etc.) as we only selected
-                                -- this group if we can allocate on
-                                -- it, hence failures will not
-                                -- propagate out of this fold loop
-                                Left _ -> Left $ "Allocation failed: " ++ msg
-                          Ok result@(_, _, _, new_cv) ->
-                              let new_accu = Right result in
-                              case accu of
-                                Left _ -> new_accu
-                                Right (_, _, _, old_cv) ->
-                                    if old_cv < new_cv
-                                    then accu
-                                    else new_accu
-        ) no_nodes node_pairs
-
-    return (nl', il', ops)
-
--- | Generic function for changing one node of an instance.
---
--- This is similar to 'nodeEvacInstance' but will be used in a few of
--- its sub-patterns. It folds the inner function 'evacOneNodeInner'
--- over the list of available nodes, which results in the best choice
--- for relocation.
-evacOneNodeOnly :: AlgorithmOptions
-                -> Node.List         -- ^ The node list (cluster-wide)
-                -> Instance.List     -- ^ Instance list (cluster-wide)
-                -> Instance.Instance -- ^ The instance to be evacuated
-                -> Gdx               -- ^ The group we're targetting
-                -> [Ndx]             -- ^ The list of available nodes
-                                      -- for allocation
-                -> Result (Node.List, Instance.List, [OpCodes.OpCode])
-evacOneNodeOnly opts nl il inst gdx avail_nodes = do
-  op_fn <- case Instance.mirrorType inst of
-             MirrorNone -> Bad "Can't relocate/evacuate non-mirrored instances"
-             MirrorInternal -> Ok ReplaceSecondary
-             MirrorExternal -> Ok FailoverToAny
-  (nl', inst', _, ndx) <- annotateResult "Can't find any good node" .
-                          eitherToResult $
-                          foldl' (evacOneNodeInner opts nl inst gdx op_fn)
-                          (Left "") avail_nodes
-  let idx = Instance.idx inst
-      il' = Container.add idx inst' il
-      ops = iMoveToJob nl' il' idx (op_fn ndx)
-  return (nl', il', ops)
-
--- | Inner fold function for changing one node of an instance.
---
--- Depending on the instance disk template, this will either change
--- the secondary (for DRBD) or the primary node (for shared
--- storage). However, the operation is generic otherwise.
---
--- The running solution is either a @Left String@, which means we
--- don't have yet a working solution, or a @Right (...)@, which
--- represents a valid solution; it holds the modified node list, the
--- modified instance (after evacuation), the score of that solution,
--- and the new secondary node index.
-evacOneNodeInner :: AlgorithmOptions
-                 -> Node.List         -- ^ Cluster node list
-                 -> Instance.Instance -- ^ Instance being evacuated
-                 -> Gdx               -- ^ The group index of the instance
-                 -> (Ndx -> IMove)    -- ^ Operation constructor
-                 -> EvacInnerState    -- ^ Current best solution
-                 -> Ndx               -- ^ Node we're evaluating as target
-                 -> EvacInnerState    -- ^ New best solution
-evacOneNodeInner opts nl inst gdx op_fn accu ndx =
-  case applyMoveEx (algIgnoreSoftErrors opts) nl inst (op_fn ndx) of
-    Bad fm -> let fail_msg = " Node " ++ Container.nameOf nl ndx ++
-                             " failed: " ++ show fm ++ ";"
-              in either (Left . (++ fail_msg)) Right accu
-    Ok (nl', inst', _, _) ->
-      let nodes = Container.elems nl'
-          -- The fromJust below is ugly (it can fail nastily), but
-          -- at this point we should have any internal mismatches,
-          -- and adding a monad here would be quite involved
-          grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
-          new_cv = compCVNodes grpnodes
-          new_accu = Right (nl', inst', new_cv, ndx)
-      in case accu of
-           Left _ -> new_accu
-           Right (_, _, old_cv, _) ->
-             if old_cv < new_cv
-               then accu
-               else new_accu
-
--- | Compute result of changing all nodes of a DRBD instance.
---
--- Given the target primary and secondary node (which might be in a
--- different group or not), this function will 'execute' all the
--- required steps and assuming all operations succceed, will return
--- the modified node and instance lists, the opcodes needed for this
--- and the new group score.
-evacDrbdAllInner :: AlgorithmOptions
-                 -> Node.List         -- ^ Cluster node list
-                 -> Instance.List     -- ^ Cluster instance list
-                 -> Instance.Instance -- ^ The instance to be moved
-                 -> Gdx               -- ^ The target group index
-                                      -- (which can differ from the
-                                      -- current group of the
-                                      -- instance)
-                 -> (Ndx, Ndx)        -- ^ Tuple of new
-                                      -- primary\/secondary nodes
-                 -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
-evacDrbdAllInner opts nl il inst gdx (t_pdx, t_sdx) = do
-  let primary = Container.find (Instance.pNode inst) nl
-      idx = Instance.idx inst
-      apMove = applyMoveEx $ algIgnoreSoftErrors opts
-  -- if the primary is offline, then we first failover
-  (nl1, inst1, ops1) <-
-    if Node.offline primary
-      then do
-        (nl', inst', _, _) <-
-          annotateResult "Failing over to the secondary" .
-          opToResult $ apMove nl inst Failover
-        return (nl', inst', [Failover])
-      else return (nl, inst, [])
-  let (o1, o2, o3) = (ReplaceSecondary t_pdx,
-                      Failover,
-                      ReplaceSecondary t_sdx)
-  -- we now need to execute a replace secondary to the future
-  -- primary node
-  (nl2, inst2, _, _) <-
-    annotateResult "Changing secondary to new primary" .
-    opToResult $
-    apMove nl1 inst1 o1
-  let ops2 = o1:ops1
-  -- we now execute another failover, the primary stays fixed now
-  (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" .
-                        opToResult $ apMove nl2 inst2 o2
-  let ops3 = o2:ops2
-  -- and finally another replace secondary, to the final secondary
-  (nl4, inst4, _, _) <-
-    annotateResult "Changing secondary to final secondary" .
-    opToResult $
-    apMove nl3 inst3 o3
-  let ops4 = o3:ops3
-      il' = Container.add idx inst4 il
-      ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
-  let nodes = Container.elems nl4
-      -- The fromJust below is ugly (it can fail nastily), but
-      -- at this point we should have any internal mismatches,
-      -- and adding a monad here would be quite involved
-      grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
-      new_cv = compCVNodes grpnodes
-  return (nl4, il', ops, new_cv)
-
--- | Computes the nodes in a given group which are available for
--- allocation.
-availableGroupNodes :: [(Gdx, [Ndx])] -- ^ Group index/node index assoc list
-                    -> IntSet.IntSet  -- ^ Nodes that are excluded
-                    -> Gdx            -- ^ The group for which we
-                                      -- query the nodes
-                    -> Result [Ndx]   -- ^ List of available node indices
-availableGroupNodes group_nodes excl_ndx gdx = do
-  local_nodes <- maybe (Bad $ "Can't find group with index " ++ show gdx)
-                 Ok (lookup gdx group_nodes)
-  let avail_nodes = filter (not . flip IntSet.member excl_ndx) local_nodes
-  return avail_nodes
-
--- | Updates the evac solution with the results of an instance
--- evacuation.
-updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
-                   -> Idx
-                   -> Result (Node.List, Instance.List, [OpCodes.OpCode])
-                   -> (Node.List, Instance.List, EvacSolution)
-updateEvacSolution (nl, il, es) idx (Bad msg) =
-  (nl, il, es { esFailed = (idx, msg):esFailed es})
-updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) =
-  (nl, il, es { esMoved = new_elem:esMoved es
-              , esOpCodes = opcodes:esOpCodes es })
-    where inst = Container.find idx il
-          new_elem = (idx,
-                      instancePriGroup nl inst,
-                      Instance.allNodes inst)
-
--- | Node-evacuation IAllocator mode main function.
-tryNodeEvac :: AlgorithmOptions
-            -> Group.List    -- ^ The cluster groups
-            -> Node.List     -- ^ The node list (cluster-wide, not per group)
-            -> Instance.List -- ^ Instance list (cluster-wide)
-            -> EvacMode      -- ^ The evacuation mode
-            -> [Idx]         -- ^ List of instance (indices) to be evacuated
-            -> Result (Node.List, Instance.List, EvacSolution)
-tryNodeEvac opts _ ini_nl ini_il mode idxs =
-  let evac_ndx = nodesToEvacuate ini_il mode idxs
-      offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
-      excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
-      group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
-                                           (Container.elems nl))) $
-                  splitCluster ini_nl ini_il
-      (fin_nl, fin_il, esol) =
-        foldl' (\state@(nl, il, _) inst ->
-                  let gdx = instancePriGroup nl inst
-                      pdx = Instance.pNode inst in
-                  updateEvacSolution state (Instance.idx inst) $
-                  availableGroupNodes group_ndx
-                    (IntSet.insert pdx excl_ndx) gdx >>=
-                      nodeEvacInstance opts nl il mode inst gdx
-               )
-        (ini_nl, ini_il, emptyEvacSolution)
-        (map (`Container.find` ini_il) idxs)
-  in return (fin_nl, fin_il, reverseEvacSolution esol)
+
 
 -- | Change-group IAllocator mode main function.
 --
@@ -1468,72 +1129,6 @@ printInsts nl il =
       isnum = False:False:False:False:False:repeat True
   in printTable "" header (map helper sil) isnum
 
--- | Convert a placement into a list of OpCodes (basically a job).
-iMoveToJob :: Node.List        -- ^ The node list; only used for node
-                               -- names, so any version is good
-                               -- (before or after the operation)
-           -> Instance.List    -- ^ The instance list; also used for
-                               -- names only
-           -> Idx              -- ^ The index of the instance being
-                               -- moved
-           -> IMove            -- ^ The actual move to be described
-           -> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to
-                               -- the given move
-iMoveToJob nl il idx move =
-  let inst = Container.find idx il
-      iname = Instance.name inst
-      lookNode  n = case mkNonEmpty (Container.nameOf nl n) of
-                      -- FIXME: convert htools codebase to non-empty strings
-                      Bad msg -> error $ "Empty node name for idx " ++
-                                 show n ++ ": " ++ msg ++ "??"
-                      Ok ne -> Just ne
-      opF' = OpCodes.OpInstanceMigrate
-              { OpCodes.opInstanceName        = iname
-              , OpCodes.opInstanceUuid        = Nothing
-              , OpCodes.opMigrationMode       = Nothing -- default
-              , OpCodes.opOldLiveMode         = Nothing -- default as well
-              , OpCodes.opTargetNode          = Nothing -- this is drbd
-              , OpCodes.opTargetNodeUuid      = Nothing
-              , OpCodes.opAllowRuntimeChanges = False
-              , OpCodes.opIgnoreIpolicy       = False
-              , OpCodes.opMigrationCleanup    = False
-              , OpCodes.opIallocator          = Nothing
-              , OpCodes.opAllowFailover       = True
-              , OpCodes.opIgnoreHvversions    = True
-              }
-      opFA n = opF { OpCodes.opTargetNode = lookNode n } -- not drbd
-      opFforced =
-        OpCodes.OpInstanceFailover
-          { OpCodes.opInstanceName        = iname
-          , OpCodes.opInstanceUuid        = Nothing
-          , OpCodes.opShutdownTimeout     =
-              fromJust $ mkNonNegative C.defaultShutdownTimeout
-          , OpCodes.opIgnoreConsistency = False
-          , OpCodes.opTargetNode = Nothing
-          , OpCodes.opTargetNodeUuid = Nothing
-          , OpCodes.opIgnoreIpolicy = False
-          , OpCodes.opIallocator = Nothing
-          , OpCodes.opMigrationCleanup = False
-          }
-      opF = if Instance.forthcoming inst then opFforced else opF'
-      opR n = OpCodes.OpInstanceReplaceDisks
-                { OpCodes.opInstanceName     = iname
-                , OpCodes.opInstanceUuid     = Nothing
-                , OpCodes.opEarlyRelease     = False
-                , OpCodes.opIgnoreIpolicy    = False
-                , OpCodes.opReplaceDisksMode = OpCodes.ReplaceNewSecondary
-                , OpCodes.opReplaceDisksList = []
-                , OpCodes.opRemoteNode       = lookNode n
-                , OpCodes.opRemoteNodeUuid   = Nothing
-                , OpCodes.opIallocator       = Nothing
-                }
-  in case move of
-       Failover -> [ opF ]
-       FailoverToAny np -> [ opFA np ]
-       ReplacePrimary np -> [ opF, opR np, opF ]
-       ReplaceSecondary ns -> [ opR ns ]
-       ReplaceAndFailover np -> [ opR np, opF ]
-       FailoverAndReplace ns -> [ opF, opR ns ]
 
 -- * Node group functions
 
@@ -1552,47 +1147,8 @@ instanceGroup nl i =
                   show pgroup ++ ", secondary " ++ show sgroup)
        else return pgroup
 
--- | Computes the group of an instance per the primary node.
-instancePriGroup :: Node.List -> Instance.Instance -> Gdx
-instancePriGroup nl i =
-  let pnode = Container.find (Instance.pNode i) nl
-  in  Node.group pnode
-
 -- | Compute the list of badly allocated instances (split across node
 -- groups).
 findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
 findSplitInstances nl =
   filter (not . isOk . instanceGroup nl) . Container.elems
-
--- | Splits a cluster into the component node groups.
-splitCluster :: Node.List -> Instance.List ->
-                [(Gdx, (Node.List, Instance.List))]
-splitCluster nl il =
-  let ngroups = Node.computeGroups (Container.elems nl)
-  in map (\(gdx, nodes) ->
-           let nidxs = map Node.idx nodes
-               nodes' = zip nidxs nodes
-               instances = Container.filter ((`elem` nidxs) . Instance.pNode) 
il
-           in (gdx, (Container.fromList nodes', instances))) ngroups
-
--- | Compute the list of nodes that are to be evacuated, given a list
--- of instances and an evacuation mode.
-nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
-                -> EvacMode      -- ^ The evacuation mode we're using
-                -> [Idx]         -- ^ List of instance indices being evacuated
-                -> IntSet.IntSet -- ^ Set of node indices
-nodesToEvacuate il mode =
-  IntSet.delete Node.noSecondary .
-  foldl' (\ns idx ->
-            let i = Container.find idx il
-                pdx = Instance.pNode i
-                sdx = Instance.sNode i
-                dt = Instance.diskTemplate i
-                withSecondary = case dt of
-                                  DTDrbd8 -> IntSet.insert sdx ns
-                                  _ -> ns
-            in case mode of
-                 ChangePrimary   -> IntSet.insert pdx ns
-                 ChangeSecondary -> withSecondary
-                 ChangeAll       -> IntSet.insert pdx withSecondary
-         ) IntSet.empty
diff --git a/src/Ganeti/HTools/Cluster/Evacuate.hs 
b/src/Ganeti/HTools/Cluster/Evacuate.hs
new file mode 100644
index 0000000..74e1a84
--- /dev/null
+++ b/src/Ganeti/HTools/Cluster/Evacuate.hs
@@ -0,0 +1,411 @@
+{-| Implementation of node evacuation
+
+-}
+
+{-
+
+Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-}
+
+module Ganeti.HTools.Cluster.Evacuate
+  ( EvacSolution(..)
+  , nodeEvacInstance
+  , tryNodeEvac
+  , emptyEvacSolution
+  , updateEvacSolution
+  , reverseEvacSolution
+  ) where
+
+import qualified Data.IntSet as IntSet
+import Data.List (foldl')
+import Data.Maybe (fromJust)
+
+import Ganeti.BasicTypes
+import Ganeti.HTools.AlgorithmParams (AlgorithmOptions(..))
+import Ganeti.HTools.Cluster.Metrics (compCVNodes)
+import Ganeti.HTools.Cluster.Moves (applyMoveEx)
+import Ganeti.HTools.Cluster.Utils ( splitCluster, iMoveToJob
+                                   , instancePriGroup, availableGroupNodes)
+import qualified Ganeti.HTools.Container as Container
+import qualified Ganeti.HTools.Group as Group
+import qualified Ganeti.HTools.Instance as Instance
+import qualified Ganeti.HTools.Node as Node
+import Ganeti.HTools.Types
+import qualified Ganeti.OpCodes as OpCodes
+import Ganeti.Types
+
+-- | Node evacuation/group change iallocator result type. This result
+-- type consists of actual opcodes (a restricted subset) that are
+-- transmitted back to Ganeti.
+data EvacSolution = EvacSolution
+  { esMoved   :: [(Idx, Gdx, [Ndx])]  -- ^ Instances moved successfully
+  , esFailed  :: [(Idx, String)]      -- ^ Instances which were not
+                                      -- relocated
+  , esOpCodes :: [[OpCodes.OpCode]]   -- ^ List of jobs
+  } deriving (Show)
+
+-- | The empty evac solution.
+emptyEvacSolution :: EvacSolution
+emptyEvacSolution = EvacSolution { esMoved = []
+                                 , esFailed = []
+                                 , esOpCodes = []
+                                 }
+
+-- | Reverses an evacuation solution.
+--
+-- Rationale: we always concat the results to the top of the lists, so
+-- for proper jobset execution, we should reverse all lists.
+reverseEvacSolution :: EvacSolution -> EvacSolution
+reverseEvacSolution (EvacSolution f m o) =
+  EvacSolution (reverse f) (reverse m) (reverse o)
+
+-- | A simple type for the running solution of evacuations.
+type EvacInnerState =
+  Either String (Node.List, Instance.Instance, Score, Ndx)
+
+-- | Function which fails if the requested mode is change secondary.
+--
+-- This is useful since except DRBD, no other disk template can
+-- execute change secondary; thus, we can just call this function
+-- instead of always checking for secondary mode. After the call to
+-- this function, whatever mode we have is just a primary change.
+failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
+failOnSecondaryChange ChangeSecondary dt =
+  fail $ "Instances with disk template '" ++ diskTemplateToRaw dt ++
+         "' can't execute change secondary"
+failOnSecondaryChange _ _ = return ()
+
+
+-- | Inner fold function for changing one node of an instance.
+--
+-- Depending on the instance disk template, this will either change
+-- the secondary (for DRBD) or the primary node (for shared
+-- storage). However, the operation is generic otherwise.
+--
+-- The running solution is either a @Left String@, which means we
+-- don't have yet a working solution, or a @Right (...)@, which
+-- represents a valid solution; it holds the modified node list, the
+-- modified instance (after evacuation), the score of that solution,
+-- and the new secondary node index.
+evacOneNodeInner :: AlgorithmOptions
+                 -> Node.List         -- ^ Cluster node list
+                 -> Instance.Instance -- ^ Instance being evacuated
+                 -> Gdx               -- ^ The group index of the instance
+                 -> (Ndx -> IMove)    -- ^ Operation constructor
+                 -> EvacInnerState    -- ^ Current best solution
+                 -> Ndx               -- ^ Node we're evaluating as target
+                 -> EvacInnerState    -- ^ New best solution
+evacOneNodeInner opts nl inst gdx op_fn accu ndx =
+  case applyMoveEx (algIgnoreSoftErrors opts) nl inst (op_fn ndx) of
+    Bad fm -> let fail_msg = " Node " ++ Container.nameOf nl ndx ++
+                             " failed: " ++ show fm ++ ";"
+              in either (Left . (++ fail_msg)) Right accu
+    Ok (nl', inst', _, _) ->
+      let nodes = Container.elems nl'
+          -- The fromJust below is ugly (it can fail nastily), but
+          -- at this point we should have any internal mismatches,
+          -- and adding a monad here would be quite involved
+          grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
+          new_cv = compCVNodes grpnodes
+          new_accu = Right (nl', inst', new_cv, ndx)
+      in case accu of
+           Left _ -> new_accu
+           Right (_, _, old_cv, _) ->
+             if old_cv < new_cv
+               then accu
+               else new_accu
+
+-- | Generic function for changing one node of an instance.
+--
+-- This is similar to 'nodeEvacInstance' but will be used in a few of
+-- its sub-patterns. It folds the inner function 'evacOneNodeInner'
+-- over the list of available nodes, which results in the best choice
+-- for relocation.
+evacOneNodeOnly :: AlgorithmOptions
+                -> Node.List         -- ^ The node list (cluster-wide)
+                -> Instance.List     -- ^ Instance list (cluster-wide)
+                -> Instance.Instance -- ^ The instance to be evacuated
+                -> Gdx               -- ^ The group we're targetting
+                -> [Ndx]             -- ^ The list of available nodes
+                                      -- for allocation
+                -> Result (Node.List, Instance.List, [OpCodes.OpCode])
+evacOneNodeOnly opts nl il inst gdx avail_nodes = do
+  op_fn <- case Instance.mirrorType inst of
+             MirrorNone -> Bad "Can't relocate/evacuate non-mirrored instances"
+             MirrorInternal -> Ok ReplaceSecondary
+             MirrorExternal -> Ok FailoverToAny
+  (nl', inst', _, ndx) <- annotateResult "Can't find any good node" .
+                          eitherToResult $
+                          foldl' (evacOneNodeInner opts nl inst gdx op_fn)
+                          (Left "") avail_nodes
+  let idx = Instance.idx inst
+      il' = Container.add idx inst' il
+      ops = iMoveToJob nl' il' idx (op_fn ndx)
+  return (nl', il', ops)
+
+-- | Compute result of changing all nodes of a DRBD instance.
+--
+-- Given the target primary and secondary node (which might be in a
+-- different group or not), this function will 'execute' all the
+-- required steps and assuming all operations succceed, will return
+-- the modified node and instance lists, the opcodes needed for this
+-- and the new group score.
+evacDrbdAllInner :: AlgorithmOptions
+                 -> Node.List         -- ^ Cluster node list
+                 -> Instance.List     -- ^ Cluster instance list
+                 -> Instance.Instance -- ^ The instance to be moved
+                 -> Gdx               -- ^ The target group index
+                                      -- (which can differ from the
+                                      -- current group of the
+                                      -- instance)
+                 -> (Ndx, Ndx)        -- ^ Tuple of new
+                                      -- primary\/secondary nodes
+                 -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
+evacDrbdAllInner opts nl il inst gdx (t_pdx, t_sdx) = do
+  let primary = Container.find (Instance.pNode inst) nl
+      idx = Instance.idx inst
+      apMove = applyMoveEx $ algIgnoreSoftErrors opts
+  -- if the primary is offline, then we first failover
+  (nl1, inst1, ops1) <-
+    if Node.offline primary
+      then do
+        (nl', inst', _, _) <-
+          annotateResult "Failing over to the secondary" .
+          opToResult $ apMove nl inst Failover
+        return (nl', inst', [Failover])
+      else return (nl, inst, [])
+  let (o1, o2, o3) = (ReplaceSecondary t_pdx,
+                      Failover,
+                      ReplaceSecondary t_sdx)
+  -- we now need to execute a replace secondary to the future
+  -- primary node
+  (nl2, inst2, _, _) <-
+    annotateResult "Changing secondary to new primary" .
+    opToResult $
+    apMove nl1 inst1 o1
+  let ops2 = o1:ops1
+  -- we now execute another failover, the primary stays fixed now
+  (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" .
+                        opToResult $ apMove nl2 inst2 o2
+  let ops3 = o2:ops2
+  -- and finally another replace secondary, to the final secondary
+  (nl4, inst4, _, _) <-
+    annotateResult "Changing secondary to final secondary" .
+    opToResult $
+    apMove nl3 inst3 o3
+  let ops4 = o3:ops3
+      il' = Container.add idx inst4 il
+      ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
+  let nodes = Container.elems nl4
+      -- The fromJust below is ugly (it can fail nastily), but
+      -- at this point we should have any internal mismatches,
+      -- and adding a monad here would be quite involved
+      grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
+      new_cv = compCVNodes grpnodes
+  return (nl4, il', ops, new_cv)
+
+-- | Run evacuation for a single instance.
+--
+-- /Note:/ this function should correctly execute both intra-group
+-- evacuations (in all modes) and inter-group evacuations (in the
+-- 'ChangeAll' mode). Of course, this requires that the correct list
+-- of target nodes is passed.
+nodeEvacInstance :: AlgorithmOptions
+                 -> Node.List         -- ^ The node list (cluster-wide)
+                 -> Instance.List     -- ^ Instance list (cluster-wide)
+                 -> EvacMode          -- ^ The evacuation mode
+                 -> Instance.Instance -- ^ The instance to be evacuated
+                 -> Gdx               -- ^ The group we're targetting
+                 -> [Ndx]             -- ^ The list of available nodes
+                                      -- for allocation
+                 -> Result (Node.List, Instance.List, [OpCodes.OpCode])
+nodeEvacInstance opts nl il mode inst@(Instance.Instance
+                                    {Instance.diskTemplate = dt@DTDiskless})
+                 gdx avail_nodes =
+                   failOnSecondaryChange mode dt >>
+                   evacOneNodeOnly opts nl il inst gdx avail_nodes
+
+nodeEvacInstance _ _ _ _ (Instance.Instance
+                          {Instance.diskTemplate = DTPlain}) _ _ =
+                  fail "Instances of type plain cannot be relocated"
+
+nodeEvacInstance _ _ _ _ (Instance.Instance
+                          {Instance.diskTemplate = DTFile}) _ _ =
+                  fail "Instances of type file cannot be relocated"
+
+nodeEvacInstance opts nl il mode inst@(Instance.Instance
+                                    {Instance.diskTemplate = dt@DTSharedFile})
+                 gdx avail_nodes =
+                   failOnSecondaryChange mode dt >>
+                   evacOneNodeOnly opts nl il inst gdx avail_nodes
+
+nodeEvacInstance opts nl il mode inst@(Instance.Instance
+                                    {Instance.diskTemplate = dt@DTBlock})
+                 gdx avail_nodes =
+                   failOnSecondaryChange mode dt >>
+                   evacOneNodeOnly opts nl il inst gdx avail_nodes
+
+nodeEvacInstance opts nl il mode inst@(Instance.Instance
+                                    {Instance.diskTemplate = dt@DTRbd})
+                 gdx avail_nodes =
+                   failOnSecondaryChange mode dt >>
+                   evacOneNodeOnly opts nl il inst gdx avail_nodes
+
+nodeEvacInstance opts nl il mode inst@(Instance.Instance
+                                    {Instance.diskTemplate = dt@DTExt})
+                 gdx avail_nodes =
+                   failOnSecondaryChange mode dt >>
+                   evacOneNodeOnly opts nl il inst gdx avail_nodes
+
+nodeEvacInstance opts nl il mode inst@(Instance.Instance
+                                    {Instance.diskTemplate = dt@DTGluster})
+                 gdx avail_nodes =
+                   failOnSecondaryChange mode dt >>
+                   evacOneNodeOnly opts nl il inst gdx avail_nodes
+
+nodeEvacInstance opts nl il ChangePrimary
+                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
+                 _ _ =
+  do
+    (nl', inst', _, _) <- opToResult
+                          $ applyMoveEx (algIgnoreSoftErrors opts) nl inst
+                            Failover
+    let idx = Instance.idx inst
+        il' = Container.add idx inst' il
+        ops = iMoveToJob nl' il' idx Failover
+    return (nl', il', ops)
+
+nodeEvacInstance opts nl il ChangeSecondary
+                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
+                 gdx avail_nodes =
+  evacOneNodeOnly opts nl il inst gdx avail_nodes
+
+-- The algorithm for ChangeAll is as follows:
+--
+-- * generate all (primary, secondary) node pairs for the target groups
+-- * for each pair, execute the needed moves (r:s, f, r:s) and compute
+--   the final node list state and group score
+-- * select the best choice via a foldl that uses the same Either
+--   String solution as the ChangeSecondary mode
+nodeEvacInstance opts nl il ChangeAll
+                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
+                 gdx avail_nodes =
+  do
+    let no_nodes = Left "no nodes available"
+        node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
+    (nl', il', ops, _) <-
+        annotateResult "Can't find any good nodes for relocation" .
+        eitherToResult $
+        foldl'
+        (\accu nodes -> case evacDrbdAllInner opts nl il inst gdx nodes of
+                          Bad msg ->
+                              case accu of
+                                Right _ -> accu
+                                -- we don't need more details (which
+                                -- nodes, etc.) as we only selected
+                                -- this group if we can allocate on
+                                -- it, hence failures will not
+                                -- propagate out of this fold loop
+                                Left _ -> Left $ "Allocation failed: " ++ msg
+                          Ok result@(_, _, _, new_cv) ->
+                              let new_accu = Right result in
+                              case accu of
+                                Left _ -> new_accu
+                                Right (_, _, _, old_cv) ->
+                                    if old_cv < new_cv
+                                    then accu
+                                    else new_accu
+        ) no_nodes node_pairs
+
+    return (nl', il', ops)
+
+-- | Updates the evac solution with the results of an instance
+-- evacuation.
+updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
+                   -> Idx
+                   -> Result (Node.List, Instance.List, [OpCodes.OpCode])
+                   -> (Node.List, Instance.List, EvacSolution)
+updateEvacSolution (nl, il, es) idx (Bad msg) =
+  (nl, il, es { esFailed = (idx, msg):esFailed es})
+updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) =
+  (nl, il, es { esMoved = new_elem:esMoved es
+              , esOpCodes = opcodes:esOpCodes es })
+    where inst = Container.find idx il
+          new_elem = (idx,
+                      instancePriGroup nl inst,
+                      Instance.allNodes inst)
+
+-- | Compute the list of nodes that are to be evacuated, given a list
+-- of instances and an evacuation mode.
+nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
+                -> EvacMode      -- ^ The evacuation mode we're using
+                -> [Idx]         -- ^ List of instance indices being evacuated
+                -> IntSet.IntSet -- ^ Set of node indices
+nodesToEvacuate il mode =
+  IntSet.delete Node.noSecondary .
+  foldl' (\ns idx ->
+            let i = Container.find idx il
+                pdx = Instance.pNode i
+                sdx = Instance.sNode i
+                dt = Instance.diskTemplate i
+                withSecondary = case dt of
+                                  DTDrbd8 -> IntSet.insert sdx ns
+                                  _ -> ns
+            in case mode of
+                 ChangePrimary   -> IntSet.insert pdx ns
+                 ChangeSecondary -> withSecondary
+                 ChangeAll       -> IntSet.insert pdx withSecondary
+         ) IntSet.empty
+
+-- | Node-evacuation IAllocator mode main function.
+tryNodeEvac :: AlgorithmOptions
+            -> Group.List    -- ^ The cluster groups
+            -> Node.List     -- ^ The node list (cluster-wide, not per group)
+            -> Instance.List -- ^ Instance list (cluster-wide)
+            -> EvacMode      -- ^ The evacuation mode
+            -> [Idx]         -- ^ List of instance (indices) to be evacuated
+            -> Result (Node.List, Instance.List, EvacSolution)
+tryNodeEvac opts _ ini_nl ini_il mode idxs =
+  let evac_ndx = nodesToEvacuate ini_il mode idxs
+      offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
+      excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
+      group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
+                                           (Container.elems nl))) $
+                  splitCluster ini_nl ini_il
+      (fin_nl, fin_il, esol) =
+        foldl' (\state@(nl, il, _) inst ->
+                  let gdx = instancePriGroup nl inst
+                      pdx = Instance.pNode inst in
+                  updateEvacSolution state (Instance.idx inst) $
+                  availableGroupNodes group_ndx
+                    (IntSet.insert pdx excl_ndx) gdx >>=
+                      nodeEvacInstance opts nl il mode inst gdx
+               )
+        (ini_nl, ini_il, emptyEvacSolution)
+        (map (`Container.find` ini_il) idxs)
+  in return (fin_nl, fin_il, reverseEvacSolution esol)
diff --git a/src/Ganeti/HTools/Cluster/Utils.hs 
b/src/Ganeti/HTools/Cluster/Utils.hs
new file mode 100644
index 0000000..11e5038
--- /dev/null
+++ b/src/Ganeti/HTools/Cluster/Utils.hs
@@ -0,0 +1,150 @@
+{-| Utility functions for cluster operations
+
+-}
+
+{-
+
+Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-}
+
+module Ganeti.HTools.Cluster.Utils
+  ( splitCluster
+  , iMoveToJob
+  , instancePriGroup
+  , availableGroupNodes
+  ) where
+
+import Data.Maybe (fromJust)
+import qualified Data.IntSet as IntSet
+
+import Ganeti.BasicTypes
+import qualified Ganeti.Constants as C
+import qualified Ganeti.HTools.Container as Container
+import qualified Ganeti.HTools.Instance as Instance
+import qualified Ganeti.HTools.Node as Node
+import Ganeti.HTools.Types
+import qualified Ganeti.OpCodes as OpCodes
+import Ganeti.Types (mkNonEmpty, mkNonNegative)
+
+-- | Splits a cluster into the component node groups.
+splitCluster :: Node.List -> Instance.List ->
+                [(Gdx, (Node.List, Instance.List))]
+splitCluster nl il =
+  let ngroups = Node.computeGroups (Container.elems nl)
+  in map (\(gdx, nodes) ->
+           let nidxs = map Node.idx nodes
+               nodes' = zip nidxs nodes
+               instances = Container.filter ((`elem` nidxs) . Instance.pNode) 
il
+           in (gdx, (Container.fromList nodes', instances))) ngroups
+
+-- | Convert a placement into a list of OpCodes (basically a job).
+iMoveToJob :: Node.List        -- ^ The node list; only used for node
+                               -- names, so any version is good
+                               -- (before or after the operation)
+           -> Instance.List    -- ^ The instance list; also used for
+                               -- names only
+           -> Idx              -- ^ The index of the instance being
+                               -- moved
+           -> IMove            -- ^ The actual move to be described
+           -> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to
+                               -- the given move
+iMoveToJob nl il idx move =
+  let inst = Container.find idx il
+      iname = Instance.name inst
+      lookNode  n = case mkNonEmpty (Container.nameOf nl n) of
+                      -- FIXME: convert htools codebase to non-empty strings
+                      Bad msg -> error $ "Empty node name for idx " ++
+                                 show n ++ ": " ++ msg ++ "??"
+                      Ok ne -> Just ne
+      opF' = OpCodes.OpInstanceMigrate
+              { OpCodes.opInstanceName        = iname
+              , OpCodes.opInstanceUuid        = Nothing
+              , OpCodes.opMigrationMode       = Nothing -- default
+              , OpCodes.opOldLiveMode         = Nothing -- default as well
+              , OpCodes.opTargetNode          = Nothing -- this is drbd
+              , OpCodes.opTargetNodeUuid      = Nothing
+              , OpCodes.opAllowRuntimeChanges = False
+              , OpCodes.opIgnoreIpolicy       = False
+              , OpCodes.opMigrationCleanup    = False
+              , OpCodes.opIallocator          = Nothing
+              , OpCodes.opAllowFailover       = True
+              , OpCodes.opIgnoreHvversions    = True
+              }
+      opFA n = opF { OpCodes.opTargetNode = lookNode n } -- not drbd
+      opFforced =
+        OpCodes.OpInstanceFailover
+          { OpCodes.opInstanceName        = iname
+          , OpCodes.opInstanceUuid        = Nothing
+          , OpCodes.opShutdownTimeout     =
+              fromJust $ mkNonNegative C.defaultShutdownTimeout
+          , OpCodes.opIgnoreConsistency = False
+          , OpCodes.opTargetNode = Nothing
+          , OpCodes.opTargetNodeUuid = Nothing
+          , OpCodes.opIgnoreIpolicy = False
+          , OpCodes.opIallocator = Nothing
+          , OpCodes.opMigrationCleanup = False
+          }
+      opF = if Instance.forthcoming inst then opFforced else opF'
+      opR n = OpCodes.OpInstanceReplaceDisks
+                { OpCodes.opInstanceName     = iname
+                , OpCodes.opInstanceUuid     = Nothing
+                , OpCodes.opEarlyRelease     = False
+                , OpCodes.opIgnoreIpolicy    = False
+                , OpCodes.opReplaceDisksMode = OpCodes.ReplaceNewSecondary
+                , OpCodes.opReplaceDisksList = []
+                , OpCodes.opRemoteNode       = lookNode n
+                , OpCodes.opRemoteNodeUuid   = Nothing
+                , OpCodes.opIallocator       = Nothing
+                }
+  in case move of
+       Failover -> [ opF ]
+       FailoverToAny np -> [ opFA np ]
+       ReplacePrimary np -> [ opF, opR np, opF ]
+       ReplaceSecondary ns -> [ opR ns ]
+       ReplaceAndFailover np -> [ opR np, opF ]
+       FailoverAndReplace ns -> [ opF, opR ns ]
+
+-- | Computes the group of an instance per the primary node.
+instancePriGroup :: Node.List -> Instance.Instance -> Gdx
+instancePriGroup nl i =
+  let pnode = Container.find (Instance.pNode i) nl
+  in  Node.group pnode
+
+-- | Computes the nodes in a given group which are available for
+-- allocation.
+availableGroupNodes :: [(Gdx, [Ndx])] -- ^ Group index/node index assoc list
+                    -> IntSet.IntSet  -- ^ Nodes that are excluded
+                    -> Gdx            -- ^ The group for which we
+                                      -- query the nodes
+                    -> Result [Ndx]   -- ^ List of available node indices
+availableGroupNodes group_nodes excl_ndx gdx = do
+  local_nodes <- maybe (Bad $ "Can't find group with index " ++ show gdx)
+                 Ok (lookup gdx group_nodes)
+  let avail_nodes = filter (not . flip IntSet.member excl_ndx) local_nodes
+  return avail_nodes
+
diff --git a/src/Ganeti/HTools/Dedicated.hs b/src/Ganeti/HTools/Dedicated.hs
index 0ce23e4..8059766 100644
--- a/src/Ganeti/HTools/Dedicated.hs
+++ b/src/Ganeti/HTools/Dedicated.hs
@@ -57,6 +57,7 @@ import Ganeti.BasicTypes (iterateOk, Result, failError)
 import qualified Ganeti.HTools.AlgorithmParams as Alg
 import qualified Ganeti.HTools.Backend.IAlloc as IAlloc
 import qualified Ganeti.HTools.Cluster as Cluster
+import qualified Ganeti.HTools.Cluster.Utils as ClusterUtils
 import qualified Ganeti.HTools.Container as Container
 import qualified Ganeti.HTools.Group as Group
 import qualified Ganeti.HTools.Instance as Instance
@@ -210,7 +211,7 @@ findMGAllocation :: Alg.AlgorithmOptions
                  -> Int
                  -> Result (Cluster.GenericAllocSolution Metric)
 findMGAllocation opts gl nl il inst count = do
-  let groups_by_idx = Cluster.splitCluster nl il
+  let groups_by_idx = ClusterUtils.splitCluster nl il
       genSol (gdx, (nl', _)) =
         liftM fst $ findAllocation opts gl nl' gdx inst count
       sols = map (flip Container.find gl . fst &&& genSol) groups_by_idx
diff --git a/src/Ganeti/HTools/Program/Hbal.hs 
b/src/Ganeti/HTools/Program/Hbal.hs
index b875aaf..084433a 100644
--- a/src/Ganeti/HTools/Program/Hbal.hs
+++ b/src/Ganeti/HTools/Program/Hbal.hs
@@ -53,6 +53,7 @@ import Ganeti.HTools.AlgorithmParams (AlgorithmOptions(..), 
fromCLIOptions)
 import qualified Ganeti.HTools.Container as Container
 import qualified Ganeti.HTools.Cluster as Cluster
 import qualified Ganeti.HTools.Cluster.Metrics as Metrics
+import qualified Ganeti.HTools.Cluster.Utils as ClusterUtils
 import qualified Ganeti.HTools.Group as Group
 import qualified Ganeti.HTools.Node as Node
 import qualified Ganeti.HTools.Instance as Instance
@@ -206,7 +207,7 @@ maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
 selectGroup :: Options -> Group.List -> Node.List -> Instance.List
             -> IO (String, (Node.List, Instance.List))
 selectGroup opts gl nlf ilf = do
-  let ngroups = Cluster.splitCluster nlf ilf
+  let ngroups = ClusterUtils.splitCluster nlf ilf
   when (length ngroups > 1 && isNothing (optGroup opts)) $ do
     hPutStrLn stderr "Found multiple node groups:"
     mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
diff --git a/src/Ganeti/HTools/Program/Hcheck.hs 
b/src/Ganeti/HTools/Program/Hcheck.hs
index d0d1e1e..c6ce982 100644
--- a/src/Ganeti/HTools/Program/Hcheck.hs
+++ b/src/Ganeti/HTools/Program/Hcheck.hs
@@ -47,6 +47,7 @@ import Ganeti.HTools.AlgorithmParams (fromCLIOptions)
 import qualified Ganeti.HTools.Container as Container
 import qualified Ganeti.HTools.Cluster as Cluster
 import qualified Ganeti.HTools.Cluster.Metrics as Metrics
+import qualified Ganeti.HTools.Cluster.Utils as ClusterUtils
 import qualified Ganeti.HTools.Group as Group
 import qualified Ganeti.HTools.Node as Node
 import qualified Ganeti.HTools.Instance as Instance
@@ -312,7 +313,7 @@ main opts args = do
   (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
   nlf <- setNodeStatus opts fixed_nl
 
-  let splitcluster = Cluster.splitCluster nlf ilf
+  let splitcluster = ClusterUtils.splitCluster nlf ilf
 
   when machineread $ printGroupsMappings gl
 
diff --git a/src/Ganeti/HTools/Program/Hinfo.hs 
b/src/Ganeti/HTools/Program/Hinfo.hs
index 1098687..0c49faa 100644
--- a/src/Ganeti/HTools/Program/Hinfo.hs
+++ b/src/Ganeti/HTools/Program/Hinfo.hs
@@ -46,6 +46,7 @@ import Text.Printf (printf)
 
 import qualified Ganeti.HTools.Container as Container
 import qualified Ganeti.HTools.Cluster as Cluster
+import qualified Ganeti.HTools.Cluster.Utils as ClusterUtils
 import qualified Ganeti.HTools.Cluster.Metrics as Metrics
 import qualified Ganeti.HTools.Node as Node
 import qualified Ganeti.HTools.Group as Group
@@ -122,7 +123,7 @@ showGroupInfo :: Int -> Group.List -> Node.List -> 
Instance.List -> IO ()
 showGroupInfo verbose gl nl il = do
   let cgrs   = map (\(gdx, (gnl, gil)) ->
                  calcGroupInfo (Container.find gdx gl) gnl gil) $
-                 Cluster.splitCluster nl il
+                 ClusterUtils.splitCluster nl il
       cn1h   = all giN1Status cgrs
       grs    = map groupRowFormatHelper cgrs
       header = ["Group", "Nodes", "Instances", "Bad_Nodes", "Bad_Instances",
diff --git a/test/hs/Test/Ganeti/HTools/Cluster.hs 
b/test/hs/Test/Ganeti/HTools/Cluster.hs
index 2829dea..2d54c27 100644
--- a/test/hs/Test/Ganeti/HTools/Cluster.hs
+++ b/test/hs/Test/Ganeti/HTools/Cluster.hs
@@ -54,7 +54,9 @@ import Ganeti.BasicTypes
 import qualified Ganeti.HTools.AlgorithmParams as Alg
 import qualified Ganeti.HTools.Backend.IAlloc as IAlloc
 import qualified Ganeti.HTools.Cluster as Cluster
+import qualified Ganeti.HTools.Cluster.Evacuate as Evacuate
 import qualified Ganeti.HTools.Cluster.Metrics as Metrics
+import qualified Ganeti.HTools.Cluster.Utils as ClusterUtils
 import qualified Ganeti.HTools.Container as Container
 import qualified Ganeti.HTools.Group as Group
 import qualified Ganeti.HTools.Instance as Instance
@@ -243,15 +245,15 @@ prop_AllocRelocate =
 -- | Helper property checker for the result of a nodeEvac or
 -- changeGroup operation.
 check_EvacMode :: Group.Group -> Instance.Instance
-               -> Result (Node.List, Instance.List, Cluster.EvacSolution)
+               -> Result (Node.List, Instance.List, Evacuate.EvacSolution)
                -> Property
 check_EvacMode grp inst result =
   case result of
     Bad msg -> failTest $ "Couldn't evacuate/change group:" ++ msg
     Ok (_, _, es) ->
-      let moved = Cluster.esMoved es
-          failed = Cluster.esFailed es
-          opcodes = not . null $ Cluster.esOpCodes es
+      let moved = Evacuate.esMoved es
+          failed = Evacuate.esFailed es
+          opcodes = not . null $ Evacuate.esOpCodes es
       in conjoin
            [ failmsg ("'failed' not empty: " ++ show failed) (null failed)
            , failmsg "'opcodes' is null" opcodes
@@ -276,7 +278,7 @@ prop_AllocEvacuate =
     Bad msg -> failTest msg
     Ok (nl, il, inst') ->
       conjoin . map (\mode -> check_EvacMode defGroup inst' $
-                              Cluster.tryNodeEvac Alg.defaultOptions
+                              Evacuate.tryNodeEvac Alg.defaultOptions
                                 defGroupList nl il mode
                                 [Instance.idx inst']) .
                               evacModeOptions .
@@ -357,7 +359,7 @@ prop_SplitCluster node inst =
   let nl = makeSmallCluster node 2
       (nl', il') = foldl (\(ns, is) _ -> assignInstance ns is inst 0 1)
                    (nl, Container.empty) [1..icnt]
-      gni = Cluster.splitCluster nl' il'
+      gni = ClusterUtils.splitCluster nl' il'
   in sum (map (Container.size . snd . snd) gni) == icnt &&
      all (\(guuid, (nl'', _)) -> all ((== guuid) . Node.group)
                                  (Container.elems nl'')) gni
-- 
2.2.0.rc0.207.ga3a616c

Reply via email to