Implement the handling of pending maintenance tasks in
the maintenance daemon. In each round, we submit the
next task for the most radical action for each node.

Signed-off-by: Klaus Aehlig <[email protected]>
---
 Makefile.am                          |   1 +
 src/Ganeti/MaintD/HandleIncidents.hs | 236 +++++++++++++++++++++++++++++++++++
 src/Ganeti/MaintD/Server.hs          |   5 +-
 3 files changed, 240 insertions(+), 2 deletions(-)
 create mode 100644 src/Ganeti/MaintD/HandleIncidents.hs

diff --git a/Makefile.am b/Makefile.am
index c7b7f15..eb25a82 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -981,6 +981,7 @@ HS_LIB_SRCS = \
        src/Ganeti/MaintD/Autorepairs.hs \
        src/Ganeti/MaintD/Balance.hs \
        src/Ganeti/MaintD/CollectIncidents.hs \
+       src/Ganeti/MaintD/HandleIncidents.hs \
         src/Ganeti/MaintD/MemoryState.hs \
        src/Ganeti/MaintD/Server.hs \
        src/Ganeti/MaintD/Utils.hs \
diff --git a/src/Ganeti/MaintD/HandleIncidents.hs 
b/src/Ganeti/MaintD/HandleIncidents.hs
new file mode 100644
index 0000000..8801336
--- /dev/null
+++ b/src/Ganeti/MaintD/HandleIncidents.hs
@@ -0,0 +1,236 @@
+{-| Incidient handling in the maintenance daemon.
+
+This module implements the submission of actions for ongoing
+repair events reported by the node-status data collector.
+
+-}
+
+{-
+
+Copyright (C) 2015 Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-}
+
+module Ganeti.MaintD.HandleIncidents
+  ( handleIncidents
+  ) where
+
+import Control.Arrow ((&&&))
+import Control.Exception.Lifted (bracket)
+import Control.Lens.Setter (over)
+import Control.Monad (foldM, liftM)
+import Control.Monad.IO.Class (liftIO)
+import Data.Function (on)
+import Data.IORef (IORef)
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import System.IO.Error (tryIOError)
+
+import Ganeti.BasicTypes ( GenericResult(..), ResultT, mkResultT, mkResultT'
+                         , eitherToResult, Down(..))
+import Ganeti.HTools.AlgorithmParams (AlgorithmOptions(..), defaultOptions)
+import Ganeti.HTools.Cluster.Evacuate (tryNodeEvac, EvacSolution(..))
+import qualified Ganeti.HTools.Container as Container
+import qualified Ganeti.HTools.Group as Group
+import qualified Ganeti.HTools.Instance as Instance
+import qualified Ganeti.HTools.Node as Node
+import Ganeti.HTools.Types (Idx)
+import Ganeti.JQueue (currentTimestamp)
+import Ganeti.Jobs (execJobsWaitOkJid, submitJobs, forceFailover)
+import Ganeti.Logging.Lifted
+import qualified Ganeti.Luxi as L
+import Ganeti.MaintD.MemoryState ( MemoryState, getIncidents, rmIncident
+                                 , updateIncident, appendJobs)
+import Ganeti.MaintD.Utils (annotateOpCode)
+import Ganeti.Objects.Lens (incidentJobsL)
+import Ganeti.Objects.Maintenance ( RepairStatus(..), RepairAction(..)
+                                  , Incident(..))
+import Ganeti.OpCodes (OpCode(..), MetaOpCode)
+import qualified Ganeti.Path as Path
+import Ganeti.Types ( cTimeOf, uuidOf, mkNonEmpty, fromJobId
+                    , EvacMode(..), TagKind(..))
+import Ganeti.Utils (maxBy, logAndBad)
+
+-- | Given two incidents, chose the more severe one; for equally severe
+-- ones the older (by creation timestamp).
+moreSevereIncident :: Incident -> Incident -> Incident
+moreSevereIncident = maxBy (compare `on` incidentAction &&& (Down . cTimeOf))
+
+-- | From a given list of incidents, return, for each node,
+-- the one with the most severe action.
+rankIncidents :: [Incident] -> Map.Map String Incident
+rankIncidents = foldl (\m i -> Map.insertWith moreSevereIncident
+                                 (incidentNode i) i m) Map.empty
+
+-- | Generate a job to drain a given node.
+drainJob :: String -> ResultT String IO [ MetaOpCode ]
+drainJob name = do
+  name' <- mkNonEmpty name
+  now <- liftIO currentTimestamp
+  return $ map (annotateOpCode ("Draining " ++ name) now)
+    [ OpNodeSetParams { opNodeName = name'
+                      , opNodeUuid = Nothing
+                      , opForce = True
+                      , opHvState = Nothing
+                      , opDiskState = Nothing
+                      , opMasterCandidate = Nothing
+                      , opOffline = Nothing
+                      , opDrained = Just True
+                      , opAutoPromote = False
+                      , opMasterCapable = Nothing
+                      , opVmCapable = Nothing
+                      , opSecondaryIp = Nothing
+                      , opgenericNdParams = Nothing
+                      , opPowered = Nothing
+                      } ]
+
+-- | Submit and register the next job for a node evacuation.
+handleEvacuation :: L.Client -- ^ Luxi client to use
+                 -> IORef MemoryState -- ^ memory state of the daemon
+                 -> (Group.List, Node.List, Instance.List) -- ^ cluster
+                 -> Idx -- ^ index of the node to evacuate
+                 -> Bool -- ^ whether to try migrations
+                 -> Set.Set Int -- ^ allowed nodes for evacuation
+                 -> Incident -- ^ the incident
+                 -> ResultT String IO (Set.Set Int) -- ^ nodes still available
+handleEvacuation client memst (gl, nl, il) ndx migrate freenodes incident = do
+  let node = Container.find ndx nl
+      name = Node.name node
+      fNdNames = map (Node.name . flip Container.find nl) $ Set.elems freenodes
+      evacOpts = defaultOptions { algEvacMode = True
+                                , algIgnoreSoftErrors = True
+                                , algRestrictToNodes = Just fNdNames
+                                }
+      evacFun = tryNodeEvac evacOpts gl nl il
+      migrateFun = if migrate then id else forceFailover
+      annotateFun = annotateOpCode $ "Evacuating " ++ name
+      pendingIncident = incident { incidentRepairStatus = RSPending }
+      updateJobs jids_r = case jids_r of
+        Ok jids -> do
+          let incident' = over incidentJobsL (++ jids) pendingIncident
+          liftIO $ updateIncident memst incident'
+          liftIO $ appendJobs memst jids
+          logDebug $ "Jobs submitted: " ++ show (map fromJobId jids)
+        Bad e -> mkResultT . logAndBad
+                   $ "Failure evacuating " ++ name ++ ": " ++ e
+      logInstName i = logInfo $ "Evacuating instance "
+                                  ++ Instance.name (Container.find i il)
+                                  ++ " from " ++ name
+      execSol sol = do
+        now <- liftIO currentTimestamp
+        let jobs = map (map (annotateFun now . migrateFun)) $ esOpCodes sol
+        jids <- liftIO $ submitJobs jobs client
+        updateJobs jids
+        let touched = esMoved sol >>= \(_, _, nidxs) -> nidxs
+        return $ freenodes Set.\\ Set.fromList touched
+  logDebug $ "Handling evacuation of " ++ name
+  case () of _ | not $ Node.offline node -> do
+                   logDebug $ "Draining node " ++ name
+                   job <- drainJob name
+                   jids <- liftIO $ submitJobs [job] client
+                   updateJobs jids
+                   return freenodes
+               | i:_ <- Node.pList node -> do
+                   logInstName i
+                   (_, _, sol) <- mkResultT . return $ evacFun ChangePrimary 
[i]
+                   execSol sol
+               | i:_ <- Node.sList node -> do
+                   logInstName i
+                   (_, _, sol) <- mkResultT . return
+                                    $ evacFun ChangeSecondary [i]
+                   execSol sol
+               | otherwise -> do
+                   logDebug $ "Finished evacuation of " ++ name
+                   now <- liftIO currentTimestamp
+                   jids <- mkResultT $ execJobsWaitOkJid
+                            [[ annotateFun now
+                               . OpTagsSet TagKindNode [ incidentTag incident ]
+                               $ Just name]] client
+                   let incident' = over incidentJobsL (++ jids)
+                                     $ incident { incidentRepairStatus =
+                                                    RSCompleted }
+                   liftIO $ updateIncident memst incident'
+                   liftIO $ appendJobs memst jids
+                   return freenodes
+
+-- | Submit the next actions for a single incident, given the unaffected nodes;
+-- register all submitted jobs and return the new set of unaffected nodes.
+handleIncident :: L.Client
+               -> IORef MemoryState
+               -> (Group.List, Node.List, Instance.List)
+               -> Set.Set Int
+               -> (String, Incident)
+               -> ResultT String IO (Set.Set Int)
+handleIncident client memstate (gl, nl, il) freeNodes (name, incident) = do
+  ndx <- case Container.keys $ Container.filter ((==) name . Node.name) nl of
+           [ndx] -> return ndx
+           [] -> do
+             logWarning $ "Node " ++ name ++ " no longer in the cluster;"
+                          ++ " clearing incident " ++ show incident
+             liftIO . rmIncident memstate $ uuidOf incident
+             fail $ "node " ++ name ++ " left the cluster"
+           ndxs -> do
+             logWarning $ "Abmigious node name " ++ name
+                          ++ "; could refer to indices " ++ show ndxs
+             fail $ "ambigious name " ++ name
+  case incidentAction incident of
+    RANoop -> do
+      logDebug $ "Nothing to do for " ++ show incident
+      liftIO . rmIncident memstate $ uuidOf incident
+      return freeNodes
+    RALiveRepair -> do
+      logInfo "Live repairs not yet implemented"
+      return freeNodes
+    RAEvacuate ->
+      handleEvacuation client memstate (gl, nl, il) ndx True freeNodes incident
+    RAEvacuateFailover ->
+      handleEvacuation client memstate (gl, nl, il) ndx False freeNodes 
incident
+
+-- | Submit the jobs necessary for the next maintenance step
+-- for each pending maintenance, i.e., the most radical maintenance
+-- for each node. Return the set of node indices unaffected by these
+-- operations. Also, for each job submitted, register it directly.
+handleIncidents :: IORef MemoryState
+                -> (Group.List, Node.List, Instance.List)
+                -> ResultT String IO (Set.Set Int)
+handleIncidents memstate (gl, nl, il) = do
+  incidents <- getIncidents memstate
+  let activeIncidents = filter ((<= RSPending) . incidentRepairStatus) 
incidents
+      incidentsToHandle = rankIncidents activeIncidents
+      incidentNodes = Set.fromList . Container.keys
+        $ Container.filter ((`elem` Map.keys incidentsToHandle) . Node.name) nl
+      freeNodes = Set.fromList (Container.keys nl) Set.\\ incidentNodes
+  if null activeIncidents
+    then return freeNodes
+    else do
+      luxiSocket <- liftIO Path.defaultQuerySocket
+      bracket (mkResultT' . liftM eitherToResult . tryIOError
+                 $ L.getLuxiClient luxiSocket)
+              (mkResultT' . liftM eitherToResult . tryIOError . L.closeClient)
+              $ \ client ->
+                foldM (handleIncident client memstate (gl, nl, il)) freeNodes
+                  $ Map.assocs incidentsToHandle
diff --git a/src/Ganeti/MaintD/Server.hs b/src/Ganeti/MaintD/Server.hs
index d3657cd..28d68f4 100644
--- a/src/Ganeti/MaintD/Server.hs
+++ b/src/Ganeti/MaintD/Server.hs
@@ -62,7 +62,6 @@ import Ganeti.Daemon ( OptType, CheckFn, PrepFn, MainFn, 
oDebug
                      , oNoVoting, oYesDoIt, oPort, oBindAddress, oNoDaemonize)
 import Ganeti.Daemon.Utils (handleMasterVerificationOptions)
 import qualified Ganeti.HTools.Backend.Luxi as Luxi
-import qualified Ganeti.HTools.Container as Container
 import Ganeti.HTools.Loader (ClusterData(..), mergeData, checkData)
 import Ganeti.Jobs (waitForJobs)
 import Ganeti.Logging.Lifted
@@ -70,6 +69,7 @@ import qualified Ganeti.Luxi as L
 import Ganeti.MaintD.Autorepairs (harepTasks)
 import Ganeti.MaintD.Balance (balanceTask)
 import Ganeti.MaintD.CollectIncidents (collectIncidents)
+import Ganeti.MaintD.HandleIncidents (handleIncidents)
 import Ganeti.MaintD.MemoryState
 import qualified Ganeti.Path as Path
 import Ganeti.Runtime (GanetiDaemon(GanetiMaintd))
@@ -136,8 +136,9 @@ maintenance memstate = do
   cData <- loadClusterData
   let il = cdInstances cData
       nl = cdNodes cData
-      nidxs = Set.fromList $ Container.keys nl
+      gl = cdGroups cData
   collectIncidents memstate nl
+  nidxs <- handleIncidents memstate (gl, nl, il)
   (nidxs', jobs) <- harepTasks (nl, il) nidxs
   unless (null jobs)
    . liftIO $ appendJobs memstate jobs
-- 
2.5.0.rc2.392.g76e840b

Reply via email to