Once the handling of an incident is finished (either successfully or
failed), the corresponding node is tagged accordingly and the state
is set to success or failure. It is then the task of the user to confirm
that all needed non-Ganeti actions are taken by removing that tag.
Once the tag is removed, the repair daemon will forget the event.

Signed-off-by: Klaus Aehlig <[email protected]>
---
 Makefile.am                           |  1 +
 src/Ganeti/MaintD/CleanupIncidents.hs | 86 +++++++++++++++++++++++++++++++++++
 src/Ganeti/MaintD/Server.hs           |  2 +
 3 files changed, 89 insertions(+)
 create mode 100644 src/Ganeti/MaintD/CleanupIncidents.hs

diff --git a/Makefile.am b/Makefile.am
index 0258c0d..06361b5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -980,6 +980,7 @@ HS_LIB_SRCS = \
        src/Ganeti/Luxi.hs \
        src/Ganeti/MaintD/Autorepairs.hs \
        src/Ganeti/MaintD/Balance.hs \
+       src/Ganeti/MaintD/CleanupIncidents.hs \
        src/Ganeti/MaintD/CollectIncidents.hs \
        src/Ganeti/MaintD/HandleIncidents.hs \
         src/Ganeti/MaintD/MemoryState.hs \
diff --git a/src/Ganeti/MaintD/CleanupIncidents.hs 
b/src/Ganeti/MaintD/CleanupIncidents.hs
new file mode 100644
index 0000000..1347f04
--- /dev/null
+++ b/src/Ganeti/MaintD/CleanupIncidents.hs
@@ -0,0 +1,86 @@
+{-| Incident clean up in the maintenance daemon.
+
+This module implements the clean up of events that are finished,
+and acknowledged as such by the user.
+
+-}
+
+{-
+
+Copyright (C) 2015 Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-}
+
+module Ganeti.MaintD.CleanupIncidents
+  ( cleanupIncidents
+  ) where
+
+import Control.Arrow ((&&&))
+import Control.Monad (unless)
+import Control.Monad.IO.Class (liftIO)
+import Data.IORef (IORef)
+
+import Ganeti.BasicTypes (ResultT, mkResultT)
+import qualified Ganeti.HTools.Container as Container
+import qualified Ganeti.HTools.Node as Node
+import Ganeti.Logging.Lifted
+import Ganeti.MaintD.MemoryState (MemoryState, getIncidents, rmIncident)
+import Ganeti.Objects.Maintenance (Incident(..), RepairStatus(..))
+import Ganeti.Utils (logAndBad)
+
+-- | Remove a single incident, provided the corresponding tag
+-- is no longer present.
+cleanupIncident :: IORef MemoryState
+                -> Node.List
+                -> Incident
+                -> ResultT String IO ()
+cleanupIncident memstate nl incident = do
+  let location = incidentNode incident
+      uuid = incidentUuid incident
+      tag = incidentTag incident
+      nodes = filter ((==) location . Node.name) $ Container.elems nl
+  case nodes of
+    [] -> do
+            logInfo $ "No node any more with name " ++ location
+                       ++ "; will forget event " ++ uuid
+            liftIO $ rmIncident memstate uuid
+    [nd] -> unless (tag `elem` Node.nTags nd) $ do
+              logInfo $ "Tag " ++ tag ++ " removed on " ++ location
+                        ++ "; will forget event " ++ uuid
+              liftIO $ rmIncident memstate uuid
+    _ -> mkResultT . logAndBad
+           $ "Found More than one node with name " ++ location
+
+-- | Remove all incidents from the record that are in a final state
+-- and additionally the node tag for that incident has been removed.
+cleanupIncidents :: IORef MemoryState -> Node.List -> ResultT String IO ()
+cleanupIncidents memstate nl = do
+  incidents <- getIncidents memstate
+  let finalized = filter ((> RSPending) . incidentRepairStatus) incidents
+  logDebug . (++) "Finalized incidents " . show
+    $ map (incidentNode &&& incidentUuid) finalized
+  mapM_ (cleanupIncident memstate nl) finalized
diff --git a/src/Ganeti/MaintD/Server.hs b/src/Ganeti/MaintD/Server.hs
index 28d68f4..b22a06e 100644
--- a/src/Ganeti/MaintD/Server.hs
+++ b/src/Ganeti/MaintD/Server.hs
@@ -68,6 +68,7 @@ import Ganeti.Logging.Lifted
 import qualified Ganeti.Luxi as L
 import Ganeti.MaintD.Autorepairs (harepTasks)
 import Ganeti.MaintD.Balance (balanceTask)
+import Ganeti.MaintD.CleanupIncidents (cleanupIncidents)
 import Ganeti.MaintD.CollectIncidents (collectIncidents)
 import Ganeti.MaintD.HandleIncidents (handleIncidents)
 import Ganeti.MaintD.MemoryState
@@ -137,6 +138,7 @@ maintenance memstate = do
   let il = cdInstances cData
       nl = cdNodes cData
       gl = cdGroups cData
+  cleanupIncidents memstate nl
   collectIncidents memstate nl
   nidxs <- handleIncidents memstate (gl, nl, il)
   (nidxs', jobs) <- harepTasks (nl, il) nidxs
-- 
2.5.0.457.gab17608

Reply via email to