On Tue, Jul 14, 2015 at 05:59:36PM +0200, 'Klaus Aehlig' via ganeti-devel wrote:
As described in the design, the maintenance daemon will only start
a new round once all jobs from the old round are finished. Add this
functionality now. As the list of jobs of the current round is also
relevant information, we expose it over HTTP. In order to do so, we
keep an in-memory copy of the job list (while the authoritative copy
is still kept in the configuration, as per our design).
Signed-off-by: Klaus Aehlig <aeh...@google.com>
---
Makefile.am | 1 +
src/Ganeti/MaintD/MemoryState.hs | 95 ++++++++++++++++++++++++++++++++++++++++
src/Ganeti/MaintD/Server.hs | 43 ++++++++++++++----
3 files changed, 130 insertions(+), 9 deletions(-)
create mode 100644 src/Ganeti/MaintD/MemoryState.hs
diff --git a/Makefile.am b/Makefile.am
index bd5b64b..e82057c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -975,6 +975,7 @@ HS_LIB_SRCS = \
src/Ganeti/Logging/WriterLog.hs \
src/Ganeti/Luxi.hs \
src/Ganeti/MaintD/Autorepairs.hs \
+ src/Ganeti/MaintD/MemoryState.hs \
src/Ganeti/MaintD/Server.hs \
src/Ganeti/MaintD/Utils.hs \
src/Ganeti/Network.hs \
diff --git a/src/Ganeti/MaintD/MemoryState.hs b/src/Ganeti/MaintD/MemoryState.hs
new file mode 100644
index 0000000..592d0b5
--- /dev/null
+++ b/src/Ganeti/MaintD/MemoryState.hs
@@ -0,0 +1,95 @@
+{-| Memory copy of the state of the maintenance daemon.
+
+While the autoritative state of the maintenance daemon is
+stored in the configuration, the daemon keeps a copy of some
+values at run time, so that they can easily be exposed over
+HTTP.
+
+This module also provides functions for the mirrored information
+to update both, the authoritative state and the in-memory copy.
+
+-}
+
+{-
+
+Copyright (C) 2015 Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-}
+
+module Ganeti.MaintD.MemoryState
+ ( MemoryState(..)
+ , emptyMemoryState
+ , getJobs
+ , clearJobs
+ , appendJobs
+ ) where
+
+import Control.Monad.IO.Class (liftIO)
+import Data.IORef (IORef, atomicModifyIORef)
+
+import Ganeti.BasicTypes (ResultT, withErrorT)
+import qualified Ganeti.Path as Path
+import Ganeti.THH.HsRPC (runRpcClient)
+import Ganeti.Types (JobId)
+import Ganeti.Utils (ordNub)
+import Ganeti.WConfd.Client ( getWConfdClient, maintenanceJobs, runModifyRpc
+ , clearMaintdJobs, appendMaintdJobs )
+
+-- | In-memory copy of parts of the state of the maintenance
+-- daemon.
+data MemoryState = MemoryState
+ { msJobs :: [ JobId ]
+ }
+
+-- | Inital state of the in-memory copy. All parts will be updated
+-- before use, after one round at the latest this copy is up to date.
+emptyMemoryState :: MemoryState
+emptyMemoryState = MemoryState {
+ msJobs = []
+ }
+
+-- | Get the list of jobs from the authoritative copy, and update the
+-- in-memory copy as well.
+getJobs :: IORef MemoryState -> ResultT String IO [JobId]
+getJobs memstate = do
+ wconfdClient <- liftIO $ getWConfdClient =<< Path.defaultWConfdSocket
+ jobs <- withErrorT show $ runRpcClient maintenanceJobs wconfdClient
.. also I'd suggest to close the client here
+ liftIO . atomicModifyIORef memstate $ \ s -> (s { msJobs = jobs }, ())
+ return jobs
+
+-- | Reset the list of active jobs.
+clearJobs :: IORef MemoryState -> IO ()
+clearJobs memstate = do
+ runModifyRpc clearMaintdJobs
+ atomicModifyIORef memstate $ \ s -> ( s { msJobs = [] }, ())
+
+-- | Append jobs to the list of active jobs, if not present already
+appendJobs :: IORef MemoryState -> [JobId] -> IO ()
+appendJobs memstate jobs = do
+ runModifyRpc $ appendMaintdJobs jobs
+ atomicModifyIORef memstate
+ $ \ s -> ( s { msJobs = ordNub $ msJobs s ++ jobs }, ())
diff --git a/src/Ganeti/MaintD/Server.hs b/src/Ganeti/MaintD/Server.hs
index ef0134c..45a650c 100644
--- a/src/Ganeti/MaintD/Server.hs
+++ b/src/Ganeti/MaintD/Server.hs
@@ -43,14 +43,16 @@ module Ganeti.MaintD.Server
import Control.Applicative ((<|>))
import Control.Concurrent (forkIO)
-import Control.Monad (forever, void, unless, when)
+import Control.Monad (forever, void, unless, when, liftM)
import Control.Monad.IO.Class (liftIO)
+import Data.IORef (IORef, newIORef, readIORef)
import qualified Data.Set as Set
-import Snap.Core (Snap, method, Method(GET), ifTop)
+import Snap.Core (Snap, method, Method(GET), ifTop, dir, route)
import Snap.Http.Server (httpServe)
import Snap.Http.Server.Config (Config)
import System.IO.Error (tryIOError)
import System.Time (getClockTime)
+import qualified Text.JSON as J
import Ganeti.BasicTypes ( GenericResult(..), ResultT, runResultT, mkResultT
, withErrorT, isBad)
@@ -61,9 +63,12 @@ import Ganeti.Daemon.Utils (handleMasterVerificationOptions)
import qualified Ganeti.HTools.Backend.Luxi as Luxi
import qualified Ganeti.HTools.Container as Container
import Ganeti.HTools.Loader (ClusterData(..), mergeData, checkData)
+import Ganeti.Jobs (waitForJobs)
import Ganeti.THH.HsRPC (runRpcClient)
import Ganeti.Logging.Lifted
+import qualified Ganeti.Luxi as L
import Ganeti.MaintD.Autorepairs (harepTasks)
+import Ganeti.MaintD.MemoryState
import qualified Ganeti.Path as Path
import Ganeti.Runtime (GanetiDaemon(GanetiMaintd))
import Ganeti.Types (JobId(..))
@@ -110,24 +115,43 @@ loadClusterData = do
return $ cdata { cdNodes = nl }
-- | Perform one round of maintenance
-maintenance :: ResultT String IO ()
-maintenance = do
+maintenance :: IORef MemoryState -> ResultT String IO ()
+maintenance memstate = do
wconfdClient <- liftIO $ getWConfdClient =<< Path.defaultWConfdSocket
delay <- withErrorT show $ runRpcClient maintenanceRoundDelay wconfdClient
liftIO $ threadDelaySeconds delay
+ oldjobs <- getJobs memstate
+ logDebug $ "Jobs submitted in the last round: "
+ ++ show (map fromJobId oldjobs)
+ luxiSocket <- liftIO Path.defaultQuerySocket
+ lclient <- mkResultT . liftM (either (Bad . show) Ok)
+ . tryIOError $ L.getLuxiClient luxiSocket
+ void . mkResultT $ waitForJobs oldjobs lclient
.. and here
+ liftIO $ clearJobs memstate
logDebug "New round of maintenance started"
cData <- loadClusterData
let il = cdInstances cData
nl = cdNodes cData
nidxs = Set.fromList $ Container.keys nl
(nidxs', jobs) <- harepTasks (nl, il) nidxs
+ liftIO $ appendJobs memstate jobs
logDebug $ "Unaffected nodes " ++ show (Set.toList nidxs')
++ ", jobs submitted " ++ show (map fromJobId jobs)
+-- | Expose a part of the memory state
+exposeState :: J.JSON a => (MemoryState -> a) -> IORef MemoryState -> Snap ()
+exposeState selector ref = do
+ state <- liftIO $ readIORef ref
+ plainJSON $ selector state
+
-- | The information to serve via HTTP
-httpInterface :: Snap ()
-httpInterface = ifTop (method GET $ plainJSON [1 :: Int])
- <|> error404
+httpInterface :: IORef MemoryState -> Snap ()
+httpInterface memstate =
+ ifTop (method GET $ plainJSON [1 :: Int])
+ <|> dir "1" (ifTop (plainJSON J.JSNull)
+ <|> route [ ("jobs", exposeState msJobs memstate)
+ ])
+ <|> error404
-- | Check function for luxid.
checkMain :: CheckFn CheckResult
@@ -140,10 +164,11 @@ prepMain opts _ = httpConfFromOpts GanetiMaintd opts
-- | Main function.
main :: MainFn CheckResult PrepResult
main _ _ httpConf = do
+ memstate <- newIORef emptyMemoryState
void . forkIO . forever $ do
- res <- runResultT maintenance
+ res <- runResultT $ maintenance memstate
logDebug $ "Maintenance round done, result is " ++ show res
when (isBad res) $ do
logInfo "Backing off after a round with internal errors"
threadDelaySeconds C.maintdDefaultRoundDelay
- httpServe httpConf httpInterface
+ httpServe httpConf $ httpInterface memstate
--
2.4.3.573.g4eafbef
Rest LGTM, thanks