This data collector is for the maintainance daemon. It runs predefined scripts evaluating node health.
Signed-off-by: BSRK Aditya <[email protected]> --- src/Ganeti/Constants.hs | 7 ++ src/Ganeti/DataCollectors.hs | 5 ++ src/Ganeti/DataCollectors/Diagnose.hs | 145 +++++++++++++++++++++++++++++++++ src/Ganeti/DataCollectors/Types.hs | 2 +- test/data/cluster_config_2.16.json | 4 + test/py/cfgupgrade_unittest.py | 1 + 6 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 src/Ganeti/DataCollectors/Diagnose.hs diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs index 4cba0d5..872eb68 100644 --- a/src/Ganeti/Constants.hs +++ b/src/Ganeti/Constants.hs @@ -5412,6 +5412,12 @@ dataCollectorInstStatus = "inst-status-xen" dataCollectorParameterInterval :: String dataCollectorParameterInterval = "interval" +dataCollectorDiagnose :: String +dataCollectorDiagnose = "diagnose" + +dataCollectorDiagnoseDirectory :: String +dataCollectorDiagnoseDirectory = "/etc/ganeti/node-diagnose-commands" + dataCollectorNames :: FrozenSet String dataCollectorNames = ConstantUtils.mkSet [ dataCollectorCPULoad @@ -5420,6 +5426,7 @@ dataCollectorNames = , dataCollectorLv , dataCollectorInstStatus , dataCollectorXenCpuLoad + , dataCollectorDiagnose ] dataCollectorStateActive :: String diff --git a/src/Ganeti/DataCollectors.hs b/src/Ganeti/DataCollectors.hs index bca6848..bcc2f41 100644 --- a/src/Ganeti/DataCollectors.hs +++ b/src/Ganeti/DataCollectors.hs @@ -42,6 +42,7 @@ import qualified Ganeti.DataCollectors.Diskstats as Diskstats import qualified Ganeti.DataCollectors.Drbd as Drbd import qualified Ganeti.DataCollectors.InstStatus as InstStatus import qualified Ganeti.DataCollectors.Lv as Lv +import qualified Ganeti.DataCollectors.Diagnose as D import qualified Ganeti.DataCollectors.XenCpuLoad as XenCpuLoad import Ganeti.DataCollectors.Types (DataCollector(..),ReportBuilder(..)) import Ganeti.JSON (GenericContainer(..)) @@ -57,6 +58,7 @@ collectors = , drdbCollector , instStatusCollector , lvCollector + , diagnoseCollector ] where f .&&. g = \x y -> f x y && g x y @@ -82,6 +84,9 @@ collectors = lvCollector = DataCollector Lv.dcName Lv.dcCategory Lv.dcKind (StatelessR Lv.dcReport) Nothing activeConfig updateInterval + diagnoseCollector = + DataCollector D.dcName D.dcCategory D.dcKind + (StatelessR D.dcReport) Nothing activeConfig updateInterval cpuLoadCollector = DataCollector CPUload.dcName CPUload.dcCategory CPUload.dcKind (StatefulR CPUload.dcReport) (Just CPUload.dcUpdate) activeConfig diff --git a/src/Ganeti/DataCollectors/Diagnose.hs b/src/Ganeti/DataCollectors/Diagnose.hs new file mode 100644 index 0000000..f1bfa84 --- /dev/null +++ b/src/Ganeti/DataCollectors/Diagnose.hs @@ -0,0 +1,145 @@ +{-| Self diagnose data collector + +-} + +{- + +Copyright (C) 2013 Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-} + +module Ganeti.DataCollectors.Diagnose + ( dcName + , dcCategory + , dcKind + , dcReport + ) where + +import Control.Monad +import System.Directory +import System.Posix.Files +import System.Process +import qualified Text.JSON as J + +import qualified Ganeti.Constants as C +import Ganeti.DataCollectors.Types + +-- | The name of this data collector. +dcName :: String +dcName = C.dataCollectorDiagnose + +-- | The category of this data collector. +dcCategory :: Maybe DCCategory +dcCategory = Just DCNode + +-- | The kind of this data collector. +dcKind :: DCKind +dcKind = DCKStatus + +-- | The version of this data collector. +dcVersion :: DCVersion +dcVersion = DCVerBuiltin + +-- | The version number for the data format of this data collector. +dcFormatVersion :: Int +dcFormatVersion = 1 + +lookupObj :: String -> J.JSObject e -> J.Result e +lookupObj key obj = case lookup key (J.fromJSObject obj) of + Just val -> J.Ok val + Nothing -> J.Error $ "Could not find " ++ show key ++ "." + +type Command = String + +data Status = Ok | LiveRepair (Maybe Command) | Evacuate | EvacuateFailover + +data Report = Report + { reportStatus :: Status + , reportDetails :: Maybe J.JSValue + } + +instance J.JSON Report where + readJSON jv = do + as <- J.readJSON jv :: J.Result (J.JSObject J.JSValue) + let details = lookup "details" (J.fromJSObject as) + strStatus <- ((lookupObj "status" as) >>= J.readJSON) :: J.Result String + case strStatus of + "ok" -> return $ Report Ok details + "live-repair" -> do + case lookup "command" (J.fromJSObject as) of + Nothing -> return $ Report (LiveRepair Nothing) details + Just jCmd -> fmap (\cmd -> Report (LiveRepair cmd) details) + (J.readJSON jCmd) + "evacuate" -> return $ Report Evacuate details + "evacuate-failover" -> return $ Report EvacuateFailover details + _ -> fail "status does not match any of the predefined values" + showJSON r = J.JSObject $ J.toJSObject $ + (case reportStatus r of + Ok -> [("status", J.showJSON "ok")] + LiveRepair Nothing -> [("status", J.showJSON "live-repair")] + LiveRepair (Just cmd) -> [ ("status", J.showJSON "live-repair") + , ("command", J.showJSON cmd) + ] + Evacuate -> [("status", J.showJSON "evacuate")] + EvacuateFailover -> [("status", J.showJSON "evacuate-failover")]) ++ + (case reportDetails r of + Nothing -> [] + Just val -> [("details", val)]) + +isValid :: FilePath -> IO Bool +isValid fp = let maxFileMode = ownerModes `unionFileModes` groupReadMode + `unionFileModes` groupExecuteMode + `unionFileModes` otherReadMode + `unionFileModes` otherExecuteMode + in do + fs <- getFileStatus fp + return . and $ + [ fileOwner fs == 0 + , fileGroup fs == 0 + , fileMode fs `intersectFileModes` maxFileMode == fileMode fs + , ownerExecuteMode `intersectFileModes` fileMode fs == ownerExecuteMode + , groupExecuteMode `intersectFileModes` fileMode fs == groupExecuteMode + , isRegularFile fs + ] + +-- | This function computes the JSON representation of the LV status. +buildJsonReport :: IO J.JSValue +buildJsonReport = do + fp <- getCurrentDirectory + setCurrentDirectory C.dataCollectorDiagnoseDirectory + cmds <- getDirectoryContents "." + validCmds <- filterM isValid cmds + outs <- forM validCmds (\cmd -> readProcess cmd [] "") + setCurrentDirectory fp + case sequence (map J.decode outs) of + J.Ok vals -> return . J.JSArray $ vals + J.Error str -> fail str + +-- | The data exported by the data collector, taken from the default location. +dcReport :: IO DCReport +dcReport = buildJsonReport >>= + buildReport dcName dcVersion dcFormatVersion dcCategory dcKind diff --git a/src/Ganeti/DataCollectors/Types.hs b/src/Ganeti/DataCollectors/Types.hs index 8b60be1..3bd31b3 100644 --- a/src/Ganeti/DataCollectors/Types.hs +++ b/src/Ganeti/DataCollectors/Types.hs @@ -68,7 +68,7 @@ import Ganeti.THH import Ganeti.Utils (getCurrentTimeUSec) -- | The possible classes a data collector can belong to. -data DCCategory = DCInstance | DCStorage | DCDaemon | DCHypervisor +data DCCategory = DCInstance | DCStorage | DCDaemon | DCHypervisor | DCNode deriving (Show, Eq, Read, Enum, Bounded) -- | Get the category name and return it as a string. diff --git a/test/data/cluster_config_2.16.json b/test/data/cluster_config_2.16.json index 97511cf..82c5d05 100644 --- a/test/data/cluster_config_2.16.json +++ b/test/data/cluster_config_2.16.json @@ -41,6 +41,10 @@ "active": true, "interval": 5000000.0 }, + "diagnose": { + "active": true, + "interval": 5000000.0 + }, "xen-cpu-avg-load": { "active": true, "interval": 5000000.0 diff --git a/test/py/cfgupgrade_unittest.py b/test/py/cfgupgrade_unittest.py index a436351..22e2339 100755 --- a/test/py/cfgupgrade_unittest.py +++ b/test/py/cfgupgrade_unittest.py @@ -73,6 +73,7 @@ def GetMinimalConfig(): "inst-status-xen": { "active": True, "interval": 5000000 }, "cpu-avg-load": { "active": True, "interval": 5000000 }, "xen-cpu-avg-load": { "active": True, "interval": 5000000 }, + "diagnose": { "active": True, "interval": 5000000 }, }, }, "instances": {}, -- 1.7.10.4
