The data collectors should be able to provide as much information as possible
even when the system is badly degraded. This patch modifies the instance status
collector for xen so that it can keep providing as much data as possible, even
when some of the queries it performs fail, by removing exitIfBad calls and
substituting them with logging and returning empty fields instead.

Signed-off-by: Michele Tartara <[email protected]>
---
 src/Ganeti/DataCollectors/InstStatus.hs |   20 ++++++++++++++------
 src/Ganeti/Hypervisor/Xen.hs            |   30 +++++++++++++++++++++---------
 2 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/src/Ganeti/DataCollectors/InstStatus.hs 
b/src/Ganeti/DataCollectors/InstStatus.hs
index 65fc20c..0430274 100644
--- a/src/Ganeti/DataCollectors/InstStatus.hs
+++ b/src/Ganeti/DataCollectors/InstStatus.hs
@@ -43,6 +43,7 @@ import qualified Data.Map as Map
 import Network.BSD (getHostName)
 import qualified Text.JSON as J
 
+import Ganeti.BasicTypes as BT
 import Ganeti.Confd.ClientFunctions
 import Ganeti.Common
 import Ganeti.DataCollectors.CLI
@@ -181,12 +182,19 @@ buildInstStatusReport srvAddr srvPort = do
   node <- getHostName
   answer <- getInstances node srvAddr srvPort
   inst <- exitIfBad "Can't get instance info from ConfD" answer
-  domains <- getInferredDomInfo
-  uptimes <- getUptimeInfo
-  let primaryInst =  fst inst
-  iStatus <- mapM (buildStatus domains uptimes) primaryInst
-  let globalStatus = computeGlobalStatus iStatus
-      jsonReport = J.showJSON $ ReportData iStatus globalStatus
+  d <- getInferredDomInfo
+  reportData <-
+    case d of
+      BT.Ok domains -> do
+        uptimes <- getUptimeInfo
+        let primaryInst =  fst inst
+        iStatus <- mapM (buildStatus domains uptimes) primaryInst
+        let globalStatus = computeGlobalStatus iStatus
+        return $ ReportData iStatus globalStatus
+      BT.Bad m ->
+        (return . ReportData []) . DCStatus DCSCBad $
+          "Unable to receive the list of instances: " ++ m
+  let jsonReport = J.showJSON reportData
   buildReport dcName dcVersion dcFormatVersion dcCategory dcKind jsonReport
 
 -- | Main function.
diff --git a/src/Ganeti/Hypervisor/Xen.hs b/src/Ganeti/Hypervisor/Xen.hs
index 06c928e..3fbffa7 100644
--- a/src/Ganeti/Hypervisor/Xen.hs
+++ b/src/Ganeti/Hypervisor/Xen.hs
@@ -40,21 +40,25 @@ import qualified Ganeti.BasicTypes as BT
 import qualified Ganeti.Constants as C
 import Ganeti.Hypervisor.Xen.Types
 import Ganeti.Hypervisor.Xen.XmParser
+import Ganeti.Logging
 import Ganeti.Utils
 
 
 -- | Get information about the current Xen domains as a map where the domain
 -- name is the key. This only includes the information made available by Xen
 -- itself.
-getDomainsInfo :: IO (Map.Map String Domain)
+getDomainsInfo :: IO (BT.Result (Map.Map String Domain))
 getDomainsInfo = do
   contents <-
-    ((E.try $ readProcess C.xenCmdXm ["list", "--long"] "")
-      :: IO (Either IOError String)) >>=
-      exitIfBad "running command" . either (BT.Bad . show) BT.Ok
-  case A.parseOnly xmListParser $ pack contents of
-    Left msg -> exitErr msg
-    Right dom -> return dom
+        (E.try $ readProcess C.xenCmdXm ["list", "--long"] "")
+          :: IO (Either IOError String)
+  return $
+    either (BT.Bad . show) (
+      \c ->
+        case A.parseOnly xmListParser $ pack c of
+          Left msg -> BT.Bad msg
+          Right dom -> BT.Ok dom
+      ) contents
 
 -- | Given a domain and a map containing information about multiple domains,
 -- infer additional information about that domain (specifically, whether it is
@@ -70,11 +74,19 @@ inferDomInfos domMap dom1 =
 -- name is the key. This includes information made available by Xen itself as
 -- well as further information that can be inferred by querying Xen multiple
 -- times and comparing the results.
-getInferredDomInfo :: IO (Map.Map String Domain)
+getInferredDomInfo :: IO (BT.Result (Map.Map String Domain))
 getInferredDomInfo = do
   domMap1 <- getDomainsInfo
   domMap2 <- getDomainsInfo
-  return $ fmap (inferDomInfos domMap2) domMap1
+  case (domMap1, domMap2) of
+    (BT.Bad m1, BT.Bad m2) -> return . BT.Bad $ m1 ++ "\n" ++ m2
+    (BT.Bad m, BT.Ok d) -> do
+      logWarning $ "Unable to retrieve domains info the first time" ++ m
+      return $ BT.Ok d
+    (BT.Ok d, BT.Bad m) -> do
+      logWarning $ "Unable to retrieve domains info the second time" ++ m
+      return $ BT.Ok d
+    (BT.Ok d1, BT.Ok d2) -> return . BT.Ok $ fmap (inferDomInfos d2) d1
 
 -- | Get information about the uptime of domains, as a map where the domain ID
 -- is the key.
-- 
1.7.10.4

Reply via email to