commit bc47bd760bd237dfa57f62be192243e5cbb036ab
Merge: c0bbbb0 5ebbd75
Author: Petr Pudlak <[email protected]>
Date: Tue Mar 17 12:54:27 2015 +0100
Merge branch 'stable-2.12' into stable-2.13
Conflicts:
src/Ganeti/Query/Exec.hs
Resolution: merge changes from both branches
Signed-off-by: Petr Pudlak <[email protected]>
diff --cc src/Ganeti/Query/Exec.hs
index 14faee0,73ae68c..53a0fa5
--- a/src/Ganeti/Query/Exec.hs
+++ b/src/Ganeti/Query/Exec.hs
@@@ -60,9 -60,8 +60,10 @@@ module Ganeti.Query.Exe
, forkJobProcess
) where
- import Control.Concurrent
- import Control.Exception.Lifted (onException, throwIO)
+ import Control.Concurrent (rtsSupportsBoundThreads)
+ import Control.Concurrent.Lifted (threadDelay)
++import Control.Exception (onException, throwIO)
+import qualified Control.Exception.Lifted as E
import Control.Monad
import Control.Monad.Error
import Control.Monad.Trans.Maybe
@@@ -71,10 -70,10 +72,10 @@@ import qualified Data.Map as
import Data.Maybe (listToMaybe, mapMaybe)
import System.Directory (getDirectoryContents)
import System.Environment
-import System.IO.Error (tryIOError)
+import System.IO.Error (tryIOError, annotateIOError)
import System.Posix.Process
import System.Posix.IO
- import System.Posix.Signals (sigTERM, signalProcess)
+ import System.Posix.Signals (sigABRT, sigKILL, sigTERM, signalProcess)
import System.Posix.Types (Fd, ProcessID)
import System.Time
import Text.Printf
@@@ -226,38 -220,42 +230,50 @@@ forkJobProcess jid luxiLivelock update
(pid, master) <- liftIO $ forkWithPipe connectConfig (runJobProcess jid)
+ let logDebugJob = logDebug
+ . (("[job-" ++ jidStr ++ ",pid=" ++ show pid ++ "] ")
++)
+
+ logDebugJob "Forked a new process"
+
+ let killIfAlive [] = return ()
+ killIfAlive (sig : sigs) = do
+ logDebugJob "Getting the status of the process"
+ status <- tryError . liftIO $ getProcessStatus False True pid
+ case status of
+ Left e -> logDebugJob $ "Job process already gone: " ++ show e
+ Right (Just s) -> logDebugJob $ "Child process status: " ++ show s
+ Right Nothing -> do
+ logDebugJob $ "Child process running, killing by " ++ show sig
+ liftIO $ signalProcess sig pid
+ unless (null sigs) $ do
+ threadDelay 100000 -- wait for 0.1s and check again
+ killIfAlive sigs
+
let onError = do
- logDebug "Closing the pipe to the client"
+ logDebugJob "Closing the pipe to the client"
withErrorLogAt WARNING "Closing the communication pipe failed"
(liftIO (closeClient master)) `orElse` return ()
- logDebug $ "Getting the status of job process "
- ++ show (fromJobId jid)
- status <- liftIO $ getProcessStatus False True pid
- case status of
- Just s -> logDebug $ "Child process (job " ++ show (fromJobId jid)
- ++ ") status: " ++ show s
- Nothing -> do
- logDebug $ "Child process (job " ++ show (fromJobId jid)
- ++ ") running, killing by SIGTERM"
- liftIO $ signalProcess sigTERM pid
+ killIfAlive [sigTERM, sigABRT, sigKILL]
- flip onException onError $ do
+ flip catchError (\e -> onError >> throwError e)
+ . (`mplus` (onError >> mzero))
+ $ do
let recv = liftIO $ recvMsg master
- send = liftIO . sendMsg master
+ `rethrowAnnotateIOError` "ganeti job process input pipe"
+ `onException`
+ logError "recv from ganeti job process pipe failed"
+
+ send x = liftIO $ sendMsg master x
+ `rethrowAnnotateIOError` "ganeti job process output pipe"
+ `onException`
+ logError "send to ganeti job process pipe failed"
+
- logDebug "Getting the lockfile of the client"
+ logDebugJob "Getting the lockfile of the client"
lockfile <- recv `orElse` mzero
- logDebug $ "Setting the lockfile to the final " ++ lockfile
+ logDebugJob $ "Setting the lockfile to the final " ++ lockfile
toErrorBase $ update lockfile
- logDebug "Confirming the client it can start"
+ logDebugJob "Confirming the client it can start"
send ""
-- from now on, we communicate with the job's Python process