commit bc47bd760bd237dfa57f62be192243e5cbb036ab
Merge: c0bbbb0 5ebbd75
Author: Petr Pudlak <[email protected]>
Date:   Tue Mar 17 12:54:27 2015 +0100

    Merge branch 'stable-2.12' into stable-2.13
    
    Conflicts:
        src/Ganeti/Query/Exec.hs
    Resolution: merge changes from both branches
    
    Signed-off-by: Petr Pudlak <[email protected]>

diff --cc src/Ganeti/Query/Exec.hs
index 14faee0,73ae68c..53a0fa5
--- a/src/Ganeti/Query/Exec.hs
+++ b/src/Ganeti/Query/Exec.hs
@@@ -60,9 -60,8 +60,10 @@@ module Ganeti.Query.Exe
    , forkJobProcess
    ) where
  
- import Control.Concurrent
- import Control.Exception.Lifted (onException, throwIO)
+ import Control.Concurrent (rtsSupportsBoundThreads)
+ import Control.Concurrent.Lifted (threadDelay)
++import Control.Exception (onException, throwIO)
 +import qualified Control.Exception.Lifted as E
  import Control.Monad
  import Control.Monad.Error
  import Control.Monad.Trans.Maybe
@@@ -71,10 -70,10 +72,10 @@@ import qualified Data.Map as 
  import Data.Maybe (listToMaybe, mapMaybe)
  import System.Directory (getDirectoryContents)
  import System.Environment
 -import System.IO.Error (tryIOError)
 +import System.IO.Error (tryIOError, annotateIOError)
  import System.Posix.Process
  import System.Posix.IO
- import System.Posix.Signals (sigTERM, signalProcess)
+ import System.Posix.Signals (sigABRT, sigKILL, sigTERM, signalProcess)
  import System.Posix.Types (Fd, ProcessID)
  import System.Time
  import Text.Printf
@@@ -226,38 -220,42 +230,50 @@@ forkJobProcess jid luxiLivelock update 
  
      (pid, master) <- liftIO $ forkWithPipe connectConfig (runJobProcess jid)
  
+     let logDebugJob = logDebug
+                       . (("[job-" ++ jidStr ++ ",pid=" ++ show pid ++ "] ") 
++)
+ 
+     logDebugJob "Forked a new process"
+ 
+     let killIfAlive [] = return ()
+         killIfAlive (sig : sigs) = do
+           logDebugJob "Getting the status of the process"
+           status <- tryError . liftIO $ getProcessStatus False True pid
+           case status of
+             Left e -> logDebugJob $ "Job process already gone: " ++ show e
+             Right (Just s) -> logDebugJob $ "Child process status: " ++ show s
+             Right Nothing -> do
+                 logDebugJob $ "Child process running, killing by " ++ show sig
+                 liftIO $ signalProcess sig pid
+                 unless (null sigs) $ do
+                   threadDelay 100000 -- wait for 0.1s and check again
+                   killIfAlive sigs
+ 
      let onError = do
-           logDebug "Closing the pipe to the client"
+           logDebugJob "Closing the pipe to the client"
            withErrorLogAt WARNING "Closing the communication pipe failed"
                (liftIO (closeClient master)) `orElse` return ()
-           logDebug $ "Getting the status of job process "
-                      ++ show (fromJobId jid)
-           status <- liftIO $ getProcessStatus False True pid
-           case status of
-             Just s -> logDebug $ "Child process (job " ++ show (fromJobId jid)
-                                   ++ ") status: " ++ show s
-             Nothing -> do
-                       logDebug $ "Child process (job " ++ show (fromJobId jid)
-                                   ++ ") running, killing by SIGTERM"
-                       liftIO $ signalProcess sigTERM pid
+           killIfAlive [sigTERM, sigABRT, sigKILL]
  
-     flip onException onError $ do
+     flip catchError (\e -> onError >> throwError e)
+       . (`mplus` (onError >> mzero))
+       $ do
        let recv = liftIO $ recvMsg master
 -          send = liftIO . sendMsg master
 +                   `rethrowAnnotateIOError` "ganeti job process input pipe"
 +                   `onException`
 +                   logError "recv from ganeti job process pipe failed"
 +
 +          send x = liftIO $ sendMsg master x
 +                     `rethrowAnnotateIOError` "ganeti job process output pipe"
 +                     `onException`
 +                     logError "send to ganeti job process pipe failed"
 +
-       logDebug "Getting the lockfile of the client"
+       logDebugJob "Getting the lockfile of the client"
        lockfile <- recv `orElse` mzero
  
-       logDebug $ "Setting the lockfile to the final " ++ lockfile
+       logDebugJob $ "Setting the lockfile to the final " ++ lockfile
        toErrorBase $ update lockfile
-       logDebug "Confirming the client it can start"
+       logDebugJob "Confirming the client it can start"
        send ""
  
        -- from now on, we communicate with the job's Python process

Reply via email to