Fixed by the interdiff:

--- a/src/Ganeti/Query/Exec.hs
+++ b/src/Ganeti/Query/Exec.hs
@@ -346,7 +346,7 @@ forkJobProcess job luxiLivelock update = do
           return fd

-- | Forks the process and starts the processing of post hooks for the opcode
--- whose execution was unfinished due to job process disappear.
+-- whose execution was unfinished due to job process disappearing.
 forkPostHooksProcess :: (FromString e, Show e)
                      => JobId
                      -> ResultT e IO ProcessID


On 11/24/2015 08:27 PM, Hrvoje Ribicic wrote:
LGTM with nit.

On Fri, Nov 20, 2015 at 5:11 PM, 'Oleg Ponomarev' via ganeti-devel <[email protected] <mailto:[email protected]>> wrote:

    forkPostHooksProcess function will be used to create process running
    global POST hooks for opcodes which job processes have disappeared.

    Signed-off-by: Oleg Ponomarev <[email protected]
    <mailto:[email protected]>>
    ---
     src/Ganeti/Path.hs       |  7 +++++++
     src/Ganeti/Query/Exec.hs | 51
    +++++++++++++++++++++++++++++++++++++++++++-----
     2 files changed, 53 insertions(+), 5 deletions(-)

    diff --git a/src/Ganeti/Path.hs b/src/Ganeti/Path.hs
    index 8c02dea..e96bbb5 100644
    --- a/src/Ganeti/Path.hs
    +++ b/src/Ganeti/Path.hs
    @@ -58,6 +58,7 @@ module Ganeti.Path
       , instanceReasonDir
       , getInstReasonFilename
       , jqueueExecutorPy
    +  , postHooksExecutorPy
       , kvmPidDir
       ) where

    @@ -192,6 +193,12 @@ jqueueExecutorPy :: IO FilePath
     jqueueExecutorPy = return $ versionedsharedir
                                 </> "ganeti" </> "jqueue" </> "exec.py"

    +-- | The path to the Python executable for global post hooks of
    job which
    +-- process has disappeared.
    +postHooksExecutorPy :: IO FilePath
    +postHooksExecutorPy =
    +  return $ versionedsharedir </> "ganeti" </> "jqueue" </>
    "post_hooks_exec.py"
    +
     -- | The path to the directory where kvm stores the pid files.
     kvmPidDir :: IO FilePath
     kvmPidDir = runDir `pjoin` "kvm-hypervisor" `pjoin` "pid"
    diff --git a/src/Ganeti/Query/Exec.hs b/src/Ganeti/Query/Exec.hs
    index 8a4b13f..d7c9cbc 100644
    --- a/src/Ganeti/Query/Exec.hs
    +++ b/src/Ganeti/Query/Exec.hs
    @@ -58,6 +58,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    SUCH DAMAGE.
     module Ganeti.Query.Exec
       ( isForkSupported
       , forkJobProcess
    +  , forkPostHooksProcess
       ) where

     import Prelude ()
    @@ -244,6 +245,10 @@ killProcessOnError pid master logFn = do
                     threadDelay 100000 -- wait for 0.1s and check again
                     killIfAlive sigs

    +-- | Data type used only to define the return type of
    forkProcessCatchErrors.
    +data ForkProcessRet = ForkJob (FilePath, ProcessID) |
    +                      ForkPostHooks ProcessID
    +
     -- | Forks current process and running runFn in the child and
    commFn in the
     -- parent. Due to a bug in GHC forking process, we want to retry
    if the forked
     -- process fails to start. If it fails later on, the failure is
    handled by
    @@ -252,8 +257,8 @@ forkProcessCatchErrors :: (Show e, FromString e)
                            => (Client -> IO ())
                            -> (ProcessID -> String -> ResultT e
    (WriterLogT IO) ())
                            -> (ProcessID -> Client
    -                           -> ResultT e (WriterLogT IO)
    (FilePath, ProcessID))
    -                       -> ResultT e IO (FilePath, ProcessID)
    +                           -> ResultT e (WriterLogT IO)
    ForkProcessRet)
    +                       -> ResultT e IO ForkProcessRet
     forkProcessCatchErrors runFn logFn commFn = do
       -- Due to a bug in GHC forking process, we want to retry
       -- if the forked process fails to start.
    @@ -286,8 +291,9 @@ forkJobProcess job luxiLivelock update = do
                  ++ " for job " ++ jidStr
       update luxiLivelock

    -  forkProcessCatchErrors (childMain . qjId $ job) logDebugJob
    -                         parentMain
    +  ForkJob ret <- forkProcessCatchErrors (childMain . qjId $ job)
    logDebugJob
    +                                        parentMain
    +  return ret
       where
         -- Retrieve secret parameters if present
         secretParams = encodeStrict . filterSecretParameters . qjOps
    $ job
    @@ -318,7 +324,7 @@ forkJobProcess job luxiLivelock update = do
           _ <- recv "Waiting for the job to ask for secret parameters"
           send "Writing secret parameters to the client" secretParams
           liftIO $ closeClient master
    -      return (lockfile, pid)
    +      return $ ForkJob (lockfile, pid)

         -- | Code performing communication with the parent process.
    During
         -- communication the livelock is created, locked and sent back
    @@ -337,3 +343,38 @@ forkJobProcess job luxiLivelock update = do
               _ <- logFn "Waiting for the master process to confirm
    the lock"
               _ <- recvMsg s'
               return fd
    +
    +-- | Forks the process and starts the processing of post hooks
    for the opcode
    +-- whose execution was unfinished due to job process disappear.


Nit: the job process disappearing.

    +forkPostHooksProcess :: (FromString e, Show e)
    +                     => JobId
    +                     -> ResultT e IO ProcessID
    +forkPostHooksProcess jid = do
    +  ForkPostHooks ret <- forkProcessCatchErrors (childMain jid)
    logDebugJob
    +                                              parentMain
    +  return ret
    +  where
    +    jidStr = show $ fromJobId jid
    +    jobLogPrefix pid = "[start:post_hooks:job-" ++ jidStr ++ ",pid="
    +                       ++ show pid ++ "] "
    +    logDebugJob pid = logDebug . (jobLogPrefix pid ++)
    +
    +    -- | Code performing communication with the child process.
    First, receive
    +    -- livelock, then send necessary parameters to the python child.
    +    parentMain pid master = do
    +      let annotatedIO msg k = do
    +            logDebugJob pid msg
    +            liftIO $ rethrowAnnotateIOError (jobLogPrefix pid ++
    msg) k
    +      let recv msg = annotatedIO msg (recvMsg master)
    +          send msg x = annotatedIO msg (sendMsg master x)
    +      -- We communicate with the Python process
    +      _ <- recv "Waiting for the post hooks executor to ask for
    the job id"
    +      send "Writing job id to the client" jidStr
    +
    +      liftIO $ closeClient master
    +      return $ ForkPostHooks pid
    +
    +    -- | Code performing communication with the parent process.
    Python part
    +    -- will only read job file so, we don't need livelock here.
    +    childMain jid' s = runProcess jid' s P.postHooksExecutorPy commFn
    +      where commFn _ _ _ = return (0 :: Fd)
    --
    2.6.0.rc2.230.g3dd15c0


Hrvoje Ribicic
Ganeti Engineering
Google Germany GmbH
Dienerstr. 12, 80331, München

Geschäftsführer: Matthew Scott Sucherman, Paul Terence Manicle
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg

Diese E-Mail ist vertraulich. Wenn Sie nicht der richtige Adressat sind, leiten Sie diese bitte nicht weiter, informieren Sie den Absender und löschen Sie die E-Mail und alle Anhänge. Vielen Dank. This e-mail is confidential. If you are not the right addressee please do not forward it, please inform the sender, and please erase this e-mail including any attachments. Thanks.


Reply via email to