LGTM with nit. On Fri, Nov 20, 2015 at 5:11 PM, 'Oleg Ponomarev' via ganeti-devel < [email protected]> wrote:
> forkPostHooksProcess function will be used to create process running > global POST hooks for opcodes which job processes have disappeared. > > Signed-off-by: Oleg Ponomarev <[email protected]> > --- > src/Ganeti/Path.hs | 7 +++++++ > src/Ganeti/Query/Exec.hs | 51 > +++++++++++++++++++++++++++++++++++++++++++----- > 2 files changed, 53 insertions(+), 5 deletions(-) > > diff --git a/src/Ganeti/Path.hs b/src/Ganeti/Path.hs > index 8c02dea..e96bbb5 100644 > --- a/src/Ganeti/Path.hs > +++ b/src/Ganeti/Path.hs > @@ -58,6 +58,7 @@ module Ganeti.Path > , instanceReasonDir > , getInstReasonFilename > , jqueueExecutorPy > + , postHooksExecutorPy > , kvmPidDir > ) where > > @@ -192,6 +193,12 @@ jqueueExecutorPy :: IO FilePath > jqueueExecutorPy = return $ versionedsharedir > </> "ganeti" </> "jqueue" </> "exec.py" > > +-- | The path to the Python executable for global post hooks of job which > +-- process has disappeared. > +postHooksExecutorPy :: IO FilePath > +postHooksExecutorPy = > + return $ versionedsharedir </> "ganeti" </> "jqueue" </> > "post_hooks_exec.py" > + > -- | The path to the directory where kvm stores the pid files. > kvmPidDir :: IO FilePath > kvmPidDir = runDir `pjoin` "kvm-hypervisor" `pjoin` "pid" > diff --git a/src/Ganeti/Query/Exec.hs b/src/Ganeti/Query/Exec.hs > index 8a4b13f..d7c9cbc 100644 > --- a/src/Ganeti/Query/Exec.hs > +++ b/src/Ganeti/Query/Exec.hs > @@ -58,6 +58,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH > DAMAGE. > module Ganeti.Query.Exec > ( isForkSupported > , forkJobProcess > + , forkPostHooksProcess > ) where > > import Prelude () > @@ -244,6 +245,10 @@ killProcessOnError pid master logFn = do > threadDelay 100000 -- wait for 0.1s and check again > killIfAlive sigs > > +-- | Data type used only to define the return type of > forkProcessCatchErrors. > +data ForkProcessRet = ForkJob (FilePath, ProcessID) | > + ForkPostHooks ProcessID > + > -- | Forks current process and running runFn in the child and commFn in > the > -- parent. Due to a bug in GHC forking process, we want to retry if the > forked > -- process fails to start. If it fails later on, the failure is handled by > @@ -252,8 +257,8 @@ forkProcessCatchErrors :: (Show e, FromString e) > => (Client -> IO ()) > -> (ProcessID -> String -> ResultT e (WriterLogT > IO) ()) > -> (ProcessID -> Client > - -> ResultT e (WriterLogT IO) (FilePath, > ProcessID)) > - -> ResultT e IO (FilePath, ProcessID) > + -> ResultT e (WriterLogT IO) ForkProcessRet) > + -> ResultT e IO ForkProcessRet > forkProcessCatchErrors runFn logFn commFn = do > -- Due to a bug in GHC forking process, we want to retry > -- if the forked process fails to start. > @@ -286,8 +291,9 @@ forkJobProcess job luxiLivelock update = do > ++ " for job " ++ jidStr > update luxiLivelock > > - forkProcessCatchErrors (childMain . qjId $ job) logDebugJob > - parentMain > + ForkJob ret <- forkProcessCatchErrors (childMain . qjId $ job) > logDebugJob > + parentMain > + return ret > where > -- Retrieve secret parameters if present > secretParams = encodeStrict . filterSecretParameters . qjOps $ job > @@ -318,7 +324,7 @@ forkJobProcess job luxiLivelock update = do > _ <- recv "Waiting for the job to ask for secret parameters" > send "Writing secret parameters to the client" secretParams > liftIO $ closeClient master > - return (lockfile, pid) > + return $ ForkJob (lockfile, pid) > > -- | Code performing communication with the parent process. During > -- communication the livelock is created, locked and sent back > @@ -337,3 +343,38 @@ forkJobProcess job luxiLivelock update = do > _ <- logFn "Waiting for the master process to confirm the lock" > _ <- recvMsg s' > return fd > + > +-- | Forks the process and starts the processing of post hooks for the > opcode > +-- whose execution was unfinished due to job process disappear. > Nit: the job process disappearing. > +forkPostHooksProcess :: (FromString e, Show e) > + => JobId > + -> ResultT e IO ProcessID > +forkPostHooksProcess jid = do > + ForkPostHooks ret <- forkProcessCatchErrors (childMain jid) logDebugJob > + parentMain > + return ret > + where > + jidStr = show $ fromJobId jid > + jobLogPrefix pid = "[start:post_hooks:job-" ++ jidStr ++ ",pid=" > + ++ show pid ++ "] " > + logDebugJob pid = logDebug . (jobLogPrefix pid ++) > + > + -- | Code performing communication with the child process. First, > receive > + -- livelock, then send necessary parameters to the python child. > + parentMain pid master = do > + let annotatedIO msg k = do > + logDebugJob pid msg > + liftIO $ rethrowAnnotateIOError (jobLogPrefix pid ++ msg) k > + let recv msg = annotatedIO msg (recvMsg master) > + send msg x = annotatedIO msg (sendMsg master x) > + -- We communicate with the Python process > + _ <- recv "Waiting for the post hooks executor to ask for the job > id" > + send "Writing job id to the client" jidStr > + > + liftIO $ closeClient master > + return $ ForkPostHooks pid > + > + -- | Code performing communication with the parent process. Python > part > + -- will only read job file so, we don't need livelock here. > + childMain jid' s = runProcess jid' s P.postHooksExecutorPy commFn > + where commFn _ _ _ = return (0 :: Fd) > -- > 2.6.0.rc2.230.g3dd15c0 > > Hrvoje Ribicic Ganeti Engineering Google Germany GmbH Dienerstr. 12, 80331, München Geschäftsführer: Matthew Scott Sucherman, Paul Terence Manicle Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg Diese E-Mail ist vertraulich. Wenn Sie nicht der richtige Adressat sind, leiten Sie diese bitte nicht weiter, informieren Sie den Absender und löschen Sie die E-Mail und alle Anhänge. Vielen Dank. This e-mail is confidential. If you are not the right addressee please do not forward it, please inform the sender, and please erase this e-mail including any attachments. Thanks.
