I've implemented some lightweight signal handling facilities. See the attached header and implementation files for a taste of the current API, and the pseudo-example code showing how to use it, briefly described below:
Right now I'm using it to interact with the job scheduler during (explicit) timestepping. I have being/end signal handling calls around TSSolve(). A PostStep() routine catches signals and handles them this way: * If SIGINT or SIGTERM, I dump a restart file and set converged reason to USER to stop. * If SIGUSR1, I dump a restart file and continue timestepping. * if SIGUSR2, I dump a VTK file and continue timestepping. I can send signals to the job with `scancel -s SIG<NAME>`. When the job time allocation is about to expire, SLURM fist sends SIGTERM and waits some time before SIGKILL. That time is enough to get a restart file from the last step, stop timestepping and finalize gracefully. I'm not 100% happy with the API, maybe I should make it easier to use. For example, I could define each PETSC_SIGXXX so that I do need the macro PetscSigMask(). That would complicate a bit the mapping signal enum -> name string, though. I could also implement PetscSignalRaise(), it may be useful, but I'm not sure. Do you think this may be of some value for core PETSc? I'm asking before submitting a MR because that would require writing some docs, I don't want to do the doc work before knowing your opinion first :-). Regards, -- Lisandro Dalcin ============ Research Scientist Extreme Computing Research Center (ECRC) King Abdullah University of Science and Technology (KAUST) http://ecrc.kaust.edu.sa/
#include <petscsignal.h> int main(...) { ... int sigset = PETSC_SIGSET_EMPTY; PetscSigSetAdd(sigset,PETSC_SIGINT); PetscSigSetAdd(sigset,PETSC_SIGTERM); PetscSigSetAdd(sigset,PETSC_SIGUSR1); PetscSigSetAdd(sigset,PETSC_SIGUSR2); TSSetPostStep(ts,PostStep); PetscSignalBegin(sigset); TSSolve(ts,NULL); PetscSignalEnd(&sigset); ... } PetscErorCode PostStep(TS ts) { int sigset; PetscSignalQuery(&sigset); MPI_Bcast(&sigset,1,MPI_INT,0,comm); if (!sigset) return 0; PetscBool SigINT = PetscSigSetHas(sigset,PETSC_SIGINT); PetscBool SigTERM = PetscSigSetHas(sigset,PETSC_SIGTERM); PetscBool SigUSR1 = PetscSigSetHas(sigset,PETSC_SIGUSR1); PetscBool SigUSR2 = PetscSigSetHas(sigset,PETSC_SIGUSR2); if (SigINT || SigTERM) { DumpRestartFile(); TSSetConvergedReason(ts,TS_CONVERGED_USER); } if (SigUSR1) { DumpRestartFile(); PetscSignalClear(PetscSigMask(PETSC_SIGUSR1)); } if (SigUSR2) { DumpVTKFile(); PetscSignalClear(PetscSigMask(PETSC_SIGUSR1)); } return 0; }
#include <ssdc/petscsignal.h> #include <petsc/private/petscimpl.h> #include <signal.h> const char *const PetscSigNums[] = { NULL, "SIGHUP", "SIGINT", "SIGTERM", "SIGUSR1", "SIGUSR2", "PetscSigNum", "PETSC_", NULL }; #define MAXLEVELS 16 #define MKLIST(ITEM,SEP) \ ITEM(0) SEP \ ITEM(1) SEP \ ITEM(2) SEP \ ITEM(3) SEP \ ITEM(4) SEP \ ITEM(5) SEP \ ITEM(6) SEP \ ITEM(7) SEP \ ITEM(8) SEP \ ITEM(9) SEP \ ITEM(10) SEP \ ITEM(11) SEP \ ITEM(12) SEP \ ITEM(13) SEP \ ITEM(14) SEP \ ITEM(15) SEP \ /**/ #define SigEnum(sig) PETSC_SIG##sig #define SigMask(sig) PetscSigMask(SigEnum(sig)) static volatile int level = -1; static volatile int _tab_sigset[MAXLEVELS]; #define sigset_value() (_tab_sigset[level]) #define sigset_clear() (sigset_value() = 0) #define sigset_add(sig) (sigset_value() |= SigMask(sig)) PETSC_STATIC_INLINE void _handler(int _level,int signum) { #define level _level switch (signum) { #ifndef PETSC_MISSING_SIGHUP case SIGHUP: sigset_add(HUP); break; #endif #ifndef PETSC_MISSING_SIGINT case SIGINT: sigset_add(INT); break; #endif #ifndef PETSC_MISSING_SIGTERM case SIGTERM: sigset_add(TERM); break; #endif #ifndef PETSC_MISSING_SIGUSR1 case SIGUSR1: sigset_add(USR1); break; #endif #ifndef PETSC_MISSING_SIGUSR2 case SIGUSR2: sigset_add(USR2); break; #endif } #undef level } #define COMMA , #define HDLNAME(i) _sighandler##i #define HDLFUNC(i) static void HDLNAME(i)(int s) {_handler(i,s);} MKLIST(HDLFUNC,/**/) static void (*_tab_sighandler[MAXLEVELS])(int) = {MKLIST(HDLNAME,COMMA)}; #define sighandler() _tab_sighandler[level] static int _tab_sigmask[MAXLEVELS]; static void (*_tab_handler[MAXLEVELS][32])(int); #define setmask(set) (_tab_sigmask[level] = (set)) #define enabled(sig) (_tab_sigmask[level]&SigMask(sig)) #define handler(sig) (_tab_handler[level][SigEnum(sig)]) #define levelpush(set) \ do { level++; setmask(set); sigset_clear(); } while(0) #define install(sig) \ do { if (enabled(sig)) {handler(sig) = signal(SIG##sig, sighandler());} } while(0) #define uninstall(sig) \ do { if (enabled(sig)) {signal(SIG##sig, handler(sig));} } while(0) #define levelpop() \ do { sigset_clear(); setmask(0); level--; } while(0) PetscErrorCode PetscSignalBegin(int sigset) { PetscFunctionBegin; if (level >= MAXLEVELS-1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Too many nested calls"); levelpush(sigset); #ifndef PETSC_MISSING_SIGHUP install(HUP); #endif #ifndef PETSC_MISSING_SIGINT install(INT); #endif #ifndef PETSC_MISSING_SIGTERM install(TERM); #endif #ifndef PETSC_MISSING_SIGUSR1 install(USR1); #endif #ifndef PETSC_MISSING_SIGUSR2 install(USR2); #endif PetscFunctionReturn(0); } PetscErrorCode PetscSignalEnd(int *sigset) { PetscFunctionBegin; if (level < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unbalanced Begin/End calls"); if (sigset) PetscValidPointer(sigset,1); #ifndef PETSC_MISSING_SIGHUP uninstall(HUP); #endif #ifndef PETSC_MISSING_SIGINT uninstall(INT); #endif #ifndef PETSC_MISSING_SIGTERM uninstall(TERM); #endif #ifndef PETSC_MISSING_SIGUSR1 uninstall(USR1); #endif #ifndef PETSC_MISSING_SIGUSR2 uninstall(USR2); #endif if (sigset) *sigset = sigset_value(); levelpop(); PetscFunctionReturn(0); } PetscErrorCode PetscSignalQuery(int *sigset) { PetscFunctionBegin; PetscValidPointer(sigset,1); *sigset = (level >= 0) ? sigset_value() : 0; PetscFunctionReturn(0); } PetscErrorCode PetscSignalClear(int sigset) { PetscFunctionBegin; if (level >= 0) sigset_value() &= ~sigset; PetscFunctionReturn(0); }
#ifndef PETSCSIGNAL_H #define PETSCSIGNAL_H #include <petscsys.h> typedef enum { PETSC_SIGHUP = 1, PETSC_SIGINT = 2, PETSC_SIGTERM = 3, PETSC_SIGUSR1 = 4, PETSC_SIGUSR2 = 5 } PetscSigNum; PETSC_EXTERN const char *const PetscSigNums[]; #define PetscSigMask(sig) ((int)(1u<<((sig)-1))) #define PETSC_SIGSET_EMPTY ((int)( 0u)) #define PETSC_SIGSET_FULL ((int)(~0u)) #define PetscSigSetAdd(sigset,sig) (((sigset) |= PetscSigMask(sig)),0) #define PetscSigSetDel(sigset,sig) (((sigset) &= ~PetscSigMask(sig)),0) #define PetscSigSetHas(sigset,sig) (((sigset) & PetscSigMask(sig)) ? PETSC_TRUE : PETSC_FALSE) PETSC_EXTERN PetscErrorCode PetscSignalBegin(int); PETSC_EXTERN PetscErrorCode PetscSignalEnd(int*); PETSC_EXTERN PetscErrorCode PetscSignalQuery(int*); PETSC_EXTERN PetscErrorCode PetscSignalClear(int); #endif