I've implemented some lightweight signal handling facilities. See the
attached header and implementation files for a taste of the current API,
and the pseudo-example code showing how to use it, briefly described below:

Right now I'm using it to interact with the job scheduler during (explicit)
timestepping. I have being/end signal handling calls around TSSolve(). A
PostStep() routine catches signals and handles them this way:

* If SIGINT or SIGTERM, I dump a restart file and set converged reason to
USER to stop.
* If SIGUSR1, I dump a restart file and continue timestepping.
* if SIGUSR2, I dump a VTK file and continue timestepping.

I can send signals to the job with `scancel -s SIG<NAME>`. When the job
time allocation is about to expire, SLURM fist sends SIGTERM and waits some
time before SIGKILL. That time is enough to get a restart file from the
last step, stop timestepping and finalize gracefully.

I'm not 100% happy with the API, maybe I should make it easier to use. For
example, I could define each PETSC_SIGXXX so that I do need the
macro PetscSigMask(). That would complicate a bit the mapping signal enum
-> name string, though. I could also implement PetscSignalRaise(), it may
be useful, but I'm not sure.

Do you think this may be of some value for core PETSc? I'm asking before
submitting a MR because that would require writing some docs, I don't want
to do the doc work before knowing your opinion first :-).

Regards,

-- 
Lisandro Dalcin
============
Research Scientist
Extreme Computing Research Center (ECRC)
King Abdullah University of Science and Technology (KAUST)
http://ecrc.kaust.edu.sa/
#include <petscsignal.h>

int main(...)
{
  ...

  int sigset = PETSC_SIGSET_EMPTY;
  PetscSigSetAdd(sigset,PETSC_SIGINT);
  PetscSigSetAdd(sigset,PETSC_SIGTERM);
  PetscSigSetAdd(sigset,PETSC_SIGUSR1);
  PetscSigSetAdd(sigset,PETSC_SIGUSR2);

  TSSetPostStep(ts,PostStep);

  PetscSignalBegin(sigset);
  TSSolve(ts,NULL);
  PetscSignalEnd(&sigset);

  ...
}

PetscErorCode PostStep(TS ts)
{
  int sigset;
  PetscSignalQuery(&sigset);
  MPI_Bcast(&sigset,1,MPI_INT,0,comm);
  if (!sigset) return 0;

  PetscBool SigINT  = PetscSigSetHas(sigset,PETSC_SIGINT);
  PetscBool SigTERM = PetscSigSetHas(sigset,PETSC_SIGTERM);
  PetscBool SigUSR1 = PetscSigSetHas(sigset,PETSC_SIGUSR1);
  PetscBool SigUSR2 = PetscSigSetHas(sigset,PETSC_SIGUSR2);

  if (SigINT || SigTERM) {
    DumpRestartFile();
    TSSetConvergedReason(ts,TS_CONVERGED_USER);
  }
  if (SigUSR1) {
    DumpRestartFile();
    PetscSignalClear(PetscSigMask(PETSC_SIGUSR1));
  }
  if (SigUSR2) {
    DumpVTKFile();
    PetscSignalClear(PetscSigMask(PETSC_SIGUSR1));
  }
  return 0;
}
#include <ssdc/petscsignal.h>
#include <petsc/private/petscimpl.h>
#include <signal.h>

const char *const PetscSigNums[] =
{
  NULL,
  "SIGHUP",
  "SIGINT",
  "SIGTERM",
  "SIGUSR1",
  "SIGUSR2",
  "PetscSigNum",
  "PETSC_",
  NULL
};

#define MAXLEVELS 16

#define MKLIST(ITEM,SEP) \
  ITEM(0)  SEP           \
  ITEM(1)  SEP           \
  ITEM(2)  SEP           \
  ITEM(3)  SEP           \
  ITEM(4)  SEP           \
  ITEM(5)  SEP           \
  ITEM(6)  SEP           \
  ITEM(7)  SEP           \
  ITEM(8)  SEP           \
  ITEM(9)  SEP           \
  ITEM(10) SEP           \
  ITEM(11) SEP           \
  ITEM(12) SEP           \
  ITEM(13) SEP           \
  ITEM(14) SEP           \
  ITEM(15) SEP           \
/**/

#define SigEnum(sig) PETSC_SIG##sig
#define SigMask(sig) PetscSigMask(SigEnum(sig))

static volatile int level = -1;
static volatile int _tab_sigset[MAXLEVELS];
#define sigset_value()  (_tab_sigset[level])
#define sigset_clear()  (sigset_value() = 0)
#define sigset_add(sig) (sigset_value() |= SigMask(sig))

PETSC_STATIC_INLINE void _handler(int _level,int signum)
{
#define level _level
  switch (signum) {
#ifndef PETSC_MISSING_SIGHUP
  case SIGHUP:  sigset_add(HUP);  break;
#endif
#ifndef PETSC_MISSING_SIGINT
  case SIGINT:  sigset_add(INT);  break;
#endif
#ifndef PETSC_MISSING_SIGTERM
  case SIGTERM: sigset_add(TERM); break;
#endif
#ifndef PETSC_MISSING_SIGUSR1
  case SIGUSR1: sigset_add(USR1); break;
#endif
#ifndef PETSC_MISSING_SIGUSR2
  case SIGUSR2: sigset_add(USR2); break;
#endif
  }
#undef level
}

#define COMMA ,
#define HDLNAME(i) _sighandler##i
#define HDLFUNC(i) static void HDLNAME(i)(int s) {_handler(i,s);}
MKLIST(HDLFUNC,/**/)
static void (*_tab_sighandler[MAXLEVELS])(int) = {MKLIST(HDLNAME,COMMA)};
#define sighandler() _tab_sighandler[level]

static int    _tab_sigmask[MAXLEVELS];
static void (*_tab_handler[MAXLEVELS][32])(int);
#define setmask(set) (_tab_sigmask[level] = (set))
#define enabled(sig) (_tab_sigmask[level]&SigMask(sig))
#define handler(sig) (_tab_handler[level][SigEnum(sig)])

#define levelpush(set) \
  do { level++; setmask(set); sigset_clear(); } while(0)

#define install(sig) \
  do { if (enabled(sig)) {handler(sig) = signal(SIG##sig, sighandler());} } while(0)

#define uninstall(sig) \
  do { if (enabled(sig)) {signal(SIG##sig, handler(sig));} } while(0)

#define levelpop() \
  do { sigset_clear(); setmask(0); level--; } while(0)

PetscErrorCode PetscSignalBegin(int sigset)
{
  PetscFunctionBegin;
  if (level >= MAXLEVELS-1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Too many nested calls");
  levelpush(sigset);
#ifndef PETSC_MISSING_SIGHUP
  install(HUP);
#endif
#ifndef PETSC_MISSING_SIGINT
  install(INT);
#endif
#ifndef PETSC_MISSING_SIGTERM
  install(TERM);
#endif
#ifndef PETSC_MISSING_SIGUSR1
  install(USR1);
#endif
#ifndef PETSC_MISSING_SIGUSR2
  install(USR2);
#endif
  PetscFunctionReturn(0);
}

PetscErrorCode PetscSignalEnd(int *sigset)
{
  PetscFunctionBegin;
  if (level < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unbalanced Begin/End calls");
  if (sigset) PetscValidPointer(sigset,1);
#ifndef PETSC_MISSING_SIGHUP
  uninstall(HUP);
#endif
#ifndef PETSC_MISSING_SIGINT
  uninstall(INT);
#endif
#ifndef PETSC_MISSING_SIGTERM
  uninstall(TERM);
#endif
#ifndef PETSC_MISSING_SIGUSR1
  uninstall(USR1);
#endif
#ifndef PETSC_MISSING_SIGUSR2
  uninstall(USR2);
#endif
  if (sigset) *sigset = sigset_value();
  levelpop();
  PetscFunctionReturn(0);
}

PetscErrorCode PetscSignalQuery(int *sigset)
{
  PetscFunctionBegin;
  PetscValidPointer(sigset,1);
  *sigset = (level >= 0) ? sigset_value() : 0;
  PetscFunctionReturn(0);
}

PetscErrorCode PetscSignalClear(int sigset)
{
  PetscFunctionBegin;
  if (level >= 0) sigset_value() &= ~sigset;
  PetscFunctionReturn(0);
}
#ifndef PETSCSIGNAL_H
#define PETSCSIGNAL_H

#include <petscsys.h>

typedef enum {
  PETSC_SIGHUP  = 1,
  PETSC_SIGINT  = 2,
  PETSC_SIGTERM = 3,
  PETSC_SIGUSR1 = 4,
  PETSC_SIGUSR2 = 5
} PetscSigNum;

PETSC_EXTERN const char *const PetscSigNums[];

#define PetscSigMask(sig)  ((int)(1u<<((sig)-1)))

#define PETSC_SIGSET_EMPTY ((int)( 0u))
#define PETSC_SIGSET_FULL  ((int)(~0u))
#define PetscSigSetAdd(sigset,sig) (((sigset) |= PetscSigMask(sig)),0)
#define PetscSigSetDel(sigset,sig) (((sigset) &= ~PetscSigMask(sig)),0)
#define PetscSigSetHas(sigset,sig) (((sigset) & PetscSigMask(sig)) ? PETSC_TRUE : PETSC_FALSE)

PETSC_EXTERN PetscErrorCode PetscSignalBegin(int);
PETSC_EXTERN PetscErrorCode PetscSignalEnd(int*);
PETSC_EXTERN PetscErrorCode PetscSignalQuery(int*);
PETSC_EXTERN PetscErrorCode PetscSignalClear(int);

#endif

Reply via email to