Hi Hans,

I have clarified to your comment below. It's an ACK anyways.

> Nodeinit sends SIGKILL to parent pid returned from fork. Sending
> SIGABRT
> the child pid should be used instead.

Yes that's true, and a good catch. Must have been caught in the first version!

You would have already guessed the reason also. But in case if you have not, 
then this behaviour is seen because we marked in the nodeinit.conf, for NID to 
spawn scripts
by specifying "S"
i.e. For eg:-
/usr/local/lib/opensaf/clc-cli/osaf-rded:RDE:S:/usr/local/lib/opensaf/clc-cli/osaf-rded:12000:-6:2:1:start:stop

Therefore the pid is that of the script and not of our executable, also while 
spawning scripts, NID also cancels all the signals for that process.

If we had specified it as "D" for daemon or "E" for regular process then the 
service->pid would have
had the pid of that process itself.

Thanks,
Mathi.

----- [email protected] wrote:

> osaf/services/infrastructure/nid/nodeinit.c |  88
> +++++++++++++++++++++++++++++
>  1 files changed, 88 insertions(+), 0 deletions(-)
> 
> 
> Nodeinit sends SIGKILL to parent pid returned from fork. Sending
> SIGABRT
> the child pid should be used instead.
> 
> diff --git a/osaf/services/infrastructure/nid/nodeinit.c
> b/osaf/services/infrastructure/nid/nodeinit.c
> --- a/osaf/services/infrastructure/nid/nodeinit.c
> +++ b/osaf/services/infrastructure/nid/nodeinit.c
> @@ -56,6 +56,10 @@
>  #include <sys/time.h>
>  #include <sys/resource.h>
>  
> +#include <signal.h>
> +#include <sys/wait.h>
> +#include <stdint.h>
> +
>  #include <configmake.h>
>  #include <rda_papi.h>
>  #include <logtrace.h>
> @@ -1084,6 +1088,58 @@ uint32_t check_process(NID_SPAWN_INFO *s
>       TRACE_LEAVE();
>  }
>  
> +
> +/****************************************************************************
> + * Name          : get_pid_from_file                                 
>       *
> + *                                                                   
>       *
> + * Description   : Retrieves the given service name pid.             
>       *
> + *                                                                   
>       *
> + * Arguments     : service name.                                     
>       *
> + *                                                                   
>       *
> + * Return Values : > 0 - process id of given service                 
>       *
> + *                 -1 - error, see syslog                            
>       *
> + *                                                                   
>       *
> +
> ***************************************************************************/
> +static pid_t get_pid_from_file(const char* service_name)
> +{
> +     char pid_file[NAME_MAX];
> +
> +     char prog_name[40];
> +     char *service, *tmp;
> +     FILE *f;
> +     pid_t pid;
> +
> +     service = (char*) malloc(strlen(service_name) +1);
> +     strcpy(service, service_name);
> +     tmp = service;
> +     for ( ; *tmp; ++tmp) *tmp = tolower(*tmp);
> +
> +     strcpy(prog_name, "osaf");
> +     strcat(prog_name, service);
> +     free(service);
> +
> +     LOG_IN("XXXX %s", prog_name);
> +
> +     snprintf(pid_file, sizeof(pid_file), PKGPIDDIR "/%s.pid",
> prog_name);
> +
> +     if ((f = fopen(pid_file, "r")) == 0) {
> +             LOG_WA("Failed to open %s", pid_file);
> +             return -1;
> +     }
> +
> +     if (fscanf(f, "%d", &pid) == 0) {
> +             LOG_WA("Could not read PID from file %s", pid_file);
> +             return -1;
> +     }
> +
> +     if (fclose(f) != 0) {
> +             LOG_WA("Could not close file");
> +             return -1;
> +     }
> +
> +     return pid;
> +}
> +
> 
> /****************************************************************************
>   * Name          : cleanup                                           
>       *
>   *                                                                   
>       *
> @@ -1108,6 +1164,38 @@ void cleanup(NID_SPAWN_INFO *service)
>       nid_close_ipc();
>       select_fd = -1;
>  
> +     pid_t w_pid;
> +     pid_t pid;
> +     int status;
> +     uint32_t no_of_retries = 0;
> +     const uint32_t MAX_NO_RETRIES = 5;
> +
> +     // get pid of current service_name instead of the parent pid
> +     pid = get_pid_from_file(service->serv_name);
> +     if (pid > 0) {
> +             if (check_process(service)) {
> +                     // send abort signal to process to generate a core dump
> +                     LOG_ER("Sending SIGABRT to %s, pid=%d, (parent pid=%d)",
> service->serv_name, pid, service->pid);
> +                     if (kill(pid, SIGABRT) >= 0) {
> +                             // wait a short period for process to exit
> +                             do {
> +                                     w_pid = waitpid(service->pid, &status, 
> WNOHANG);
> +                                     if (w_pid < 0) {
> +                                             if (errno == EINTR)
> +                                                     continue;
> +                                             else
> +                                                     break;
> +                                     } else if (w_pid > 0) {
> +                                             if (WIFEXITED(status) || 
> WIFSIGNALED(status)) {
> +                                                     break;
> +                                             }
> +                                     }
> +                                     sleep(1);
> +                             } while (++no_of_retries < MAX_NO_RETRIES);
> +                     }
> +             }
> +     }
> +     // if sending abort signal did not succeed, fallback to sigkill
>       if (check_process(service)) {
>               LOG_ER("Sending SIGKILL to %s, pid=%d", service->serv_name,
> service->pid);
>               kill(service->pid, SIGKILL);

------------------------------------------------------------------------------
BPM Camp - Free Virtual Workshop May 6th at 10am PDT/1PM EDT
Develop your own process in accordance with the BPMN 2 standard
Learn Process modeling best practices with Bonita BPM through live exercises
http://www.bonitasoft.com/be-part-of-it/events/bpm-camp-virtual- event?utm_
source=Sourceforge_BPM_Camp_5_6_15&utm_medium=email&utm_campaign=VA_SF
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to