osaf/services/infrastructure/nid/nodeinit.c | 56 +++++++++++++++------------- 1 files changed, 29 insertions(+), 27 deletions(-)
Generate core dump only at timeout diff --git a/osaf/services/infrastructure/nid/nodeinit.c b/osaf/services/infrastructure/nid/nodeinit.c --- a/osaf/services/infrastructure/nid/nodeinit.c +++ b/osaf/services/infrastructure/nid/nodeinit.c @@ -106,8 +106,8 @@ static NID_APP_TYPE get_apptype(char *); static uint32_t get_spawn_info(char *, NID_SPAWN_INFO *, char *); static uint32_t parse_nodeinit_conf(char *strbuf); static uint32_t check_process(NID_SPAWN_INFO *service); -static void cleanup(NID_SPAWN_INFO *service); -static uint32_t recovery_action(NID_SPAWN_INFO *, char *); +static void cleanup(NID_SPAWN_INFO *service, int reason); +static uint32_t recovery_action(NID_SPAWN_INFO *, char *, int); static uint32_t spawn_services(char *); static void nid_sleep(uint32_t); @@ -989,7 +989,7 @@ uint32_t spawn_wait(NID_SPAWN_INFO *serv while ((n = osaf_poll_one_fd(select_fd, service->time_out * 10)) <= 0) { if (n == 0) { LOG_ER("Timed-out for response from %s", service->serv_name); - return NCSCC_RC_FAILURE; + return NCSCC_RC_REQ_TIMOUT; } break; } @@ -1151,7 +1151,7 @@ static pid_t get_pid_from_file(const cha * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. * * * ***************************************************************************/ -void cleanup(NID_SPAWN_INFO *service) +void cleanup(NID_SPAWN_INFO *service, int reason) { char strbuff[256]; @@ -1168,27 +1168,29 @@ void cleanup(NID_SPAWN_INFO *service) const uint32_t MAX_NO_RETRIES = 5; // get pid of current service_name instead of the parent pid - pid = get_pid_from_file(service->serv_name); - if (pid > 0) { - if (check_process(service)) { - // send abort signal to process to generate a core dump - LOG_ER("Sending SIGABRT to %s, pid=%d, (origin parent pid=%d)", service->serv_name, pid, service->pid); - if (kill(pid, SIGABRT) >= 0) { - // wait a short period for process to exit - do { - w_pid = waitpid(service->pid, &status, WNOHANG); - if (w_pid < 0) { - if (errno == EINTR) - continue; - else - break; - } else if (w_pid > 0) { - if (WIFEXITED(status) || WIFSIGNALED(status)) { - break; + if (reason == NCSCC_RC_REQ_TIMOUT) { + pid = get_pid_from_file(service->serv_name); + if (pid > 0) { + if (check_process(service)) { + // send abort signal to process to generate a core dump + LOG_ER("Sending SIGABRT to %s, pid=%d, (origin parent pid=%d)", service->serv_name, pid, service->pid); + if (kill(pid, SIGABRT) >= 0) { + // wait a short period for process to exit + do { + w_pid = waitpid(service->pid, &status, WNOHANG); + if (w_pid < 0) { + if (errno == EINTR) + continue; + else + break; + } else if (w_pid > 0) { + if (WIFEXITED(status) || WIFSIGNALED(status)) { + break; + } } - } - sleep(1); - } while (++no_of_retries < MAX_NO_RETRIES); + sleep(1); + } while (++no_of_retries < MAX_NO_RETRIES); + } } } } @@ -1229,7 +1231,7 @@ void cleanup(NID_SPAWN_INFO *service) * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. * * * ***************************************************************************/ -uint32_t recovery_action(NID_SPAWN_INFO *service, char *strbuff) +uint32_t recovery_action(NID_SPAWN_INFO *service, char *strbuff, int reason) { uint32_t count = 0; NID_RECOVERY_OPT opt = NID_RESPAWN; @@ -1244,7 +1246,7 @@ uint32_t recovery_action(NID_SPAWN_INFO /* Just clean the stuff we created during prev retry */ if (service->pid != 0) - cleanup(service); + cleanup(service, reason); /* Done with cleanup so goahead with recovery */ if ((service->recovery_matrix[opt].action) (service, strbuff) != NCSCC_RC_SUCCESS) { @@ -1312,7 +1314,7 @@ uint32_t spawn_services(char *strbuf) if (rc != NCSCC_RC_SUCCESS) { LOG_ER("%s", sbuff); LOG_ER("Going for recovery"); - if (recovery_action(service, sbuff) != NCSCC_RC_SUCCESS) { + if (recovery_action(service, sbuff, rc) != NCSCC_RC_SUCCESS) { exit(EXIT_FAILURE); } } ------------------------------------------------------------------------------ One dashboard for servers and applications across Physical-Virtual-Cloud Widest out-of-the-box monitoring support with 50+ applications Performance metrics, stats and reports that give you Actionable Insights Deep dive visibility with transaction tracing using APM Insight. http://ad.doubleclick.net/ddm/clk/290420510;117567292;y _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel