osaf/services/infrastructure/nid/nodeinit.c |  56 +++++++++++++++-------------
 1 files changed, 29 insertions(+), 27 deletions(-)


Generate core dump only at timeout

diff --git a/osaf/services/infrastructure/nid/nodeinit.c 
b/osaf/services/infrastructure/nid/nodeinit.c
--- a/osaf/services/infrastructure/nid/nodeinit.c
+++ b/osaf/services/infrastructure/nid/nodeinit.c
@@ -106,8 +106,8 @@ static NID_APP_TYPE get_apptype(char *);
 static uint32_t get_spawn_info(char *, NID_SPAWN_INFO *, char *);
 static uint32_t parse_nodeinit_conf(char *strbuf);
 static uint32_t check_process(NID_SPAWN_INFO *service);
-static void cleanup(NID_SPAWN_INFO *service);
-static uint32_t recovery_action(NID_SPAWN_INFO *, char *);
+static void cleanup(NID_SPAWN_INFO *service, int reason);
+static uint32_t recovery_action(NID_SPAWN_INFO *, char *, int);
 static uint32_t spawn_services(char *);
 static void nid_sleep(uint32_t);
 
@@ -989,7 +989,7 @@ uint32_t spawn_wait(NID_SPAWN_INFO *serv
        while ((n = osaf_poll_one_fd(select_fd, service->time_out * 10)) <= 0) {
                if (n == 0) {
                        LOG_ER("Timed-out for response from %s", 
service->serv_name);
-                       return NCSCC_RC_FAILURE;
+                       return NCSCC_RC_REQ_TIMOUT;
                }
                break;
        }
@@ -1151,7 +1151,7 @@ static pid_t get_pid_from_file(const cha
  * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.                       *
  *                                                                          *
  ***************************************************************************/
-void cleanup(NID_SPAWN_INFO *service)
+void cleanup(NID_SPAWN_INFO *service, int reason)
 {
        char strbuff[256];
 
@@ -1168,27 +1168,29 @@ void cleanup(NID_SPAWN_INFO *service)
        const uint32_t MAX_NO_RETRIES = 5;
 
        // get pid of current service_name instead of the parent pid
-       pid = get_pid_from_file(service->serv_name);
-       if (pid > 0) {
-               if (check_process(service)) {
-                       // send abort signal to process to generate a core dump
-                       LOG_ER("Sending SIGABRT to %s, pid=%d, (origin parent 
pid=%d)", service->serv_name, pid, service->pid);
-                       if (kill(pid, SIGABRT) >= 0) {
-                               // wait a short period for process to exit
-                               do {
-                                       w_pid = waitpid(service->pid, &status, 
WNOHANG);
-                                       if (w_pid < 0) {
-                                               if (errno == EINTR)
-                                                       continue;
-                                               else
-                                                       break;
-                                       } else if (w_pid > 0) {
-                                               if (WIFEXITED(status) || 
WIFSIGNALED(status)) {
-                                                       break;
+       if (reason == NCSCC_RC_REQ_TIMOUT) {
+               pid = get_pid_from_file(service->serv_name);
+               if (pid > 0) {
+                       if (check_process(service)) {
+                               // send abort signal to process to generate a 
core dump
+                               LOG_ER("Sending SIGABRT to %s, pid=%d, (origin 
parent pid=%d)", service->serv_name, pid, service->pid);
+                               if (kill(pid, SIGABRT) >= 0) {
+                                       // wait a short period for process to 
exit
+                                       do {
+                                               w_pid = waitpid(service->pid, 
&status, WNOHANG);
+                                               if (w_pid < 0) {
+                                                       if (errno == EINTR)
+                                                               continue;
+                                                       else
+                                                               break;
+                                               } else if (w_pid > 0) {
+                                                       if (WIFEXITED(status) 
|| WIFSIGNALED(status)) {
+                                                               break;
+                                                       }
                                                }
-                                       }
-                                       sleep(1);
-                               } while (++no_of_retries < MAX_NO_RETRIES);
+                                               sleep(1);
+                                       } while (++no_of_retries < 
MAX_NO_RETRIES);
+                               }
                        }
                }
        }
@@ -1229,7 +1231,7 @@ void cleanup(NID_SPAWN_INFO *service)
  * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.                       *
  *                                                                          *
  ***************************************************************************/
-uint32_t recovery_action(NID_SPAWN_INFO *service, char *strbuff)
+uint32_t recovery_action(NID_SPAWN_INFO *service, char *strbuff, int reason)
 {
        uint32_t count = 0;
        NID_RECOVERY_OPT opt = NID_RESPAWN;
@@ -1244,7 +1246,7 @@ uint32_t recovery_action(NID_SPAWN_INFO 
 
                        /* Just clean the stuff we created during prev retry */
                        if (service->pid != 0)
-                               cleanup(service);
+                               cleanup(service, reason);
 
                        /* Done with cleanup so goahead with recovery */
                        if ((service->recovery_matrix[opt].action) (service, 
strbuff) != NCSCC_RC_SUCCESS) {
@@ -1312,7 +1314,7 @@ uint32_t spawn_services(char *strbuf)
                if (rc != NCSCC_RC_SUCCESS) {
                        LOG_ER("%s", sbuff);
                        LOG_ER("Going for recovery");
-                       if (recovery_action(service, sbuff) != 
NCSCC_RC_SUCCESS) {
+                       if (recovery_action(service, sbuff, rc) != 
NCSCC_RC_SUCCESS) {
                                exit(EXIT_FAILURE);
                        }
                }

------------------------------------------------------------------------------
One dashboard for servers and applications across Physical-Virtual-Cloud 
Widest out-of-the-box monitoring support with 50+ applications
Performance metrics, stats and reports that give you Actionable Insights
Deep dive visibility with transaction tracing using APM Insight.
http://ad.doubleclick.net/ddm/clk/290420510;117567292;y
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to