Re: [devel] [PATCH 1 of 1] nid: cleanup should generate a core dump V3 [#1300]

Hans Nordebäck Wed, 06 May 2015 03:22:39 -0700

Hi Mathi,

is it ok to push this patch? I'll add the documentation AndersW 
mentioned. /Thanks HansN


On 05/06/2015 12:17 PM, Anders Widell wrote:
> Ack with minor comment: The documentation for the function 
> spawn_wait() should be updated to say that NCSCC_RC_REQ_TIMEOUT is a 
> possible return code.
>
> / Anders Widell
>
> On 04/30/2015 03:13 PM, Hans Nordeback wrote:
>> osaf/services/infrastructure/nid/nodeinit.c |  56 
>> +++++++++++++++-------------
>>   1 files changed, 29 insertions(+), 27 deletions(-)
>>
>>
>> Generate core dump only at timeout
>>
>> diff --git a/osaf/services/infrastructure/nid/nodeinit.c 
>> b/osaf/services/infrastructure/nid/nodeinit.c
>> --- a/osaf/services/infrastructure/nid/nodeinit.c
>> +++ b/osaf/services/infrastructure/nid/nodeinit.c
>> @@ -106,8 +106,8 @@ static NID_APP_TYPE get_apptype(char *);
>>   static uint32_t get_spawn_info(char *, NID_SPAWN_INFO *, char *);
>>   static uint32_t parse_nodeinit_conf(char *strbuf);
>>   static uint32_t check_process(NID_SPAWN_INFO *service);
>> -static void cleanup(NID_SPAWN_INFO *service);
>> -static uint32_t recovery_action(NID_SPAWN_INFO *, char *);
>> +static void cleanup(NID_SPAWN_INFO *service, int reason);
>> +static uint32_t recovery_action(NID_SPAWN_INFO *, char *, int);
>>   static uint32_t spawn_services(char *);
>>   static void nid_sleep(uint32_t);
>>   @@ -989,7 +989,7 @@ uint32_t spawn_wait(NID_SPAWN_INFO *serv
>>       while ((n = osaf_poll_one_fd(select_fd, service->time_out * 
>> 10)) <= 0) {
>>           if (n == 0) {
>>               LOG_ER("Timed-out for response from %s", 
>> service->serv_name);
>> -            return NCSCC_RC_FAILURE;
>> +            return NCSCC_RC_REQ_TIMOUT;
>>           }
>>           break;
>>       }
>> @@ -1151,7 +1151,7 @@ static pid_t get_pid_from_file(const cha
>>    * Return Values : 
>> NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.                       *
>> * *
>> ***************************************************************************/
>> -void cleanup(NID_SPAWN_INFO *service)
>> +void cleanup(NID_SPAWN_INFO *service, int reason)
>>   {
>>       char strbuff[256];
>>   @@ -1168,27 +1168,29 @@ void cleanup(NID_SPAWN_INFO *service)
>>       const uint32_t MAX_NO_RETRIES = 5;
>>         // get pid of current service_name instead of the parent pid
>> -    pid = get_pid_from_file(service->serv_name);
>> -    if (pid > 0) {
>> -        if (check_process(service)) {
>> -            // send abort signal to process to generate a core dump
>> -            LOG_ER("Sending SIGABRT to %s, pid=%d, (origin parent 
>> pid=%d)", service->serv_name, pid, service->pid);
>> -            if (kill(pid, SIGABRT) >= 0) {
>> -                // wait a short period for process to exit
>> -                do {
>> -                    w_pid = waitpid(service->pid, &status, WNOHANG);
>> -                    if (w_pid < 0) {
>> -                        if (errno == EINTR)
>> -                            continue;
>> -                        else
>> -                            break;
>> -                    } else if (w_pid > 0) {
>> -                        if (WIFEXITED(status) || WIFSIGNALED(status)) {
>> -                            break;
>> +    if (reason == NCSCC_RC_REQ_TIMOUT) {
>> +        pid = get_pid_from_file(service->serv_name);
>> +        if (pid > 0) {
>> +            if (check_process(service)) {
>> +                // send abort signal to process to generate a core dump
>> +                LOG_ER("Sending SIGABRT to %s, pid=%d, (origin 
>> parent pid=%d)", service->serv_name, pid, service->pid);
>> +                if (kill(pid, SIGABRT) >= 0) {
>> +                    // wait a short period for process to exit
>> +                    do {
>> +                        w_pid = waitpid(service->pid, &status, 
>> WNOHANG);
>> +                        if (w_pid < 0) {
>> +                            if (errno == EINTR)
>> +                                continue;
>> +                            else
>> +                                break;
>> +                        } else if (w_pid > 0) {
>> +                            if (WIFEXITED(status) || 
>> WIFSIGNALED(status)) {
>> +                                break;
>> +                            }
>>                           }
>> -                    }
>> -                    sleep(1);
>> -                } while (++no_of_retries < MAX_NO_RETRIES);
>> +                        sleep(1);
>> +                    } while (++no_of_retries < MAX_NO_RETRIES);
>> +                }
>>               }
>>           }
>>       }
>> @@ -1229,7 +1231,7 @@ void cleanup(NID_SPAWN_INFO *service)
>>    * Return Values : 
>> NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.                       *
>> * *
>> ***************************************************************************/
>> -uint32_t recovery_action(NID_SPAWN_INFO *service, char *strbuff)
>> +uint32_t recovery_action(NID_SPAWN_INFO *service, char *strbuff, int 
>> reason)
>>   {
>>       uint32_t count = 0;
>>       NID_RECOVERY_OPT opt = NID_RESPAWN;
>> @@ -1244,7 +1246,7 @@ uint32_t recovery_action(NID_SPAWN_INFO
>>                        /* Just clean the stuff we created during prev 
>> retry */
>>               if (service->pid != 0)
>> -                cleanup(service);
>> +                cleanup(service, reason);
>>                        /* Done with cleanup so goahead with recovery */
>>               if ((service->recovery_matrix[opt].action) (service, 
>> strbuff) != NCSCC_RC_SUCCESS) {
>> @@ -1312,7 +1314,7 @@ uint32_t spawn_services(char *strbuf)
>>           if (rc != NCSCC_RC_SUCCESS) {
>>               LOG_ER("%s", sbuff);
>>               LOG_ER("Going for recovery");
>> -            if (recovery_action(service, sbuff) != NCSCC_RC_SUCCESS) {
>> +            if (recovery_action(service, sbuff, rc) != 
>> NCSCC_RC_SUCCESS) {
>>                   exit(EXIT_FAILURE);
>>               }
>>           }
>


------------------------------------------------------------------------------
One dashboard for servers and applications across Physical-Virtual-Cloud 
Widest out-of-the-box monitoring support with 50+ applications
Performance metrics, stats and reports that give you Actionable Insights
Deep dive visibility with transaction tracing using APM Insight.
http://ad.doubleclick.net/ddm/clk/290420510;117567292;y
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1 of 1] nid: cleanup should generate a core dump V3 [#1300]

Reply via email to