Hi Anders,

thanks, I'll update/Regards HansN


On 12/16/2016 04:06 PM, Anders Widell wrote:
> Ack with comments, marked AndersW>.
>
> regards,
>
> Anders Widell
>
>
> On 12/13/2016 02:56 PM, Hans Nordeback wrote:
>> osaf/services/infrastructure/nid/Makefile.am |    2 +-
>>   osaf/services/infrastructure/nid/nodeinit.c  |  285 
>> ++++++++++++++++++++++++++-
>>   2 files changed, 278 insertions(+), 9 deletions(-)
>>
>>
>> diff --git a/osaf/services/infrastructure/nid/Makefile.am 
>> b/osaf/services/infrastructure/nid/Makefile.am
>> --- a/osaf/services/infrastructure/nid/Makefile.am
>> +++ b/osaf/services/infrastructure/nid/Makefile.am
>> @@ -31,7 +31,7 @@ opensafd_CPPFLAGS = \
>>       $(AM_CPPFLAGS)
>>     opensafd_SOURCES = \
>> -    nodeinit.c
>> +    nodeinit.cc
>>     opensafd_LDADD = \
>>       $(top_builddir)/osaf/libs/core/libopensaf_core.la
>> diff --git a/osaf/services/infrastructure/nid/nodeinit.c 
>> b/osaf/services/infrastructure/nid/nodeinit.cc
>> rename from osaf/services/infrastructure/nid/nodeinit.c
>> rename to osaf/services/infrastructure/nid/nodeinit.cc
>> --- a/osaf/services/infrastructure/nid/nodeinit.c
>> +++ b/osaf/services/infrastructure/nid/nodeinit.cc
>> @@ -63,10 +63,15 @@
>>   #include <configmake.h>
>>   #include <rda_papi.h>
>>   #include <logtrace.h>
>> +
>> +#include <string>
>> +#include <vector>
> [AndersW] Need to include <cerrno> due to the use of the errno variable.
>> +
>>   #include "osaf_poll.h"
>>   #include "osaf_time.h"
>>     #include "nodeinit.h"
>> +#include "osaf/libs/core/cplusplus/base/file_notify.h"
>>     #define SETSIG(sa, sig, fun, flags) \
>>       do { \
>> @@ -111,11 +116,46 @@ static uint32_t recovery_action(NID_SPAW
>>   static uint32_t spawn_services(char *);
>>   static void nid_sleep(uint32_t);
>>   +/* Functions used for service monitoring */
>> +static uint32_t create_svc_monitor_thread(void);
>> +static void* svc_monitor_thread(void *fd);
>> +static int handle_data_request(struct pollfd *fds, const std::string 
>> &nid_name);
>> +static void handle_svc_exit(int fd);
>> +static std::string get_svc_name(int fd);
>> +static int start_monitor_svc(const char *svc);
>> +
>> +/* Data declarations for service monitoring */
>> +static int svc_mon_fd = -1;
>> +static int next_svc_fds_slot = 0;
>> +
>> +struct SvcMap {
>> +  std::string nid_name;
>> +  std::string fifo_file;
>> +  int fifo_fd;
>> +};
>> +
>> +static std::vector<SvcMap> svc_map = {
>> +  {"AMFD", "osafamfd.fifo", -1},
>> +  {"TRANSPORT", "osaftransportd.fifo", -1},
>> +  {"CLMNA", "osafclmna.fifo", -1},
>> +  {"RDED", "osafrded.fifo", -1},
>> +  {"HLFM", "osaffmd.fifo", -1},
>> +  {"IMMD", "osafimmd.fifo", -1},
>> +  {"IMMND", "osafimmnd.fifo", -1},
>> +  {"LOGD", "osaflogd.fifo", -1},
>> +  {"NTFD", "osafntfd.fifo", -1},
>> +  {"PLMD", "osafplmd.fifo", -1},
>> +  {"CLMD", "osafclmd.fifo", -1},
> [AndersW] Remove the last comma character from line above.
>> +};
>> +static const std::string fifo_dir = PKGLOCALSTATEDIR;
>> +const int kMaxNumOfFds = 40;
>> +const int kTenSecondsInMilliseconds = 10000;
>> +
>>   /* List of recovery strategies */
>>   NID_FUNC recovery_funcs[] = { spawn_wait  };
>>   NID_FORK_FUNC fork_funcs[] = { fork_process, fork_script, 
>> fork_daemon };
>>   -char *nid_recerr[NID_MAXREC][4] = {
>> +const char *nid_recerr[NID_MAXREC][4] = {
>>       {"Trying To RESPAWN", "Could Not RESPAWN", "Succeeded To 
>> RESPAWN", "FAILED TO RESPAWN"},
>>       {"Trying To RESET", "Faild to RESET", "suceeded To RESET", 
>> "FAILED AFTER RESTART"}
>>   };
>> @@ -167,10 +207,10 @@ char *gettoken(char **str, uint32_t tok)
>>           return (NULL);
>>       }
>>   -    while ((*p != tok) && (*p != '\n') && *p)
>> +    while ((*p != static_cast<int>(tok)) && (*p != '\n') && *p)
>>           p++;
>>   -    if ((*p == tok) || (*p == '\n')) {
>> +    if ((*p == static_cast<int>(tok)) || (*p == '\n')) {
>>           *p++ = 0;
>>           *str = p;
>>       }
>> @@ -522,7 +562,7 @@ uint32_t parse_nodeinit_conf(char *strbu
>>       NID_SPAWN_INFO *childinfo;
>>       char buff[256], sbuf[200], *ch, *ch1, tmp[30], nidconf[256];
>>       uint32_t lineno = 0, retry = 0;
>> -    struct nid_resetinfo info = { {""}, -1 };
>> +    struct nid_resetinfo info = { {""}, static_cast<uint32_t>(-1) };
>>       FILE *file, *ntfile;
>>         TRACE_ENTER();
>> @@ -565,7 +605,7 @@ uint32_t parse_nodeinit_conf(char *strbu
>>           }
>>             /* Allocate mem for new child info */
>> -        while ((childinfo = malloc(sizeof(NID_SPAWN_INFO))) == NULL) {
>> +        while ((childinfo = 
>> reinterpret_cast<NID_SPAWN_INFO*>(malloc(sizeof(NID_SPAWN_INFO)))) == 
>> NULL) {
> [AndersW] Use nullptr instead of NULL.
>>               if (retry++ == 5) {
>>                   sprintf(strbuf, "FAILURE: Out of memory\n");
>>                   return NCSCC_RC_FAILURE;
>> @@ -994,6 +1034,8 @@ uint32_t spawn_wait(NID_SPAWN_INFO *serv
>>           break;
>>       }
>>   +    waitpid(pid, NULL, WNOHANG);
> [AndersW] Use nullptr instead of NULL.
>> +
>>       /* Read the message from FIFO and fill in structure. */
>>       while ((n = read(select_fd, buff1, sizeof(buff1))) <= 0) {
>>           if (errno == EINTR) {
>> @@ -1263,7 +1305,7 @@ uint32_t recovery_action(NID_SPAWN_INFO
>>           if (service->recovery_matrix[opt].retry_count == 0) {
>>               if (count != 0)
>>                   LOG_ER("%s", nid_recerr[opt][3]);
>> -            opt++;
>> +            opt = 
>> static_cast<NID_RECOVERY_OPT>(static_cast<int>(opt) +1);
>  [AndersW] Add space after plus sign on the line above.
>>               continue;
>>           }
>>       }
>> @@ -1285,8 +1327,7 @@ uint32_t recovery_action(NID_SPAWN_INFO
>>    * Return Values : 
>> NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.                       *
>> * *
>> ***************************************************************************/
>> -uint32_t spawn_services(char *strbuf)
>> -{
>> +uint32_t spawn_services(char *strbuf) {
>>       NID_SPAWN_INFO *service;
>>       NID_CHILD_LIST sp_list = spawn_list;
>>       char sbuff[100];
>> @@ -1322,6 +1363,10 @@ uint32_t spawn_services(char *strbuf)
>>           if (strlen(sbuff) > 0)
>>               LOG_NO("%s", sbuff);
>>   +        if (start_monitor_svc(service->serv_name) != 
>> NCSCC_RC_SUCCESS) {
>> +            exit(EXIT_FAILURE);
>> +        }
>> +
>>           sp_list.head = sp_list.head->next;
>>       }
>>   @@ -1330,6 +1375,225 @@ uint32_t spawn_services(char *strbuf)
>>       return NCSCC_RC_SUCCESS;
>>   }
>>   +int start_monitor_svc(const char *svc) {
>> +  int rc = NCSCC_RC_SUCCESS;
>> +  char svc_name[NID_MAXSNAME];
>> +
>> +  TRACE_ENTER2("service: %s", svc);
>> +
>> +  strncpy(svc_name, svc, sizeof(svc_name));
> [AndersW] Use snprintf() instead of strncpy(), since strncpy() does 
> not gurarantee that svc_name is NUL-terminated when the string doesn't 
> fit.
>> +
>> +  while (true) {
>> +    ssize_t write_rc = write(svc_mon_fd, svc_name, strlen(svc_name));
>> +    if (write_rc == -1) {
>> +      if (errno == EINTR) {
>> +        continue;
>> +      } else {
>> +        LOG_ER("Failed to start sevice %s, error: %s",
>> +               svc_name, strerror(errno));
>> +        rc = NCSCC_RC_FAILURE;
>> +        break;
>> +      }
>> +    }
>> +    break;
>> +  }
>> +  TRACE_LEAVE();
>> +  return rc;
>> +}
>> +
>> +int handle_data_request(struct pollfd *fds, const std::string 
>> &nid_name) {
>> +  base::FileNotify file_notify;
>> +  base::FileNotify::FileNotifyErrors notify_rc;
>> +  int rc = NCSCC_RC_SUCCESS;
>> +  int fifo_fd = -1;
>> +
>> +  TRACE_ENTER2("service: %s", nid_name.c_str());
>> +
>> +  for (auto &svc : svc_map) {
>> +    if (nid_name == svc.nid_name) {
>> +      std::string fifo_file = fifo_dir + "/" + svc.fifo_file;
>> +      notify_rc = file_notify.WaitForFileCreation(fifo_file,
>> + kTenSecondsInMilliseconds);
>> +      if (notify_rc != base::FileNotify::FileNotifyErrors::kOK) {
>> +        LOG_ER("fifo file %s does not exist, notify rc: %d",
>> +               fifo_file.c_str(), notify_rc);
>> +        rc = NCSCC_RC_FAILURE;
>> +        break;
>> +      }
>> +      int retry_cnt = 0;
>> +      do {
>> +        if (retry_cnt > 0) {
>> +          osaf_nanosleep(&kHundredMilliseconds);
>> +        }
>> +        fifo_fd = open(fifo_file.c_str(), O_WRONLY|O_NONBLOCK);
>> +      } while ((fifo_fd == -1) &&
>> +               (retry_cnt++ < 5 && (errno == EINTR || errno == 
>> ENXIO)));
>> +
>> +      if (fifo_fd == -1) {
>> +        LOG_ER("Failed to open %s, error: %s", fifo_file.c_str(),
>> +               strerror(errno));
>> +        rc = NCSCC_RC_FAILURE;
>> +        break;
>> +      } else {
> [AndersW] Should check if next_svc_fds_slot >= kMaxNumOfFds
>> +        svc.fifo_fd = fifo_fd;
>> +        fds[next_svc_fds_slot].fd = fifo_fd;
>> +        fds[next_svc_fds_slot].events = POLLIN;
>> +        next_svc_fds_slot++;
>> +        LOG_NO("Monitoring of %s started", nid_name.c_str());
>> +        break;
>> +      }
>> +    }
>> +  }
>> +  TRACE_LEAVE();
>> +  return rc;
>> +}
>> +
>> +std::string get_svc_name(int fd) {
>> +  std::string svc_name;
>> +
>> +  for (auto const& svc : svc_map) {
>> +    if (fd == svc.fifo_fd) {
>> +      svc_name = svc.nid_name;
>> +      break;
>> +    }
>> +  }
>> +  return svc_name;
>> +}
>> +
>> +void handle_svc_exit(int fd) {
>> +  const std::string &svc_name = get_svc_name(fd);
>> +
>> +  if (svc_name.size() != 0) {
>> +    LOG_ER("Service %s has unexpectedly crashed. Unable to continue, 
>> exiting",
>> +           svc_name.c_str());
>> +    exit(EXIT_FAILURE);
>> +  } else {
>> +    LOG_NO("fd %d was not found in service map", fd);
>> +  }
>> +}
>> +
>> +/****************************************************************************
>>  
>>
>> + * Name          : 
>> svc_monitor_thread                                       *
>> + * *
>> + * Description   : creates the service monitor 
>> thread                       *
>> + * *
>> + * Arguments     : 
>> -                                                        *
>> + * *
>> + * Return Values : 
>> NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.                       *
>> + * *
>> + 
>> ***************************************************************************/
>> +void* svc_monitor_thread(void *fd) {
>> +  char nid_name[NID_MAXSNAME];
>> +  int svc_mon_thr_fd = *(reinterpret_cast<int*>(fd));
>> +  enum {
>> +    FD_SVC_MON_THR = 0,
> [AndersW] Remove the comma from the line above.
>> +  };
>> +
>> +  struct pollfd *fds;
>> +
>> +  fds = new pollfd[sizeof(pollfd) * kMaxNumOfFds];
> [AndersW] Remove sizeof(pollfd) * from the line above.
>> +  osafassert(fds != NULL);
> [AndersW] Use nullptr instead of NULL.
>> +  ssize_t read_rc = -1;
>> +
>> +  fds[FD_SVC_MON_THR].fd = svc_mon_thr_fd;
>> +  fds[FD_SVC_MON_THR].events = POLLIN;
>> +  next_svc_fds_slot++;
>> +
>> +  while (true) {
>> +    unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
>> +    if (rc > 0) {
>> +      // check if any monitored service has exit
>> +      for (int i = next_svc_fds_slot-1; i > 0; --i) {
> [AndersW] Add spaces around binary operator "-" on the line above.
>> +        if ((fds[i].revents & POLLIN) ||
>> +            (fds[i].revents & POLLHUP) ||
>> +            (fds[i].revents & POLLERR)) {
>> +          handle_svc_exit(fds[i].fd);
>> +        }
>> +      }
>> +
>> +      if (fds[FD_SVC_MON_THR].revents & POLLIN) {
>> +        while (true) {
>> +          read_rc = read(svc_mon_thr_fd, nid_name, NID_MAXSNAME);
>> +          if (read_rc == -1) {
>> +            if (errno == EINTR) {
>> +              continue;
>> +            } else {
>> +              LOG_ER("Failed to read on socketpair descriptor: %s",
>> +                     strerror(errno));
>> +              exit(EXIT_FAILURE);
>> +            }
>> +          }
>> +          osafassert(read_rc < NID_MAXSNAME);
>> +          nid_name[read_rc] = '\0';
>> +          break;
>> +        }
>> +        if (handle_data_request(fds, nid_name) != NCSCC_RC_SUCCESS) {
>> +          LOG_ER("Failed to start monitoring for service %s, exiting",
>> +                 nid_name);
>> +          exit(EXIT_FAILURE);
>> +        }
>> +      }
>> +    } else {
>> +      LOG_ER("osaf_poll timed out and no descriptors are ready, 
>> exiting");
>> +      exit(EXIT_FAILURE);
>> +    }
>> +  }
>> +  delete [] fds;
> [AndersW] The line above is unreachable - can be deleted.
>> +}
>> +
>> +/****************************************************************************
>>  
>>
>> + * Name          : 
>> create_svc_monitor_thread                                *
>> + * *
>> + * Description   : creates the service monitor 
>> thread                       *
>> + * *
>> + * Arguments     : 
>> -                                                        *
>> + * *
>> + * Return Values : 
>> NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.                       *
>> + * *
>> + 
>> ***************************************************************************/
>> +uint32_t create_svc_monitor_thread(void) {
>> +  int s_pair[2];
>> +  int svc_mon_thr_fd = -1;
>> +  pthread_t thread;
>> +  pthread_attr_t attr;
>> +
>> +  TRACE_ENTER();
>> +
>> +  if (socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, s_pair) == -1) {
> [AndersW] Add spaces around binary operator "|" on the line above.
>> +    LOG_ER("socketpair FAILED: %s", strerror(errno));
>> +    return NCSCC_RC_FAILURE;
>> +  }
>> +
>> +  svc_mon_fd = s_pair[0];
>> +  svc_mon_thr_fd = s_pair[1];
>> +
>> +  TRACE("sd1: %d sd2: %d", svc_mon_fd, svc_mon_thr_fd);
>> +
>> +  if (pthread_attr_init(&attr) != 0) {
>> +    LOG_ER("pthread_attr_init FAILED: %s", strerror(errno));
>> +    return NCSCC_RC_FAILURE;
>> +  }
>> +
>> +  if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 
>> 0) {
>> +    LOG_ER("pthread_setdetachstate FAILED: %s", strerror(errno));
>> +    return NCSCC_RC_FAILURE;
>> +  }
>> +
>> +  if (pthread_create(&thread, &attr, svc_monitor_thread,
>> +  reinterpret_cast<void*>(&svc_mon_thr_fd)) != 0) {
>> +    LOG_ER("pthread_create FAILED: %s", strerror(errno));
>> +    return NCSCC_RC_FAILURE;
>> +  }
>> +
>> +  if (pthread_attr_destroy(&attr) != 0) {
>> +    LOG_ER("pthread_attr_destroy FAILED: %s", strerror(errno));
>> +    return NCSCC_RC_FAILURE;
>> +  }
>> +
>> +  TRACE_LEAVE();
>> +  return NCSCC_RC_SUCCESS;
>> +}
>> +
>> /****************************************************************************
>>    * Name          : 
>> main                                                     *
>> * *
>> @@ -1365,6 +1629,11 @@ int main(int argc, char *argv[])
>>           exit(EXIT_FAILURE);
>>       }
>>   +    if (create_svc_monitor_thread() != NCSCC_RC_SUCCESS) {
>> +        LOG_ER("Failed to create service monitor thread, exiting");
>> +        exit(EXIT_FAILURE);
>> +    }
>> +
>>       if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
>>           LOG_ER("Failed to parse file %s. Exiting", sbuf);
>>           exit(EXIT_FAILURE);
>


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to