Hi Mathi, please see comment inlined with [HansN].
/Thanks HansN On 12/19/2016 01:05 PM, Mathivanan Naickan Palanivelu wrote: > Hi Hans, > > Quick comments > > (a) Remove the comma below: > + {"CLMD", "osafclmd.fifo", -1}, [HansN] I have removed the comma. > > (b) NULL is not a part of c++11 right. [HansN] yes, I changed NULL to nullptr. > > (c) I'm not sure there is an obvious reason for converting this file to c++ [HansN] one reason to change to C++ was easier use of e.g. FileNotify and use STL. > > Thanks, > Mathi. > >> -----Original Message----- >> From: Hans Nordeback [mailto:hans.nordeb...@ericsson.com] >> Sent: Tuesday, December 13, 2016 7:27 PM >> To: Mathivanan Naickan Palanivelu; Praveen Malviya; Ramesh Babu Betham; >> anders.wid...@ericsson.com >> Cc: opensaf-devel@lists.sourceforge.net >> Subject: [PATCH 1 of 1] nid: Use the FIFO monitoring for started services V2 >> [#2204] >> >> osaf/services/infrastructure/nid/Makefile.am | 2 +- >> osaf/services/infrastructure/nid/nodeinit.c | 285 >> ++++++++++++++++++++++++++- >> 2 files changed, 278 insertions(+), 9 deletions(-) >> >> >> diff --git a/osaf/services/infrastructure/nid/Makefile.am >> b/osaf/services/infrastructure/nid/Makefile.am >> --- a/osaf/services/infrastructure/nid/Makefile.am >> +++ b/osaf/services/infrastructure/nid/Makefile.am >> @@ -31,7 +31,7 @@ opensafd_CPPFLAGS = \ >> $(AM_CPPFLAGS) >> >> opensafd_SOURCES = \ >> - nodeinit.c >> + nodeinit.cc >> >> opensafd_LDADD = \ >> $(top_builddir)/osaf/libs/core/libopensaf_core.la >> diff --git a/osaf/services/infrastructure/nid/nodeinit.c >> b/osaf/services/infrastructure/nid/nodeinit.cc >> rename from osaf/services/infrastructure/nid/nodeinit.c >> rename to osaf/services/infrastructure/nid/nodeinit.cc >> --- a/osaf/services/infrastructure/nid/nodeinit.c >> +++ b/osaf/services/infrastructure/nid/nodeinit.cc >> @@ -63,10 +63,15 @@ >> #include <configmake.h> >> #include <rda_papi.h> >> #include <logtrace.h> >> + >> +#include <string> >> +#include <vector> >> + >> #include "osaf_poll.h" >> #include "osaf_time.h" >> >> #include "nodeinit.h" >> +#include "osaf/libs/core/cplusplus/base/file_notify.h" >> >> #define SETSIG(sa, sig, fun, flags) \ >> do { \ >> @@ -111,11 +116,46 @@ static uint32_t recovery_action(NID_SPAW static >> uint32_t spawn_services(char *); static void nid_sleep(uint32_t); >> >> +/* Functions used for service monitoring */ static uint32_t >> +create_svc_monitor_thread(void); static void* svc_monitor_thread(void >> +*fd); static int handle_data_request(struct pollfd *fds, const >> +std::string &nid_name); static void handle_svc_exit(int fd); static >> +std::string get_svc_name(int fd); static int start_monitor_svc(const >> +char *svc); >> + >> +/* Data declarations for service monitoring */ static int svc_mon_fd = >> +-1; static int next_svc_fds_slot = 0; >> + >> +struct SvcMap { >> + std::string nid_name; >> + std::string fifo_file; >> + int fifo_fd; >> +}; >> + >> +static std::vector<SvcMap> svc_map = { >> + {"AMFD", "osafamfd.fifo", -1}, >> + {"TRANSPORT", "osaftransportd.fifo", -1}, >> + {"CLMNA", "osafclmna.fifo", -1}, >> + {"RDED", "osafrded.fifo", -1}, >> + {"HLFM", "osaffmd.fifo", -1}, >> + {"IMMD", "osafimmd.fifo", -1}, >> + {"IMMND", "osafimmnd.fifo", -1}, >> + {"LOGD", "osaflogd.fifo", -1}, >> + {"NTFD", "osafntfd.fifo", -1}, >> + {"PLMD", "osafplmd.fifo", -1}, >> + {"CLMD", "osafclmd.fifo", -1}, >> +}; >> +static const std::string fifo_dir = PKGLOCALSTATEDIR; const int >> +kMaxNumOfFds = 40; const int kTenSecondsInMilliseconds = 10000; >> + >> /* List of recovery strategies */ >> NID_FUNC recovery_funcs[] = { spawn_wait }; NID_FORK_FUNC >> fork_funcs[] = { fork_process, fork_script, fork_daemon }; >> >> -char *nid_recerr[NID_MAXREC][4] = { >> +const char *nid_recerr[NID_MAXREC][4] = { >> {"Trying To RESPAWN", "Could Not RESPAWN", "Succeeded To >> RESPAWN", "FAILED TO RESPAWN"}, >> {"Trying To RESET", "Faild to RESET", "suceeded To RESET", "FAILED >> AFTER RESTART"} }; @@ -167,10 +207,10 @@ char *gettoken(char **str, >> uint32_t tok) >> return (NULL); >> } >> >> - while ((*p != tok) && (*p != '\n') && *p) >> + while ((*p != static_cast<int>(tok)) && (*p != '\n') && *p) >> p++; >> >> - if ((*p == tok) || (*p == '\n')) { >> + if ((*p == static_cast<int>(tok)) || (*p == '\n')) { >> *p++ = 0; >> *str = p; >> } >> @@ -522,7 +562,7 @@ uint32_t parse_nodeinit_conf(char *strbu >> NID_SPAWN_INFO *childinfo; >> char buff[256], sbuf[200], *ch, *ch1, tmp[30], nidconf[256]; >> uint32_t lineno = 0, retry = 0; >> - struct nid_resetinfo info = { {""}, -1 }; >> + struct nid_resetinfo info = { {""}, static_cast<uint32_t>(-1) }; >> FILE *file, *ntfile; >> >> TRACE_ENTER(); >> @@ -565,7 +605,7 @@ uint32_t parse_nodeinit_conf(char *strbu >> } >> >> /* Allocate mem for new child info */ >> - while ((childinfo = malloc(sizeof(NID_SPAWN_INFO))) == >> NULL) { >> + while ((childinfo = >> +reinterpret_cast<NID_SPAWN_INFO*>(malloc(sizeof(NID_SPAWN_INFO)) >> )) == >> +NULL) { >> if (retry++ == 5) { >> sprintf(strbuf, "FAILURE: Out of memory\n"); >> return NCSCC_RC_FAILURE; >> @@ -994,6 +1034,8 @@ uint32_t spawn_wait(NID_SPAWN_INFO *serv >> break; >> } >> >> + waitpid(pid, NULL, WNOHANG); >> + >> /* Read the message from FIFO and fill in structure. */ >> while ((n = read(select_fd, buff1, sizeof(buff1))) <= 0) { >> if (errno == EINTR) { >> @@ -1263,7 +1305,7 @@ uint32_t recovery_action(NID_SPAWN_INFO >> if (service->recovery_matrix[opt].retry_count == 0) { >> if (count != 0) >> LOG_ER("%s", nid_recerr[opt][3]); >> - opt++; >> + opt = >> static_cast<NID_RECOVERY_OPT>(static_cast<int>(opt) +1); >> continue; >> } >> } >> @@ -1285,8 +1327,7 @@ uint32_t recovery_action(NID_SPAWN_INFO >> * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. >> * >> * >> * >> >> ********************************************************** >> *****************/ >> -uint32_t spawn_services(char *strbuf) >> -{ >> +uint32_t spawn_services(char *strbuf) { >> NID_SPAWN_INFO *service; >> NID_CHILD_LIST sp_list = spawn_list; >> char sbuff[100]; >> @@ -1322,6 +1363,10 @@ uint32_t spawn_services(char *strbuf) >> if (strlen(sbuff) > 0) >> LOG_NO("%s", sbuff); >> >> + if (start_monitor_svc(service->serv_name) != >> NCSCC_RC_SUCCESS) { >> + exit(EXIT_FAILURE); >> + } >> + >> sp_list.head = sp_list.head->next; >> } >> >> @@ -1330,6 +1375,225 @@ uint32_t spawn_services(char *strbuf) >> return NCSCC_RC_SUCCESS; >> } >> >> +int start_monitor_svc(const char *svc) { >> + int rc = NCSCC_RC_SUCCESS; >> + char svc_name[NID_MAXSNAME]; >> + >> + TRACE_ENTER2("service: %s", svc); >> + >> + strncpy(svc_name, svc, sizeof(svc_name)); >> + >> + while (true) { >> + ssize_t write_rc = write(svc_mon_fd, svc_name, strlen(svc_name)); >> + if (write_rc == -1) { >> + if (errno == EINTR) { >> + continue; >> + } else { >> + LOG_ER("Failed to start sevice %s, error: %s", >> + svc_name, strerror(errno)); >> + rc = NCSCC_RC_FAILURE; >> + break; >> + } >> + } >> + break; >> + } >> + TRACE_LEAVE(); >> + return rc; >> +} >> + >> +int handle_data_request(struct pollfd *fds, const std::string >> +&nid_name) { >> + base::FileNotify file_notify; >> + base::FileNotify::FileNotifyErrors notify_rc; >> + int rc = NCSCC_RC_SUCCESS; >> + int fifo_fd = -1; >> + >> + TRACE_ENTER2("service: %s", nid_name.c_str()); >> + >> + for (auto &svc : svc_map) { >> + if (nid_name == svc.nid_name) { >> + std::string fifo_file = fifo_dir + "/" + svc.fifo_file; >> + notify_rc = file_notify.WaitForFileCreation(fifo_file, >> + >> kTenSecondsInMilliseconds); >> + if (notify_rc != base::FileNotify::FileNotifyErrors::kOK) { >> + LOG_ER("fifo file %s does not exist, notify rc: %d", >> + fifo_file.c_str(), notify_rc); >> + rc = NCSCC_RC_FAILURE; >> + break; >> + } >> + int retry_cnt = 0; >> + do { >> + if (retry_cnt > 0) { >> + osaf_nanosleep(&kHundredMilliseconds); >> + } >> + fifo_fd = open(fifo_file.c_str(), O_WRONLY|O_NONBLOCK); >> + } while ((fifo_fd == -1) && >> + (retry_cnt++ < 5 && (errno == EINTR || errno == >> + ENXIO))); >> + >> + if (fifo_fd == -1) { >> + LOG_ER("Failed to open %s, error: %s", fifo_file.c_str(), >> + strerror(errno)); >> + rc = NCSCC_RC_FAILURE; >> + break; >> + } else { >> + svc.fifo_fd = fifo_fd; >> + fds[next_svc_fds_slot].fd = fifo_fd; >> + fds[next_svc_fds_slot].events = POLLIN; >> + next_svc_fds_slot++; >> + LOG_NO("Monitoring of %s started", nid_name.c_str()); >> + break; >> + } >> + } >> + } >> + TRACE_LEAVE(); >> + return rc; >> +} >> + >> +std::string get_svc_name(int fd) { >> + std::string svc_name; >> + >> + for (auto const& svc : svc_map) { >> + if (fd == svc.fifo_fd) { >> + svc_name = svc.nid_name; >> + break; >> + } >> + } >> + return svc_name; >> +} >> + >> +void handle_svc_exit(int fd) { >> + const std::string &svc_name = get_svc_name(fd); >> + >> + if (svc_name.size() != 0) { >> + LOG_ER("Service %s has unexpectedly crashed. Unable to continue, >> exiting", >> + svc_name.c_str()); >> + exit(EXIT_FAILURE); >> + } else { >> + LOG_NO("fd %d was not found in service map", fd); >> + } >> +} >> + >> +/********************************************************* >> ******************* >> + * Name : svc_monitor_thread >> * >> + * >> * >> + * Description : creates the service monitor thread >> * >> + * >> * >> + * Arguments : - >> * >> + * >> * >> + * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. >> * >> + * >> * >> + >> +********************************************************* >> ************** >> +****/ >> +void* svc_monitor_thread(void *fd) { >> + char nid_name[NID_MAXSNAME]; >> + int svc_mon_thr_fd = *(reinterpret_cast<int*>(fd)); >> + enum { >> + FD_SVC_MON_THR = 0, >> + }; >> + >> + struct pollfd *fds; >> + >> + fds = new pollfd[sizeof(pollfd) * kMaxNumOfFds]; osafassert(fds != >> + NULL); ssize_t read_rc = -1; >> + >> + fds[FD_SVC_MON_THR].fd = svc_mon_thr_fd; >> fds[FD_SVC_MON_THR].events >> + = POLLIN; next_svc_fds_slot++; >> + >> + while (true) { >> + unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1); >> + if (rc > 0) { >> + // check if any monitored service has exit >> + for (int i = next_svc_fds_slot-1; i > 0; --i) { >> + if ((fds[i].revents & POLLIN) || >> + (fds[i].revents & POLLHUP) || >> + (fds[i].revents & POLLERR)) { >> + handle_svc_exit(fds[i].fd); >> + } >> + } >> + >> + if (fds[FD_SVC_MON_THR].revents & POLLIN) { >> + while (true) { >> + read_rc = read(svc_mon_thr_fd, nid_name, NID_MAXSNAME); >> + if (read_rc == -1) { >> + if (errno == EINTR) { >> + continue; >> + } else { >> + LOG_ER("Failed to read on socketpair descriptor: %s", >> + strerror(errno)); >> + exit(EXIT_FAILURE); >> + } >> + } >> + osafassert(read_rc < NID_MAXSNAME); >> + nid_name[read_rc] = '\0'; >> + break; >> + } >> + if (handle_data_request(fds, nid_name) != NCSCC_RC_SUCCESS) { >> + LOG_ER("Failed to start monitoring for service %s, exiting", >> + nid_name); >> + exit(EXIT_FAILURE); >> + } >> + } >> + } else { >> + LOG_ER("osaf_poll timed out and no descriptors are ready, exiting"); >> + exit(EXIT_FAILURE); >> + } >> + } >> + delete [] fds; >> +} >> + >> +/********************************************************* >> ******************* >> + * Name : create_svc_monitor_thread >> * >> + * >> * >> + * Description : creates the service monitor thread >> * >> + * >> * >> + * Arguments : - >> * >> + * >> * >> + * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. >> * >> + * >> * >> + >> +********************************************************* >> ************** >> +****/ uint32_t create_svc_monitor_thread(void) { >> + int s_pair[2]; >> + int svc_mon_thr_fd = -1; >> + pthread_t thread; >> + pthread_attr_t attr; >> + >> + TRACE_ENTER(); >> + >> + if (socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, s_pair) == -1) { >> + LOG_ER("socketpair FAILED: %s", strerror(errno)); >> + return NCSCC_RC_FAILURE; >> + } >> + >> + svc_mon_fd = s_pair[0]; >> + svc_mon_thr_fd = s_pair[1]; >> + >> + TRACE("sd1: %d sd2: %d", svc_mon_fd, svc_mon_thr_fd); >> + >> + if (pthread_attr_init(&attr) != 0) { >> + LOG_ER("pthread_attr_init FAILED: %s", strerror(errno)); >> + return NCSCC_RC_FAILURE; >> + } >> + >> + if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != >> 0) { >> + LOG_ER("pthread_setdetachstate FAILED: %s", strerror(errno)); >> + return NCSCC_RC_FAILURE; >> + } >> + >> + if (pthread_create(&thread, &attr, svc_monitor_thread, >> + reinterpret_cast<void*>(&svc_mon_thr_fd)) != 0) { >> + LOG_ER("pthread_create FAILED: %s", strerror(errno)); >> + return NCSCC_RC_FAILURE; >> + } >> + >> + if (pthread_attr_destroy(&attr) != 0) { >> + LOG_ER("pthread_attr_destroy FAILED: %s", strerror(errno)); >> + return NCSCC_RC_FAILURE; >> + } >> + >> + TRACE_LEAVE(); >> + return NCSCC_RC_SUCCESS; >> +} >> + >> >> /********************************************************** >> ****************** >> * Name : main >> * >> * >> * >> @@ -1365,6 +1629,11 @@ int main(int argc, char *argv[]) >> exit(EXIT_FAILURE); >> } >> >> + if (create_svc_monitor_thread() != NCSCC_RC_SUCCESS) { >> + LOG_ER("Failed to create service monitor thread, exiting"); >> + exit(EXIT_FAILURE); >> + } >> + >> if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) { >> LOG_ER("Failed to parse file %s. Exiting", sbuf); >> exit(EXIT_FAILURE); ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel