Hi Nagu,

I did a quick test with a patch (attached in this mail) that provokes the fault reported in #517. I did a quick test with the patch for review and the "provoking" patch,
the fault seems to remain.

I tested according to below:
- Apply both patches.
- Start the system in UML.
- Check traces on SC-2, tail -f root/var/SC-2/log/messages
- When the system is up load the AmfDemo configuration, immcfg -f immcfg -f /hostfs/AppConfig-2N.xml


Mar 17 16:55:12 SC-2 local0.err osafamfnd[467]: ER XXXX: Provoke ticket 517 fault, safSu=SU2,safSg=AmfDemo,safApp=AmfDemo1 Mar 17 16:55:33 SC-2 user.notice kernel: random: nonblocking pool is initialized Mar 17 16:56:09 SC-2 local0.err osafamfwd[631]: TIMEOUT receiving AMF health check request, generating core for amfnd Mar 17 16:56:09 SC-2 local0.err osafamfwd[631]: Last received healthcheck cnt=2

/Thanks HansN


On 03/15/2016 09:26 AM, [email protected] wrote:
  osaf/services/saf/amf/amfnd/Makefile.am    |    3 +-
  osaf/services/saf/amf/amfnd/evt.cc         |   14 ++-
  osaf/services/saf/amf/amfnd/imm.cc         |  137 
+++++++++++++++++++++++++++++
  osaf/services/saf/amf/amfnd/include/avnd.h |    6 +
  osaf/services/saf/amf/amfnd/main.cc        |    3 +-
  5 files changed, 158 insertions(+), 5 deletions(-)


If immnd is down and Amf decides to instantiate some SU, then there
is a deadlock between Amfnd and Imm. Amf calls Imm api to read
imm attributes during SU instantiation and stuck because Imm
is down. Imm waits for Amfnd to instantiate/restart it.
So, both waits for each other. This is a deadlock.
So, the fix bypasses main thread for SU instantiation and does
process of SU instantiation in a separate thread.
So, when Imm is down, its instantiation/restart happens in the
main thread. If any SU instantiation request comes, then Amfnd
routes that request in a separate thread, that threads stuck in Imm
api call untill Imm is not restarted and responds to Imm api call.
This obviates the deadlock between Amfnd and Imm.
So, this fix has advantage for not having any mutex between threads.
The reason is that SU is yet to instantiate, so it is still
immune to faults of any components and hence no need of having any
mutex among the threads.

diff --git a/osaf/services/saf/amf/amfnd/Makefile.am 
b/osaf/services/saf/amf/amfnd/Makefile.am
--- a/osaf/services/saf/amf/amfnd/Makefile.am
+++ b/osaf/services/saf/amf/amfnd/Makefile.am
@@ -60,7 +60,8 @@ osafamfnd_SOURCES = \
        term.cc \
        tmr.cc \
        util.cc \
-       verify.cc
+       verify.cc \
+       imm.cc
osafamfnd_LDADD = \
        $(top_builddir)/osaf/tools/safimm/src/libimmutil.la \
diff --git a/osaf/services/saf/amf/amfnd/evt.cc 
b/osaf/services/saf/amf/amfnd/evt.cc
--- a/osaf/services/saf/amf/amfnd/evt.cc
+++ b/osaf/services/saf/amf/amfnd/evt.cc
@@ -319,9 +319,17 @@ void avnd_evt_destroy(AVND_EVT *evt)
  
******************************************************************************/
  uint32_t avnd_evt_send(AVND_CB *cb, AVND_EVT *evt)
  {
-       uint32_t rc = m_NCS_IPC_SEND(&cb->mbx, evt, evt->priority);
-       if (NCSCC_RC_SUCCESS != rc)
-               LOG_CR("AvND send event to mailbox failed, type = %u", 
evt->type);
+       uint32_t rc;
+       if (evt->type == AVND_EVT_AVD_SU_PRES_MSG) {
+               TRACE("AVND_EVT_AVD_REG_SU_MSG");
+               rc = m_NCS_IPC_SEND(&ir_cb.mbx, evt, evt->priority);
+               if (NCSCC_RC_SUCCESS != rc)
+                       LOG_CR("AvND send event to mailbox failed, type = %u", 
evt->type);
+       } else {
+               rc = m_NCS_IPC_SEND(&cb->mbx, evt, evt->priority);
+               if (NCSCC_RC_SUCCESS != rc)
+                       LOG_CR("AvND send event to mailbox failed, type = %u", 
evt->type);
+       }
return rc;
  }
diff --git a/osaf/services/saf/amf/amfnd/imm.cc 
b/osaf/services/saf/amf/amfnd/imm.cc
new file mode 100644
--- /dev/null
+++ b/osaf/services/saf/amf/amfnd/imm.cc
@@ -0,0 +1,137 @@
+/*      -*- OpenSAF  -*-
+ *
+ * (C) Copyright 2016 The OpenSAF Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
+ * under the GNU Lesser General Public License Version 2.1, February 1999.
+ * The complete license can be accessed from the following location:
+ * http://opensource.org/licenses/lgpl-license.php
+ * See the Copying file included with the OpenSAF distribution for full
+ * licensing terms.
+ *
+ * Author(s): Oracle
+ *
+ */
+
+#include "avnd.h"
+#include <poll.h>
+
+enum {
+       FD_MBX = 0
+};
+
+ir_cb_t ir_cb;
+
+extern const AVND_EVT_HDLR g_avnd_func_list[AVND_EVT_MAX];
+
+static struct pollfd fds[FD_MBX + 1];
+static nfds_t irfds = FD_MBX + 1;
+static void ir_process_event(AVND_EVT *evt);
+
+/**
+ * This thread will read classes and object information thereby allowing main
+ * thread to proceed with other important work.
+ * As of now, SU instantiation needs imm data to be read, so
+ * only this event is being handled here.
+ * Going forward, we need to use this function for similar purposes.
+ * @param _cb
+ *
+ * @return void*
+ */
+static void *imm_reader_thread(void *_cb)
+{
+       uint32_t rc = SA_AIS_OK;
+       AVND_EVT *evt;
+       TRACE_ENTER();
+       NCS_SEL_OBJ mbx_fd;
+
+       /* create the mail box */
+       rc = m_NCS_IPC_CREATE(&ir_cb.mbx);
+       if (NCSCC_RC_SUCCESS != rc) {
+               LOG_CR("AvND Mailbox creation failed");
+               exit(EXIT_FAILURE);
+       }
+       TRACE("Ir mailbox creation success");
+
+       /* attach the mail box */
+       rc = m_NCS_IPC_ATTACH(&ir_cb.mbx);
+       if (NCSCC_RC_SUCCESS != rc) {
+               LOG_CR("Ir mailbox attach failed");
+               exit(EXIT_FAILURE);
+       }
+
+       mbx_fd = ncs_ipc_get_sel_obj(&ir_cb.mbx);
+       fds[FD_MBX].fd = mbx_fd.rmv_obj;
+       fds[FD_MBX].events = POLLIN;
+
+       while (1) {
+               if (poll(fds, irfds, -1) == (-1)) {
+                       if (errno == EINTR) {
+                               continue;
+                       }
+
+                       LOG_ER("poll Fail - %s", strerror(errno));
+                       exit(EXIT_FAILURE);
+               }
+
+               if (fds[FD_MBX].revents & POLLIN) {
+                       while (nullptr != (evt = (AVND_EVT 
*)ncs_ipc_non_blk_recv(&ir_cb.mbx)))
+                               ir_process_event(evt);
+               }
+       }
+       TRACE_LEAVE();
+}
+
+/**
+ * Start a imm reader thread.
+ * @param Nothing.
+ * @return Nothing.
+ */
+void imm_reader_thread_create(void)
+{
+       pthread_t thread;
+       pthread_attr_t attr;
+
+       TRACE_ENTER();
+
+       pthread_attr_init(&attr);
+       pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+       if (pthread_create(&thread, &attr, imm_reader_thread, &ir_cb) != 0) {
+               LOG_ER("pthread_create FAILED: %s", strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       pthread_attr_destroy(&attr);
+
+       TRACE_LEAVE();
+}
+
+/**
+ * This function process events, which requires reading data from imm.
+ * As of now, SU instantiation needs to read data, so diverting SU
+ * instantiation in this thread. Going forward, we need to use this
+ * function for similar purposes.
+ * @param Event pointer.
+ * @return Nothing.
+ */
+void ir_process_event(AVND_EVT *evt)
+{
+       uint32_t res = NCSCC_RC_SUCCESS;
+       AVND_EVT_TYPE evt_type =  evt->type;
+       TRACE_ENTER2("Evt type:%u", evt_type);
+
+       osafassert(AVND_EVT_AVD_SU_PRES_MSG == evt_type);
+
+       res = g_avnd_func_list[evt_type] (avnd_cb, evt);
+
+       /* log the result of event processing */
+       TRACE("Evt Type: '%u', '%s'", evt_type, ((res == NCSCC_RC_SUCCESS) ? "success" : 
"failure"));
+
+       if (evt)
+               avnd_evt_destroy(evt);
+
+       TRACE_LEAVE2("res '%u'", res);
+}
diff --git a/osaf/services/saf/amf/amfnd/include/avnd.h 
b/osaf/services/saf/amf/amfnd/include/avnd.h
--- a/osaf/services/saf/amf/amfnd/include/avnd.h
+++ b/osaf/services/saf/amf/amfnd/include/avnd.h
@@ -69,5 +69,11 @@
extern uint32_t avnd_create(void);
  extern void avnd_sigterm_handler(void);
+typedef struct {
+        SYSF_MBX mbx;
+}ir_cb_t;
+
+extern ir_cb_t ir_cb;
+extern void imm_reader_thread_create(void);
#endif
diff --git a/osaf/services/saf/amf/amfnd/main.cc 
b/osaf/services/saf/amf/amfnd/main.cc
--- a/osaf/services/saf/amf/amfnd/main.cc
+++ b/osaf/services/saf/amf/amfnd/main.cc
@@ -552,7 +552,7 @@ void avnd_main_process(void)
                LOG_ER("signal TERM failed: %s", strerror(errno));
                goto done;
        }
-
+       imm_reader_thread_create();
        mbx_fd = ncs_ipc_get_sel_obj(&avnd_cb->mbx);
        fds[FD_MBX].fd = mbx_fd.rmv_obj;
        fds[FD_MBX].events = POLLIN;
@@ -638,6 +638,7 @@ void avnd_evt_process(AVND_EVT *evt)
  done:
        if (evt)
                avnd_evt_destroy(evt);
+       TRACE_LEAVE();
  }
/*****************************************************************************

# HG changeset patch
# Parent 560e640d694b99e76413488f6ba17c6965fffce0

diff --git a/osaf/libs/agents/saf/imma/imma_om_api.c 
b/osaf/libs/agents/saf/imma/imma_om_api.c
--- a/osaf/libs/agents/saf/imma/imma_om_api.c
+++ b/osaf/libs/agents/saf/imma/imma_om_api.c
@@ -6583,6 +6583,14 @@ SaAisErrorT saImmOmSearchInitialize_2(Sa
                } else {
                        rootNameStr = 
(SaStringT)osaf_extended_name_borrow(rootName);
                }
+
+               if (len > 0) {
+                       if (strncmp(rootNameStr, 
"safSu=SU2,safSg=AmfDemo,safApp=AmfDemo1", len) == 0) {
+                               LOG_ER("XXXX: Provoke ticket 517 fault, %s", 
rootNameStr);
+                               sleep(180);
+                       }
+               }
+
        }
 
        rc = search_init_common(immHandle, rootNameStr, scope,
------------------------------------------------------------------------------
Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://pubads.g.doubleclick.net/gampad/clk?id=278785231&iu=/4140
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to