osaf/services/saf/amf/amfd/clm.cc        |  103 ++++++++++++++++++++++++++----
 osaf/services/saf/amf/amfd/include/cb.h  |    7 +-
 osaf/services/saf/amf/amfd/include/clm.h |    6 +-
 osaf/services/saf/amf/amfd/include/ntf.h |    3 +
 osaf/services/saf/amf/amfd/main.cc       |   17 +---
 osaf/services/saf/amf/amfd/ntf.cc        |   85 +++++++++++++++++++++++++
 osaf/services/saf/amf/amfd/role.cc       |   24 +++---
 7 files changed, 205 insertions(+), 40 deletions(-)


In new controller reallocation scenario with roaming sc feature, if immnd
dies in the node becoming active, the circular dependencies among Opensaf
services appear, which leads eventually to a reboot.

The dependencies are:
.clmd can not use IMM services since immnd dies
.immnd needs restarted by amfnd
.amfnd is hanging since amfnd is calling CLM services
.amfd is also hanging since amfd is calling CLM and NTF services
.ntfd is hanging due to logd's dependencies on IMM

The problem could be solved if:
. amfd initializes NTF, CLM handle in thread in initialization phase
. amfnd initializes CLM in thread if amfnd receives clm bad handle

Since amfnd has already initialized CLM in thread up on receiving clm bad
handle. This patch does initialze CLM, NTF in thread at amfd side. Also,
threading initialization in this patch can be refactored later by utilizing
the support of #1609

diff --git a/osaf/services/saf/amf/amfd/clm.cc 
b/osaf/services/saf/amf/amfd/clm.cc
--- a/osaf/services/saf/amf/amfd/clm.cc
+++ b/osaf/services/saf/amf/amfd/clm.cc
@@ -386,14 +386,26 @@ static const SaClmCallbacksT_4 clm_callb
        /*.saClmClusterTrackCallback =*/ clm_track_cb
 };
 
-SaAisErrorT avd_clm_init(void)
+SaAisErrorT avd_clm_init(AVD_CL_CB* cb)
 {
-        SaAisErrorT error = SA_AIS_OK;
+       SaAisErrorT error = SA_AIS_OK;
+       SaClmHandleT clm_handle = 0;
+       SaSelectionObjectT sel_obj = 0;
 
+       cb->clmHandle = 0;
+       cb->clm_sel_obj = 0;
        TRACE_ENTER();
+       /*
+        * TODO: This CLM initialization thread can be re-factored
+        * after having osaf dedicated thread, so that all APIs calls
+        * to external service can be automatically retried with result
+        * code (TRY_AGAIN, TIMEOUT, UNAVAILABLE), or reinitialized within
+        * BAD_HANDLE. Also, duplicated codes in initialization thread
+        * will be moved to osaf dedicated thread
+        */
        for (;;) {
                SaVersionT Version = { 'B', 4, 1 };
-               error = saClmInitialize_4(&avd_cb->clmHandle, &clm_callbacks, 
&Version);
+               error = saClmInitialize_4(&clm_handle, &clm_callbacks, 
&Version);
                if (error == SA_AIS_ERR_TRY_AGAIN ||
                    error == SA_AIS_ERR_TIMEOUT ||
                     error == SA_AIS_ERR_UNAVAILABLE) {
@@ -404,15 +416,21 @@ SaAisErrorT avd_clm_init(void)
                        osaf_nanosleep(&kHundredMilliseconds);
                        continue;
                }
-               if (error == SA_AIS_OK) break;
-               LOG_ER("Failed to Initialize with CLM: %u", error);
+               if (error == SA_AIS_OK) {
+                       break;
+               }else {
+                       LOG_ER("Failed to Initialize with CLM: %u", error);
+                       goto done;
+               }
+       }
+       cb->clmHandle = clm_handle;
+       error = saClmSelectionObjectGet(cb->clmHandle, &sel_obj);
+       if (error != SA_AIS_OK) {
+               LOG_ER("Failed to get selection object from CLM %u", error);
+               cb->clmHandle = 0;
                goto done;
        }
-       error = saClmSelectionObjectGet(avd_cb->clmHandle, 
&avd_cb->clm_sel_obj);
-       if (SA_AIS_OK != error) { 
-               LOG_ER("Failed to get selection object from CLM %u", error);
-               goto done;
-       }
+       cb->clm_sel_obj = sel_obj;
 
        TRACE("Successfully initialized CLM");
 
@@ -428,10 +446,15 @@ SaAisErrorT avd_clm_track_start(void)
         
        TRACE_ENTER();
        error = saClmClusterTrack_4(avd_cb->clmHandle, trackFlags, nullptr);
-        if (SA_AIS_OK != error)
-                LOG_ER("Failed to start cluster tracking %u", error);
-
-        TRACE_LEAVE();
+       if (error != SA_AIS_OK) {
+               if (error == SA_AIS_ERR_TRY_AGAIN || error == 
SA_AIS_ERR_TIMEOUT ||
+                       error == SA_AIS_ERR_UNAVAILABLE) {
+                       LOG_WA("Failed to start cluster tracking %u", error);
+               } else {
+                       LOG_ER("Failed to start cluster tracking %u", error);
+               }
+       }
+       TRACE_LEAVE();
        return error;
 }
 
@@ -468,3 +491,55 @@ void clm_node_terminate(AVD_AVND *node)
        else
                TRACE("Waiting for the pending SU presence state updates");
 }
+
+static void* avd_clm_init_thread(void* arg)
+{
+       TRACE_ENTER();
+       AVD_CL_CB* cb = static_cast<AVD_CL_CB*>(arg);
+       SaAisErrorT error = SA_AIS_OK;
+
+       if (avd_clm_init(cb) != SA_AIS_OK) {
+               LOG_ER("avd_clm_init FAILED");
+               goto done;
+       }
+
+       if (cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
+               for (;;) {
+                       error = avd_clm_track_start();
+                       if (error == SA_AIS_ERR_TRY_AGAIN ||
+                               error == SA_AIS_ERR_TIMEOUT ||
+                                       error == SA_AIS_ERR_UNAVAILABLE) {
+                               osaf_nanosleep(&kHundredMilliseconds);
+                               continue;
+                       }
+                       if (error == SA_AIS_OK) {
+                               break;
+                       } else {
+                               LOG_ER("avd_clm_track_start FAILED, error: %u", 
error);
+                               goto done;
+                       }
+               }
+       }
+
+done:
+       TRACE_LEAVE();
+       return nullptr;
+}
+
+SaAisErrorT avd_start_clm_init_bg(void)
+{
+       pthread_t thread;
+       pthread_attr_t attr;
+       pthread_attr_init(&attr);
+       pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+       if (pthread_create(&thread, &attr, avd_clm_init_thread, avd_cb) != 0) {
+               LOG_ER("pthread_create FAILED: %s", strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       pthread_attr_destroy(&attr);
+       return SA_AIS_OK;
+}
+
+
diff --git a/osaf/services/saf/amf/amfd/include/cb.h 
b/osaf/services/saf/amf/amfd/include/cb.h
--- a/osaf/services/saf/amf/amfd/include/cb.h
+++ b/osaf/services/saf/amf/amfd/include/cb.h
@@ -47,6 +47,7 @@
 
 #include <list>
 #include <queue>
+#include <atomic>
 
 class AVD_SI;
 class AVD_AVND;
@@ -193,7 +194,7 @@ typedef struct cl_cb_tag {
                                   since the cluster boot time */
 
        /********** NTF related params      ***********/
-       SaNtfHandleT ntfHandle;
+       std::atomic<SaNtfHandleT> ntfHandle;
 
        /********Peer AvD related*********************/
        AVD_EXT_COMP_INFO ext_comp_info;
@@ -207,8 +208,8 @@ typedef struct cl_cb_tag {
        bool is_implementer;
 
        /* Clm stuff */
-       SaClmHandleT clmHandle;
-       SaSelectionObjectT clm_sel_obj;
+       std::atomic<SaClmHandleT> clmHandle;
+       std::atomic<SaSelectionObjectT> clm_sel_obj;
 
        bool fully_initialized;
        bool swap_switch; /* true - In middle of role switch. */
diff --git a/osaf/services/saf/amf/amfd/include/clm.h 
b/osaf/services/saf/amf/amfd/include/clm.h
--- a/osaf/services/saf/amf/amfd/include/clm.h
+++ b/osaf/services/saf/amf/amfd/include/clm.h
@@ -21,10 +21,14 @@
 #ifndef _AVD_CLM_H
 #define _AVD_CLM_H
 
-extern SaAisErrorT avd_clm_init(void);
+struct cl_cb_tag;
+
+
+extern SaAisErrorT avd_clm_init(struct cl_cb_tag*);
 extern SaAisErrorT avd_clm_track_start(void);
 extern SaAisErrorT avd_clm_track_stop(void);
 extern void clm_node_terminate(AVD_AVND *node);
+extern SaAisErrorT avd_start_clm_init_bg(void);
 
 #endif
 
diff --git a/osaf/services/saf/amf/amfd/include/ntf.h 
b/osaf/services/saf/amf/amfd/include/ntf.h
--- a/osaf/services/saf/amf/amfd/include/ntf.h
+++ b/osaf/services/saf/amf/amfd/include/ntf.h
@@ -105,4 +105,7 @@ void avd_alarm_clear(const SaNameT *name
 
 void avd_send_error_report_ntf(const SaNameT *name, SaAmfRecommendedRecoveryT 
recovery);
 
+extern SaAisErrorT avd_ntf_init(struct cl_cb_tag*);
+extern SaAisErrorT avd_start_ntf_init_bg(void);
+
 #endif
diff --git a/osaf/services/saf/amf/amfd/main.cc 
b/osaf/services/saf/amf/amfd/main.cc
--- a/osaf/services/saf/amf/amfd/main.cc
+++ b/osaf/services/saf/amf/amfd/main.cc
@@ -576,14 +576,6 @@ static uint32_t initialize(void)
                goto done;
        }
 
-       // CLM init is independent of this SC's role. Init with CLM early.
-
-       if (avd_clm_init() != SA_AIS_OK) {
-               LOG_EM("avd_clm_init FAILED");
-               rc = NCSCC_RC_FAILURE;
-               goto done;
-       }
-
        if ((rc = initialize_for_assignment(cb, role))
                != NCSCC_RC_SUCCESS) {
                LOG_ER("initialize_for_assignment FAILED %u", (unsigned) rc);
@@ -633,11 +625,14 @@ static void main_loop(void)
        while (1) {
                fds[FD_MBCSV].fd = cb->mbcsv_sel_obj;
                fds[FD_MBCSV].events = POLLIN;
-               fds[FD_CLM].fd = cb->clm_sel_obj;
-               fds[FD_CLM].events = POLLIN;
                fds[FD_IMM].fd = cb->imm_sel_obj; // IMM fd must be last in 
array
                fds[FD_IMM].events = POLLIN;
-               
+       
+               if (cb->clmHandle != 0) {
+                       fds[FD_CLM].fd = cb->clm_sel_obj;
+                       fds[FD_CLM].events = POLLIN;
+               }
+       
                if (cb->immOiHandle != 0) {
                        fds[FD_IMM].fd = cb->imm_sel_obj;
                        fds[FD_IMM].events = POLLIN;
diff --git a/osaf/services/saf/amf/amfd/ntf.cc 
b/osaf/services/saf/amf/amfd/ntf.cc
--- a/osaf/services/saf/amf/amfd/ntf.cc
+++ b/osaf/services/saf/amf/amfd/ntf.cc
@@ -25,6 +25,7 @@
 #include <logtrace.h>
 #include <util.h>
 #include <ntf.h>
+#include "osaf_time.h"
 
 /*****************************************************************************
   Name          :  avd_send_comp_inst_failed_alarm
@@ -572,6 +573,12 @@ uint32_t sendAlarmNotificationAvd(AVD_CL
                return status;
        }
 
+       if (avd_cb->ntfHandle == 0) {
+               LOG_ER("NTF handle has not been initialized, alarm notification 
"
+                               "for (%s) will be lost", ntf_object.value);
+               return status;
+       }
+
        if (type != 0) {
                add_info_items = 1;
                allocation_size = SA_NTF_ALLOC_SYSTEM_LIMIT;
@@ -660,6 +667,13 @@ uint32_t sendStateChangeNotificationAvd(
                LOG_WA("State change notification lost for '%s'", 
ntf_object.value);
                return status;
        }
+
+       if (avd_cb->ntfHandle == 0) {
+               LOG_WA("NTF handle has not been initialized, state change 
notification "
+                               "for (%s) will be lost", ntf_object.value);
+               return status;
+       }
+
        if (additional_info_is_present == true) {
                add_info_items = 1;
                allocation_size = SA_NTF_ALLOC_SYSTEM_LIMIT;
@@ -770,4 +784,75 @@ void avd_send_error_report_ntf(const SaN
        TRACE_LEAVE();
 }
 
+SaAisErrorT avd_ntf_init(AVD_CL_CB* cb)
+{
+       SaAisErrorT error = SA_AIS_OK;
+       SaNtfHandleT ntf_handle;
+       TRACE_ENTER();
 
+       // reset handle
+       cb->ntfHandle = 0;
+
+       /*
+        * TODO: to be re-factored as CLM initialization thread
+        */
+       for (;;) {
+               SaVersionT ntfVersion = { 'A', 0x01, 0x01 };
+
+               error = saNtfInitialize(&ntf_handle, NULL, &ntfVersion);
+               if (error == SA_AIS_ERR_TRY_AGAIN ||
+                   error == SA_AIS_ERR_TIMEOUT ||
+                    error == SA_AIS_ERR_UNAVAILABLE) {
+                       if (error != SA_AIS_ERR_TRY_AGAIN) {
+                               LOG_WA("saNtfInitialize returned %u",
+                                      (unsigned) error);
+                       }
+                       osaf_nanosleep(&kHundredMilliseconds);
+                       continue;
+               }
+               if (error == SA_AIS_OK) {
+                       break;
+               } else {
+                       LOG_ER("Failed to Initialize with NTF: %u", error);
+                       goto done;
+               }
+       }
+       cb->ntfHandle = ntf_handle;
+       TRACE("Successfully initialized NTF");
+
+done:
+       TRACE_LEAVE();
+       return error;
+}
+
+static void* avd_ntf_init_thread(void* arg)
+{
+       TRACE_ENTER();
+       AVD_CL_CB* cb = static_cast<AVD_CL_CB*>(arg);
+
+       if (avd_ntf_init(cb) != SA_AIS_OK) {
+               LOG_ER("avd_clm_init FAILED");
+               goto done;
+       }
+
+done:
+       TRACE_LEAVE();
+       return nullptr;
+}
+
+SaAisErrorT avd_start_ntf_init_bg(void)
+{
+       pthread_t thread;
+       pthread_attr_t attr;
+       pthread_attr_init(&attr);
+       pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+       if (pthread_create(&thread, &attr, avd_ntf_init_thread, avd_cb) != 0) {
+               LOG_ER("pthread_create FAILED: %s", strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       pthread_attr_destroy(&attr);
+
+       return SA_AIS_OK;
+}
diff --git a/osaf/services/saf/amf/amfd/role.cc 
b/osaf/services/saf/amf/amfd/role.cc
--- a/osaf/services/saf/amf/amfd/role.cc
+++ b/osaf/services/saf/amf/amfd/role.cc
@@ -174,9 +174,8 @@ void avd_role_change_evh(AVD_CL_CB *cb, 
 uint32_t initialize_for_assignment(cl_cb_tag* cb, SaAmfHAStateT ha_state)
 {
        TRACE_ENTER2("ha_state = %d", static_cast<int>(ha_state));
-       SaVersionT ntfVersion = {'A', 0x01, 0x01};
        uint32_t rc = NCSCC_RC_SUCCESS;
-       SaAisErrorT error;
+
        if (cb->fully_initialized) goto done;
        cb->avail_state_avd = ha_state;
        if (ha_state == SA_AMF_HA_QUIESCED) {
@@ -194,17 +193,25 @@ uint32_t initialize_for_assignment(cl_cb
                LOG_ER("avsv_mbcsv_register FAILED");
                goto done;
        }
+       // Initialize CLM handle in thread
+       if (avd_start_clm_init_bg() != SA_AIS_OK) {
+               LOG_EM("avd_clm_init FAILED");
+               rc = NCSCC_RC_FAILURE;
+               goto done;
+       }
+
        if (avd_imm_init(cb) != SA_AIS_OK) {
                LOG_ER("avd_imm_init FAILED");
                rc = NCSCC_RC_FAILURE;
                goto done;
        }
-       if ((error = saNtfInitialize(&cb->ntfHandle, nullptr, &ntfVersion)) !=
-           SA_AIS_OK) {
-               LOG_ER("saNtfInitialize Failed (%u)", error);
-               rc = NCSCC_RC_FAILURE;
+
+       // Initialize NTF handle in thread
+       if (avd_start_ntf_init_bg() != SA_AIS_OK) {
+               LOG_EM("avd_start_ntf_init_bg FAILED");
                goto done;
        }
+
        if ((rc = avd_mds_set_vdest_role(cb, ha_state)) != NCSCC_RC_SUCCESS) {
                LOG_ER("avd_mds_set_vdest_role FAILED");
                goto done;
@@ -273,11 +280,6 @@ uint32_t avd_active_role_initialization(
 
        avd_imm_update_runtime_attrs();
 
-       if (avd_clm_track_start() != SA_AIS_OK) {
-               LOG_ER("avd_clm_track_start FAILED");
-               goto done;
-       }
-
        status = NCSCC_RC_SUCCESS;
 done:
        TRACE_LEAVE();

------------------------------------------------------------------------------
What NetFlow Analyzer can do for you? Monitors network bandwidth and traffic
patterns at an interface-level. Reveals which users, apps, and protocols are 
consuming the most bandwidth. Provides multi-vendor support for NetFlow, 
J-Flow, sFlow and other flows. Make informed decisions using capacity planning
reports.http://sdm.link/zohodev2dev
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to