osaf/services/saf/amf/amfd/clm.cc | 103 ++++++++++++++++++++++++++---- osaf/services/saf/amf/amfd/include/cb.h | 7 +- osaf/services/saf/amf/amfd/include/clm.h | 6 +- osaf/services/saf/amf/amfd/include/ntf.h | 3 + osaf/services/saf/amf/amfd/main.cc | 17 +--- osaf/services/saf/amf/amfd/ntf.cc | 85 +++++++++++++++++++++++++ osaf/services/saf/amf/amfd/role.cc | 24 +++--- 7 files changed, 205 insertions(+), 40 deletions(-)
In new controller reallocation scenario with roaming sc feature, if immnd dies in the node becoming active, the circular dependencies among Opensaf services appear, which leads eventually to a reboot. The dependencies are: .clmd can not use IMM services since immnd dies .immnd needs restarted by amfnd .amfnd is hanging since amfnd is calling CLM services .amfd is also hanging since amfd is calling CLM and NTF services .ntfd is hanging due to logd's dependencies on IMM The problem could be solved if: . amfd initializes NTF, CLM handle in thread in initialization phase . amfnd initializes CLM in thread if amfnd receives clm bad handle Since amfnd has already initialized CLM in thread up on receiving clm bad handle. This patch does initialze CLM, NTF in thread at amfd side. Also, threading initialization in this patch can be refactored later by utilizing the support of #1609 diff --git a/osaf/services/saf/amf/amfd/clm.cc b/osaf/services/saf/amf/amfd/clm.cc --- a/osaf/services/saf/amf/amfd/clm.cc +++ b/osaf/services/saf/amf/amfd/clm.cc @@ -386,14 +386,26 @@ static const SaClmCallbacksT_4 clm_callb /*.saClmClusterTrackCallback =*/ clm_track_cb }; -SaAisErrorT avd_clm_init(void) +SaAisErrorT avd_clm_init(AVD_CL_CB* cb) { - SaAisErrorT error = SA_AIS_OK; + SaAisErrorT error = SA_AIS_OK; + SaClmHandleT clm_handle = 0; + SaSelectionObjectT sel_obj = 0; + cb->clmHandle = 0; + cb->clm_sel_obj = 0; TRACE_ENTER(); + /* + * TODO: This CLM initialization thread can be re-factored + * after having osaf dedicated thread, so that all APIs calls + * to external service can be automatically retried with result + * code (TRY_AGAIN, TIMEOUT, UNAVAILABLE), or reinitialized within + * BAD_HANDLE. Also, duplicated codes in initialization thread + * will be moved to osaf dedicated thread + */ for (;;) { SaVersionT Version = { 'B', 4, 1 }; - error = saClmInitialize_4(&avd_cb->clmHandle, &clm_callbacks, &Version); + error = saClmInitialize_4(&clm_handle, &clm_callbacks, &Version); if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT || error == SA_AIS_ERR_UNAVAILABLE) { @@ -404,15 +416,21 @@ SaAisErrorT avd_clm_init(void) osaf_nanosleep(&kHundredMilliseconds); continue; } - if (error == SA_AIS_OK) break; - LOG_ER("Failed to Initialize with CLM: %u", error); + if (error == SA_AIS_OK) { + break; + }else { + LOG_ER("Failed to Initialize with CLM: %u", error); + goto done; + } + } + cb->clmHandle = clm_handle; + error = saClmSelectionObjectGet(cb->clmHandle, &sel_obj); + if (error != SA_AIS_OK) { + LOG_ER("Failed to get selection object from CLM %u", error); + cb->clmHandle = 0; goto done; } - error = saClmSelectionObjectGet(avd_cb->clmHandle, &avd_cb->clm_sel_obj); - if (SA_AIS_OK != error) { - LOG_ER("Failed to get selection object from CLM %u", error); - goto done; - } + cb->clm_sel_obj = sel_obj; TRACE("Successfully initialized CLM"); @@ -428,10 +446,15 @@ SaAisErrorT avd_clm_track_start(void) TRACE_ENTER(); error = saClmClusterTrack_4(avd_cb->clmHandle, trackFlags, nullptr); - if (SA_AIS_OK != error) - LOG_ER("Failed to start cluster tracking %u", error); - - TRACE_LEAVE(); + if (error != SA_AIS_OK) { + if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT || + error == SA_AIS_ERR_UNAVAILABLE) { + LOG_WA("Failed to start cluster tracking %u", error); + } else { + LOG_ER("Failed to start cluster tracking %u", error); + } + } + TRACE_LEAVE(); return error; } @@ -468,3 +491,55 @@ void clm_node_terminate(AVD_AVND *node) else TRACE("Waiting for the pending SU presence state updates"); } + +static void* avd_clm_init_thread(void* arg) +{ + TRACE_ENTER(); + AVD_CL_CB* cb = static_cast<AVD_CL_CB*>(arg); + SaAisErrorT error = SA_AIS_OK; + + if (avd_clm_init(cb) != SA_AIS_OK) { + LOG_ER("avd_clm_init FAILED"); + goto done; + } + + if (cb->avail_state_avd == SA_AMF_HA_ACTIVE) { + for (;;) { + error = avd_clm_track_start(); + if (error == SA_AIS_ERR_TRY_AGAIN || + error == SA_AIS_ERR_TIMEOUT || + error == SA_AIS_ERR_UNAVAILABLE) { + osaf_nanosleep(&kHundredMilliseconds); + continue; + } + if (error == SA_AIS_OK) { + break; + } else { + LOG_ER("avd_clm_track_start FAILED, error: %u", error); + goto done; + } + } + } + +done: + TRACE_LEAVE(); + return nullptr; +} + +SaAisErrorT avd_start_clm_init_bg(void) +{ + pthread_t thread; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + if (pthread_create(&thread, &attr, avd_clm_init_thread, avd_cb) != 0) { + LOG_ER("pthread_create FAILED: %s", strerror(errno)); + exit(EXIT_FAILURE); + } + + pthread_attr_destroy(&attr); + return SA_AIS_OK; +} + + diff --git a/osaf/services/saf/amf/amfd/include/cb.h b/osaf/services/saf/amf/amfd/include/cb.h --- a/osaf/services/saf/amf/amfd/include/cb.h +++ b/osaf/services/saf/amf/amfd/include/cb.h @@ -47,6 +47,7 @@ #include <list> #include <queue> +#include <atomic> class AVD_SI; class AVD_AVND; @@ -193,7 +194,7 @@ typedef struct cl_cb_tag { since the cluster boot time */ /********** NTF related params ***********/ - SaNtfHandleT ntfHandle; + std::atomic<SaNtfHandleT> ntfHandle; /********Peer AvD related*********************/ AVD_EXT_COMP_INFO ext_comp_info; @@ -207,8 +208,8 @@ typedef struct cl_cb_tag { bool is_implementer; /* Clm stuff */ - SaClmHandleT clmHandle; - SaSelectionObjectT clm_sel_obj; + std::atomic<SaClmHandleT> clmHandle; + std::atomic<SaSelectionObjectT> clm_sel_obj; bool fully_initialized; bool swap_switch; /* true - In middle of role switch. */ diff --git a/osaf/services/saf/amf/amfd/include/clm.h b/osaf/services/saf/amf/amfd/include/clm.h --- a/osaf/services/saf/amf/amfd/include/clm.h +++ b/osaf/services/saf/amf/amfd/include/clm.h @@ -21,10 +21,14 @@ #ifndef _AVD_CLM_H #define _AVD_CLM_H -extern SaAisErrorT avd_clm_init(void); +struct cl_cb_tag; + + +extern SaAisErrorT avd_clm_init(struct cl_cb_tag*); extern SaAisErrorT avd_clm_track_start(void); extern SaAisErrorT avd_clm_track_stop(void); extern void clm_node_terminate(AVD_AVND *node); +extern SaAisErrorT avd_start_clm_init_bg(void); #endif diff --git a/osaf/services/saf/amf/amfd/include/ntf.h b/osaf/services/saf/amf/amfd/include/ntf.h --- a/osaf/services/saf/amf/amfd/include/ntf.h +++ b/osaf/services/saf/amf/amfd/include/ntf.h @@ -105,4 +105,7 @@ void avd_alarm_clear(const SaNameT *name void avd_send_error_report_ntf(const SaNameT *name, SaAmfRecommendedRecoveryT recovery); +extern SaAisErrorT avd_ntf_init(struct cl_cb_tag*); +extern SaAisErrorT avd_start_ntf_init_bg(void); + #endif diff --git a/osaf/services/saf/amf/amfd/main.cc b/osaf/services/saf/amf/amfd/main.cc --- a/osaf/services/saf/amf/amfd/main.cc +++ b/osaf/services/saf/amf/amfd/main.cc @@ -576,14 +576,6 @@ static uint32_t initialize(void) goto done; } - // CLM init is independent of this SC's role. Init with CLM early. - - if (avd_clm_init() != SA_AIS_OK) { - LOG_EM("avd_clm_init FAILED"); - rc = NCSCC_RC_FAILURE; - goto done; - } - if ((rc = initialize_for_assignment(cb, role)) != NCSCC_RC_SUCCESS) { LOG_ER("initialize_for_assignment FAILED %u", (unsigned) rc); @@ -633,11 +625,14 @@ static void main_loop(void) while (1) { fds[FD_MBCSV].fd = cb->mbcsv_sel_obj; fds[FD_MBCSV].events = POLLIN; - fds[FD_CLM].fd = cb->clm_sel_obj; - fds[FD_CLM].events = POLLIN; fds[FD_IMM].fd = cb->imm_sel_obj; // IMM fd must be last in array fds[FD_IMM].events = POLLIN; - + + if (cb->clmHandle != 0) { + fds[FD_CLM].fd = cb->clm_sel_obj; + fds[FD_CLM].events = POLLIN; + } + if (cb->immOiHandle != 0) { fds[FD_IMM].fd = cb->imm_sel_obj; fds[FD_IMM].events = POLLIN; diff --git a/osaf/services/saf/amf/amfd/ntf.cc b/osaf/services/saf/amf/amfd/ntf.cc --- a/osaf/services/saf/amf/amfd/ntf.cc +++ b/osaf/services/saf/amf/amfd/ntf.cc @@ -25,6 +25,7 @@ #include <logtrace.h> #include <util.h> #include <ntf.h> +#include "osaf_time.h" /***************************************************************************** Name : avd_send_comp_inst_failed_alarm @@ -572,6 +573,12 @@ uint32_t sendAlarmNotificationAvd(AVD_CL return status; } + if (avd_cb->ntfHandle == 0) { + LOG_ER("NTF handle has not been initialized, alarm notification " + "for (%s) will be lost", ntf_object.value); + return status; + } + if (type != 0) { add_info_items = 1; allocation_size = SA_NTF_ALLOC_SYSTEM_LIMIT; @@ -660,6 +667,13 @@ uint32_t sendStateChangeNotificationAvd( LOG_WA("State change notification lost for '%s'", ntf_object.value); return status; } + + if (avd_cb->ntfHandle == 0) { + LOG_WA("NTF handle has not been initialized, state change notification " + "for (%s) will be lost", ntf_object.value); + return status; + } + if (additional_info_is_present == true) { add_info_items = 1; allocation_size = SA_NTF_ALLOC_SYSTEM_LIMIT; @@ -770,4 +784,75 @@ void avd_send_error_report_ntf(const SaN TRACE_LEAVE(); } +SaAisErrorT avd_ntf_init(AVD_CL_CB* cb) +{ + SaAisErrorT error = SA_AIS_OK; + SaNtfHandleT ntf_handle; + TRACE_ENTER(); + // reset handle + cb->ntfHandle = 0; + + /* + * TODO: to be re-factored as CLM initialization thread + */ + for (;;) { + SaVersionT ntfVersion = { 'A', 0x01, 0x01 }; + + error = saNtfInitialize(&ntf_handle, NULL, &ntfVersion); + if (error == SA_AIS_ERR_TRY_AGAIN || + error == SA_AIS_ERR_TIMEOUT || + error == SA_AIS_ERR_UNAVAILABLE) { + if (error != SA_AIS_ERR_TRY_AGAIN) { + LOG_WA("saNtfInitialize returned %u", + (unsigned) error); + } + osaf_nanosleep(&kHundredMilliseconds); + continue; + } + if (error == SA_AIS_OK) { + break; + } else { + LOG_ER("Failed to Initialize with NTF: %u", error); + goto done; + } + } + cb->ntfHandle = ntf_handle; + TRACE("Successfully initialized NTF"); + +done: + TRACE_LEAVE(); + return error; +} + +static void* avd_ntf_init_thread(void* arg) +{ + TRACE_ENTER(); + AVD_CL_CB* cb = static_cast<AVD_CL_CB*>(arg); + + if (avd_ntf_init(cb) != SA_AIS_OK) { + LOG_ER("avd_clm_init FAILED"); + goto done; + } + +done: + TRACE_LEAVE(); + return nullptr; +} + +SaAisErrorT avd_start_ntf_init_bg(void) +{ + pthread_t thread; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + if (pthread_create(&thread, &attr, avd_ntf_init_thread, avd_cb) != 0) { + LOG_ER("pthread_create FAILED: %s", strerror(errno)); + exit(EXIT_FAILURE); + } + + pthread_attr_destroy(&attr); + + return SA_AIS_OK; +} diff --git a/osaf/services/saf/amf/amfd/role.cc b/osaf/services/saf/amf/amfd/role.cc --- a/osaf/services/saf/amf/amfd/role.cc +++ b/osaf/services/saf/amf/amfd/role.cc @@ -174,9 +174,8 @@ void avd_role_change_evh(AVD_CL_CB *cb, uint32_t initialize_for_assignment(cl_cb_tag* cb, SaAmfHAStateT ha_state) { TRACE_ENTER2("ha_state = %d", static_cast<int>(ha_state)); - SaVersionT ntfVersion = {'A', 0x01, 0x01}; uint32_t rc = NCSCC_RC_SUCCESS; - SaAisErrorT error; + if (cb->fully_initialized) goto done; cb->avail_state_avd = ha_state; if (ha_state == SA_AMF_HA_QUIESCED) { @@ -194,17 +193,25 @@ uint32_t initialize_for_assignment(cl_cb LOG_ER("avsv_mbcsv_register FAILED"); goto done; } + // Initialize CLM handle in thread + if (avd_start_clm_init_bg() != SA_AIS_OK) { + LOG_EM("avd_clm_init FAILED"); + rc = NCSCC_RC_FAILURE; + goto done; + } + if (avd_imm_init(cb) != SA_AIS_OK) { LOG_ER("avd_imm_init FAILED"); rc = NCSCC_RC_FAILURE; goto done; } - if ((error = saNtfInitialize(&cb->ntfHandle, nullptr, &ntfVersion)) != - SA_AIS_OK) { - LOG_ER("saNtfInitialize Failed (%u)", error); - rc = NCSCC_RC_FAILURE; + + // Initialize NTF handle in thread + if (avd_start_ntf_init_bg() != SA_AIS_OK) { + LOG_EM("avd_start_ntf_init_bg FAILED"); goto done; } + if ((rc = avd_mds_set_vdest_role(cb, ha_state)) != NCSCC_RC_SUCCESS) { LOG_ER("avd_mds_set_vdest_role FAILED"); goto done; @@ -273,11 +280,6 @@ uint32_t avd_active_role_initialization( avd_imm_update_runtime_attrs(); - if (avd_clm_track_start() != SA_AIS_OK) { - LOG_ER("avd_clm_track_start FAILED"); - goto done; - } - status = NCSCC_RC_SUCCESS; done: TRACE_LEAVE(); ------------------------------------------------------------------------------ What NetFlow Analyzer can do for you? Monitors network bandwidth and traffic patterns at an interface-level. Reveals which users, apps, and protocols are consuming the most bandwidth. Provides multi-vendor support for NetFlow, J-Flow, sFlow and other flows. Make informed decisions using capacity planning reports.http://sdm.link/zohodev2dev _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel