On 2015-03-04 16:59, Anders Widell wrote: > Ack from me also. I haven't done any thorough analysis of this code. A > question: In the case of a role change event, why does the main thread > have to wait on the mutex for the re-initialization to finish? Isn't it > sufficient that you after re-initialization has completed check the > current role and perform necessary actions (implementer set, etc.)? > > Also, wouldn't it be better if this code was re-written so that all IMM > handling code is put in a separate thread, and then you use a mailbox to > communicate between the main thread and the IMM handling thread? This > code in its current form is very difficult to analyze and it is far from > obvious to me that it is correct from an threading perspective.
Agree, definitively! This piece of code is not maintainable /Hans > > / Anders Widell > > On 03/04/2015 03:20 PM, Hans Nordebäck wrote: >> Ack, code reviewed and valgrind tool helgrind run. >> A question, if avd_imm_reinit_bg is called with imm_reinit_mutex taken, >> there will be a deadlock due to the conditional wait on >> imm_reinit_thread_startup_mutex , >> so what is the purpose of the imm_reinit_thread_startup_mutex? /Thanks HansN >> >> -----Original Message----- >> From: [email protected] [mailto:[email protected]] >> Sent: den 26 februari 2015 07:23 >> To: Hans Nordebäck; [email protected]; [email protected]; >> Anders Björnerstedt; Anders Widell >> Cc: [email protected] >> Subject: [PATCH 1 of 1] amfd: add mutex to sync up among amfd threads [#405, >> #707] >> >> osaf/services/saf/amf/amfd/imm.cc | 30 +++++++++- >> osaf/services/saf/amf/amfd/role.cc | 117 >> ++++++++++++++++++++++++++++++------ >> 2 files changed, 125 insertions(+), 22 deletions(-) >> >> >> If imm returns BAD HANDLE to Amf, Amf reinitializes with imm intf in a >> separate thread. >> If role change event comes during imm intf initialization time, ImplSet, >> ImplClear, etc fails with BAD HANDLE. >> >> So, Amf need to take mutex and wait for imm intf initialization. >> >> diff --git a/osaf/services/saf/amf/amfd/imm.cc >> b/osaf/services/saf/amf/amfd/imm.cc >> --- a/osaf/services/saf/amf/amfd/imm.cc >> +++ b/osaf/services/saf/amf/amfd/imm.cc >> @@ -47,6 +47,7 @@ >> #include <si.h> >> #include <csi.h> >> #include <si_dep.h> >> +#include "osaf_utility.h" >> >> >> >> @@ -54,7 +55,13 @@ >> * DEFINITIONS >> * >> ======================================================================== >> */ >> - >> +/* mutex for synchronising imm initialization and role change events. >> +*/ pthread_mutex_t imm_reinit_mutex = PTHREAD_MUTEX_INITIALIZER; >> +/* mutex for synchronising amfd thread and imm initialization thread >> startup >> + to make sure that imm initialization thread executes first and take >> + imm_reinit_mutex and then proceeds for initialization. */ static >> +pthread_mutex_t imm_reinit_thread_startup_mutex = >> +PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t >> +imm_reinit_thread_startup_cond = PTHREAD_COND_INITIALIZER; >> // >> // TODO(HANO) Temporary use this function instead of strdup which uses >> malloc. >> // Later on remove this function and use std::string instead @@ -1637,16 >> +1644,23 @@ static void *avd_imm_reinit_bg_thread(vo >> uint32_t status; >> >> TRACE_ENTER(); >> + osaf_mutex_lock_ordie(&imm_reinit_mutex); >> + /* Send signal that imm_reinit_mutex has been taken. */ >> + osaf_mutex_lock_ordie(&imm_reinit_thread_startup_mutex); >> + pthread_cond_signal(&imm_reinit_thread_startup_cond); >> + osaf_mutex_unlock_ordie(&imm_reinit_thread_startup_mutex); >> >> immutilWrapperProfile.errorsAreFatal = 0; >> >> if ((rc = immutil_saImmOiInitialize_2(&cb->immOiHandle, &avd_callbacks, >> &immVersion)) != SA_AIS_OK) { >> LOG_ER("saImmOiInitialize failed %u", rc); >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> exit(EXIT_FAILURE); >> } >> >> if ((rc = immutil_saImmOiSelectionObjectGet(cb->immOiHandle, >> &cb->imm_sel_obj)) != SA_AIS_OK) { >> LOG_ER("saImmOiSelectionObjectGet failed %u", rc); >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> exit(EXIT_FAILURE); >> } >> >> @@ -1654,17 +1668,20 @@ static void *avd_imm_reinit_bg_thread(vo >> if (cb->avail_state_avd == SA_AMF_HA_ACTIVE) { >> if (avd_imm_impl_set() != SA_AIS_OK) { >> LOG_ER("exiting since avd_imm_impl_set failed"); >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> exit(EXIT_FAILURE); >> } >> } else { >> /* become applier and re-read the config */ >> if (avd_imm_applier_set() != SA_AIS_OK) { >> LOG_ER("exiting since avd_imm_applier_set failed"); >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> exit(EXIT_FAILURE); >> } >> >> if (avd_imm_config_get() != NCSCC_RC_SUCCESS) { >> LOG_ER("avd_imm_config_get FAILED"); >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> exit(EXIT_FAILURE); >> } >> } >> @@ -1677,6 +1694,8 @@ static void *avd_imm_reinit_bg_thread(vo >> osafassert(status == NCSCC_RC_SUCCESS); >> >> LOG_NO("Finished re-initializing with IMM"); >> + /* Release mutex taken.*/ >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> TRACE_LEAVE(); >> return NULL; >> } >> @@ -1688,11 +1707,14 @@ void avd_imm_reinit_bg(void) { >> pthread_t thread; >> pthread_attr_t attr; >> + int rc = 0; >> >> TRACE_ENTER(); >> >> LOG_NO("Re-initializing with IMM"); >> >> + osaf_mutex_lock_ordie(&imm_reinit_thread_startup_mutex); >> + >> (void) saImmOiFinalize(avd_cb->immOiHandle); >> >> avd_cb->immOiHandle = 0; >> @@ -1703,8 +1725,14 @@ void avd_imm_reinit_bg(void) >> >> if (pthread_create(&thread, &attr, avd_imm_reinit_bg_thread, avd_cb) != >> 0) { >> LOG_ER("pthread_create FAILED: %s", strerror(errno)); >> + osaf_mutex_unlock_ordie(&imm_reinit_thread_startup_mutex); >> exit(EXIT_FAILURE); >> } >> + >> + rc = pthread_cond_wait(&imm_reinit_thread_startup_cond, >> &imm_reinit_thread_startup_mutex); >> + if (rc != 0) osaf_abort(rc); >> + osaf_mutex_unlock_ordie(&imm_reinit_thread_startup_mutex); >> + >> pthread_attr_destroy(&attr); >> >> TRACE_LEAVE(); >> diff --git a/osaf/services/saf/amf/amfd/role.cc >> b/osaf/services/saf/amf/amfd/role.cc >> --- a/osaf/services/saf/amf/amfd/role.cc >> +++ b/osaf/services/saf/amf/amfd/role.cc >> @@ -44,6 +44,9 @@ >> #include <cluster.h> >> #include <clm.h> >> #include <si_dep.h> >> +#include "osaf_utility.h" >> + >> +extern pthread_mutex_t imm_reinit_mutex; >> >> static uint32_t avd_role_failover(AVD_CL_CB *cb, SaAmfHAStateT role); >> static uint32_t avd_role_failover_qsd_actv(AVD_CL_CB *cb, SaAmfHAStateT >> role); @@ -298,18 +301,30 @@ static uint32_t avd_role_failover(AVD_CL >> */ >> avsv_dequeue_async_update_msgs(cb, false); >> >> + /* Take mutex before changing role as it may impact logic >> + in avd_imm_reinit_bg_thread. If mutex is taken for imm >> + initialization, then wait for its completion before changing role.*/ >> + osaf_mutex_lock_ordie(&imm_reinit_mutex); >> cb->avail_state_avd = role; >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> >> /* Declare this standby as Active. Set Vdest role and MBCSv role */ >> if (NCSCC_RC_SUCCESS != (status = avd_mds_set_vdest_role(cb, role))) { >> LOG_ER("FAILOVER StandBy --> Active, VDEST Change role failed >> "); >> goto done; >> } >> - >> + /* There is no need to take mutex in ImplClear because if imm >> + initialization were undergoing, above mutex took care. */ >> /* Give up our IMM OI Applier role */ >> if ((rc = immutil_saImmOiImplementerClear(cb->immOiHandle)) != >> SA_AIS_OK) { >> LOG_ER("FAILOVER StandBy --> Active FAILED, ImplementerClear >> failed %u", rc); >> - goto done; >> + /* If it fails with BAD HANDLE, reinit imm intf and continue. >> + Let imm intf reinit take care of setting Impl/Applr based >> + on Avd role. */ >> + if (rc == SA_AIS_ERR_BAD_HANDLE) { >> + avd_imm_reinit_bg(); >> + } else >> + goto done; >> } >> >> /* Time to send fail-over messages to all the AVND's */ @@ -325,11 >> +340,22 @@ static uint32_t avd_role_failover(AVD_CL >> >> /* We have successfully changed role to Active. */ >> cb->node_id_avd_other = 0; >> + /* Take mutex here, to sync with above avd_imm_reinit_bg call. */ >> + osaf_mutex_lock_ordie(&imm_reinit_mutex); >> + if ((rc = immutil_saImmOiImplementerSet(avd_cb->immOiHandle, >> const_cast<SaImmOiImplementerNameT>("safAmfService"))) != SA_AIS_OK) { >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> + LOG_ER("FAILOVER StandBy --> Active FAILED, ImplementerSet >> failed %u", rc); >> + if (rc == SA_AIS_ERR_BAD_HANDLE) { >> + avd_imm_reinit_bg(); >> + } else if (rc == SA_AIS_ERR_EXIST) { >> + /* This may arise if immutil_saImmOiImplementerClear >> + failed and amf reinitializes imm interface and >> + set impl in avd_imm_reinit_bg_thread.*/ >> + } else >> + goto done; >> + } else >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> >> - if ((rc = immutil_saImmOiImplementerSet(avd_cb->immOiHandle, >> const_cast<SaImmOiImplementerNameT>("safAmfService"))) != SA_AIS_OK) { >> - LOG_ER("FAILOVER StandBy --> Active FAILED, ImplementerSet >> failed %u", rc); >> - goto done; >> - } >> avd_cb->is_implementer = true; >> >> >> @@ -486,12 +512,18 @@ static uint32_t avd_role_failover_qsd_ac >> >> cb->node_id_avd_other = 0; >> >> + /* Take mutex to be in sync with imm intf initialization.*/ >> + osaf_mutex_lock_ordie(&imm_reinit_mutex); >> /* Give up our IMM OI applier role */ >> if ((rc = immutil_saImmOiImplementerClear(cb->immOiHandle)) != >> SA_AIS_OK) { >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> LOG_ER("FAILOVER Quiesced --> Active FAILED, ImplementerClear >> failed %u", rc); >> - return NCSCC_RC_FAILURE; >> - } >> - >> + if (rc == SA_AIS_ERR_BAD_HANDLE) { >> + avd_imm_reinit_bg(); >> + } else >> + return NCSCC_RC_FAILURE; >> + } else >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> >> avd_imm_impl_set_task_create(); >> >> @@ -577,20 +609,40 @@ void avd_mds_qsd_role_evh(AVD_CL_CB *cb, >> _exit(EXIT_FAILURE); // should never get here... >> } >> >> + /* Take mutex here to sync with imm reinit thread.*/ >> + osaf_mutex_lock_ordie(&imm_reinit_mutex); >> + >> /* Give up IMM OI implementer role */ >> if ((rc = immutil_saImmOiImplementerClear(cb->immOiHandle)) != >> SA_AIS_OK) { >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> LOG_ER("FAILOVER Active --> Quiesced FAILED, ImplementerClear >> failed %u", rc); >> - osafassert(0); >> - } >> + if (rc == SA_AIS_ERR_BAD_HANDLE) { >> + avd_imm_reinit_bg(); >> + } else >> + osafassert(0); >> + } else >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> + >> cb->is_implementer = false; >> >> /* Throw away all pending IMM updates, no longer implementer */ >> Fifo::empty(); >> >> - if (avd_imm_applier_set() != SA_AIS_OK) { >> - LOG_ER("avd_imm_applier_set FAILED"); >> - osafassert(0); >> - } >> + /* Take mutex here, to sync with above avd_imm_reinit_bg call. */ >> + osaf_mutex_lock_ordie(&imm_reinit_mutex); >> + if ((rc = avd_imm_applier_set()) != SA_AIS_OK) { >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> + LOG_ER("avd_imm_applier_set FAILED, %u", rc); >> + if (rc == SA_AIS_ERR_BAD_HANDLE) { >> + avd_imm_reinit_bg(); >> + } else if (rc == SA_AIS_ERR_EXIST) { >> + /* This may arise if immutil_saImmOiImplementerClear >> + failed and amf reinitializes imm interface and >> + set applier in avd_imm_reinit_bg_thread.*/ >> + } else >> + osafassert(0); >> + } else >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> >> /* Now set the MBCSv role to quiesced, */ >> if (NCSCC_RC_SUCCESS != (status = avsv_set_ckpt_role(cb, >> SA_AMF_HA_QUIESCED))) { @@ -981,7 +1033,12 @@ uint32_t >> amfd_switch_stdby_actv(AVD_CL_C >> */ >> avsv_dequeue_async_update_msgs(cb, false); >> >> + /* Take mutex before changing role as it may impact logic >> + in avd_imm_reinit_bg_thread. If mutex is taken for imm >> + initialization, then wait for its completion before changing role.*/ >> + osaf_mutex_lock_ordie(&imm_reinit_mutex); >> cb->avail_state_avd = SA_AMF_HA_ACTIVE; >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> >> /* Declare this standby as Active. Set Vdest role role */ >> if (NCSCC_RC_SUCCESS != (status = avd_mds_set_vdest_role(cb, >> SA_AMF_HA_ACTIVE))) { @@ -1004,18 +1061,36 @@ uint32_t >> amfd_switch_stdby_actv(AVD_CL_C >> cb->swap_switch = false; >> >> /* Give up our IMM OI Applier role */ >> + /* There is no need to take mutex in ImplClear because if imm >> + initialization were undergoing, above mutex took care. */ >> if ((rc = immutil_saImmOiImplementerClear(cb->immOiHandle)) != >> SA_AIS_OK) { >> LOG_ER("Switch Standby --> Active FAILED, ImplementerClear >> failed %u", rc); >> - cb->swap_switch = false; >> - avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE); >> - return NCSCC_RC_FAILURE; >> + if (rc == SA_AIS_ERR_BAD_HANDLE) { >> + avd_imm_reinit_bg(); >> + } else { >> + cb->swap_switch = false; >> + avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, >> SA_AMF_HA_ACTIVE); >> + return NCSCC_RC_FAILURE; >> + } >> } >> >> + /* Take mutex here, to sync with above avd_imm_reinit_bg call. */ >> + osaf_mutex_lock_ordie(&imm_reinit_mutex); >> if ((rc = immutil_saImmOiImplementerSet(avd_cb->immOiHandle, >> const_cast<SaImmOiImplementerNameT>("safAmfService"))) != SA_AIS_OK) { >> LOG_ER("Switch Standby --> Active, ImplementerSet failed %u", >> rc); >> - avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE); >> - return NCSCC_RC_FAILURE; >> - } >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> + if (rc == SA_AIS_ERR_BAD_HANDLE) { >> + avd_imm_reinit_bg(); >> + } else if (rc == SA_AIS_ERR_EXIST) { >> + /* This may arise if immutil_saImmOiImplementerClear >> + failed and amf reinit imm interface and set impl >> + in avd_imm_reinit_bg_thread.*/ >> + } else { >> + avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, >> SA_AMF_HA_ACTIVE); >> + return NCSCC_RC_FAILURE; >> + } >> + } else >> + osaf_mutex_unlock_ordie(&imm_reinit_mutex); >> >> avd_cb->is_implementer = true; >> avd_cb->active_services_exist = true; > > > > ------------------------------------------------------------------------------ > Dive into the World of Parallel Programming The Go Parallel Website, sponsored > by Intel and developed in partnership with Slashdot Media, is your hub for all > things parallel software development, from weekly thought leadership blogs to > news, videos, case studies, tutorials and more. Take a look and join the > conversation now. http://goparallel.sourceforge.net/ > _______________________________________________ > Opensaf-devel mailing list > [email protected] > https://lists.sourceforge.net/lists/listinfo/opensaf-devel > ------------------------------------------------------------------------------ Dive into the World of Parallel Programming The Go Parallel Website, sponsored by Intel and developed in partnership with Slashdot Media, is your hub for all things parallel software development, from weekly thought leadership blogs to news, videos, case studies, tutorials and more. Take a look and join the conversation now. http://goparallel.sourceforge.net/ _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
