Ack, code reviewed and valgrind tool helgrind run.
A question, if avd_imm_reinit_bg is called with  imm_reinit_mutex taken, there 
will be a deadlock due to the conditional wait on 
imm_reinit_thread_startup_mutex , 
so what is the purpose of the imm_reinit_thread_startup_mutex? /Thanks HansN

-----Original Message-----
From: nagendr...@oracle.com [mailto:nagendr...@oracle.com] 
Sent: den 26 februari 2015 07:23
To: Hans Nordebäck; praveen.malv...@oracle.com; mathi.naic...@oracle.com; 
Anders Björnerstedt; Anders Widell
Cc: opensaf-devel@lists.sourceforge.net
Subject: [PATCH 1 of 1] amfd: add mutex to sync up among amfd threads [#405, 
#707]

 osaf/services/saf/amf/amfd/imm.cc  |   30 +++++++++-
 osaf/services/saf/amf/amfd/role.cc |  117 ++++++++++++++++++++++++++++++------
 2 files changed, 125 insertions(+), 22 deletions(-)


If imm returns BAD HANDLE to Amf, Amf reinitializes with imm intf in a separate 
thread.
If role change event comes during imm intf initialization time, ImplSet, 
ImplClear, etc fails with BAD HANDLE.

So, Amf need to take mutex and wait for imm intf initialization.

diff --git a/osaf/services/saf/amf/amfd/imm.cc 
b/osaf/services/saf/amf/amfd/imm.cc
--- a/osaf/services/saf/amf/amfd/imm.cc
+++ b/osaf/services/saf/amf/amfd/imm.cc
@@ -47,6 +47,7 @@
 #include <si.h>
 #include <csi.h>
 #include <si_dep.h>
+#include "osaf_utility.h"
 
 
 
@@ -54,7 +55,13 @@
  *   DEFINITIONS
  * ========================================================================
  */
-
+/* mutex for synchronising  imm initialization and role change events. 
+*/ pthread_mutex_t imm_reinit_mutex = PTHREAD_MUTEX_INITIALIZER;
+/* mutex for synchronising  amfd thread and imm initialization thread startup 
+   to make sure that imm initialization thread executes first and take
+   imm_reinit_mutex and then proceeds for initialization. */ static 
+pthread_mutex_t imm_reinit_thread_startup_mutex = 
+PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t 
+imm_reinit_thread_startup_cond = PTHREAD_COND_INITIALIZER;
 //
 // TODO(HANO) Temporary use this function instead of strdup which uses malloc.
 // Later on remove this function and use std::string instead @@ -1637,16 
+1644,23 @@ static void *avd_imm_reinit_bg_thread(vo
        uint32_t status;
 
        TRACE_ENTER();
+       osaf_mutex_lock_ordie(&imm_reinit_mutex);
+       /* Send signal that imm_reinit_mutex has been taken. */
+       osaf_mutex_lock_ordie(&imm_reinit_thread_startup_mutex);
+       pthread_cond_signal(&imm_reinit_thread_startup_cond);
+       osaf_mutex_unlock_ordie(&imm_reinit_thread_startup_mutex);
 
        immutilWrapperProfile.errorsAreFatal = 0;
 
        if ((rc = immutil_saImmOiInitialize_2(&cb->immOiHandle, &avd_callbacks, 
&immVersion)) != SA_AIS_OK) {
                LOG_ER("saImmOiInitialize failed %u", rc);
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
                exit(EXIT_FAILURE);
        }
 
        if ((rc = immutil_saImmOiSelectionObjectGet(cb->immOiHandle, 
&cb->imm_sel_obj)) != SA_AIS_OK) {
                LOG_ER("saImmOiSelectionObjectGet failed %u", rc);
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
                exit(EXIT_FAILURE);
        }
 
@@ -1654,17 +1668,20 @@ static void *avd_imm_reinit_bg_thread(vo
        if (cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
                if (avd_imm_impl_set() != SA_AIS_OK) {
                        LOG_ER("exiting since avd_imm_impl_set failed");
+                       osaf_mutex_unlock_ordie(&imm_reinit_mutex);
                        exit(EXIT_FAILURE);
                }
        } else {
                /* become applier and re-read the config */
                if (avd_imm_applier_set() != SA_AIS_OK) {
                        LOG_ER("exiting since avd_imm_applier_set failed");
+                       osaf_mutex_unlock_ordie(&imm_reinit_mutex);
                        exit(EXIT_FAILURE);
                }
 
                if (avd_imm_config_get() != NCSCC_RC_SUCCESS) {
                        LOG_ER("avd_imm_config_get FAILED");
+                       osaf_mutex_unlock_ordie(&imm_reinit_mutex);
                        exit(EXIT_FAILURE);
                }
        }
@@ -1677,6 +1694,8 @@ static void *avd_imm_reinit_bg_thread(vo
        osafassert(status == NCSCC_RC_SUCCESS);
 
        LOG_NO("Finished re-initializing with IMM");
+       /* Release mutex taken.*/
+       osaf_mutex_unlock_ordie(&imm_reinit_mutex);
        TRACE_LEAVE();
        return NULL;
 }
@@ -1688,11 +1707,14 @@ void avd_imm_reinit_bg(void)  {
        pthread_t thread;
        pthread_attr_t attr;
+       int rc = 0;
 
        TRACE_ENTER();
 
        LOG_NO("Re-initializing with IMM");
 
+       osaf_mutex_lock_ordie(&imm_reinit_thread_startup_mutex);
+
        (void) saImmOiFinalize(avd_cb->immOiHandle);
 
        avd_cb->immOiHandle = 0;
@@ -1703,8 +1725,14 @@ void avd_imm_reinit_bg(void)
 
        if (pthread_create(&thread, &attr, avd_imm_reinit_bg_thread, avd_cb) != 
0) {
                LOG_ER("pthread_create FAILED: %s", strerror(errno));
+               osaf_mutex_unlock_ordie(&imm_reinit_thread_startup_mutex);
                exit(EXIT_FAILURE);
        }
+
+       rc = pthread_cond_wait(&imm_reinit_thread_startup_cond, 
&imm_reinit_thread_startup_mutex);
+       if (rc != 0) osaf_abort(rc);
+       osaf_mutex_unlock_ordie(&imm_reinit_thread_startup_mutex);
+
        pthread_attr_destroy(&attr);
 
        TRACE_LEAVE();
diff --git a/osaf/services/saf/amf/amfd/role.cc 
b/osaf/services/saf/amf/amfd/role.cc
--- a/osaf/services/saf/amf/amfd/role.cc
+++ b/osaf/services/saf/amf/amfd/role.cc
@@ -44,6 +44,9 @@
 #include <cluster.h>
 #include <clm.h>
 #include <si_dep.h>
+#include "osaf_utility.h"
+
+extern pthread_mutex_t imm_reinit_mutex;
 
 static uint32_t avd_role_failover(AVD_CL_CB *cb, SaAmfHAStateT role);  static 
uint32_t avd_role_failover_qsd_actv(AVD_CL_CB *cb, SaAmfHAStateT role); @@ 
-298,18 +301,30 @@ static uint32_t avd_role_failover(AVD_CL
         */
        avsv_dequeue_async_update_msgs(cb, false);
 
+       /* Take mutex before changing role as it may impact logic 
+          in avd_imm_reinit_bg_thread. If mutex is taken for imm 
+          initialization, then wait for its completion before changing role.*/
+       osaf_mutex_lock_ordie(&imm_reinit_mutex);
        cb->avail_state_avd = role;
+       osaf_mutex_unlock_ordie(&imm_reinit_mutex);
 
        /* Declare this standby as Active. Set Vdest role and MBCSv role */
        if (NCSCC_RC_SUCCESS != (status = avd_mds_set_vdest_role(cb, role))) {
                LOG_ER("FAILOVER StandBy --> Active, VDEST Change role failed 
");
                goto done;
        }
-
+       /* There is no need to take mutex in ImplClear because if imm
+          initialization were undergoing, above mutex took care. */
        /* Give up our IMM OI Applier role */
        if ((rc = immutil_saImmOiImplementerClear(cb->immOiHandle)) != 
SA_AIS_OK) {
                LOG_ER("FAILOVER StandBy --> Active FAILED, ImplementerClear 
failed %u", rc);
-               goto done;
+               /* If it fails with BAD HANDLE, reinit imm intf and continue.
+                  Let imm intf reinit take care of setting Impl/Applr based
+                  on Avd role. */
+               if (rc == SA_AIS_ERR_BAD_HANDLE) {
+                       avd_imm_reinit_bg();
+               } else
+                       goto done;
        }
 
        /* Time to send fail-over messages to all the AVND's */ @@ -325,11 
+340,22 @@ static uint32_t avd_role_failover(AVD_CL
 
        /* We have successfully changed role to Active. */
        cb->node_id_avd_other = 0;
+       /* Take mutex here, to sync with above avd_imm_reinit_bg call. */
+       osaf_mutex_lock_ordie(&imm_reinit_mutex);
+       if ((rc = immutil_saImmOiImplementerSet(avd_cb->immOiHandle, 
const_cast<SaImmOiImplementerNameT>("safAmfService"))) != SA_AIS_OK) {
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
+               LOG_ER("FAILOVER StandBy --> Active FAILED, ImplementerSet 
failed %u", rc);
+               if (rc == SA_AIS_ERR_BAD_HANDLE) {
+                       avd_imm_reinit_bg();
+               } else if (rc == SA_AIS_ERR_EXIST) {
+                       /* This may arise if immutil_saImmOiImplementerClear
+                          failed and amf reinitializes imm interface and
+                          set impl in avd_imm_reinit_bg_thread.*/
+               } else
+                       goto done;
+       } else
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
 
-       if ((rc = immutil_saImmOiImplementerSet(avd_cb->immOiHandle, 
const_cast<SaImmOiImplementerNameT>("safAmfService"))) != SA_AIS_OK) {
-               LOG_ER("FAILOVER StandBy --> Active FAILED, ImplementerSet 
failed %u", rc);
-               goto done;
-       }
        avd_cb->is_implementer = true;
 
 
@@ -486,12 +512,18 @@ static uint32_t avd_role_failover_qsd_ac
 
        cb->node_id_avd_other = 0;
 
+       /* Take mutex to be in sync with imm intf initialization.*/
+       osaf_mutex_lock_ordie(&imm_reinit_mutex);
        /* Give up our IMM OI applier role */
        if ((rc = immutil_saImmOiImplementerClear(cb->immOiHandle)) != 
SA_AIS_OK) {
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
                LOG_ER("FAILOVER Quiesced --> Active FAILED, ImplementerClear 
failed %u", rc);
-               return NCSCC_RC_FAILURE;
-       }
-
+               if (rc == SA_AIS_ERR_BAD_HANDLE) {
+                       avd_imm_reinit_bg();
+               } else
+                       return NCSCC_RC_FAILURE;
+       } else
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
 
        avd_imm_impl_set_task_create();
 
@@ -577,20 +609,40 @@ void avd_mds_qsd_role_evh(AVD_CL_CB *cb,
                _exit(EXIT_FAILURE); // should never get here...
        }
 
+       /* Take mutex here to sync with imm reinit thread.*/
+       osaf_mutex_lock_ordie(&imm_reinit_mutex);
+
        /* Give up IMM OI implementer role */
        if ((rc = immutil_saImmOiImplementerClear(cb->immOiHandle)) != 
SA_AIS_OK) {
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
                LOG_ER("FAILOVER Active --> Quiesced FAILED, ImplementerClear 
failed %u", rc);
-               osafassert(0);
-       }
+               if (rc == SA_AIS_ERR_BAD_HANDLE) {
+                       avd_imm_reinit_bg();
+               } else
+                       osafassert(0);
+       } else
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
+
        cb->is_implementer = false;
 
        /* Throw away all pending IMM updates, no longer implementer */
        Fifo::empty();
 
-       if (avd_imm_applier_set() != SA_AIS_OK) {
-               LOG_ER("avd_imm_applier_set FAILED");
-               osafassert(0);
-       }
+       /* Take mutex here, to sync with above avd_imm_reinit_bg call. */
+       osaf_mutex_lock_ordie(&imm_reinit_mutex);
+       if ((rc = avd_imm_applier_set()) != SA_AIS_OK) {
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
+               LOG_ER("avd_imm_applier_set FAILED, %u", rc);
+               if (rc == SA_AIS_ERR_BAD_HANDLE) {
+                       avd_imm_reinit_bg();
+               } else if (rc == SA_AIS_ERR_EXIST) {
+                       /* This may arise if immutil_saImmOiImplementerClear
+                          failed and amf reinitializes imm interface and
+                          set applier in avd_imm_reinit_bg_thread.*/
+               } else
+                       osafassert(0);
+       } else
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
 
        /* Now set the MBCSv role to quiesced, */
        if (NCSCC_RC_SUCCESS != (status = avsv_set_ckpt_role(cb, 
SA_AMF_HA_QUIESCED))) { @@ -981,7 +1033,12 @@ uint32_t 
amfd_switch_stdby_actv(AVD_CL_C
         */
        avsv_dequeue_async_update_msgs(cb, false);
 
+       /* Take mutex before changing role as it may impact logic 
+          in avd_imm_reinit_bg_thread. If mutex is taken for imm 
+          initialization, then wait for its completion before changing role.*/
+       osaf_mutex_lock_ordie(&imm_reinit_mutex);
        cb->avail_state_avd = SA_AMF_HA_ACTIVE;
+       osaf_mutex_unlock_ordie(&imm_reinit_mutex);
 
        /* Declare this standby as Active. Set Vdest role role */
        if (NCSCC_RC_SUCCESS != (status = avd_mds_set_vdest_role(cb, 
SA_AMF_HA_ACTIVE))) { @@ -1004,18 +1061,36 @@ uint32_t 
amfd_switch_stdby_actv(AVD_CL_C
        cb->swap_switch = false;
 
        /* Give up our IMM OI Applier role */
+       /* There is no need to take mutex in ImplClear because if imm
+          initialization were undergoing, above mutex took care. */
        if ((rc = immutil_saImmOiImplementerClear(cb->immOiHandle)) != 
SA_AIS_OK) {
                LOG_ER("Switch Standby --> Active FAILED, ImplementerClear 
failed %u", rc);
-               cb->swap_switch = false;
-               avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
-               return NCSCC_RC_FAILURE;
+               if (rc == SA_AIS_ERR_BAD_HANDLE) {
+                       avd_imm_reinit_bg();
+               } else {
+                       cb->swap_switch = false;
+                       avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, 
SA_AMF_HA_ACTIVE);
+                       return NCSCC_RC_FAILURE;
+               }
        }
 
+       /* Take mutex here, to sync with above avd_imm_reinit_bg call. */
+       osaf_mutex_lock_ordie(&imm_reinit_mutex);
        if ((rc = immutil_saImmOiImplementerSet(avd_cb->immOiHandle, 
const_cast<SaImmOiImplementerNameT>("safAmfService"))) != SA_AIS_OK) {
                LOG_ER("Switch Standby --> Active, ImplementerSet failed %u", 
rc);
-               avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
-               return NCSCC_RC_FAILURE;
-       }
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
+               if (rc == SA_AIS_ERR_BAD_HANDLE) {
+                       avd_imm_reinit_bg();
+               } else if (rc == SA_AIS_ERR_EXIST) {
+                       /* This may arise if immutil_saImmOiImplementerClear
+                          failed and amf reinit imm interface and set impl
+                          in avd_imm_reinit_bg_thread.*/
+               } else {
+                       avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, 
SA_AMF_HA_ACTIVE);
+                       return NCSCC_RC_FAILURE;
+               }
+       } else
+               osaf_mutex_unlock_ordie(&imm_reinit_mutex);
 
        avd_cb->is_implementer = true;
        avd_cb->active_services_exist = true;

------------------------------------------------------------------------------
Dive into the World of Parallel Programming The Go Parallel Website, sponsored
by Intel and developed in partnership with Slashdot Media, is your hub for all
things parallel software development, from weekly thought leadership blogs to
news, videos, case studies, tutorials and more. Take a look and join the 
conversation now. http://goparallel.sourceforge.net/
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to