osaf/services/saf/clmsv/clms/clms.h     |   1 +
 osaf/services/saf/clmsv/clms/clms_amf.c |   2 ++
 osaf/services/saf/clmsv/clms/clms_evt.c |  26 +++++++++++++++++++++++++-
 osaf/services/saf/clmsv/clms/clms_imm.c |  18 ------------------
 4 files changed, 28 insertions(+), 19 deletions(-)


The standby CLM queues up node_downs and clears the queue for
unprocessed entries after becoming active. During role change(controller 
failover),
the standby processes these entries but unintendedly from a separate thread i.e.
from within the implementer set thread.
This results in a scenario where two threads can try to update the same node 
entry.
The patch serialises the processing of node_down events during controller role 
change,
by moving the node_down processing out of the implementer_set thread.
In a certain user's setup (tony hart), this issue was reproducible where-in
during a failover, when the new ACTIVE was processing the NODE_DOWN Of the 
previous active
the main thread was processing the NODE_DOWN of a payload node.
One of the thread deleted a node that was being accessed by the another thread.
The problem here was that the node_down procsssing during failover should not 
have been
done from within the implementer set thread. This was a mistake.
This patch removes any extra processing from the implementerset thread.
The patch is tested by tony hart @btisystems and works fine.

diff --git a/osaf/services/saf/clmsv/clms/clms.h 
b/osaf/services/saf/clmsv/clms/clms.h
--- a/osaf/services/saf/clmsv/clms/clms.h
+++ b/osaf/services/saf/clmsv/clms/clms.h
@@ -119,5 +119,6 @@ extern void ckpt_cluster_rec(void);
 extern void clms_cb_dump(void);
 extern uint32_t clms_send_is_member_info(CLMS_CB * cb, SaClmNodeIdT node_id,  
SaBoolT member, SaBoolT is_configured);
 extern void clm_imm_reinit_bg(CLMS_CB * cb);
+extern void proc_downs_during_rolechange (void);
 
 #endif   /* ifndef CLMS_H */
diff --git a/osaf/services/saf/clmsv/clms/clms_amf.c 
b/osaf/services/saf/clmsv/clms/clms_amf.c
--- a/osaf/services/saf/clmsv/clms/clms_amf.c
+++ b/osaf/services/saf/clmsv/clms/clms_amf.c
@@ -257,6 +257,8 @@ static void clms_amf_csi_set_callback(Sa
        if (role_change == true) {
                if(clms_cb->ha_state == SA_AMF_HA_ACTIVE) {
                        clms_imm_impl_set(clms_cb);
+                       proc_downs_during_rolechange();
+
                        /* Unconditionally refresh IMM for runtime attributes */
                        clms_switchon_all_pending_rtupdates();
                }
diff --git a/osaf/services/saf/clmsv/clms/clms_evt.c 
b/osaf/services/saf/clmsv/clms/clms_evt.c
--- a/osaf/services/saf/clmsv/clms/clms_evt.c
+++ b/osaf/services/saf/clmsv/clms/clms_evt.c
@@ -529,7 +529,7 @@ static uint32_t proc_rda_evt(CLMSV_CLMS_
                        /* fail over, become implementer */
                        clms_imm_impl_set(clms_cb);
 
-
+                       proc_downs_during_rolechange();
 
                        if ((rc = clms_mds_change_role(clms_cb)) != 
NCSCC_RC_SUCCESS) {
                                LOG_ER("clms_mds_change_role FAILED %u", rc);
@@ -1679,3 +1679,27 @@ static uint32_t clms_ack_to_response_msg
        }
        return mds_rc;
 }
+
+void proc_downs_during_rolechange (void)
+{
+       NODE_DOWN_LIST *node_down_rec = NULL;
+       NODE_DOWN_LIST *temp_node_down_rec = NULL;
+       CLMS_CLUSTER_NODE *node = NULL;
+
+       /* Process The NodeDowns that occurred during the role change */
+       node_down_rec = clms_cb->node_down_list_head;
+       while (node_down_rec) {
+               /*Remove NODE_DOWN_REC from the NODE_DOWN_LIST */
+               node = clms_node_get_by_id(node_down_rec->node_id);
+               temp_node_down_rec = node_down_rec;
+               if (node != NULL)
+                       clms_track_send_node_down(node);
+               node_down_rec = node_down_rec->next;
+               /*Free the NODE_DOWN_REC */
+               free(temp_node_down_rec);
+       }
+       clms_cb->node_down_list_head = NULL;
+       clms_cb->node_down_list_tail = NULL;
+
+}
+
diff --git a/osaf/services/saf/clmsv/clms/clms_imm.c 
b/osaf/services/saf/clmsv/clms/clms_imm.c
--- a/osaf/services/saf/clmsv/clms/clms_imm.c
+++ b/osaf/services/saf/clmsv/clms/clms_imm.c
@@ -66,9 +66,6 @@ static void *imm_impl_set_node_down_proc
 {
        SaAisErrorT rc;
        CLMS_CB *cb = (CLMS_CB *) _cb;
-       NODE_DOWN_LIST *node_down_rec = NULL;
-       NODE_DOWN_LIST *temp_node_down_rec = NULL;
-       CLMS_CLUSTER_NODE *node = NULL;
        int msecs_waited;
 
        TRACE_ENTER();
@@ -115,21 +112,6 @@ static void *imm_impl_set_node_down_proc
                exit(EXIT_FAILURE);
        }
 
-       /* Process The NodeDowns that occurred during the role change */
-       node_down_rec = clms_cb->node_down_list_head;
-       while (node_down_rec) {
-               /*Remove NODE_DOWN_REC from the NODE_DOWN_LIST */
-               node = clms_node_get_by_id(node_down_rec->node_id);
-               temp_node_down_rec = node_down_rec;
-               if (node != NULL)
-                       clms_track_send_node_down(node);
-               node_down_rec = node_down_rec->next;
-               /*Free the NODE_DOWN_REC */
-               free(temp_node_down_rec);
-       }
-       clms_cb->node_down_list_head = NULL;
-       clms_cb->node_down_list_tail = NULL;
-
        cb->is_impl_set = true;
 
        TRACE_LEAVE();

------------------------------------------------------------------------------
"Accelerate Dev Cycles with Automated Cross-Browser Testing - For FREE
Instantly run your Selenium tests across 300+ browser/OS combos.  Get 
unparalleled scalability from the best Selenium testing platform available.
Simple to use. Nothing to install. Get started now for free."
http://p.sf.net/sfu/SauceLabs
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to