osaf/services/saf/clmsv/clms/clms.h | 1 + osaf/services/saf/clmsv/clms/clms_amf.c | 2 ++ osaf/services/saf/clmsv/clms/clms_evt.c | 26 +++++++++++++++++++++++++- osaf/services/saf/clmsv/clms/clms_imm.c | 18 ------------------ 4 files changed, 28 insertions(+), 19 deletions(-)
The standby CLM queues up node_downs and clears the queue for unprocessed entries after becoming active. During role change(controller failover), the standby processes these entries but unintendedly from a separate thread i.e. from within the implementer set thread. This results in a scenario where two threads can try to update the same node entry. The patch serialises the processing of node_down events during controller role change, by moving the node_down processing out of the implementer_set thread. In a certain user's setup (tony hart), this issue was reproducible where-in during a failover, when the new ACTIVE was processing the NODE_DOWN Of the previous active the main thread was processing the NODE_DOWN of a payload node. One of the thread deleted a node that was being accessed by the another thread. The problem here was that the node_down procsssing during failover should not have been done from within the implementer set thread. This was a mistake. This patch removes any extra processing from the implementerset thread. The patch is tested by tony hart @btisystems and works fine. diff --git a/osaf/services/saf/clmsv/clms/clms.h b/osaf/services/saf/clmsv/clms/clms.h --- a/osaf/services/saf/clmsv/clms/clms.h +++ b/osaf/services/saf/clmsv/clms/clms.h @@ -119,5 +119,6 @@ extern void ckpt_cluster_rec(void); extern void clms_cb_dump(void); extern uint32_t clms_send_is_member_info(CLMS_CB * cb, SaClmNodeIdT node_id, SaBoolT member, SaBoolT is_configured); extern void clm_imm_reinit_bg(CLMS_CB * cb); +extern void proc_downs_during_rolechange (void); #endif /* ifndef CLMS_H */ diff --git a/osaf/services/saf/clmsv/clms/clms_amf.c b/osaf/services/saf/clmsv/clms/clms_amf.c --- a/osaf/services/saf/clmsv/clms/clms_amf.c +++ b/osaf/services/saf/clmsv/clms/clms_amf.c @@ -257,6 +257,8 @@ static void clms_amf_csi_set_callback(Sa if (role_change == true) { if(clms_cb->ha_state == SA_AMF_HA_ACTIVE) { clms_imm_impl_set(clms_cb); + proc_downs_during_rolechange(); + /* Unconditionally refresh IMM for runtime attributes */ clms_switchon_all_pending_rtupdates(); } diff --git a/osaf/services/saf/clmsv/clms/clms_evt.c b/osaf/services/saf/clmsv/clms/clms_evt.c --- a/osaf/services/saf/clmsv/clms/clms_evt.c +++ b/osaf/services/saf/clmsv/clms/clms_evt.c @@ -529,7 +529,7 @@ static uint32_t proc_rda_evt(CLMSV_CLMS_ /* fail over, become implementer */ clms_imm_impl_set(clms_cb); - + proc_downs_during_rolechange(); if ((rc = clms_mds_change_role(clms_cb)) != NCSCC_RC_SUCCESS) { LOG_ER("clms_mds_change_role FAILED %u", rc); @@ -1679,3 +1679,27 @@ static uint32_t clms_ack_to_response_msg } return mds_rc; } + +void proc_downs_during_rolechange (void) +{ + NODE_DOWN_LIST *node_down_rec = NULL; + NODE_DOWN_LIST *temp_node_down_rec = NULL; + CLMS_CLUSTER_NODE *node = NULL; + + /* Process The NodeDowns that occurred during the role change */ + node_down_rec = clms_cb->node_down_list_head; + while (node_down_rec) { + /*Remove NODE_DOWN_REC from the NODE_DOWN_LIST */ + node = clms_node_get_by_id(node_down_rec->node_id); + temp_node_down_rec = node_down_rec; + if (node != NULL) + clms_track_send_node_down(node); + node_down_rec = node_down_rec->next; + /*Free the NODE_DOWN_REC */ + free(temp_node_down_rec); + } + clms_cb->node_down_list_head = NULL; + clms_cb->node_down_list_tail = NULL; + +} + diff --git a/osaf/services/saf/clmsv/clms/clms_imm.c b/osaf/services/saf/clmsv/clms/clms_imm.c --- a/osaf/services/saf/clmsv/clms/clms_imm.c +++ b/osaf/services/saf/clmsv/clms/clms_imm.c @@ -66,9 +66,6 @@ static void *imm_impl_set_node_down_proc { SaAisErrorT rc; CLMS_CB *cb = (CLMS_CB *) _cb; - NODE_DOWN_LIST *node_down_rec = NULL; - NODE_DOWN_LIST *temp_node_down_rec = NULL; - CLMS_CLUSTER_NODE *node = NULL; int msecs_waited; TRACE_ENTER(); @@ -115,21 +112,6 @@ static void *imm_impl_set_node_down_proc exit(EXIT_FAILURE); } - /* Process The NodeDowns that occurred during the role change */ - node_down_rec = clms_cb->node_down_list_head; - while (node_down_rec) { - /*Remove NODE_DOWN_REC from the NODE_DOWN_LIST */ - node = clms_node_get_by_id(node_down_rec->node_id); - temp_node_down_rec = node_down_rec; - if (node != NULL) - clms_track_send_node_down(node); - node_down_rec = node_down_rec->next; - /*Free the NODE_DOWN_REC */ - free(temp_node_down_rec); - } - clms_cb->node_down_list_head = NULL; - clms_cb->node_down_list_tail = NULL; - cb->is_impl_set = true; TRACE_LEAVE(); ------------------------------------------------------------------------------ "Accelerate Dev Cycles with Automated Cross-Browser Testing - For FREE Instantly run your Selenium tests across 300+ browser/OS combos. Get unparalleled scalability from the best Selenium testing platform available. Simple to use. Nothing to install. Get started now for free." http://p.sf.net/sfu/SauceLabs _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel