osaf/services/saf/clmsv/clms/clms_evt.c |  60 +++++++++++++++++++++++++++++++++
 1 files changed, 60 insertions(+), 0 deletions(-)


On top of fix for #1120, protection is needed while processing node_down by 
adding a check whether checkpoint entry already exists (instead of handling it 
during failover processing).
Without this check, it can still lead to stale node_leave messages.
The general idea for processing of both MDS_NODE_DOWN and CHECKPOINTED_NODE_DOWN
should be as below:
Just Delete the entry if already present, Otherwise Q the entry. It would be
cleaner that way.

diff --git a/osaf/services/saf/clmsv/clms/clms_evt.c 
b/osaf/services/saf/clmsv/clms/clms_evt.c
--- a/osaf/services/saf/clmsv/clms/clms_evt.c
+++ b/osaf/services/saf/clmsv/clms/clms_evt.c
@@ -556,6 +556,56 @@ static uint32_t proc_rda_evt(CLMSV_CLMS_
        return rc;
 }
 
+static bool delete_existing_nodedown_records(SaClmNodeIdT node_id)
+{
+       NODE_DOWN_LIST *node_down_rec = clms_cb->node_down_list_head;
+       NODE_DOWN_LIST *prev_rec = NULL;
+       bool found = false;
+       TRACE_ENTER();
+
+       /**
+        * Walk through the list to find any(all) matching records
+        * and delete them. If a record already exists, it just means
+        * it is either duplicate node_down or node_down added
+        * by checkpoint_down processing.
+        */
+       while (node_down_rec) {
+               if (node_down_rec->node_id == node_id) {
+                       TRACE("Record found");
+                       if ((node_down_rec->ndown_status != 
CHECKPOINT_PROCESSED) && found)
+                               LOG_ER("Duplicate node_downs received");
+                       /* Remove the node down entry */
+                       if (node_down_rec == clms_cb->node_down_list_head) {
+                               if (node_down_rec->next == NULL) {
+                                       /* Only one in the list? */
+                                       clms_cb->node_down_list_head = NULL;
+                                       clms_cb->node_down_list_tail = NULL;
+                               } else {
+                                       /* 1st but not only one */
+                                       clms_cb->node_down_list_head = 
node_down_rec->next;
+                               }
+                       } else {
+                               if (prev_rec) {
+                                       if (node_down_rec->next == NULL)
+                                               clms_cb->node_down_list_tail = 
prev_rec;
+                                       prev_rec->next = node_down_rec->next;
+                               }
+                       }
+
+                       /* Free the NODE_DOWN_REC */
+                       free(node_down_rec);
+                       node_down_rec = NULL;
+                       found = true;
+               } /* Matching record found */
+               /* Check the next record */
+               prev_rec = node_down_rec;
+               node_down_rec = node_down_rec->next;
+       }
+
+       TRACE_LEAVE();
+       return found;
+}
+
 /**
  * This is the function which is called when clms receives any
  * a Cluster Node UP/DN message via MDS subscription.
@@ -583,6 +633,15 @@ static uint32_t proc_mds_node_evt(CLMSV_
                clms_track_send_node_down(node);
 
        } else if (clms_cb->ha_state == SA_AMF_HA_STANDBY) {
+               /**
+                * Check if already a matching entry exists, if so delete that 
entry
+                * and do nothing. It means that there is already an entry added
+                * by checkpoint processing of node_down.
+                */
+               if (delete_existing_nodedown_records(node_id) == true) {
+                       TRACE_LEAVE();
+                       return rc;
+               } else {
                TRACE("Adding the node_down record for node: %u to the list", 
node_id);
                NODE_DOWN_LIST *node_down_rec = NULL;
                if (NULL == (node_down_rec = (NODE_DOWN_LIST *) 
malloc(sizeof(NODE_DOWN_LIST)))) {
@@ -600,6 +659,7 @@ static uint32_t proc_mds_node_evt(CLMSV_
                }
                clms_cb->node_down_list_tail = node_down_rec;
                node_down_rec->ndown_status = MDS_DOWN_PROCESSED;
+               }
        }
 
  done:

------------------------------------------------------------------------------
Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
from Actuate! Instantly Supercharge Your Business Reports and Dashboards
with Interactivity, Sharing, Native Excel Exports, App Integration & more
Get technology previously reserved for billion-dollar corporations, FREE
http://pubads.g.doubleclick.net/gampad/clk?id=157005751&iu=/4140/ostg.clktrk
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to