During cluster start, one node (node 1) broadcast up msg to other node. The
remote node (node 2) get this msg and send the connection to node 1 (connect()).
Similarly node 1 send the connection to  node 2 after node 2 broadcast up msg 
to.
Beside of node 2 connect() to node 1, node 2 also add the IP and ID info of 
node 1 to database.
But before of that, node 2 may also accept the connection that come from node 
1. The
acception is also add node ID of node 1. So there is 2 times adding the node ID
info of node 1 to database in node 2. This causes the socket connection is 
closed
and node is  restart again.

The patch change to retrieve node from database by node IP instead node ID in
processing connection. This will reject the double of establishing connection
between 2 nodes and also double of adding node IP to database.
---
 src/dtm/dtmnd/dtm.h               | 11 ++++--
 src/dtm/dtmnd/dtm_inter_trans.cc  |  3 +-
 src/dtm/dtmnd/dtm_node.cc         |  2 +-
 src/dtm/dtmnd/dtm_node_db.cc      | 80 ++++++++++++++++++++++++---------------
 src/dtm/dtmnd/dtm_node_sockets.cc | 27 +++++++++----
 5 files changed, 79 insertions(+), 44 deletions(-)

diff --git a/src/dtm/dtmnd/dtm.h b/src/dtm/dtmnd/dtm.h
index 28c811e65..6d69921c9 100644
--- a/src/dtm/dtmnd/dtm.h
+++ b/src/dtm/dtmnd/dtm.h
@@ -97,12 +97,17 @@ typedef struct dtm_snd_msg_elem {
   } info;
 } DTM_SND_MSG_ELEM;
 
+enum class KeyTypes{
+  kDtmNodeIdKeyType = 0,
+  kDtmNodeIpKeyType = 2,
+};
+
 extern void node_discovery_process(void *arg);
 extern uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb);
-extern DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid);
+extern DTM_NODE_DB *dtm_node_get(uint8_t *key, KeyTypes type);
 extern DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id);
-extern uint32_t dtm_node_add(DTM_NODE_DB *node, int i);
-extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, int i);
+extern uint32_t dtm_node_add(DTM_NODE_DB *node, KeyTypes type);
+extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, KeyTypes type);
 extern DTM_NODE_DB *dtm_node_new(const DTM_NODE_DB *new_node);
 extern void dtm_print_config(DTM_INTERNODE_CB *config);
 extern int dtm_read_config(DTM_INTERNODE_CB *config,
diff --git a/src/dtm/dtmnd/dtm_inter_trans.cc b/src/dtm/dtmnd/dtm_inter_trans.cc
index 9d8335466..96d25208f 100644
--- a/src/dtm/dtmnd/dtm_inter_trans.cc
+++ b/src/dtm/dtmnd/dtm_inter_trans.cc
@@ -235,9 +235,10 @@ static uint32_t dtm_internode_snd_msg_common(DTM_NODE_DB 
*node, uint8_t *buffer,
 uint32_t dtm_internode_snd_msg_to_node(uint8_t *buffer, uint16_t len,
                                        NODE_ID node_id) {
   DTM_NODE_DB *node = nullptr;
+  uint8_t *key = reinterpret_cast<uint8_t *>(&node_id);
 
   TRACE_ENTER();
-  node = dtm_node_get_by_id(node_id);
+  node = dtm_node_get(key, KeyTypes::kDtmNodeIdKeyType);
 
   if (nullptr != node) {
     if (NCSCC_RC_SUCCESS != dtm_internode_snd_msg_common(node, buffer, len)) {
diff --git a/src/dtm/dtmnd/dtm_node.cc b/src/dtm/dtmnd/dtm_node.cc
index de2f94738..5e51f7a17 100644
--- a/src/dtm/dtmnd/dtm_node.cc
+++ b/src/dtm/dtmnd/dtm_node.cc
@@ -125,7 +125,7 @@ uint32_t dtm_process_node_info(DTM_INTERNODE_CB *dtms_cb, 
DTM_NODE_DB *node,
       memcpy(node->node_name, data, nodename_len);
       node->node_name[nodename_len] = '\0';
       node->comm_status = true;
-      if (dtm_node_add(node, 0) != NCSCC_RC_SUCCESS) {
+      if (dtm_node_add(node, KeyTypes::kDtmNodeIdKeyType) != NCSCC_RC_SUCCESS) 
{
         LOG_ER(
             "DTM:  A node already exists in the cluster with similar "
             "configuration (possible duplicate IP address and/or node id), 
please "
diff --git a/src/dtm/dtmnd/dtm_node_db.cc b/src/dtm/dtmnd/dtm_node_db.cc
index 1c9da4dac..ff1f44b6f 100644
--- a/src/dtm/dtmnd/dtm_node_db.cc
+++ b/src/dtm/dtmnd/dtm_node_db.cc
@@ -123,24 +123,49 @@ uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb) {
 }
 
 /**
- * Retrieve node from node db by nodeid
+ * Retrieve node from node db
  *
- * @param nodeid
+ * @param key
+ * @param i
  *
- * @return NCSCC_RC_SUCCESS
- * @return NCSCC_RC_FAILURE
+ * @return node
  *
  */
-DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid) {
+DTM_NODE_DB *dtm_node_get(uint8_t *key, KeyTypes type) {
   TRACE_ENTER();
   DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
+  DTM_NODE_DB *node = nullptr;
 
-  DTM_NODE_DB *node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
-      &dtms_cb->nodeid_tree, reinterpret_cast<uint8_t *>(&nodeid)));
-  if (node != nullptr) {
-    /* Adjust the pointer */
-    node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char *>(node) -
-                                           offsetof(DTM_NODE_DB, pat_nodeid));
+  osafassert(key != nullptr);
+
+  switch (type) {
+    case KeyTypes::kDtmNodeIdKeyType:
+      TRACE("DTM: Getting node from the database by node_id : %u as key",
+            *reinterpret_cast<NODE_ID *>(key));
+      node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
+          &dtms_cb->nodeid_tree, key));
+      if (node != nullptr) {
+        // Adjust the pointer
+        node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char *>(node) -
+            offsetof(DTM_NODE_DB, pat_nodeid));
+      }
+      break;
+
+    case KeyTypes::kDtmNodeIpKeyType:
+      TRACE("DTM: Getting node from the database by node_ip : %s as key",
+            reinterpret_cast<char *>(key));
+      node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
+          &dtms_cb->ip_addr_tree, key));
+      if (node != nullptr) {
+        // Adjust the pointer
+        node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char *>(node) -
+            offsetof(DTM_NODE_DB, pat_ip_address));
+      }
+      break;
+
+    default:
+      osafassert(false);
+      break;
   }
 
   TRACE_LEAVE();
@@ -189,16 +214,15 @@ DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id) {
  * @return NCSCC_RC_FAILURE
  *
  */
-uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
+uint32_t dtm_node_add(DTM_NODE_DB *node, KeyTypes type) {
   uint32_t rc = NCSCC_RC_SUCCESS;
   DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
-  TRACE_ENTER();
-  TRACE("DTM:value of i %d", i);
+  TRACE_ENTER2("DTM:value of type %d", static_cast<int>(type));
 
   osafassert(node != nullptr);
 
-  switch (i) {
-    case 0:
+  switch (type) {
+    case KeyTypes::kDtmNodeIdKeyType:
       TRACE("DTM:Adding node_id to the database with node_id :%u as key",
             node->node_id);
       node->pat_nodeid.key_info = reinterpret_cast<uint8_t 
*>(&(node->node_id));
@@ -210,11 +234,8 @@ uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
         goto done;
       }
       break;
-    case 1:
-      osafassert(false);
-      break;
 
-    case 2:
+    case KeyTypes::kDtmNodeIpKeyType:
       TRACE("DTM:Adding node_ip to the database with node_ip :%s as key",
             node->node_ip);
       node->pat_ip_address.key_info =
@@ -230,8 +251,8 @@ uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
 
     default:
       TRACE("DTM:Invalid Patricia add");
-      rc = NCSCC_RC_FAILURE;
-      goto done;
+      osafassert(false);
+      break;
   }
 
 done:
@@ -248,15 +269,15 @@ done:
  * @return NCSCC_RC_FAILURE
  *
  */
-uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
+uint32_t dtm_node_delete(DTM_NODE_DB *node, KeyTypes type) {
   uint32_t rc = NCSCC_RC_SUCCESS;
   DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
-  TRACE_ENTER2("DTM:value of i %d", i);
+  TRACE_ENTER2("DTM:value of type %d", static_cast<int>(type));
 
   osafassert(node != nullptr);
 
-  switch (i) {
-    case 0:
+  switch (type) {
+    case KeyTypes::kDtmNodeIdKeyType:
       if (node->node_id != 0 && node->pat_nodeid.key_info) {
         TRACE("DTM:Deleting node_id from the database with node_id :%u as key",
               node->node_id);
@@ -269,11 +290,8 @@ uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
         }
       }
       break;
-    case 1:
-      osafassert(false);
-      break;
 
-    case 2:
+    case KeyTypes::kDtmNodeIpKeyType:
       if (node->node_ip != nullptr && node->pat_ip_address.key_info) {
         TRACE("DTM:Deleting node_ip from the  database with node_ip :%s as 
key",
               node->node_ip);
@@ -290,7 +308,7 @@ uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
     default:
       TRACE(
           "DTM:Deleting node from the database  with unknown option :%d as 
key",
-          i);
+          static_cast<int>(type));
       osafassert(0);
   }
 
diff --git a/src/dtm/dtmnd/dtm_node_sockets.cc 
b/src/dtm/dtmnd/dtm_node_sockets.cc
index 0ddfc6f58..41e16206b 100644
--- a/src/dtm/dtmnd/dtm_node_sockets.cc
+++ b/src/dtm/dtmnd/dtm_node_sockets.cc
@@ -171,11 +171,13 @@ void dtm_comm_socket_close(DTM_NODE_DB *node) {
       }
     }
 
-    if (dtm_node_delete(node, 0) != NCSCC_RC_SUCCESS) {
+    if (dtm_node_delete(node, KeyTypes::kDtmNodeIdKeyType) !=
+        NCSCC_RC_SUCCESS) {
       LOG_ER("DTM :dtm_node_delete failed ");
     }
 
-    if (dtm_node_delete(node, 2) != NCSCC_RC_SUCCESS) {
+    if (dtm_node_delete(node, KeyTypes::kDtmNodeIpKeyType) !=
+        NCSCC_RC_SUCCESS) {
       LOG_ER("DTM :dtm_node_delete failed ");
     }
 
@@ -566,7 +568,11 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
*dtms_cb, uint8_t *data,
     }
   }
 
-  new_node = dtm_node_get_by_id(node.node_id);
+  // Retrieve node from nodeip instead of nodeid to prevent the case dtm 
already
+  // accepted remote node connection and dtm already add the nodeip to 
database.
+  // If so, dtm should drop this message as discovery in progress.
+  uint8_t *nodeip = reinterpret_cast<uint8_t *>(node.node_ip);
+  new_node = dtm_node_get(nodeip, KeyTypes::kDtmNodeIpKeyType);
   if (new_node != nullptr) {
     if ((new_node->node_id == 0) || (new_node->node_id == node.node_id) ||
         (strncmp(node.node_ip, new_node->node_ip, INET6_ADDRSTRLEN) == 0)) {
@@ -593,10 +599,12 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
*dtms_cb, uint8_t *data,
                  0))) {
       TRACE("DTM: deleting stale enty cluster_id: %d, node_id :%u, node_ip:%s",
             node.cluster_id, node.node_id, node.node_ip);
-      if (dtm_node_delete(new_node, 0) != NCSCC_RC_SUCCESS) {
+      if (dtm_node_delete(new_node, KeyTypes::kDtmNodeIdKeyType) !=
+          NCSCC_RC_SUCCESS) {
         LOG_ER("DTM :dtm_node_delete failed (recv())");
       }
-      if (dtm_node_delete(new_node, 2) != NCSCC_RC_SUCCESS) {
+      if (dtm_node_delete(new_node, KeyTypes::kDtmNodeIpKeyType) !=
+          NCSCC_RC_SUCCESS) {
         LOG_ER("DTM :dtm_node_delete failed (recv())");
       }
       free(new_node);
@@ -621,7 +629,8 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB *dtms_cb, 
uint8_t *data,
   if (sock_desc != -1) {
     TRACE("DTM: dtm_node_add .node_ip: %s node_id: %u, comm_socket %d",
           new_node->node_ip, new_node->node_id, new_node->comm_socket);
-    if (dtm_node_add(new_node, 0) != NCSCC_RC_SUCCESS) {
+    if (dtm_node_add(new_node, KeyTypes::kDtmNodeIdKeyType) !=
+        NCSCC_RC_SUCCESS) {
       LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
              new_node->node_ip, new_node->node_id);
       dtm_comm_socket_close(new_node);
@@ -629,7 +638,8 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB *dtms_cb, 
uint8_t *data,
       goto node_fail;
     }
 
-    if (dtm_node_add(new_node, 2) != NCSCC_RC_SUCCESS) {
+    if (dtm_node_add(new_node, KeyTypes::kDtmNodeIpKeyType) !=
+        NCSCC_RC_SUCCESS) {
       LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
              new_node->node_ip, new_node->node_id);
       dtm_comm_socket_close(new_node);
@@ -748,7 +758,8 @@ DTM_NODE_DB *dtm_process_accept(DTM_INTERNODE_CB *dtms_cb, 
int stream_sock) {
       continue;
     }
 
-    if (dtm_node_add(new_node, 2) != NCSCC_RC_SUCCESS) {
+    if (dtm_node_add(new_node, KeyTypes::kDtmNodeIpKeyType) !=
+        NCSCC_RC_SUCCESS) {
       LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
              new_node->node_ip, new_node->node_id);
       dtm_comm_socket_close(new_node);
-- 
2.15.1



_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to