During cluster start, one node (node 1) broadcast up msg to other node. The
remote node (node 2) get this msg and send the connection to node 1 (connect()).
Similarly node 1 send the connection to node 2 after node 2 broadcast up msg
to.
Beside of node 2 connect() to node 1, node 2 also add the IP and ID info of
node 1 to database.
But before of that, node 2 may also accept the connection that come from node
1. The
acception is also add node ID of node 1. So there is 2 times adding the node ID
info of node 1 to database in node 2. This causes the socket connection is
closed
and node is restart again.
The patch change to retrieve node from database by node IP instead node ID in
processing connection. This will reject the double of establishing connection
between 2 nodes and also double of adding node IP to database.
---
src/dtm/dtmnd/dtm.h | 2 +-
src/dtm/dtmnd/dtm_inter_trans.cc | 3 ++-
src/dtm/dtmnd/dtm_node_db.cc | 47 ++++++++++++++++++++++++++++++---------
src/dtm/dtmnd/dtm_node_sockets.cc | 6 ++++-
4 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/src/dtm/dtmnd/dtm.h b/src/dtm/dtmnd/dtm.h
index 28c811e65..73ed1e001 100644
--- a/src/dtm/dtmnd/dtm.h
+++ b/src/dtm/dtmnd/dtm.h
@@ -99,7 +99,7 @@ typedef struct dtm_snd_msg_elem {
extern void node_discovery_process(void *arg);
extern uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb);
-extern DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid);
+extern DTM_NODE_DB *dtm_node_get(uint8_t *key, int i);
extern DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id);
extern uint32_t dtm_node_add(DTM_NODE_DB *node, int i);
extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, int i);
diff --git a/src/dtm/dtmnd/dtm_inter_trans.cc b/src/dtm/dtmnd/dtm_inter_trans.cc
index 9d8335466..d367c4ea8 100644
--- a/src/dtm/dtmnd/dtm_inter_trans.cc
+++ b/src/dtm/dtmnd/dtm_inter_trans.cc
@@ -235,9 +235,10 @@ static uint32_t dtm_internode_snd_msg_common(DTM_NODE_DB
*node, uint8_t *buffer,
uint32_t dtm_internode_snd_msg_to_node(uint8_t *buffer, uint16_t len,
NODE_ID node_id) {
DTM_NODE_DB *node = nullptr;
+ uint8_t *key = reinterpret_cast<uint8_t *>(&node_id);
TRACE_ENTER();
- node = dtm_node_get_by_id(node_id);
+ node = dtm_node_get(key, 0);
if (nullptr != node) {
if (NCSCC_RC_SUCCESS != dtm_internode_snd_msg_common(node, buffer, len)) {
diff --git a/src/dtm/dtmnd/dtm_node_db.cc b/src/dtm/dtmnd/dtm_node_db.cc
index 1c9da4dac..179829241 100644
--- a/src/dtm/dtmnd/dtm_node_db.cc
+++ b/src/dtm/dtmnd/dtm_node_db.cc
@@ -123,24 +123,49 @@ uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb) {
}
/**
- * Retrieve node from node db by nodeid
+ * Retrieve node from node db
*
- * @param nodeid
+ * @param key
+ * @param i
*
- * @return NCSCC_RC_SUCCESS
- * @return NCSCC_RC_FAILURE
+ * @return node
*
*/
-DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid) {
+DTM_NODE_DB *dtm_node_get(uint8_t *key, int i) {
TRACE_ENTER();
DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
+ DTM_NODE_DB *node = nullptr;
- DTM_NODE_DB *node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
- &dtms_cb->nodeid_tree, reinterpret_cast<uint8_t *>(&nodeid)));
- if (node != nullptr) {
- /* Adjust the pointer */
- node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char *>(node) -
- offsetof(DTM_NODE_DB, pat_nodeid));
+ osafassert(key != nullptr);
+
+ switch (i) {
+ case 0:
+ TRACE("DTM: Getting node from the database by node_id : %u as key",
+ *reinterpret_cast<NODE_ID *>(key));
+ node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
+ &dtms_cb->nodeid_tree, key));
+ if (node != nullptr) {
+ // Adjust the pointer
+ node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char *>(node) -
+ offsetof(DTM_NODE_DB, pat_nodeid));
+ }
+ break;
+
+ case 1:
+ osafassert(false);
+ break;
+
+ case 2:
+ TRACE("DTM: Getting node from the database by node_ip : %s as key",
+ reinterpret_cast<char *>(key));
+ node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
+ &dtms_cb->ip_addr_tree, key));
+ if (node != nullptr) {
+ // Adjust the pointer
+ node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char *>(node) -
+ offsetof(DTM_NODE_DB, pat_ip_address));
+ }
+ break;
}
TRACE_LEAVE();
diff --git a/src/dtm/dtmnd/dtm_node_sockets.cc
b/src/dtm/dtmnd/dtm_node_sockets.cc
index 0ddfc6f58..a6a5be4da 100644
--- a/src/dtm/dtmnd/dtm_node_sockets.cc
+++ b/src/dtm/dtmnd/dtm_node_sockets.cc
@@ -566,7 +566,11 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB
*dtms_cb, uint8_t *data,
}
}
- new_node = dtm_node_get_by_id(node.node_id);
+ // Retrieve node from nodeip instead of nodeid to prevent the case dtm
already
+ // accepted remote node connection and dtm already add the nodeip to
database.
+ // If so, dtm should drop this message as discovery in progress.
+ uint8_t *nodeip = reinterpret_cast<uint8_t *>(node.node_ip);
+ new_node = dtm_node_get(nodeip, 2);
if (new_node != nullptr) {
if ((new_node->node_id == 0) || (new_node->node_id == node.node_id) ||
(strncmp(node.node_ip, new_node->node_ip, INET6_ADDRSTRLEN) == 0)) {
--
2.15.1
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel