Hi Gary and Hans, Thanks for your review. I updated Gary comment and running patch test again.
@Hans. Because there is just small changes that does not related to the ticket. (the changes in dtm_node_delete() and dtm_node_add() with replace number by enum) so I don’t separate the patch. I will send the new after patch test ran. Regards Canh -----Original Message----- From: Gary Lee <[email protected]> Sent: Thursday, March 7, 2019 6:58 AM To: Hans Nordebäck <[email protected]>; Canh Van Truong <[email protected]>; Anders Widell <[email protected]>; Minh Hon Chau <[email protected]> Cc: [email protected] Subject: Re: [PATCH 1/1] dtm: Fix dtm close socket due to duplication of adding node IP info [#2984] Hi Canh One minor comment, KEY_TYPES should probably be called KeyTypes. Also, can you make it an enum class, rather than plain enum? Thanks Gary On 7/3/19 12:53 am, Hans Nordebäck wrote: > Hi Canh, > > ack, review only. I think it would be good to separate the re-factoring > part in a separate ticket though. > > /BR Hans > > On 12/18/18 08:25, Canh Van Truong wrote: >> During cluster start, one node (node 1) broadcast up msg to other node. The >> remote node (node 2) get this msg and send the connection to node 1 >> (connect()). >> Similarly node 1 send the connection to node 2 after node 2 broadcast up >> msg to. >> Beside of node 2 connect() to node 1, node 2 also add the IP and ID info of >> node 1 to database. >> But before of that, node 2 may also accept the connection that come from >> node 1. The >> acception is also add node ID of node 1. So there is 2 times adding the node >> ID >> info of node 1 to database in node 2. This causes the socket connection is >> closed >> and node is restart again. >> >> The patch change to retrieve node from database by node IP instead node ID in >> processing connection. This will reject the double of establishing connection >> between 2 nodes and also double of adding node IP to database. >> --- >> src/dtm/dtmnd/dtm.h | 11 ++++-- >> src/dtm/dtmnd/dtm_inter_trans.cc | 3 +- >> src/dtm/dtmnd/dtm_node.cc | 2 +- >> src/dtm/dtmnd/dtm_node_db.cc | 79 >> ++++++++++++++++++++++++--------------- >> src/dtm/dtmnd/dtm_node_sockets.cc | 20 ++++++---- >> 5 files changed, 72 insertions(+), 43 deletions(-) >> >> diff --git a/src/dtm/dtmnd/dtm.h b/src/dtm/dtmnd/dtm.h >> index 28c811e65..a06b8f503 100644 >> --- a/src/dtm/dtmnd/dtm.h >> +++ b/src/dtm/dtmnd/dtm.h >> @@ -45,6 +45,11 @@ typedef enum { >> DTM_MBX_MSG_TYPE = 5, >> } MBX_POST_TYPES; >> >> +typedef enum { >> + DTM_NODE_ID_KEY_TYPE = 0, >> + DTM_NODE_IP_KEY_TYPE = 2, >> +} KEY_TYPES; >> + >> typedef struct dtm_rcv_msg_elem { >> void *next; >> MBX_POST_TYPES type; >> @@ -99,10 +104,10 @@ typedef struct dtm_snd_msg_elem { >> >> extern void node_discovery_process(void *arg); >> extern uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb); >> -extern DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid); >> +extern DTM_NODE_DB *dtm_node_get(uint8_t *key, KEY_TYPES type); >> extern DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id); >> -extern uint32_t dtm_node_add(DTM_NODE_DB *node, int i); >> -extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, int i); >> +extern uint32_t dtm_node_add(DTM_NODE_DB *node, KEY_TYPES type); >> +extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, KEY_TYPES type); >> extern DTM_NODE_DB *dtm_node_new(const DTM_NODE_DB *new_node); >> extern void dtm_print_config(DTM_INTERNODE_CB *config); >> extern int dtm_read_config(DTM_INTERNODE_CB *config, >> diff --git a/src/dtm/dtmnd/dtm_inter_trans.cc >> b/src/dtm/dtmnd/dtm_inter_trans.cc >> index 9d8335466..9b4194614 100644 >> --- a/src/dtm/dtmnd/dtm_inter_trans.cc >> +++ b/src/dtm/dtmnd/dtm_inter_trans.cc >> @@ -235,9 +235,10 @@ static uint32_t >> dtm_internode_snd_msg_common(DTM_NODE_DB *node, uint8_t *buffer, >> uint32_t dtm_internode_snd_msg_to_node(uint8_t *buffer, uint16_t len, >> NODE_ID node_id) { >> DTM_NODE_DB *node = nullptr; >> + uint8_t *key = reinterpret_cast<uint8_t *>(&node_id); >> >> TRACE_ENTER(); >> - node = dtm_node_get_by_id(node_id); >> + node = dtm_node_get(key, DTM_NODE_ID_KEY_TYPE); >> >> if (nullptr != node) { >> if (NCSCC_RC_SUCCESS != dtm_internode_snd_msg_common(node, buffer, >> len)) { >> diff --git a/src/dtm/dtmnd/dtm_node.cc b/src/dtm/dtmnd/dtm_node.cc >> index de2f94738..72506f262 100644 >> --- a/src/dtm/dtmnd/dtm_node.cc >> +++ b/src/dtm/dtmnd/dtm_node.cc >> @@ -125,7 +125,7 @@ uint32_t dtm_process_node_info(DTM_INTERNODE_CB >> *dtms_cb, DTM_NODE_DB *node, >> memcpy(node->node_name, data, nodename_len); >> node->node_name[nodename_len] = '\0'; >> node->comm_status = true; >> - if (dtm_node_add(node, 0) != NCSCC_RC_SUCCESS) { >> + if (dtm_node_add(node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) { >> LOG_ER( >> "DTM: A node already exists in the cluster with similar " >> "configuration (possible duplicate IP address and/or node >> id), please " >> diff --git a/src/dtm/dtmnd/dtm_node_db.cc b/src/dtm/dtmnd/dtm_node_db.cc >> index 1c9da4dac..1038f0918 100644 >> --- a/src/dtm/dtmnd/dtm_node_db.cc >> +++ b/src/dtm/dtmnd/dtm_node_db.cc >> @@ -123,24 +123,49 @@ uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb) { >> } >> >> /** >> - * Retrieve node from node db by nodeid >> + * Retrieve node from node db >> * >> - * @param nodeid >> + * @param key >> + * @param i >> * >> - * @return NCSCC_RC_SUCCESS >> - * @return NCSCC_RC_FAILURE >> + * @return node >> * >> */ >> -DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid) { >> +DTM_NODE_DB *dtm_node_get(uint8_t *key, KEY_TYPES type) { >> TRACE_ENTER(); >> DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb; >> + DTM_NODE_DB *node = nullptr; >> >> - DTM_NODE_DB *node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get( >> - &dtms_cb->nodeid_tree, reinterpret_cast<uint8_t *>(&nodeid))); >> - if (node != nullptr) { >> - /* Adjust the pointer */ >> - node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char *>(node) - >> - offsetof(DTM_NODE_DB, >> pat_nodeid)); >> + osafassert(key != nullptr); >> + >> + switch (type) { >> + case DTM_NODE_ID_KEY_TYPE: >> + TRACE("DTM: Getting node from the database by node_id : %u as key", >> + *reinterpret_cast<NODE_ID *>(key)); >> + node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get( >> + &dtms_cb->nodeid_tree, key)); >> + if (node != nullptr) { >> + // Adjust the pointer >> + node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char >> *>(node) - >> + offsetof(DTM_NODE_DB, pat_nodeid)); >> + } >> + break; >> + >> + case DTM_NODE_IP_KEY_TYPE: >> + TRACE("DTM: Getting node from the database by node_ip : %s as key", >> + reinterpret_cast<char *>(key)); >> + node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get( >> + &dtms_cb->ip_addr_tree, key)); >> + if (node != nullptr) { >> + // Adjust the pointer >> + node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char >> *>(node) - >> + offsetof(DTM_NODE_DB, pat_ip_address)); >> + } >> + break; >> + >> + default: >> + osafassert(false); >> + break; >> } >> >> TRACE_LEAVE(); >> @@ -189,16 +214,16 @@ DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id) { >> * @return NCSCC_RC_FAILURE >> * >> */ >> -uint32_t dtm_node_add(DTM_NODE_DB *node, int i) { >> +uint32_t dtm_node_add(DTM_NODE_DB *node, KEY_TYPES type) { >> uint32_t rc = NCSCC_RC_SUCCESS; >> DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb; >> TRACE_ENTER(); >> - TRACE("DTM:value of i %d", i); >> + TRACE("DTM:value of i %d", type); >> >> osafassert(node != nullptr); >> >> - switch (i) { >> - case 0: >> + switch (type) { >> + case DTM_NODE_ID_KEY_TYPE: >> TRACE("DTM:Adding node_id to the database with node_id :%u as key", >> node->node_id); >> node->pat_nodeid.key_info = reinterpret_cast<uint8_t >> *>(&(node->node_id)); >> @@ -210,11 +235,8 @@ uint32_t dtm_node_add(DTM_NODE_DB *node, int i) { >> goto done; >> } >> break; >> - case 1: >> - osafassert(false); >> - break; >> >> - case 2: >> + case DTM_NODE_IP_KEY_TYPE: >> TRACE("DTM:Adding node_ip to the database with node_ip :%s as key", >> node->node_ip); >> node->pat_ip_address.key_info = >> @@ -230,8 +252,8 @@ uint32_t dtm_node_add(DTM_NODE_DB *node, int i) { >> >> default: >> TRACE("DTM:Invalid Patricia add"); >> - rc = NCSCC_RC_FAILURE; >> - goto done; >> + osafassert(false); >> + break; >> } >> >> done: >> @@ -248,15 +270,15 @@ done: >> * @return NCSCC_RC_FAILURE >> * >> */ >> -uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) { >> +uint32_t dtm_node_delete(DTM_NODE_DB *node, KEY_TYPES type) { >> uint32_t rc = NCSCC_RC_SUCCESS; >> DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb; >> - TRACE_ENTER2("DTM:value of i %d", i); >> + TRACE_ENTER2("DTM:value of i %d", type); >> >> osafassert(node != nullptr); >> >> - switch (i) { >> - case 0: >> + switch (type) { >> + case DTM_NODE_ID_KEY_TYPE: >> if (node->node_id != 0 && node->pat_nodeid.key_info) { >> TRACE("DTM:Deleting node_id from the database with node_id :%u as >> key", >> node->node_id); >> @@ -269,11 +291,8 @@ uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) { >> } >> } >> break; >> - case 1: >> - osafassert(false); >> - break; >> >> - case 2: >> + case DTM_NODE_IP_KEY_TYPE: >> if (node->node_ip != nullptr && node->pat_ip_address.key_info) { >> TRACE("DTM:Deleting node_ip from the database with node_ip :%s >> as key", >> node->node_ip); >> @@ -290,7 +309,7 @@ uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) { >> default: >> TRACE( >> "DTM:Deleting node from the database with unknown option :%d >> as key", >> - i); >> + type); >> osafassert(0); >> } >> >> diff --git a/src/dtm/dtmnd/dtm_node_sockets.cc >> b/src/dtm/dtmnd/dtm_node_sockets.cc >> index 0ddfc6f58..a0b835c29 100644 >> --- a/src/dtm/dtmnd/dtm_node_sockets.cc >> +++ b/src/dtm/dtmnd/dtm_node_sockets.cc >> @@ -171,11 +171,11 @@ void dtm_comm_socket_close(DTM_NODE_DB *node) { >> } >> } >> >> - if (dtm_node_delete(node, 0) != NCSCC_RC_SUCCESS) { >> + if (dtm_node_delete(node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) { >> LOG_ER("DTM :dtm_node_delete failed "); >> } >> >> - if (dtm_node_delete(node, 2) != NCSCC_RC_SUCCESS) { >> + if (dtm_node_delete(node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) { >> LOG_ER("DTM :dtm_node_delete failed "); >> } >> >> @@ -566,7 +566,11 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB >> *dtms_cb, uint8_t *data, >> } >> } >> >> - new_node = dtm_node_get_by_id(node.node_id); >> + // Retrieve node from nodeip instead of nodeid to prevent the case dtm >> already >> + // accepted remote node connection and dtm already add the nodeip to >> database. >> + // If so, dtm should drop this message as discovery in progress. >> + uint8_t *nodeip = reinterpret_cast<uint8_t *>(node.node_ip); >> + new_node = dtm_node_get(nodeip, DTM_NODE_IP_KEY_TYPE); >> if (new_node != nullptr) { >> if ((new_node->node_id == 0) || (new_node->node_id == node.node_id) || >> (strncmp(node.node_ip, new_node->node_ip, INET6_ADDRSTRLEN) == >> 0)) { >> @@ -593,10 +597,10 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB >> *dtms_cb, uint8_t *data, >> 0))) { >> TRACE("DTM: deleting stale enty cluster_id: %d, node_id :%u, >> node_ip:%s", >> node.cluster_id, node.node_id, node.node_ip); >> - if (dtm_node_delete(new_node, 0) != NCSCC_RC_SUCCESS) { >> + if (dtm_node_delete(new_node, DTM_NODE_ID_KEY_TYPE) != >> NCSCC_RC_SUCCESS) { >> LOG_ER("DTM :dtm_node_delete failed (recv())"); >> } >> - if (dtm_node_delete(new_node, 2) != NCSCC_RC_SUCCESS) { >> + if (dtm_node_delete(new_node, DTM_NODE_IP_KEY_TYPE) != >> NCSCC_RC_SUCCESS) { >> LOG_ER("DTM :dtm_node_delete failed (recv())"); >> } >> free(new_node); >> @@ -621,7 +625,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB >> *dtms_cb, uint8_t *data, >> if (sock_desc != -1) { >> TRACE("DTM: dtm_node_add .node_ip: %s node_id: %u, comm_socket %d", >> new_node->node_ip, new_node->node_id, new_node->comm_socket); >> - if (dtm_node_add(new_node, 0) != NCSCC_RC_SUCCESS) { >> + if (dtm_node_add(new_node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) { >> LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u", >> new_node->node_ip, new_node->node_id); >> dtm_comm_socket_close(new_node); >> @@ -629,7 +633,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB >> *dtms_cb, uint8_t *data, >> goto node_fail; >> } >> >> - if (dtm_node_add(new_node, 2) != NCSCC_RC_SUCCESS) { >> + if (dtm_node_add(new_node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) { >> LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u", >> new_node->node_ip, new_node->node_id); >> dtm_comm_socket_close(new_node); >> @@ -748,7 +752,7 @@ DTM_NODE_DB *dtm_process_accept(DTM_INTERNODE_CB >> *dtms_cb, int stream_sock) { >> continue; >> } >> >> - if (dtm_node_add(new_node, 2) != NCSCC_RC_SUCCESS) { >> + if (dtm_node_add(new_node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) { >> LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u", >> new_node->node_ip, new_node->node_id); >> dtm_comm_socket_close(new_node); _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
