Hi Canh,
ack, review only. I think it would be good to separate the re-factoring
part in a separate ticket though.
/BR Hans
On 12/18/18 08:25, Canh Van Truong wrote:
> During cluster start, one node (node 1) broadcast up msg to other node. The
> remote node (node 2) get this msg and send the connection to node 1
> (connect()).
> Similarly node 1 send the connection to node 2 after node 2 broadcast up msg
> to.
> Beside of node 2 connect() to node 1, node 2 also add the IP and ID info of
> node 1 to database.
> But before of that, node 2 may also accept the connection that come from node
> 1. The
> acception is also add node ID of node 1. So there is 2 times adding the node
> ID
> info of node 1 to database in node 2. This causes the socket connection is
> closed
> and node is restart again.
>
> The patch change to retrieve node from database by node IP instead node ID in
> processing connection. This will reject the double of establishing connection
> between 2 nodes and also double of adding node IP to database.
> ---
> src/dtm/dtmnd/dtm.h | 11 ++++--
> src/dtm/dtmnd/dtm_inter_trans.cc | 3 +-
> src/dtm/dtmnd/dtm_node.cc | 2 +-
> src/dtm/dtmnd/dtm_node_db.cc | 79
> ++++++++++++++++++++++++---------------
> src/dtm/dtmnd/dtm_node_sockets.cc | 20 ++++++----
> 5 files changed, 72 insertions(+), 43 deletions(-)
>
> diff --git a/src/dtm/dtmnd/dtm.h b/src/dtm/dtmnd/dtm.h
> index 28c811e65..a06b8f503 100644
> --- a/src/dtm/dtmnd/dtm.h
> +++ b/src/dtm/dtmnd/dtm.h
> @@ -45,6 +45,11 @@ typedef enum {
> DTM_MBX_MSG_TYPE = 5,
> } MBX_POST_TYPES;
>
> +typedef enum {
> + DTM_NODE_ID_KEY_TYPE = 0,
> + DTM_NODE_IP_KEY_TYPE = 2,
> +} KEY_TYPES;
> +
> typedef struct dtm_rcv_msg_elem {
> void *next;
> MBX_POST_TYPES type;
> @@ -99,10 +104,10 @@ typedef struct dtm_snd_msg_elem {
>
> extern void node_discovery_process(void *arg);
> extern uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb);
> -extern DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid);
> +extern DTM_NODE_DB *dtm_node_get(uint8_t *key, KEY_TYPES type);
> extern DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id);
> -extern uint32_t dtm_node_add(DTM_NODE_DB *node, int i);
> -extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, int i);
> +extern uint32_t dtm_node_add(DTM_NODE_DB *node, KEY_TYPES type);
> +extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, KEY_TYPES type);
> extern DTM_NODE_DB *dtm_node_new(const DTM_NODE_DB *new_node);
> extern void dtm_print_config(DTM_INTERNODE_CB *config);
> extern int dtm_read_config(DTM_INTERNODE_CB *config,
> diff --git a/src/dtm/dtmnd/dtm_inter_trans.cc
> b/src/dtm/dtmnd/dtm_inter_trans.cc
> index 9d8335466..9b4194614 100644
> --- a/src/dtm/dtmnd/dtm_inter_trans.cc
> +++ b/src/dtm/dtmnd/dtm_inter_trans.cc
> @@ -235,9 +235,10 @@ static uint32_t dtm_internode_snd_msg_common(DTM_NODE_DB
> *node, uint8_t *buffer,
> uint32_t dtm_internode_snd_msg_to_node(uint8_t *buffer, uint16_t len,
> NODE_ID node_id) {
> DTM_NODE_DB *node = nullptr;
> + uint8_t *key = reinterpret_cast<uint8_t *>(&node_id);
>
> TRACE_ENTER();
> - node = dtm_node_get_by_id(node_id);
> + node = dtm_node_get(key, DTM_NODE_ID_KEY_TYPE);
>
> if (nullptr != node) {
> if (NCSCC_RC_SUCCESS != dtm_internode_snd_msg_common(node, buffer,
> len)) {
> diff --git a/src/dtm/dtmnd/dtm_node.cc b/src/dtm/dtmnd/dtm_node.cc
> index de2f94738..72506f262 100644
> --- a/src/dtm/dtmnd/dtm_node.cc
> +++ b/src/dtm/dtmnd/dtm_node.cc
> @@ -125,7 +125,7 @@ uint32_t dtm_process_node_info(DTM_INTERNODE_CB *dtms_cb,
> DTM_NODE_DB *node,
> memcpy(node->node_name, data, nodename_len);
> node->node_name[nodename_len] = '\0';
> node->comm_status = true;
> - if (dtm_node_add(node, 0) != NCSCC_RC_SUCCESS) {
> + if (dtm_node_add(node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) {
> LOG_ER(
> "DTM: A node already exists in the cluster with similar "
> "configuration (possible duplicate IP address and/or node id),
> please "
> diff --git a/src/dtm/dtmnd/dtm_node_db.cc b/src/dtm/dtmnd/dtm_node_db.cc
> index 1c9da4dac..1038f0918 100644
> --- a/src/dtm/dtmnd/dtm_node_db.cc
> +++ b/src/dtm/dtmnd/dtm_node_db.cc
> @@ -123,24 +123,49 @@ uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb) {
> }
>
> /**
> - * Retrieve node from node db by nodeid
> + * Retrieve node from node db
> *
> - * @param nodeid
> + * @param key
> + * @param i
> *
> - * @return NCSCC_RC_SUCCESS
> - * @return NCSCC_RC_FAILURE
> + * @return node
> *
> */
> -DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid) {
> +DTM_NODE_DB *dtm_node_get(uint8_t *key, KEY_TYPES type) {
> TRACE_ENTER();
> DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
> + DTM_NODE_DB *node = nullptr;
>
> - DTM_NODE_DB *node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
> - &dtms_cb->nodeid_tree, reinterpret_cast<uint8_t *>(&nodeid)));
> - if (node != nullptr) {
> - /* Adjust the pointer */
> - node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char *>(node) -
> - offsetof(DTM_NODE_DB,
> pat_nodeid));
> + osafassert(key != nullptr);
> +
> + switch (type) {
> + case DTM_NODE_ID_KEY_TYPE:
> + TRACE("DTM: Getting node from the database by node_id : %u as key",
> + *reinterpret_cast<NODE_ID *>(key));
> + node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
> + &dtms_cb->nodeid_tree, key));
> + if (node != nullptr) {
> + // Adjust the pointer
> + node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char
> *>(node) -
> + offsetof(DTM_NODE_DB, pat_nodeid));
> + }
> + break;
> +
> + case DTM_NODE_IP_KEY_TYPE:
> + TRACE("DTM: Getting node from the database by node_ip : %s as key",
> + reinterpret_cast<char *>(key));
> + node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
> + &dtms_cb->ip_addr_tree, key));
> + if (node != nullptr) {
> + // Adjust the pointer
> + node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char
> *>(node) -
> + offsetof(DTM_NODE_DB, pat_ip_address));
> + }
> + break;
> +
> + default:
> + osafassert(false);
> + break;
> }
>
> TRACE_LEAVE();
> @@ -189,16 +214,16 @@ DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id) {
> * @return NCSCC_RC_FAILURE
> *
> */
> -uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
> +uint32_t dtm_node_add(DTM_NODE_DB *node, KEY_TYPES type) {
> uint32_t rc = NCSCC_RC_SUCCESS;
> DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
> TRACE_ENTER();
> - TRACE("DTM:value of i %d", i);
> + TRACE("DTM:value of i %d", type);
>
> osafassert(node != nullptr);
>
> - switch (i) {
> - case 0:
> + switch (type) {
> + case DTM_NODE_ID_KEY_TYPE:
> TRACE("DTM:Adding node_id to the database with node_id :%u as key",
> node->node_id);
> node->pat_nodeid.key_info = reinterpret_cast<uint8_t
> *>(&(node->node_id));
> @@ -210,11 +235,8 @@ uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
> goto done;
> }
> break;
> - case 1:
> - osafassert(false);
> - break;
>
> - case 2:
> + case DTM_NODE_IP_KEY_TYPE:
> TRACE("DTM:Adding node_ip to the database with node_ip :%s as key",
> node->node_ip);
> node->pat_ip_address.key_info =
> @@ -230,8 +252,8 @@ uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
>
> default:
> TRACE("DTM:Invalid Patricia add");
> - rc = NCSCC_RC_FAILURE;
> - goto done;
> + osafassert(false);
> + break;
> }
>
> done:
> @@ -248,15 +270,15 @@ done:
> * @return NCSCC_RC_FAILURE
> *
> */
> -uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
> +uint32_t dtm_node_delete(DTM_NODE_DB *node, KEY_TYPES type) {
> uint32_t rc = NCSCC_RC_SUCCESS;
> DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
> - TRACE_ENTER2("DTM:value of i %d", i);
> + TRACE_ENTER2("DTM:value of i %d", type);
>
> osafassert(node != nullptr);
>
> - switch (i) {
> - case 0:
> + switch (type) {
> + case DTM_NODE_ID_KEY_TYPE:
> if (node->node_id != 0 && node->pat_nodeid.key_info) {
> TRACE("DTM:Deleting node_id from the database with node_id :%u as
> key",
> node->node_id);
> @@ -269,11 +291,8 @@ uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
> }
> }
> break;
> - case 1:
> - osafassert(false);
> - break;
>
> - case 2:
> + case DTM_NODE_IP_KEY_TYPE:
> if (node->node_ip != nullptr && node->pat_ip_address.key_info) {
> TRACE("DTM:Deleting node_ip from the database with node_ip :%s as
> key",
> node->node_ip);
> @@ -290,7 +309,7 @@ uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
> default:
> TRACE(
> "DTM:Deleting node from the database with unknown option :%d as
> key",
> - i);
> + type);
> osafassert(0);
> }
>
> diff --git a/src/dtm/dtmnd/dtm_node_sockets.cc
> b/src/dtm/dtmnd/dtm_node_sockets.cc
> index 0ddfc6f58..a0b835c29 100644
> --- a/src/dtm/dtmnd/dtm_node_sockets.cc
> +++ b/src/dtm/dtmnd/dtm_node_sockets.cc
> @@ -171,11 +171,11 @@ void dtm_comm_socket_close(DTM_NODE_DB *node) {
> }
> }
>
> - if (dtm_node_delete(node, 0) != NCSCC_RC_SUCCESS) {
> + if (dtm_node_delete(node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) {
> LOG_ER("DTM :dtm_node_delete failed ");
> }
>
> - if (dtm_node_delete(node, 2) != NCSCC_RC_SUCCESS) {
> + if (dtm_node_delete(node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) {
> LOG_ER("DTM :dtm_node_delete failed ");
> }
>
> @@ -566,7 +566,11 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB
> *dtms_cb, uint8_t *data,
> }
> }
>
> - new_node = dtm_node_get_by_id(node.node_id);
> + // Retrieve node from nodeip instead of nodeid to prevent the case dtm
> already
> + // accepted remote node connection and dtm already add the nodeip to
> database.
> + // If so, dtm should drop this message as discovery in progress.
> + uint8_t *nodeip = reinterpret_cast<uint8_t *>(node.node_ip);
> + new_node = dtm_node_get(nodeip, DTM_NODE_IP_KEY_TYPE);
> if (new_node != nullptr) {
> if ((new_node->node_id == 0) || (new_node->node_id == node.node_id) ||
> (strncmp(node.node_ip, new_node->node_ip, INET6_ADDRSTRLEN) == 0)) {
> @@ -593,10 +597,10 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB
> *dtms_cb, uint8_t *data,
> 0))) {
> TRACE("DTM: deleting stale enty cluster_id: %d, node_id :%u,
> node_ip:%s",
> node.cluster_id, node.node_id, node.node_ip);
> - if (dtm_node_delete(new_node, 0) != NCSCC_RC_SUCCESS) {
> + if (dtm_node_delete(new_node, DTM_NODE_ID_KEY_TYPE) !=
> NCSCC_RC_SUCCESS) {
> LOG_ER("DTM :dtm_node_delete failed (recv())");
> }
> - if (dtm_node_delete(new_node, 2) != NCSCC_RC_SUCCESS) {
> + if (dtm_node_delete(new_node, DTM_NODE_IP_KEY_TYPE) !=
> NCSCC_RC_SUCCESS) {
> LOG_ER("DTM :dtm_node_delete failed (recv())");
> }
> free(new_node);
> @@ -621,7 +625,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB
> *dtms_cb, uint8_t *data,
> if (sock_desc != -1) {
> TRACE("DTM: dtm_node_add .node_ip: %s node_id: %u, comm_socket %d",
> new_node->node_ip, new_node->node_id, new_node->comm_socket);
> - if (dtm_node_add(new_node, 0) != NCSCC_RC_SUCCESS) {
> + if (dtm_node_add(new_node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) {
> LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
> new_node->node_ip, new_node->node_id);
> dtm_comm_socket_close(new_node);
> @@ -629,7 +633,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB
> *dtms_cb, uint8_t *data,
> goto node_fail;
> }
>
> - if (dtm_node_add(new_node, 2) != NCSCC_RC_SUCCESS) {
> + if (dtm_node_add(new_node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) {
> LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
> new_node->node_ip, new_node->node_id);
> dtm_comm_socket_close(new_node);
> @@ -748,7 +752,7 @@ DTM_NODE_DB *dtm_process_accept(DTM_INTERNODE_CB
> *dtms_cb, int stream_sock) {
> continue;
> }
>
> - if (dtm_node_add(new_node, 2) != NCSCC_RC_SUCCESS) {
> + if (dtm_node_add(new_node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) {
> LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
> new_node->node_ip, new_node->node_id);
> dtm_comm_socket_close(new_node);
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel