- Use non-blocking in connect new socket.
- Dtm does not exit when connect failure.
---
src/dtm/dtmnd/dtm_node.h | 1 +
src/dtm/dtmnd/dtm_node_sockets.cc | 105 +++++++++++++++++++++++++++---
src/dtm/dtmnd/dtmd.conf | 2 +-
3 files changed, 98 insertions(+), 10 deletions(-)
diff --git a/src/dtm/dtmnd/dtm_node.h b/src/dtm/dtmnd/dtm_node.h
index 82435cc11..aaeba69c7 100644
--- a/src/dtm/dtmnd/dtm_node.h
+++ b/src/dtm/dtmnd/dtm_node.h
@@ -18,6 +18,7 @@
#ifndef DTM_DTMND_DTM_NODE_H_
#define DTM_DTMND_DTM_NODE_H_
+#define DTM_TCP_TIMEOUT_SECS 10
#include <sys/socket.h>
#include <cstddef>
diff --git a/src/dtm/dtmnd/dtm_node_sockets.cc
b/src/dtm/dtmnd/dtm_node_sockets.cc
index 7cb461810..d638b885d 100644
--- a/src/dtm/dtmnd/dtm_node_sockets.cc
+++ b/src/dtm/dtmnd/dtm_node_sockets.cc
@@ -221,6 +221,88 @@ uint32_t dtm_comm_socket_send(int sock_desc, const void
*buffer,
return rc;
}
+/*
+ * By default TCP timeouts can be very long. This can lead to blocking for a
+ * very long time waiting on connect(). This function sets the socket to
+ * non-blocking mode for the connect and returns the socket to blocking mode
+ * once the connect has been established.
+ *
+ * \param socket file descriptor
+ * \param socket address structure
+ * \size of address structure
+ *
+ * \return < 0 on error, 0 on success
+ */
+int non_blocking_connect(int sockd, struct sockaddr *sin, socklen_t length) {
+ struct pollfd wset;
+ socklen_t len;
+ int flags, ret, opt;
+
+ /* Set the socket fd to non-blocking mode */
+ if ((flags = fcntl(sockd, F_GETFL, NULL)) < 0) {
+ LOG_ER("Error fcntl(..., F_GETFL)");
+ exit(EXIT_FAILURE);
+ }
+ flags |= O_NONBLOCK;
+ if (fcntl(sockd, F_SETFL, flags) < 0) {
+ LOG_ER("Error fcntl(..., F_SETFL)");
+ exit(EXIT_FAILURE);
+ }
+
+ /* connect with timeout */
+ ret =
+ connect(sockd, (struct sockaddr *)sin, length);
+ if (ret < 0) {
+ if (errno == EINPROGRESS) {
+ /* poll the fd until we get a connection, timeout, or
+ * error */
+ while (1) {
+ wset.fd = sockd;
+ wset.events = POLLOUT;
+
+ ret = poll(&wset, 1,
+ DTM_TCP_TIMEOUT_SECS * 1000);
+ if (ret < 0 && errno != EINTR) {
+ LOG_ER("Error poll");
+ return -1;
+
+ } else if (ret > 0) {
+ // Socket polled for write
+ len = sizeof(int);
+ if (getsockopt(sockd, SOL_SOCKET, SO_ERROR,
+ reinterpret_cast<void *>(&opt),
+ &len) < 0) {
+ LOG_ER("Error getsockopt(...,SOL_SOCKET,..)");
+ return -1;
+ }
+ // Check the value returned...
+ if (opt) {
+ LOG_ER("Error getsockopt");
+ return -1;
+ }
+ break;
+ } else { /* Timeout */
+ LOG_ER("Timeout in connect()");
+ return -2;
+ }
+ }
+ } else { /* Real error returned from connect */
+ int err = errno;
+ LOG_ER("Connect failed (connect()) err :%s", strerror(err));
+ return -1;
+ }
+ }
+
+ /* Connection has been established switch back to blocking mode */
+ flags &= (~O_NONBLOCK);
+ if (fcntl(sockd, F_SETFL, flags) < 0) {
+ LOG_ER("Error fcntl");
+ exit(EXIT_FAILURE);
+ }
+
+ return 0;
+}
+
/**
* Setup the new communication socket
*
@@ -235,7 +317,7 @@ int comm_socket_setup_new(DTM_INTERNODE_CB *dtms_cb,
sa_family_t ip_addr_type) {
int sock_desc = -1, sndbuf_size = dtms_cb->sock_sndbuf_size,
rcvbuf_size = dtms_cb->sock_rcvbuf_size;
- int err = 0, rv;
+ int rv;
char local_port_str[INET6_ADDRSTRLEN];
struct addrinfo *addr_list;
struct addrinfo addr_criteria, *p; /* Criteria for address match */
@@ -359,9 +441,9 @@ int comm_socket_setup_new(DTM_INTERNODE_CB *dtms_cb,
}
/* Try to connect to the given port */
- if (connect(sock_desc, addr_list->ai_addr, addr_list->ai_addrlen) < 0) {
- err = errno;
- LOG_ER("DTM :Connect failed (connect()) err :%s", strerror(err));
+ if (non_blocking_connect(sock_desc, addr_list->ai_addr,
+ addr_list->ai_addrlen) < 0) {
+ LOG_ER("DTM :non_blocking_connect() failed");
close(sock_desc);
sock_desc = -1;
goto done;
@@ -649,12 +731,12 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB
*dtms_cb, uint8_t *data,
int sock_desc = comm_socket_setup_new(dtms_cb, node.node_ip, foreign_port,
node.i_addr_family);
- new_node->comm_socket = sock_desc;
- new_node->node_id = node.node_id;
- memcpy(new_node->node_ip, node.node_ip, INET6_ADDRSTRLEN);
- new_node->i_addr_family = node.i_addr_family;
-
if (sock_desc != -1) {
+ new_node->comm_socket = sock_desc;
+ new_node->node_id = node.node_id;
+ memcpy(new_node->node_ip, node.node_ip, INET6_ADDRSTRLEN);
+ new_node->i_addr_family = node.i_addr_family;
+
TRACE("DTM: dtm_node_add .node_ip: %s node_id: %u, comm_socket %d",
new_node->node_ip, new_node->node_id, new_node->comm_socket);
if (dtm_node_add(new_node, KeyTypes::kDtmNodeIdKeyType) !=
@@ -663,6 +745,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB *dtms_cb,
uint8_t *data,
new_node->node_ip, new_node->node_id);
dtm_comm_socket_close(new_node);
sock_desc = -1;
+ new_node = nullptr;
goto node_fail;
}
@@ -672,10 +755,14 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB
*dtms_cb, uint8_t *data,
new_node->node_ip, new_node->node_id);
dtm_comm_socket_close(new_node);
sock_desc = -1;
+ new_node = nullptr;
goto node_fail;
} else
TRACE("DTM: dtm_node_add add .node_ip: %s, node_id: %u",
new_node->node_ip, new_node->node_id);
+ } else {
+ new_node = nullptr;
+ LOG_ER("comm_socket_setup_new failed for node.node_ip: %s", node.node_ip);
}
node_fail:
diff --git a/src/dtm/dtmnd/dtmd.conf b/src/dtm/dtmnd/dtmd.conf
index 4c1c58f8d..926437375 100644
--- a/src/dtm/dtmnd/dtmd.conf
+++ b/src/dtm/dtmnd/dtmd.conf
@@ -122,7 +122,7 @@ DTM_TCP_KEEPALIVE_PROBES=2
# This option, TCP_USER_TIMEOUT will override keepalive
# to determine when to close a connection due to keepalive failure.
# kept to 1.5 sec to match other transport protocols supported Opensaf
-DTM_TCP_USER_TIMEOUT=1500
+DTM_TCP_USER_TIMEOUT=4000
#
#Used to Set the dtm internode & intranode socket SO_SNDBUF/SO_RCVBUF buffer
in bytes.
--
2.25.1
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel