- Use non-blocking in connect new socket.
- Dtm does not exit when connect failure.
---
 src/dtm/dtmnd/dtm_node.h          |   1 +
 src/dtm/dtmnd/dtm_node_sockets.cc | 105 +++++++++++++++++++++++++++---
 2 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/src/dtm/dtmnd/dtm_node.h b/src/dtm/dtmnd/dtm_node.h
index 82435cc11..aaeba69c7 100644
--- a/src/dtm/dtmnd/dtm_node.h
+++ b/src/dtm/dtmnd/dtm_node.h
@@ -18,6 +18,7 @@
 
 #ifndef DTM_DTMND_DTM_NODE_H_
 #define DTM_DTMND_DTM_NODE_H_
+#define DTM_TCP_TIMEOUT_SECS 10
 
 #include <sys/socket.h>
 #include <cstddef>
diff --git a/src/dtm/dtmnd/dtm_node_sockets.cc 
b/src/dtm/dtmnd/dtm_node_sockets.cc
index 8e1299368..f09b09157 100644
--- a/src/dtm/dtmnd/dtm_node_sockets.cc
+++ b/src/dtm/dtmnd/dtm_node_sockets.cc
@@ -221,6 +221,88 @@ uint32_t dtm_comm_socket_send(int sock_desc, const void 
*buffer,
   return rc;
 }
 
+/*
+ * By default TCP timeouts can be very long. This can lead to blocking for a
+ * very long time waiting on connect(). This function sets the socket to
+ * non-blocking mode for the connect and returns the socket to blocking mode
+ * once the connect has been established.
+ *
+ * \param socket file descriptor
+ * \param socket address structure
+ * \size of address structure
+ *
+ * \return < 0 on error, 0 on success
+ */
+int non_blocking_connect(int sockd, struct sockaddr *sin, socklen_t length) {
+  struct pollfd wset;
+  socklen_t len;
+  int flags, ret, opt;
+
+  /* Set the socket fd to non-blocking mode */
+  if ((flags = fcntl(sockd, F_GETFL, NULL)) < 0) {
+    LOG_ER("Error fcntl(..., F_GETFL)");
+    exit(EXIT_FAILURE);
+  }
+  flags |= O_NONBLOCK;
+  if (fcntl(sockd, F_SETFL, flags) < 0) {
+    LOG_ER("Error fcntl(..., F_SETFL)");
+    exit(EXIT_FAILURE);
+  }
+
+  /* connect with timeout */
+  ret =
+    connect(sockd, (struct sockaddr *)sin, length);
+  if (ret < 0) {
+    if (errno == EINPROGRESS) {
+      /* poll the fd until we get a connection, timeout, or
+       * error  */
+      while (1) {
+        wset.fd = sockd;
+        wset.events = POLLOUT;
+
+        ret = poll(&wset, 1,
+             DTM_TCP_TIMEOUT_SECS * 1000);
+        if (ret < 0 && errno != EINTR) {
+          LOG_ER("Error poll");
+          return -1;
+
+        } else if (ret > 0) {
+          // Socket polled for write
+          len = sizeof(int);
+          if (getsockopt(sockd, SOL_SOCKET, SO_ERROR,
+              reinterpret_cast<void *>(&opt),
+              &len) < 0) {
+            LOG_ER("Error getsockopt(...,SOL_SOCKET,..)");
+            return -1;
+          }
+          // Check the value returned...
+          if (opt) {
+            LOG_ER("Error getsockopt");
+            return -1;
+          }
+          break;
+        } else { /* Timeout */
+          LOG_ER("Timeout in connect()");
+          return -2;
+        }
+      }
+    } else { /* Real error returned from connect */
+      int err = errno;
+      LOG_ER("Connect failed (connect()) err :%s", strerror(err));
+      return -1;
+    }
+  }
+
+  /* Connection has been established switch back to blocking mode */
+  flags &= (~O_NONBLOCK);
+  if (fcntl(sockd, F_SETFL, flags) < 0) {
+    LOG_ER("Error fcntl");
+    exit(EXIT_FAILURE);
+  }
+
+  return 0;
+}
+
 /**
  * Setup the new communication socket
  *
@@ -235,7 +317,7 @@ int comm_socket_setup_new(DTM_INTERNODE_CB *dtms_cb,
                           sa_family_t ip_addr_type) {
   int sock_desc = -1, sndbuf_size = dtms_cb->sock_sndbuf_size,
       rcvbuf_size = dtms_cb->sock_rcvbuf_size;
-  int err = 0, rv;
+  int rv;
   char local_port_str[INET6_ADDRSTRLEN];
   struct addrinfo *addr_list;
   struct addrinfo addr_criteria, *p; /* Criteria for address match */
@@ -359,9 +441,9 @@ int comm_socket_setup_new(DTM_INTERNODE_CB *dtms_cb,
   }
 
   /* Try to connect to the given port */
-  if (connect(sock_desc, addr_list->ai_addr, addr_list->ai_addrlen) < 0) {
-    err = errno;
-    LOG_ER("DTM :Connect failed (connect()) err :%s", strerror(err));
+  if (non_blocking_connect(sock_desc, addr_list->ai_addr,
+      addr_list->ai_addrlen) < 0) {
+    LOG_ER("DTM :non_blocking_connect() failed");
     close(sock_desc);
     sock_desc = -1;
     goto done;
@@ -649,12 +731,12 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
*dtms_cb, uint8_t *data,
   int sock_desc = comm_socket_setup_new(dtms_cb, node.node_ip, foreign_port,
                                         node.i_addr_family);
 
-  new_node->comm_socket = sock_desc;
-  new_node->node_id = node.node_id;
-  memcpy(new_node->node_ip, node.node_ip, INET6_ADDRSTRLEN);
-  new_node->i_addr_family = node.i_addr_family;
-
   if (sock_desc != -1) {
+    new_node->comm_socket = sock_desc;
+    new_node->node_id = node.node_id;
+    memcpy(new_node->node_ip, node.node_ip, INET6_ADDRSTRLEN);
+    new_node->i_addr_family = node.i_addr_family;
+
     TRACE("DTM: dtm_node_add .node_ip: %s node_id: %x, comm_socket %d",
           new_node->node_ip, new_node->node_id, new_node->comm_socket);
     if (dtm_node_add(new_node, KeyTypes::kDtmNodeIdKeyType) !=
@@ -663,6 +745,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB *dtms_cb, 
uint8_t *data,
              new_node->node_ip, new_node->node_id);
       dtm_comm_socket_close(new_node);
       sock_desc = -1;
+      new_node = nullptr;
       goto node_fail;
     }
 
@@ -672,10 +755,14 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
*dtms_cb, uint8_t *data,
              new_node->node_ip, new_node->node_id);
       dtm_comm_socket_close(new_node);
       sock_desc = -1;
+      new_node = nullptr;
       goto node_fail;
     } else
       TRACE("DTM: dtm_node_add add .node_ip: %s, node_id: %x",
             new_node->node_ip, new_node->node_id);
+  } else {
+    new_node = nullptr;
+    LOG_ER("comm_socket_setup_new failed for node.node_ip: %s", node.node_ip);
   }
 
 node_fail:
-- 
2.25.1



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to