Running large cluster setup, sometimes tests are hanging during long
testing cycle.

Fixing required following changes in nes_cm.[ch] code.

* Under heavy load, sometimes it takes longer to receive the response from
application to the MPA request. The rexmit timeout value is too low.

* in handle_fin_pkt(), we are calling cleanup_retrans_entry() for all
conditions, even if the packets needs to be dropped.

* check_seq(), does not check for condition if the seq# is wrapped.

* handle_ack_pkt() need to return error value, so in case of error,
handle_fin() is not called.

* handle_rst_pkt(), handling of cm_node's NES_CM_STATE_LAST_ACK is missing.

* process_packet(), in case of FIN only packet is received, call
check_seq() before processing.

* nes_connect() is not to set apbvt bit if it is a loopback connection.
apbvt bit is only set for non-loopback connections as for loopback,
all the connection setup is done from the driver.

Signed-off-by: Faisal Latif <[email protected]>
---
 drivers/infiniband/hw/nes/nes_cm.c |   74 ++++++++++++++++++------------------
 drivers/infiniband/hw/nes/nes_cm.h |    1 +
 2 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/drivers/infiniband/hw/nes/nes_cm.c 
b/drivers/infiniband/hw/nes/nes_cm.c
index dbd9a75..61da9d3 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -56,6 +56,7 @@
 #include <net/neighbour.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
+#include <net/tcp.h>
 
 #include "nes.h"
 
@@ -540,6 +541,7 @@ static void nes_cm_timer_tick(unsigned long pass)
        struct list_head *list_node;
        struct nes_cm_core *cm_core = g_cm_core;
        u32 settimer = 0;
+       unsigned long timetosend;
        int ret = NETDEV_TX_OK;
 
        struct list_head timer_list;
@@ -644,8 +646,10 @@ static void nes_cm_timer_tick(unsigned long pass)
                                send_entry->retrycount);
                        if (send_entry->send_retrans) {
                                send_entry->retranscount--;
+                               timetosend = (NES_RETRY_TIMEOUT <<
+                                       (NES_DEFAULT_RETRANS - 
send_entry->retranscount));
                                send_entry->timetosend = jiffies +
-                                       NES_RETRY_TIMEOUT;
+                                       min(timetosend, NES_MAX_TIMEOUT);
                                if (nexttimeout > send_entry->timetosend ||
                                        !settimer) {
                                        nexttimeout = send_entry->timetosend;
@@ -1325,18 +1329,20 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node)
        nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
                "refcnt=%d\n", cm_node, cm_node->state,
                atomic_read(&cm_node->ref_count));
-       cm_node->tcp_cntxt.rcv_nxt++;
-       cleanup_retrans_entry(cm_node);
        switch (cm_node->state) {
        case NES_CM_STATE_SYN_RCVD:
        case NES_CM_STATE_SYN_SENT:
        case NES_CM_STATE_ESTABLISHED:
        case NES_CM_STATE_MPAREQ_SENT:
        case NES_CM_STATE_MPAREJ_RCVD:
+               cm_node->tcp_cntxt.rcv_nxt++;
+               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_LAST_ACK;
                send_fin(cm_node, NULL);
                break;
        case NES_CM_STATE_FIN_WAIT1:
+               cm_node->tcp_cntxt.rcv_nxt++;
+               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_CLOSING;
                send_ack(cm_node, NULL);
                /* Wait for ACK as this is simultanous close..
@@ -1344,11 +1350,15 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node)
                * Just rm the node.. Done.. */
                break;
        case NES_CM_STATE_FIN_WAIT2:
+               cm_node->tcp_cntxt.rcv_nxt++;
+               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_TIME_WAIT;
                send_ack(cm_node, NULL);
                schedule_nes_timer(cm_node, NULL,  NES_TIMER_TYPE_CLOSE, 1, 0);
                break;
        case NES_CM_STATE_TIME_WAIT:
+               cm_node->tcp_cntxt.rcv_nxt++;
+               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_CLOSED;
                rem_ref_cm_node(cm_node->cm_core, cm_node);
                break;
@@ -1384,7 +1394,6 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, 
struct sk_buff *skb,
                passive_state = atomic_add_return(1, &cm_node->passive_state);
                if (passive_state ==  NES_SEND_RESET_EVENT)
                        create_event(cm_node, NES_CM_EVENT_RESET);
-               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_CLOSED;
                dev_kfree_skb_any(skb);
                break;
@@ -1398,17 +1407,16 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, 
struct sk_buff *skb,
                active_open_err(cm_node, skb, reset);
                break;
        case NES_CM_STATE_CLOSED:
-               cleanup_retrans_entry(cm_node);
                drop_packet(skb);
                break;
+       case NES_CM_STATE_LAST_ACK:
+               cm_node->cm_id->rem_ref(cm_node->cm_id);
        case NES_CM_STATE_TIME_WAIT:
-               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_CLOSED;
                rem_ref_cm_node(cm_node->cm_core, cm_node);
                drop_packet(skb);
                break;
        case NES_CM_STATE_FIN_WAIT1:
-               cleanup_retrans_entry(cm_node);
                nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__);
        default:
                drop_packet(skb);
@@ -1455,6 +1463,7 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, 
struct sk_buff *skb)
                                NES_PASSIVE_STATE_INDICATED);
                break;
        case NES_CM_STATE_MPAREQ_SENT:
+               cleanup_retrans_entry(cm_node);
                if (res_type == NES_MPA_REQUEST_REJECT) {
                        type = NES_CM_EVENT_MPA_REJECT;
                        cm_node->state = NES_CM_STATE_MPAREJ_RCVD;
@@ -1518,7 +1527,7 @@ static int check_seq(struct nes_cm_node *cm_node, struct 
tcphdr *tcph,
        rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
        if (ack_seq != loc_seq_num)
                err = 1;
-       else if ((seq + rcv_wnd) < rcv_nxt)
+       else if (!between(seq, rcv_nxt, (rcv_nxt+rcv_wnd)))
                err = 1;
        if (err) {
                nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
@@ -1652,49 +1661,39 @@ static void handle_synack_pkt(struct nes_cm_node 
*cm_node, struct sk_buff *skb,
        }
 }
 
-static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        struct tcphdr *tcph)
 {
        int datasize = 0;
        u32 inc_sequence;
        u32 rem_seq_ack;
        u32 rem_seq;
-       int ret;
+       int ret = 0;
        int optionsize;
        optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
 
        if (check_seq(cm_node, tcph, skb))
-               return;
+               return -EINVAL;
 
        skb_pull(skb, tcph->doff << 2);
        inc_sequence = ntohl(tcph->seq);
        rem_seq = ntohl(tcph->seq);
        rem_seq_ack =  ntohl(tcph->ack_seq);
        datasize = skb->len;
-       cleanup_retrans_entry(cm_node);
        switch (cm_node->state) {
        case NES_CM_STATE_SYN_RCVD:
                /* Passive OPEN */
+               cleanup_retrans_entry(cm_node);
                ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
                if (ret)
                        break;
                cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
-               if (cm_node->tcp_cntxt.rem_ack_num !=
-                   cm_node->tcp_cntxt.loc_seq_num) {
-                       nes_debug(NES_DBG_CM, "rem_ack_num != loc_seq_num\n");
-                       cleanup_retrans_entry(cm_node);
-                       send_reset(cm_node, skb);
-                       return;
-               }
                cm_node->state = NES_CM_STATE_ESTABLISHED;
-               cleanup_retrans_entry(cm_node);
                if (datasize) {
                        cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
                        handle_rcv_mpa(cm_node, skb);
-               } else { /* rcvd ACK only */
+               } else  /* rcvd ACK only */
                        dev_kfree_skb_any(skb);
-                       cleanup_retrans_entry(cm_node);
-                }
                break;
        case NES_CM_STATE_ESTABLISHED:
                /* Passive OPEN */
@@ -1706,15 +1705,12 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, 
struct sk_buff *skb,
                        drop_packet(skb);
                break;
        case NES_CM_STATE_MPAREQ_SENT:
-               cleanup_retrans_entry(cm_node);
                cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
                if (datasize) {
                        cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
                        handle_rcv_mpa(cm_node, skb);
-               } else { /* Could be just an ack pkt.. */
-                       cleanup_retrans_entry(cm_node);
+               } else  /* Could be just an ack pkt.. */
                        dev_kfree_skb_any(skb);
-               }
                break;
        case NES_CM_STATE_LISTENING:
        case NES_CM_STATE_CLOSED:
@@ -1722,11 +1718,10 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, 
struct sk_buff *skb,
                send_reset(cm_node, skb);
                break;
        case NES_CM_STATE_LAST_ACK:
+       case NES_CM_STATE_CLOSING:
                cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_CLOSED;
                cm_node->cm_id->rem_ref(cm_node->cm_id);
-       case NES_CM_STATE_CLOSING:
-               cleanup_retrans_entry(cm_node);
                rem_ref_cm_node(cm_node->cm_core, cm_node);
                drop_packet(skb);
                break;
@@ -1741,9 +1736,11 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, 
struct sk_buff *skb,
        case NES_CM_STATE_MPAREQ_RCVD:
        case NES_CM_STATE_UNKNOWN:
        default:
+               cleanup_retrans_entry(cm_node);
                drop_packet(skb);
                break;
        }
+       return ret;
 }
 
 
@@ -1849,6 +1846,7 @@ static void process_packet(struct nes_cm_node *cm_node, 
struct sk_buff *skb,
        enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
        struct tcphdr *tcph = tcp_hdr(skb);
        u32     fin_set = 0;
+       int ret = 0;
        skb_pull(skb, ip_hdr(skb)->ihl << 2);
 
        nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
@@ -1874,17 +1872,17 @@ static void process_packet(struct nes_cm_node *cm_node, 
struct sk_buff *skb,
                handle_synack_pkt(cm_node, skb, tcph);
                break;
        case NES_PKT_TYPE_ACK:
-               handle_ack_pkt(cm_node, skb, tcph);
-               if (fin_set)
+               ret = handle_ack_pkt(cm_node, skb, tcph);
+               if (fin_set && !ret)
                        handle_fin_pkt(cm_node);
                break;
        case NES_PKT_TYPE_RST:
                handle_rst_pkt(cm_node, skb, tcph);
                break;
        default:
-               drop_packet(skb);
-               if (fin_set)
+               if ((fin_set) && (!check_seq(cm_node, tcph, skb)))
                        handle_fin_pkt(cm_node);
+               drop_packet(skb);
                break;
        }
 }
@@ -2959,6 +2957,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct 
iw_cm_conn_param *conn_param)
        struct nes_device *nesdev;
        struct nes_cm_node *cm_node;
        struct nes_cm_info cm_info;
+       int apbvt_set = 0;
 
        ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
        if (!ibqp)
@@ -2996,9 +2995,11 @@ int nes_connect(struct iw_cm_id *cm_id, struct 
iw_cm_conn_param *conn_param)
                conn_param->private_data_len);
 
        if (cm_id->local_addr.sin_addr.s_addr !=
-               cm_id->remote_addr.sin_addr.s_addr)
+               cm_id->remote_addr.sin_addr.s_addr) {
                nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
                        PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+               apbvt_set = 1;
+       }
 
        /* set up the connection params for the node */
        cm_info.loc_addr = htonl(cm_id->local_addr.sin_addr.s_addr);
@@ -3015,8 +3016,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct 
iw_cm_conn_param *conn_param)
                conn_param->private_data_len, (void *)conn_param->private_data,
                &cm_info);
        if (!cm_node) {
-               if (cm_id->local_addr.sin_addr.s_addr !=
-                               cm_id->remote_addr.sin_addr.s_addr)
+               if (apbvt_set)
                        nes_manage_apbvt(nesvnic, 
ntohs(cm_id->local_addr.sin_port),
                                PCI_FUNC(nesdev->pcidev->devfn),
                                NES_MANAGE_APBVT_DEL);
@@ -3025,7 +3025,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct 
iw_cm_conn_param *conn_param)
                return -ENOMEM;
        }
 
-       cm_node->apbvt_set = 1;
+       cm_node->apbvt_set = apbvt_set;
        nesqp->cm_node = cm_node;
        cm_node->nesqp = nesqp;
        nes_add_ref(&nesqp->ibqp);
diff --git a/drivers/infiniband/hw/nes/nes_cm.h 
b/drivers/infiniband/hw/nes/nes_cm.h
index 80bba18..8b7e7c0 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -149,6 +149,7 @@ struct nes_timer_entry {
 #endif
 #define NES_SHORT_TIME      (10)
 #define NES_LONG_TIME       (2000*HZ/1000)
+#define NES_MAX_TIMEOUT     ((unsigned long) (12*HZ))
 
 #define NES_CM_HASHTABLE_SIZE         1024
 #define NES_CM_TCP_TIMER_INTERVAL     3000
-- 
1.5.3.3

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to