Hello

I have made a patch for per connection fine tuning of keepalive messages

on Linux.
The primary motivation for it was that we are using these features
very extensively on Digital and pSOS RTOS and we miss these features
in Linux. Additionally both Digital and pSOS sends keep-alive messages
precisely in desired time intervals, which is not true for Linux.

In particular:
4 new socket options are added for per connection fine tuning:
TCP_KEEPIDLE       idle time before keepalive takes place
TCP_KEEPINTVL    interval between keepalive retransmitions
TCP_KEEPCNT       how many keep alive messages to send before giving up
TCP_SYNCNT         how many SYN segments to send before giving up on
                                connection establishment

1 new sysctl added and 1 removed:
sysctl_keepalive_intvl to control system wide TCP_KEEPINTVL added
sysctl_max_ka_probes was removed

After the keepalive messages gives up, the final RST is sent.

bug in the tcp_write_wakeup to send probe even in TCP_FIN_WAIT2 state
was corrected.

Known problems:
I was not sure whether to make new variable for keepalive probes
counting so I used probes_out variable which is used for 0 window
probe (as before).
It would be simple change to introduce new variable if necessary.

Let me know what do you think.

best regards
Pavel Krauz <[EMAIL PROTECTED]>


Only in linux-2.3.14.orig/drivers/net: sk_mca.h.orig
diff -ru linux-2.3.14.orig/include/linux/socket.h linux-2.3.14/include/linux/socket.h
--- linux-2.3.14.orig/include/linux/socket.h    Thu Apr 15 14:42:43 1999
+++ linux-2.3.14/include/linux/socket.h Wed Aug 25 10:22:26 1999
@@ -255,6 +255,10 @@
 #define TCP_NODELAY    1
 #define TCP_MAXSEG     2
 #define TCP_CORK       3       /* Linux specific (for use with sendfile) */
+#define TCP_KEEPIDLE   4
+#define TCP_KEEPINTVL  5
+#define TCP_KEEPCNT    6
+#define TCP_SYNCNT     7
 
 #ifdef __KERNEL__
 extern int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
diff -ru linux-2.3.14.orig/include/linux/sysctl.h linux-2.3.14/include/linux/sysctl.h
--- linux-2.3.14.orig/include/linux/sysctl.h    Wed Aug 25 10:17:33 1999
+++ linux-2.3.14/include/linux/sysctl.h Wed Aug 25 10:22:26 1999
@@ -203,7 +203,7 @@
        NET_IPV4_IPFRAG_HIGH_THRESH=41,
        NET_IPV4_IPFRAG_LOW_THRESH=42,
        NET_IPV4_IPFRAG_TIME=43,
-       NET_IPV4_TCP_MAX_KA_PROBES=44,
+       NET_IPV4_TCP_KEEPALIVE_INTVL=44,
        NET_IPV4_TCP_KEEPALIVE_TIME=45,
        NET_IPV4_TCP_KEEPALIVE_PROBES=46,
        NET_IPV4_TCP_RETRIES1=47,
diff -ru linux-2.3.14.orig/include/net/sock.h linux-2.3.14/include/net/sock.h
--- linux-2.3.14.orig/include/net/sock.h        Wed Jul 28 22:57:37 1999
+++ linux-2.3.14/include/net/sock.h     Wed Aug 25 10:26:34 1999
@@ -259,7 +259,8 @@
        /* Two commonly used timers in both sender and receiver paths. */
        struct timer_list       retransmit_timer;       /* Resend (no ack)      */
        struct timer_list       delack_timer;           /* Ack delay            */
-
+       struct timer_list       keepalive_timer;        /* keep alive */
+       
        struct sk_buff_head     out_of_order_queue; /* Out of order segments go here */
 
        struct tcp_func         *af_specific;   /* Operations which are AF_INET{4,6} 
specific   */
@@ -306,6 +307,10 @@
        struct open_request     **syn_wait_last;
 
        int syn_backlog;        /* Backlog of received SYNs */
+       unsigned int            keepalive_time;   /* time before keep alive takes 
+place */
+       unsigned int            keepalive_intvl;  /* time interval between keep alive 
+probes */
+       unsigned char           keepalive_probes; /* num of allowed keep alive probes 
+*/
+       unsigned char           syn_retries;      /* num of allowed syn retries */
 };
 
        
diff -ru linux-2.3.14.orig/include/net/tcp.h linux-2.3.14/include/net/tcp.h
--- linux-2.3.14.orig/include/net/tcp.h Wed Aug 25 10:17:33 1999
+++ linux-2.3.14/include/net/tcp.h      Wed Aug 25 10:30:48 1999
@@ -258,9 +258,14 @@
 #define TCP_PROBEWAIT_LEN (1*HZ)/* time to wait between probes when
                                 * I've got something to write and
                                 * there is no window                   */
-#define TCP_KEEPALIVE_TIME (120*60*HZ)         /* two hours */
+#define TCP_KEEPALIVE_TIME     (120*60*HZ)     /* two hours */
 #define TCP_KEEPALIVE_PROBES   9               /* Max of 9 keepalive probes    */
-#define TCP_KEEPALIVE_PERIOD ((75*HZ)>>2)      /* period of keepalive check    */
+#define TCP_KEEPALIVE_INTVL    (75*HZ)
+
+#define MAX_TCP_KEEPIDLE       32767
+#define MAX_TCP_KEEPINTVL      32767
+#define MAX_TCP_KEEPCNT                127
+#define MAX_TCP_SYNCNT 127
 
 #define TCP_SYNACK_PERIOD      (HZ/2) /* How often to run the synack slow timer */
 #define TCP_QUICK_TRIES                8  /* How often we try to retransmit, until
@@ -302,6 +307,12 @@
 #define TCPOLEN_SACK_BASE_ALIGNED      4
 #define TCPOLEN_SACK_PERBLOCK          8
 
+/* sysctl variables for tcp */
+extern int sysctl_tcp_keepalive_time;
+extern int sysctl_tcp_keepalive_probes;
+extern int sysctl_tcp_keepalive_intvl;
+extern int sysctl_tcp_syn_retries;
+
 struct open_request;
 
 struct or_calltable {
@@ -563,7 +574,7 @@
 extern void tcp_send_partial(struct sock *);
 extern void tcp_write_wakeup(struct sock *);
 extern void tcp_send_fin(struct sock *sk);
-extern void tcp_send_active_reset(struct sock *sk);
+extern void tcp_send_active_reset(struct sock *sk, int priority);
 extern int  tcp_send_synack(struct sock *);
 extern void tcp_transmit_skb(struct sock *, struct sk_buff *);
 extern void tcp_send_skb(struct sock *, struct sk_buff *, int force_queue);
@@ -599,9 +610,8 @@
 };
 
 #define TCP_SLT_SYNACK         0
-#define TCP_SLT_KEEPALIVE      1
-#define TCP_SLT_TWKILL         2
-#define TCP_SLT_MAX            3
+#define TCP_SLT_TWKILL         1
+#define TCP_SLT_MAX            2
 
 extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX];
  
@@ -1055,7 +1065,10 @@
                break;
        case TIME_PROBE0:
                timer = &tp->probe_timer;
-               break;  
+               break;
+       case TIME_KEEPOPEN:
+               timer = &tp->keepalive_timer;
+               break;
        default:
                printk(timer_bug_msg);
                return;
@@ -1077,12 +1090,30 @@
                break;
        case TIME_PROBE0:
                return tp->probe_timer.prev != NULL;
-               break;  
+               break;
+       case TIME_KEEPOPEN:
+               return tp->keepalive_timer.prev != NULL;
+               break;
        default:
                printk(timer_bug_msg);
        };
        return 0;
 }
 
+static inline int keepalive_intvl_when(struct tcp_opt *tp)
+{
+       if (tp->keepalive_intvl)
+               return tp->keepalive_intvl;
+       else
+               return sysctl_tcp_keepalive_intvl;
+}
+
+static inline int keepalive_time_when(struct tcp_opt *tp)
+{
+       if (tp->keepalive_time)
+               return tp->keepalive_time;
+       else
+               return sysctl_tcp_keepalive_time;
+}
 
 #endif /* _TCP_H */
diff -ru linux-2.3.14.orig/net/ipv4/sysctl_net_ipv4.c 
linux-2.3.14/net/ipv4/sysctl_net_ipv4.c
--- linux-2.3.14.orig/net/ipv4/sysctl_net_ipv4.c        Tue Jan  5 00:31:35 1999
+++ linux-2.3.14/net/ipv4/sysctl_net_ipv4.c     Wed Aug 25 10:22:26 1999
@@ -48,14 +48,10 @@
 extern int sysctl_tcp_window_scaling;
 extern int sysctl_tcp_sack;
 extern int sysctl_tcp_retrans_collapse;
-extern int sysctl_tcp_keepalive_time;
-extern int sysctl_tcp_keepalive_probes;
-extern int sysctl_tcp_max_ka_probes;
 extern int sysctl_tcp_retries1;
 extern int sysctl_tcp_retries2;
 extern int sysctl_tcp_fin_timeout;
 extern int sysctl_tcp_syncookies;
-extern int sysctl_tcp_syn_retries;
 extern int sysctl_tcp_stdurg;
 extern int sysctl_tcp_rfc1337;
 extern int sysctl_tcp_syn_taildrop; 
@@ -133,8 +129,8 @@
 #endif
        {NET_IPV4_IPFRAG_TIME, "ipfrag_time",
         &sysctl_ipfrag_time, sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
-       {NET_IPV4_TCP_MAX_KA_PROBES, "tcp_max_ka_probes",
-        &sysctl_tcp_max_ka_probes, sizeof(int), 0644, NULL, &proc_dointvec},
+       {NET_IPV4_TCP_KEEPALIVE_INTVL, "tcp_keepalive_intvl",
+        &sysctl_tcp_keepalive_intvl, sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
        {NET_IPV4_TCP_KEEPALIVE_TIME, "tcp_keepalive_time",
         &sysctl_tcp_keepalive_time, sizeof(int), 0644, NULL, 
         &proc_dointvec_jiffies},
diff -ru linux-2.3.14.orig/net/ipv4/tcp.c linux-2.3.14/net/ipv4/tcp.c
--- linux-2.3.14.orig/net/ipv4/tcp.c    Tue Jul  6 05:22:09 1999
+++ linux-2.3.14/net/ipv4/tcp.c Wed Aug 25 10:22:26 1999
@@ -202,6 +202,7 @@
  *             Eric Schenk     :       Fix fast close down bug with
  *                                     shutdown() followed by close().
  *             Andi Kleen :    Make poll agree with SIGIO
+ *             Pavel Krauz     :       Keepalive fine tunning
  *                                     
  *             This program is free software; you can redistribute it and/or
  *             modify it under the terms of the GNU General Public License
@@ -1526,7 +1527,7 @@
        if(data_was_unread != 0) {
                /* Unread data was tossed, zap the connection. */
                tcp_set_state(sk, TCP_CLOSE);
-               tcp_send_active_reset(sk);
+               tcp_send_active_reset(sk, GFP_KERNEL);
        } else if (tcp_close_state(sk,1)) {
                /* We FIN if the application ate all the data before
                 * zapping the connection.
@@ -1650,8 +1651,6 @@
        req->class->destructor(req);
        tcp_openreq_free(req);
        sk->ack_backlog--; 
-       if(sk->keepopen)
-               tcp_inc_slow_timer(TCP_SLT_KEEPALIVE);
        release_sock(sk);
        lock_kernel();
        return newsk;
@@ -1730,7 +1729,27 @@
                        release_sock(sk);
                }
                return 0;
-
+               
+       case TCP_KEEPIDLE:
+               if (val < 1 || val > MAX_TCP_KEEPIDLE)
+                       return -EINVAL;
+               tp->keepalive_time = val * HZ;
+               return 0;
+       case TCP_KEEPINTVL:
+               if (val < 1 || val > MAX_TCP_KEEPINTVL)
+                       return -EINVAL;
+               tp->keepalive_intvl = val * HZ;
+               return 0;
+       case TCP_KEEPCNT:
+               if (val < 1 || val > MAX_TCP_KEEPCNT)
+                       return -EINVAL;
+               tp->keepalive_probes = val;
+               return 0;
+       case TCP_SYNCNT:
+               if (val < 1 || val > MAX_TCP_SYNCNT)
+                       return -EINVAL;
+               tp->syn_retries = val;
+               return 0;
        default:
                return -ENOPROTOOPT;
        };
@@ -1761,6 +1780,30 @@
        case TCP_CORK:
                val = (sk->nonagle == 2);
                break;
+       case TCP_KEEPIDLE:
+               if (tp->keepalive_time)
+                       val = tp->keepalive_time / HZ;
+               else
+                       val = sysctl_tcp_keepalive_time / HZ;
+               break;
+       case TCP_KEEPINTVL:
+               if (tp->keepalive_intvl)
+                       val = tp->keepalive_intvl / HZ;
+               else
+                       val = sysctl_tcp_keepalive_intvl / HZ;
+               break;
+       case TCP_KEEPCNT:
+               if (tp->keepalive_probes)
+                       val = tp->keepalive_probes;
+               else
+                       val = sysctl_tcp_keepalive_probes;
+               break;
+       case TCP_SYNCNT:
+               if (tp->syn_retries)
+                       val = tp->syn_retries;
+               else
+                       val = sysctl_tcp_syn_retries;
+               break;
        default:
                return -ENOPROTOOPT;
        };
@@ -1774,11 +1817,19 @@
 
 void tcp_set_keepalive(struct sock *sk, int val)
 {
-       if (!sk->keepopen && val)
-               tcp_inc_slow_timer(TCP_SLT_KEEPALIVE);
-       else if (sk->keepopen && !val)
-               tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+       if (!sk->keepopen && val) {
+               if ((1<<sk->state) & (TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT2) 
+&&
+                   !tp->keepalive_timer.prev) {
+                       tp->keepalive_timer.expires = jiffies + 
+keepalive_time_when(tp);
+                       add_timer(&tp->keepalive_timer);
+               }
+       } else if (sk->keepopen && !val) {
+               tcp_clear_xmit_timer(sk, TIME_KEEPOPEN);
+       }
 }
+
 
 extern void __skb_cb_too_small_for_tcp(int, int);
 
diff -ru linux-2.3.14.orig/net/ipv4/tcp_input.c linux-2.3.14/net/ipv4/tcp_input.c
--- linux-2.3.14.orig/net/ipv4/tcp_input.c      Sun Jul  4 02:57:23 1999
+++ linux-2.3.14/net/ipv4/tcp_input.c   Wed Aug 25 10:22:26 1999
@@ -768,6 +768,7 @@
 static int tcp_ack(struct sock *sk, struct tcphdr *th, 
                   u32 ack_seq, u32 ack, int len)
 {
+       unsigned long when;
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        int flag = 0;
        u32 seq = 0;
@@ -775,10 +776,18 @@
 
        if(sk->zapped)
                return(1);      /* Dead, can't ack any more so why bother */
-
-       if (tp->pending == TIME_KEEPOPEN)
-               tp->probes_out = 0;
-
+       
+       if (sk->keepopen) {
+               tp->probes_out = 0;
+               if (tp->keepalive_timer.prev) {
+                       when = keepalive_time_when(tp);
+                       /* do not update keepalive timer when we are in burst */
+                       if (when + jiffies < tp->keepalive_timer.expires - (HZ >> 1) ||
+                           tp->keepalive_timer.expires + (HZ >> 1) < when + jiffies) {
+                               tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, when);
+                       }
+               }
+       }
        tp->rcv_tstamp = tcp_time_stamp;
 
        /* If the ack is newer than sent or older than previous acks
@@ -2230,7 +2239,10 @@
 
                        /* Can't be earlier, doff would be wrong. */
                        tcp_send_ack(sk);
-
+                       if (sk->keepopen && !tp->keepalive_timer.prev) {
+                               tp->keepalive_timer.expires = jiffies + 
+keepalive_time_when(tp);
+                               add_timer(&tp->keepalive_timer);
+                       }
                        sk->dport = th->source;
                        tp->copied_seq = tp->rcv_nxt;
 
diff -ru linux-2.3.14.orig/net/ipv4/tcp_ipv4.c linux-2.3.14/net/ipv4/tcp_ipv4.c
--- linux-2.3.14.orig/net/ipv4/tcp_ipv4.c       Wed Aug 25 10:17:35 1999
+++ linux-2.3.14/net/ipv4/tcp_ipv4.c    Wed Aug 25 10:22:26 1999
@@ -79,6 +79,7 @@
 struct socket *tcp_socket=&tcp_inode.u.socket_i;
 
 static void tcp_v4_send_reset(struct sk_buff *skb);
+void tcp_keepalive_timer(unsigned long data);
 
 void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
                       struct sk_buff *skb);
@@ -1405,6 +1406,9 @@
                init_timer(&newtp->delack_timer);
                newtp->delack_timer.function = &tcp_delack_timer;
                newtp->delack_timer.data = (unsigned long) newsk;
+               init_timer(&newtp->keepalive_timer);
+               newtp->keepalive_timer.function = &tcp_keepalive_timer;
+               newtp->keepalive_timer.data = (unsigned long) newsk;
                skb_queue_head_init(&newtp->out_of_order_queue);
                newtp->send_head = newtp->retrans_head = NULL;
                newtp->rcv_wup = req->rcv_isn + 1;
@@ -1496,7 +1500,6 @@
        if (tcp_bucket_check(ntohs(skb->h.th->dest)))
                goto exit;
 #endif
-
        newsk = tcp_create_openreq_child(sk, req, skb);
        if (!newsk) 
                goto exit;
@@ -1534,7 +1537,11 @@
        SOCKHASH_UNLOCK_WRITE();
 
        sk->data_ready(sk, 0); /* Deliver SIGIO */ 
-
+       
+       if (newsk->keepopen) {
+               newtp->keepalive_timer.expires = jiffies + keepalive_time_when(newtp);
+               add_timer(&newtp->keepalive_timer);
+       }
        return newsk;
 
 exit:
@@ -1630,7 +1637,7 @@
 
        if (sk->state == TCP_LISTEN) { 
                struct sock *nsk;
-               
+       
                nsk = tcp_v4_hnd_req(sk, skb);
                if (!nsk) 
                        goto discard;
@@ -1955,9 +1962,6 @@
        struct sk_buff *skb;
 
        tcp_clear_xmit_timers(sk);
-
-       if (sk->keepopen)
-               tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
 
        /* Cleanup up the write buffer. */
        while((skb = __skb_dequeue(&sk->write_queue)) != NULL)
diff -ru linux-2.3.14.orig/net/ipv4/tcp_output.c linux-2.3.14/net/ipv4/tcp_output.c
--- linux-2.3.14.orig/net/ipv4/tcp_output.c     Thu May 27 03:14:38 1999
+++ linux-2.3.14/net/ipv4/tcp_output.c  Wed Aug 25 10:22:26 1999
@@ -780,13 +780,13 @@
  * was unread data in the receive queue.  This behavior is recommended
  * by draft-ietf-tcpimpl-prob-03.txt section 3.10.  -DaveM
  */
-void tcp_send_active_reset(struct sock *sk)
+void tcp_send_active_reset(struct sock *sk, int priority)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        struct sk_buff *skb;
 
        /* NOTE: No TCP options attached and we never retransmit this. */
-       skb = alloc_skb(MAX_HEADER + sk->prot->max_header, GFP_KERNEL);
+       skb = alloc_skb(MAX_HEADER + sk->prot->max_header, priority);
        if (!skb)
                return;
 
@@ -1082,7 +1082,7 @@
                 */
                if ((1 << sk->state) &
                    ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT1|
-                     TCPF_LAST_ACK|TCPF_CLOSING))
+                     TCPF_FIN_WAIT2|TCPF_LAST_ACK|TCPF_CLOSING))
                        return;
 
                if (before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) &&
diff -ru linux-2.3.14.orig/net/ipv4/tcp_timer.c linux-2.3.14/net/ipv4/tcp_timer.c
--- linux-2.3.14.orig/net/ipv4/tcp_timer.c      Sun Jul  4 02:57:23 1999
+++ linux-2.3.14/net/ipv4/tcp_timer.c   Wed Aug 25 10:22:26 1999
@@ -25,12 +25,12 @@
 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
+int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
 int sysctl_tcp_retries1 = TCP_RETR1;
 int sysctl_tcp_retries2 = TCP_RETR2;
 
 static void tcp_sltimer_handler(unsigned long);
 static void tcp_syn_recv_timer(unsigned long);
-static void tcp_keepalive(unsigned long data);
 static void tcp_twkill(unsigned long);
 
 struct timer_list      tcp_slow_timer = {
@@ -42,7 +42,6 @@
 
 struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = {
        {ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK    */
-       {ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive},  /* KEEPALIVE */
        {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill}         /* TWKILL    */
 };
 
@@ -54,6 +53,8 @@
  * to optimize.
  */
 
+void tcp_keepalive_timer(unsigned long data);
+
 void tcp_init_xmit_timers(struct sock *sk)
 {
        init_timer(&sk->tp_pinfo.af_tcp.retransmit_timer);
@@ -67,6 +68,10 @@
        init_timer(&sk->tp_pinfo.af_tcp.probe_timer);
        sk->tp_pinfo.af_tcp.probe_timer.function=&tcp_probe_timer;
        sk->tp_pinfo.af_tcp.probe_timer.data = (unsigned long) sk;
+       
+       init_timer(&sk->tp_pinfo.af_tcp.keepalive_timer);
+       sk->tp_pinfo.af_tcp.keepalive_timer.function=&tcp_keepalive_timer;
+       sk->tp_pinfo.af_tcp.keepalive_timer.data = (unsigned long) sk;
 }
 
 /*
@@ -100,7 +105,11 @@
        case TIME_WRITE:
                printk(KERN_DEBUG "bug: tcp_reset_xmit_timer TIME_WRITE\n");
                break;
-
+               
+       case TIME_KEEPOPEN:
+               mod_timer(&tp->keepalive_timer, jiffies + when);
+               break;
+               
        default:
                printk(KERN_DEBUG "bug: unknown timer value\n");
        };
@@ -116,6 +125,8 @@
                del_timer(&tp->delack_timer);
        if(tp->probe_timer.prev)
                del_timer(&tp->probe_timer);
+       if (tp->keepalive_timer.prev)
+               del_timer(&tp->keepalive_timer);
 }
 
 static int tcp_write_err(struct sock *sk, int force)
@@ -149,7 +160,9 @@
        }
        
        /* Have we tried to SYN too many times (repent repent 8)) */
-       if(tp->retransmits > sysctl_tcp_syn_retries && sk->state==TCP_SYN_SENT) {
+       if (sk->state == TCP_SYN_SENT && 
+           ((!tp->syn_retries && tp->retransmits > sysctl_tcp_syn_retries) ||
+             (tp->syn_retries && tp->retransmits > tp->syn_retries))) {
                tcp_write_err(sk, 1);
                /* Don't FIN, we got nothing back */
                return 0;
@@ -220,34 +233,38 @@
        bh_unlock_sock(sk);
 }
 
-static __inline__ int tcp_keepopen_proc(struct sock *sk)
+void tcp_keepalive_timer(unsigned long data)
 {
-       int res = 0;
+       struct sock *sk = (struct sock *) data;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       unsigned int when;
 
+       bh_lock_sock(sk);
        if ((1<<sk->state) & (TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT2)) {
-               struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-               __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
+/*             __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp; */
 
-               if (elapsed >= sysctl_tcp_keepalive_time) {
-                       if (tp->probes_out > sysctl_tcp_keepalive_probes) {
-                               if(sk->err_soft)
-                                       sk->err = sk->err_soft;
-                               else
-                                       sk->err = ETIMEDOUT;
-
-                               tcp_set_state(sk, TCP_CLOSE);
-                               sk->shutdown = SHUTDOWN_MASK;
-                               if (!sk->dead)
-                                       sk->state_change(sk);
-                       } else {
-                               tp->probes_out++;
-                               tp->pending = TIME_KEEPOPEN;
-                               tcp_write_wakeup(sk);
-                               res = 1;
-                       }
+               if ((!tp->keepalive_probes && tp->probes_out >= 
+sysctl_tcp_keepalive_probes) ||
+                    (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) 
+{
+                       tcp_send_active_reset(sk, GFP_ATOMIC);
+                       if(sk->err_soft)
+                               sk->err = sk->err_soft;
+                       else
+                               sk->err = ETIMEDOUT;
+                       tcp_set_state(sk, TCP_CLOSE);
+                       sk->shutdown = SHUTDOWN_MASK;
+                       if (!sk->dead)
+                               sk->state_change(sk);
+               } else {
+                       tp->probes_out++;
+                       tp->pending = TIME_KEEPOPEN;
+                       tcp_write_wakeup(sk);
+               }
+               if (!tp->keepalive_timer.prev) {
+                       tp->keepalive_timer.expires = jiffies + 
+keepalive_intvl_when(tp);
+                       add_timer(&tp->keepalive_timer);
                }
        }
-       return res;
+       bh_unlock_sock(sk);
 }
 
 /* Kill off TIME_WAIT sockets once their lifetime has expired. */
@@ -339,70 +356,6 @@
        SOCKHASH_UNLOCK_WRITE_BH();
 
        tcp_dec_slow_timer(TCP_SLT_TWKILL);
-}
-
-/*
- *     Check all sockets for keepalive timer
- *     Called every 75 seconds
- *     This timer is started by af_inet init routine and is constantly
- *     running.
- *
- *     It might be better to maintain a count of sockets that need it using
- *     setsockopt/tcp_destroy_sk and only set the timer when needed.
- */
-
-/*
- *     don't send over 5 keepopens at a time to avoid burstiness 
- *     on big servers [AC]
- */
-#define MAX_KA_PROBES  5
-
-int sysctl_tcp_max_ka_probes = MAX_KA_PROBES;
-
-/* Keepopen's are only valid for "established" TCP's, nicely our listener
- * hash gets rid of most of the useless testing, so we run through a couple
- * of the established hash chains each clock tick.  -DaveM
- *
- * And now, even more magic... TIME_WAIT TCP's cannot have keepalive probes
- * going off for them, so we only need check the first half of the established
- * hash table, even less testing under heavy load.
- *
- * I _really_ would rather do this by adding a new timer_struct to struct sock,
- * and this way only those who set the keepalive option will get the overhead.
- * The idea is you set it for 2 hours when the sock is first connected, when it
- * does fire off (if at all, most sockets die earlier) you check for the keepalive
- * option and also if the sock has been idle long enough to start probing.
- */
-static void tcp_keepalive(unsigned long data)
-{
-       static int chain_start = 0;
-       int count = 0;
-       int i;
-       
-       SOCKHASH_LOCK_READ_BH();
-       for(i = chain_start; i < (chain_start + ((tcp_ehash_size >> 1) >> 2)); i++) {
-               struct sock *sk;
-
-               sk = tcp_ehash[i];
-               while(sk) {
-                       struct sock *next = sk->next;
-
-                       bh_lock_sock(sk);
-                       if (sk->keepopen && !sk->lock.users) {
-                               SOCKHASH_UNLOCK_READ_BH();
-                               count += tcp_keepopen_proc(sk);
-                               SOCKHASH_LOCK_READ_BH();
-                       }
-                       bh_unlock_sock(sk);
-                       if(count == sysctl_tcp_max_ka_probes)
-                               goto out;
-                       sk = next;
-               }
-       }
-out:
-       SOCKHASH_UNLOCK_READ_BH();
-       chain_start = ((chain_start + ((tcp_ehash_size >> 1)>>2)) &
-                      ((tcp_ehash_size >> 1) - 1));
 }
 
 /*

Reply via email to