Hi Willy

Well that took a lot longer than anticipated… my first cut only worked for 
small requests. It does not seem possible to use TFO if the application layer 
request is not all going to fit within the SYN packet. A fragmented SYN is 
likely to get dropped by the receiver, and if we hold some of the request data 
back to avoid SYN fragmentation then the remote application just gets pushed a 
partial request, so it still breaks. (At least on Linux) 

I moved the TFO client code back into proto_tcp.c,  this made it easier to 
fallback to regular tcp connection if it is going to be unwise to attempt TFO 
for this request. Trying to catch this in raw_sock was too tricky for me.

I’ve also added an additional tfo-max-rs parameter to set a limit on the 
maximum request size for which to attempt TFO. If not supplied then uses a 
default of 1420 bytes to allow for headers and tcp options. Looks like it might 
be possible to replace this with a lookup to find out the cached mss size for 
each cached TFO cookie but I did not attempt that yet.

I’ve tested this patch with debian wheezy using kernel 3.12 from 
wheezy-backports.  
 On the client side, set net.ipv4.tcp_fastopen = 1
 Server side, set net.ipv4.tcp_fastopen = 2

I’d say this is still Experimental, approach with caution…

Patch against 1.5-dev22 [RELEASE] follows

Best regards
David

diff --git a/include/common/compat.h b/include/common/compat.h
index 48ea1f7..3827556 100644
--- a/include/common/compat.h
+++ b/include/common/compat.h
@@ -127,7 +127,10 @@
 #ifndef TCP_FASTOPEN
 #define TCP_FASTOPEN 23
 #endif
+#ifndef MSG_FASTOPEN
+#define MSG_FASTOPEN 0x20000000
 #endif
+#endif /* USE_TFO */
 
 /* If IPv6 is supported, define IN6_IS_ADDR_V4MAPPED() if missing. */
 #if defined(IPV6_TCLASS) && !defined(IN6_IS_ADDR_V4MAPPED)
diff --git a/include/common/defaults.h b/include/common/defaults.h
index f765e90..dcbfdd7 100644
--- a/include/common/defaults.h
+++ b/include/common/defaults.h
@@ -214,4 +214,18 @@
 #define SSLCACHESIZE 20000
 #endif
 
+
+/* Default maximum size a request can be to attempt TCP FastOpen
+ * 1420 = 1500 - 20 (IP) - 20 (TCP) - 40 (max size of TCP Options)
+ * Must be small enough to make sure the complete application request will fit
+ * within the initial SYN without fragmentation.
+ * Can override on a per server basis using the tfo-max-rs keyword
+ */
+#ifndef DEF_FASTOPEN_MAX_REQ_SZ
+#define DEF_FASTOPEN_MAX_REQ_SZ 1420
+#endif
+
+
+
+
 #endif /* _COMMON_DEFAULTS_H */
diff --git a/include/types/server.h b/include/types/server.h
index 54ab813..26ced7d 100644
--- a/include/types/server.h
+++ b/include/types/server.h
@@ -47,7 +47,7 @@
 #define SRV_RUNNING    0x0001  /* the server is UP */
 #define SRV_BACKUP     0x0002  /* this server is a backup server */
 #define SRV_MAPPORTS   0x0004  /* this server uses mapped ports */
-/* unused: 0x0008 */
+#define SRV_FASTOPEN    0x0008  /* we can use TCP Fast Open to connect to this 
server */
 /* unused: 0x0010 */
 #define SRV_GOINGDOWN  0x0020  /* this server says that it's going down (404) 
*/
 #define SRV_WARMINGUP  0x0040  /* this server is warming up after a failure */
@@ -133,6 +133,10 @@
        short onmarkedup;                       /* what to do when marked up: 
one of HANA_ONMARKEDUP_* */
        int slowstart;                          /* slowstart time in seconds 
(ms in the conf) */
 
+#ifdef TCP_FASTOPEN
+       int tfo_max_rs;               /* max size of a request for which we 
will attempt TCP FastOpen */
+#endif
+
        char *id;                               /* just for identification */
        unsigned iweight,uweight, eweight;      /* initial weight, 
user-specified weight, and effective weight */
        unsigned wscore;                        /* weight score, used during 
srv map computation */
diff --git a/src/proto_tcp.c b/src/proto_tcp.c
index 11f6331..6a04ba5 100644
--- a/src/proto_tcp.c
+++ b/src/proto_tcp.c
@@ -58,6 +58,12 @@
 #include <import/ip_tproxy.h>
 #endif
 
+
+#ifdef TCP_FASTOPEN
+#include <common/buffer.h>
+#endif
+
+
 static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int 
errlen);
 static int tcp_bind_listener(struct listener *listener, char *errmsg, int 
errlen);
 
@@ -295,6 +301,7 @@
                return SN_ERR_INTERNAL;
        }
 
+
        if ((fd = conn->t.sock.fd = socket(conn->addr.to.ss_family, 
SOCK_STREAM, IPPROTO_TCP)) == -1) {
                qfprintf(stderr, "Cannot get a server socket.\n");
 
@@ -462,67 +469,136 @@
        if (global.tune.server_rcvbuf)
                 setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 
&global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
 
+
+#ifdef TCP_FASTOPEN
+       /* Linux >= 3.6. Client side TCP Fast Open lets us send data with the 
initial SYN
+        *  - only try TFO if buffer has contiguous data waiting, and all the 
data will fit within the SYN
+        *  - usually some data will be accepted by the kernel, if so we update 
the buffer
+        *  - If no TFO cookie yet, returns EINPROGRESS and uses a normal tcp 
connect
+        *
+        *  If the TFO sendto() succeeds, drops through to conn_ctrl_init etc
+        *
+        *  If TFO is not enabled for this server, or no data to send, use a 
regular TCP connect()
+        *  Do not attempt TFO if request is larger than TFO Max Request Size 
(tfo-max-rs)
+        *    - defaults to 1420 octets, to allow for max TCP options size
+        *    - if the request will not fit in the SYN without fragmentation, 
then the
+        *      TFO server receives only a partial request; the TCP will not 
buffer the request
+        *      while the TFO handshake completes
+        *    - also some firewalls will drop a fragmented SYN anyway
+        *
+        *    TODO: auto-detect the Max Request Size based on the cached 
tcp_metrics.tcpm_fastopen.mss
+        *    for the server inet.addr; looks like mss is cached when the TFO 
cookie is exchanged
+        *
+        *
+        *  */
+       int use_fastopen = 0;
+       struct stream_interface *si = conn->owner;
+       struct channel *chn = si->ob;
+       struct buffer *buf = chn->buf;
+
+       use_fastopen =  ((srv->state & SRV_FASTOPEN)
+                                               && buf->o
+                                               && (buf->o <= srv->tfo_max_rs)
+                                               && (buf->o == 
bo_contig_data(buf))  /* is this check needed - could it ever fail? */
+                                       );
+
+       DPRINTF(stderr,"[%u] %s: fd=%d, srv_state=%08x, buf=%d, buf_contig=%d, 
tfo_max_rs=%d, use_fastopen=%d\n",
+                               now_ms, __FUNCTION__, fd, srv->state, buf->o, 
bo_contig_data(buf), srv->tfo_max_rs, use_fastopen);
+
+       if (use_fastopen) {
+
+               unsigned int send_flag = (MSG_FASTOPEN | MSG_DONTWAIT | 
MSG_NOSIGNAL);
+               int ret = 0;
+
+               ret = sendto(fd, bo_ptr(buf), bo_contig_data(buf), send_flag, 
(struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to));
+
+               if  (ret >= 0)  {
+                               DPRINTF(stderr,"[%u] %s: tfo: fd=%d, 
srv_state=%08x, bo_ptr=%p, ret=%d, errno=%d\n",
+                                                       now_ms, __FUNCTION__, 
fd, srv->state, bo_ptr(buf), ret, errno);
+                               buf->p += ret;
+                               buf->o -= ret;
+                               if (likely(buffer_empty(buf))) {
+                                         data = 0;
+                                         buffer_realign(buf);
+                               }
+               } else if ((errno != EINPROGRESS) && (errno != EALREADY) && 
(errno != EISCONN)){
+                               DPRINTF(stderr, "[%u] %s: TCP Fast Open failed 
for backend %s: errno=%d\n", now_ms, __FUNCTION__, be->id, errno );
+                               send_log(be, LOG_ERR, "TCP Fast Open failed for 
backend %s: errno=%d\n", be->id, errno);
+                               port_range_release_port(fdinfo[fd].port_range, 
fdinfo[fd].local_port);
+                               fdinfo[fd].port_range = NULL;
+                               close(fd);
+                               conn->err_code = CO_ER_SOCK_ERR;
+                               conn->flags |= CO_FL_ERROR;
+                               return SN_ERR_SRVCL;
+               }
+
+               /* now fall through to conn_ctrl_init etc below */
+
+
+       } else
+#endif /* TCP_FASTOPEN */
+
        if ((connect(fd, (struct sockaddr *)&conn->addr.to, 
get_addr_len(&conn->addr.to)) == -1) &&
            (errno != EINPROGRESS) && (errno != EALREADY) && (errno != 
EISCONN)) {
                    (errno != EINPROGRESS) && (errno != EALREADY) && (errno != 
EISCONN)) {
 

 
@@ -833,11 +909,15 @@
 #if defined(TCP_FASTOPEN)
        if (listener->options & LI_O_TCP_FO) {
                /* TFO needs a queue length, let's use the configured backlog */
-               int qlen = listener->backlog ? listener->backlog : 
listener->maxconn;
+               /* Safer to fallback to maxaccept, not maxconn? */
+               int qlen = listener->backlog ? listener->backlog : 
listener->maxaccept;
                if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, 
sizeof(qlen)) == -1) {
                        msg = "cannot enable TCP_FASTOPEN";
                        err |= ERR_WARN;
                }
+               DPRINTF(stderr,"%s: tfo_Listen: fd=%d, listener_options=%08x, 
qlen=%d, backlog=%d, maxconn=%d, maxaccept=%d\n",
+                                       __FUNCTION__, fd, listener->options, 
qlen, listener->backlog, listener->maxconn, listener->maxaccept);
+
        }
 #endif
 #if defined(IPV6_V6ONLY)
diff --git a/src/server.c b/src/server.c
index d91c726..e05e383 100644
--- a/src/server.c
+++ b/src/server.c
@@ -140,6 +140,32 @@
        return 0;
 }
 
+/* TCP Fast Open: parse the "tfo" server keyword */
+static int srv_parse_tfo(char **args, int *cur_arg, struct proxy *px, struct 
server *newsrv, char **err)
+{
+       newsrv->state |= SRV_FASTOPEN;
+       newsrv->tfo_max_rs = DEF_FASTOPEN_MAX_REQ_SZ;
+       return 0;
+}
+/* TCP Fast Open: maximum request size for which we will attempt TFO */
+static int srv_parse_tfo_max_rs(char **args, int *cur_arg, struct proxy *px, 
struct server *newsrv, char **err)
+{
+       newsrv->state |= SRV_FASTOPEN;   /*tfo_max_rs implies tfo */
+       newsrv->tfo_max_rs = atol(args[*cur_arg + 1]);
+
+       /* Tiny values may not make much sense but people can try if they want 
*/
+       if (newsrv->tfo_max_rs < 1 || newsrv->tfo_max_rs > 1420 ) {
+               memprintf(err, "'%s' : expects an integer argument, range 1 - 
1420", args[*cur_arg]);
+               return ERR_ALERT | ERR_FATAL;
+       }
+
+       return 0;
+}
+
+
+
+
+
 /* Note: must not be declared <const> as its list will be overwritten.
  * Please take care of keeping this list alphabetically sorted, doing so helps
  * all code contributors.
@@ -149,6 +175,8 @@
  */
 static struct srv_kw_list srv_kws = { "ALL", { }, {
        { "id",           srv_parse_id,           1,  0 }, /* set id# of server 
*/
+       { "tfo",          srv_parse_tfo,          0,  0 }, /* use TCP Fast Open 
to connect to this server */
+       { "tfo-max-rs",   srv_parse_tfo_max_rs,   1,  0 }, /* set the maximum 
request size for which we attempt TFO */
        { NULL, NULL, 0 },
 }};
 









On 25 Jan 2014, at 07:49, Willy Tarreau <w...@1wt.eu> wrote:

> Hi David,
> 
> On Fri, Jan 24, 2014 at 11:09:45PM +0000, David Harrold wrote:
>> Hi Willy
>> 
>> A very quick update - I?ve managed to get TFO working between two haproxy
>> instances running on Debian 7 with the 3.2.11 kernel from
>> http://www.multipath-tcp.org. 
>> 
>> Lots more testing and tidying up to do but I hope to be able to share a patch
>> for consideration in a few weeks.
> 
> Excellent, that's great news! Did you have to change lots of things
> in the connection establishment sequence or not ? If the changes are
> minimal and low-impact, we could even consider merging it for 1.5-final,
> as I expect that a number of users will start to use it in the upcoming
> months/years.
> 
> Best regards,
> Willy
> 

Reply via email to