Hi,
I have changed relayd so that it uses socket splicing also for
persistent http connections. Before it spliced the incomming and
outgoing tcp streams only if the data should go unmodified through
the kernel until the end of stream.
With this diff, relayd can give the kernel a maximum splice length.
So it can take back control on persitent http sessions or with http
chunking.
As the diff contains a bunch of independent fixes, I will break it
into small pieces for review and commit. But for those who want
to test the whole thing, here it is.
bluhm
Index: usr.sbin/relayd/relay.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/relayd/relay.c,v
retrieving revision 1.157
diff -u -p -r1.157 relay.c
--- usr.sbin/relayd/relay.c 19 Oct 2012 16:49:50 -0000 1.157
+++ usr.sbin/relayd/relay.c 2 Nov 2012 17:49:50 -0000
@@ -70,9 +70,6 @@ void relay_input(struct rsession *);
u_int32_t relay_hash_addr(struct sockaddr_storage *, u_int32_t);
-int relay_splice(struct ctl_relay_event *);
-int relay_splicelen(struct ctl_relay_event *);
-
SSL_CTX *relay_ssl_ctx_create(struct relay *);
void relay_ssl_transaction(struct rsession *,
struct ctl_relay_event *);
@@ -643,6 +640,7 @@ relay_connected(int fd, short sig, void
case RELAY_PROTO_HTTP:
/* Check the servers's HTTP response */
if (!RB_EMPTY(&rlay->rl_proto->response_tree)) {
+ con->se_out.toread = TOREAD_HTTP_HEADER;
outrd = relay_read_http;
if ((con->se_out.nodes = calloc(proto->response_nodes,
sizeof(u_int8_t))) == NULL) {
@@ -681,9 +679,6 @@ relay_connected(int fd, short sig, void
bufferevent_settimeout(bev,
rlay->rl_conf.timeout.tv_sec, rlay->rl_conf.timeout.tv_sec);
bufferevent_enable(bev, EV_READ|EV_WRITE);
-
- if (relay_splice(&con->se_out) == -1)
- relay_close(con, strerror(errno));
}
void
@@ -699,6 +694,7 @@ relay_input(struct rsession *con)
/* Check the client's HTTP request */
if (!RB_EMPTY(&rlay->rl_proto->request_tree) ||
proto->lateconnect) {
+ con->se_in.toread = TOREAD_HTTP_HEADER;
inrd = relay_read_http;
if ((con->se_in.nodes = calloc(proto->request_nodes,
sizeof(u_int8_t))) == NULL) {
@@ -731,9 +727,6 @@ relay_input(struct rsession *con)
bufferevent_settimeout(con->se_in.bev,
rlay->rl_conf.timeout.tv_sec, rlay->rl_conf.timeout.tv_sec);
bufferevent_enable(con->se_in.bev, EV_READ|EV_WRITE);
-
- if (relay_splice(&con->se_in) == -1)
- relay_close(con, strerror(errno));
}
void
@@ -741,10 +734,19 @@ relay_write(struct bufferevent *bev, voi
{
struct ctl_relay_event *cre = (struct ctl_relay_event *)arg;
struct rsession *con = cre->con;
+
if (gettimeofday(&con->se_tv_last, NULL) == -1)
- con->se_done = 1;
+ goto fail;
if (con->se_done)
- relay_close(con, "last write (done)");
+ goto done;
+ if (relay_splice(cre->dst) == -1)
+ goto fail;
+ return;
+ done:
+ relay_close(con, "last write (done)");
+ return;
+ fail:
+ relay_close(con, strerror(errno));
}
void
@@ -822,11 +824,27 @@ relay_splice(struct ctl_relay_event *cre
(proto->tcpflags & TCPFLAG_NSPLICE))
return (0);
- if (cre->bev->readcb != relay_read)
+ if (cre->splicelen >= 0)
return (0);
+ if (! (cre->toread == TOREAD_UNLIMITED || cre->toread > 0)) {
+ DPRINTF("%s: session %d: splice dir %d, nothing to read %lld",
+ __func__, con->se_id, cre->dir, cre->toread);
+ return (0);
+ }
+
+ /* do not splice before buffers have not been completely fushed */
+ if (EVBUFFER_LENGTH(cre->bev->input) ||
+ EVBUFFER_LENGTH(cre->dst->bev->output)) {
+ DPRINTF("%s: session %d: splice dir %d, dirty buffer",
+ __func__, con->se_id, cre->dir);
+ bufferevent_disable(cre->bev, EV_READ);
+ return (2);
+ }
+
bzero(&sp, sizeof(sp));
sp.sp_fd = cre->dst->s;
+ sp.sp_max = cre->toread > 0 ? cre->toread : 0;
sp.sp_idle = rlay->rl_conf.timeout;
if (setsockopt(cre->s, SOL_SOCKET, SO_SPLICE, &sp, sizeof(sp)) == -1) {
log_debug("%s: session %d: splice dir %d failed: %s",
@@ -834,8 +852,11 @@ relay_splice(struct ctl_relay_event *cre
return (-1);
}
cre->splicelen = 0;
- DPRINTF("%s: session %d: splice dir %d successful",
- __func__, con->se_id, cre->dir);
+ bufferevent_enable(cre->bev, EV_READ);
+
+ DPRINTF("%s: session %d: splice dir %d, maximum %lld, successful",
+ __func__, con->se_id, cre->dir, cre->toread);
+
return (1);
}
@@ -846,12 +867,19 @@ relay_splicelen(struct ctl_relay_event *
off_t len;
socklen_t optlen;
+ if (cre->splicelen < 0)
+ return (0);
+
optlen = sizeof(len);
if (getsockopt(cre->s, SOL_SOCKET, SO_SPLICE, &len, &optlen) == -1) {
log_debug("%s: session %d: splice dir %d get length failed: %s",
__func__, con->se_id, cre->dir, strerror(errno));
return (-1);
}
+
+ DPRINTF("%s: session %d: splice dir %d, length %lld",
+ __func__, con->se_id, cre->dir, len);
+
if (len > cre->splicelen) {
cre->splicelen = len;
return (1);
@@ -859,6 +887,20 @@ relay_splicelen(struct ctl_relay_event *
return (0);
}
+int
+relay_spliceadjust(struct ctl_relay_event *cre)
+{
+ if (cre->splicelen < 0)
+ return (0);
+ if (relay_splicelen(cre) == -1)
+ return (-1);
+ if (cre->splicelen > 0 && cre->toread > 0)
+ cre->toread -= cre->splicelen;
+ cre->splicelen = -1;
+
+ return (1);
+}
+
void
relay_error(struct bufferevent *bev, short error, void *arg)
{
@@ -898,10 +940,18 @@ relay_error(struct bufferevent *bev, sho
break;
}
}
+ if (relay_spliceadjust(cre) == -1)
+ goto fail;
if (relay_splice(cre) == -1)
goto fail;
return;
}
+ if (error & EVBUFFER_ERROR && errno == EMSGSIZE) {
+ if (relay_spliceadjust(cre) == -1)
+ goto fail;
+ bufferevent_enable(cre->bev, EV_READ);
+ return;
+ }
if (error & (EVBUFFER_READ|EVBUFFER_WRITE|EVBUFFER_EOF)) {
bufferevent_disable(bev, EV_READ|EV_WRITE);
@@ -975,6 +1025,8 @@ relay_accept(int fd, short event, void *
con->se_out.con = con;
con->se_in.splicelen = -1;
con->se_out.splicelen = -1;
+ con->se_in.toread = TOREAD_UNLIMITED;
+ con->se_out.toread = TOREAD_UNLIMITED;
con->se_relay = rlay;
con->se_id = ++relay_conid;
con->se_relayid = rlay->rl_conf.id;
Index: usr.sbin/relayd/relay_http.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/relayd/relay_http.c,v
retrieving revision 1.3
diff -u -p -r1.3 relay_http.c
--- usr.sbin/relayd/relay_http.c 10 Oct 2012 14:27:46 -0000 1.3
+++ usr.sbin/relayd/relay_http.c 2 Nov 2012 17:49:50 -0000
@@ -81,11 +81,12 @@ relay_read_http(struct bufferevent *bev,
if (gettimeofday(&con->se_tv_last, NULL) == -1)
goto fail;
size = EVBUFFER_LENGTH(src);
- DPRINTF("%s: size %lu, to read %llu", __func__, size, cre->toread);
+ DPRINTF("%s: dir %d, size %lu, to read %lld",
+ __func__, cre->dir, size, cre->toread);
if (!size) {
if (cre->dir == RELAY_DIR_RESPONSE)
return;
- cre->toread = 0;
+ cre->toread = TOREAD_HTTP_HEADER;
goto done;
}
@@ -227,8 +228,7 @@ relay_read_http(struct bufferevent *bev,
* the carriage return? And some browsers seem to
* include the line length in the content-length.
*/
- cre->toread = strtonum(pk.value, 0, ULLONG_MAX,
- &errstr);
+ cre->toread = strtonum(pk.value, 0, LLONG_MAX, &errstr);
if (errstr) {
relay_abort_http(con, 500, errstr, 0);
goto abort;
@@ -317,6 +317,7 @@ relay_read_http(struct bufferevent *bev,
return;
case HTTP_METHOD_CONNECT:
/* Data stream */
+ cre->toread = TOREAD_UNLIMITED;
bev->readcb = relay_read;
break;
case HTTP_METHOD_DELETE:
@@ -327,22 +328,25 @@ relay_read_http(struct bufferevent *bev,
case HTTP_METHOD_PUT:
case HTTP_METHOD_RESPONSE:
/* HTTP request payload */
- if (cre->toread) {
+ if (cre->toread > 0)
bev->readcb = relay_read_httpcontent;
- break;
- }
/* Single-pass HTTP response */
- bev->readcb = relay_read;
+ if (cre->toread < 0) {
+ cre->toread = TOREAD_UNLIMITED;
+ bev->readcb = relay_read;
+ }
+
break;
default:
/* HTTP handler */
+ cre->toread = TOREAD_HTTP_HEADER;
bev->readcb = relay_read_http;
break;
}
if (cre->chunked) {
/* Chunked transfer encoding */
- cre->toread = 0;
+ cre->toread = TOREAD_HTTP_CHUNK_LENGHT;
bev->readcb = relay_read_httpchunks;
}
@@ -353,7 +357,7 @@ relay_read_http(struct bufferevent *bev,
relay_http_request_close(cre);
done:
- if (cre->dir == RELAY_DIR_REQUEST && !cre->toread &&
+ if (cre->dir == RELAY_DIR_REQUEST && cre->toread < 0 &&
proto->lateconnect && cre->dst->bev == NULL) {
if (rlay->rl_conf.fwdmode == FWD_TRANS) {
relay_bindanyreq(con, 0, IPPROTO_TCP);
@@ -371,6 +375,8 @@ relay_read_http(struct bufferevent *bev,
if (EVBUFFER_LENGTH(src) && bev->readcb != relay_read_http)
bev->readcb(bev, arg);
bufferevent_enable(bev, EV_READ);
+ if (relay_splice(cre) == -1)
+ relay_close(con, strerror(errno));
return;
fail:
relay_abort_http(con, 500, strerror(errno), 0);
@@ -390,17 +396,33 @@ relay_read_httpcontent(struct buffereven
if (gettimeofday(&con->se_tv_last, NULL) == -1)
goto fail;
size = EVBUFFER_LENGTH(src);
- DPRINTF("%s: size %lu, to read %llu", __func__,
- size, cre->toread);
+ DPRINTF("%s: dir %d, size %lu, to read %lld",
+ __func__, cre->dir, size, cre->toread);
if (!size)
return;
- if (relay_bufferevent_write_buffer(cre->dst, src) == -1)
+ if (relay_spliceadjust(cre) == -1)
goto fail;
- if ((off_t)size >= cre->toread)
+
+ if (cre->toread > 0) {
+ /* Read content data */
+ if ((off_t)size > cre->toread) {
+ size = cre->toread;
+ if (relay_bufferevent_write_chunk(cre->dst, src, size)
+ == -1)
+ goto fail;
+ cre->toread = 0;
+ } else {
+ if (relay_bufferevent_write_buffer(cre->dst, src) == -1)
+ goto fail;
+ cre->toread -= size;
+ }
+ DPRINTF("%s: done, size %lu, to read %lld", __func__,
+ size, cre->toread);
+ }
+ if (cre->toread == 0) {
+ cre->toread = TOREAD_HTTP_HEADER;
bev->readcb = relay_read_http;
- cre->toread -= size;
- DPRINTF("%s: done, size %lu, to read %llu", __func__,
- size, cre->toread);
+ }
if (con->se_done)
goto done;
if (bev->readcb != relay_read_httpcontent)
@@ -427,19 +449,37 @@ relay_read_httpchunks(struct bufferevent
if (gettimeofday(&con->se_tv_last, NULL) == -1)
goto fail;
size = EVBUFFER_LENGTH(src);
- DPRINTF("%s: size %lu, to read %llu", __func__,
- size, cre->toread);
+ DPRINTF("%s: dir %d, size %lu, to read %lld",
+ __func__, cre->dir, size, cre->toread);
if (!size)
return;
+ if (relay_spliceadjust(cre) == -1)
+ goto fail;
- if (!cre->toread) {
+ if (cre->toread > 0) {
+ /* Read chunk data */
+ if ((off_t)size > cre->toread) {
+ size = cre->toread;
+ if (relay_bufferevent_write_chunk(cre->dst, src, size)
+ == -1)
+ goto fail;
+ cre->toread = 0;
+ } else {
+ if (relay_bufferevent_write_buffer(cre->dst, src) == -1)
+ goto fail;
+ cre->toread -= size;
+ }
+ DPRINTF("%s: done, size %lu, to read %lld", __func__,
+ size, cre->toread);
+ }
+ if (cre->toread == TOREAD_HTTP_CHUNK_LENGHT) {
line = evbuffer_readline(src);
if (line == NULL) {
/* Ignore empty line, continue */
bufferevent_enable(bev, EV_READ);
return;
}
- if (!strlen(line)) {
+ if (strlen(line) == 0) {
free(line);
goto next;
}
@@ -458,40 +498,38 @@ relay_read_httpchunks(struct bufferevent
}
free(line);
- /* Last chunk is 0 bytes followed by an empty newline */
+ /* Last chunk is 0 bytes followed by optional trailer */
if ((cre->toread = lval) == 0) {
DPRINTF("%s: last chunk", __func__);
-
- line = evbuffer_readline(src);
- if (line == NULL) {
- relay_close(con, "invalid last chunk");
- return;
- }
+ cre->toread = TOREAD_HTTP_CHUNK_TRAILER;
+ }
+ } else if (cre->toread == TOREAD_HTTP_CHUNK_TRAILER) {
+ /* Last chunk is 0 bytes followed by trailer and empty line */
+ line = evbuffer_readline(src);
+ if (line == NULL) {
+ /* Ignore empty line, continue */
+ bufferevent_enable(bev, EV_READ);
+ return;
+ }
+ if (relay_bufferevent_print(cre->dst, line) == -1 ||
+ relay_bufferevent_print(cre->dst, "\r\n") == -1) {
free(line);
- if (relay_bufferevent_print(cre->dst, "\r\n") == -1)
- goto fail;
-
+ goto fail;
+ }
+ if (strlen(line) == 0) {
/* Switch to HTTP header mode */
+ cre->toread = TOREAD_HTTP_HEADER;
bev->readcb = relay_read_http;
}
- } else {
- /* Read chunk data */
- if ((off_t)size > cre->toread)
- size = cre->toread;
- if (relay_bufferevent_write_chunk(cre->dst, src, size) == -1)
+ free(line);
+ } else if (cre->toread == 0) {
+ /* Chunk is terminated by an empty newline */
+ line = evbuffer_readline(src);
+ if (line != NULL)
+ free(line);
+ if (relay_bufferevent_print(cre->dst, "\r\n") == -1)
goto fail;
- cre->toread -= size;
- DPRINTF("%s: done, size %lu, to read %llu", __func__,
- size, cre->toread);
-
- if (cre->toread == 0) {
- /* Chunk is terminated by an empty (empty) newline */
- line = evbuffer_readline(src);
- if (line != NULL)
- free(line);
- if (relay_bufferevent_print(cre->dst, "\r\n\r\n") == -1)
- goto fail;
- }
+ cre->toread = TOREAD_HTTP_CHUNK_LENGHT;
}
next:
Index: usr.sbin/relayd/relayd.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/relayd/relayd.h,v
retrieving revision 1.162
diff -u -p -r1.162 relayd.h
--- usr.sbin/relayd/relayd.h 19 Oct 2012 16:49:50 -0000 1.162
+++ usr.sbin/relayd/relayd.h 2 Nov 2012 17:49:50 -0000
@@ -196,6 +196,11 @@ struct ctl_relay_event {
int buflen;
};
+#define TOREAD_UNLIMITED -1
+#define TOREAD_HTTP_HEADER -2
+#define TOREAD_HTTP_CHUNK_LENGHT -3
+#define TOREAD_HTTP_CHUNK_TRAILER -4
+
struct ctl_natlook {
objid_t id;
int proc;
@@ -988,6 +993,9 @@ int relay_cmp_af(struct sockaddr_storag
struct sockaddr_storage *);
void relay_write(struct bufferevent *, void *);
void relay_read(struct bufferevent *, void *);
+int relay_splice(struct ctl_relay_event *);
+int relay_splicelen(struct ctl_relay_event *);
+int relay_spliceadjust(struct ctl_relay_event *);
void relay_error(struct bufferevent *, short, void *);
int relay_lognode(struct rsession *,
struct protonode *, struct protonode *, char *, size_t);