On Wed, Mar 30, 2011 at 09:33:11PM +0200, Claudio Jeker wrote:
> On Wed, Mar 30, 2011 at 08:34:24PM +0200, Mark Kettenis wrote:
> > > Date: Tue, 29 Mar 2011 22:42:47 +0200
> > > From: Claudio Jeker <[email protected]>
> > >
> > > Here is a possible fix. The problem was that because of the way NFS uses
> > > the socket API it did not turn of the sendbuffer scaling which reset the
> > > size of the socket back to 17376 bytes which is a no go when a buffer of
> > > more then 17k is generated by NFS. It is better to initialize the sb_wat
> > > in soreserve() which is called by NFS and all attach functions.
> >
> > This no longer does the sbcheckreserve() dance though. Is that alright?
> >
>
> The code that was there previously was a bit strange. Since when
> sb_hiwat == 0 is true then sb_wat is 0 as well. Additionally
> sbcheckreserve() would only cause the watermark to be set to the default
> which is tcp_sendspace/tcp_recvspace. So since sb_hiwat == 0 is never run.
>
While digging myself deeper into this code I figured out that we may want
something a bit more like the following diff. It adds back the
sbcheckreserve() checks in TCP and the socket buffers inherit more from
their listening socket when creating a new socket because of a SYN.
FreeBSD does something similar in their sonewconn() function. It also
seems to follow the accept(2) man page more closely:
The accept() call extracts the first connection request on
the queue of pending connections, creates a new socket with the same
properties of s, and allocates a new file descriptor for the socket.
Not sure if the SB_ASYNC sb_flags needs to be inherited as well.
--
:wq Claudio
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.51
diff -u -p -r1.51 uipc_socket2.c
--- kern/uipc_socket2.c 24 Sep 2010 02:59:45 -0000 1.51
+++ kern/uipc_socket2.c 31 Mar 2011 14:27:31 -0000
@@ -176,8 +176,16 @@ sonewconn(struct socket *head, int conns
/*
* Inherit watermarks but those may get clamped in low mem situations.
*/
+ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+ pool_put(&socket_pool, so);
+ return ((struct socket *)0);
+ }
so->so_snd.sb_wat = head->so_snd.sb_wat;
+ so->so_snd.sb_lowat = head->so_snd.sb_lowat;
+ so->so_snd.sb_timeo = head->so_snd.sb_timeo;
so->so_rcv.sb_wat = head->so_rcv.sb_wat;
+ so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
+ so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
soqinsque(head, so, soqueue);
if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, NULL,
@@ -353,6 +361,8 @@ soreserve(struct socket *so, u_long sndc
goto bad;
if (sbreserve(&so->so_rcv, rcvcc))
goto bad2;
+ so->so_snd.sb_wat = sndcc;
+ so->so_rcv.sb_wat = rcvcc;
if (so->so_rcv.sb_lowat == 0)
so->so_rcv.sb_lowat = 1;
if (so->so_snd.sb_lowat == 0)
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.105
diff -u -p -r1.105 tcp_usrreq.c
--- netinet/tcp_usrreq.c 10 Oct 2010 22:02:50 -0000 1.105
+++ netinet/tcp_usrreq.c 31 Mar 2011 13:42:56 -0000
@@ -652,16 +652,10 @@ tcp_attach(so)
struct inpcb *inp;
int error;
- if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
- /* if low on memory only allow smaller then default buffers */
- if (so->so_snd.sb_wat == 0 ||
- sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace))
- so->so_snd.sb_wat = tcp_sendspace;
- if (so->so_rcv.sb_wat == 0 ||
- sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace))
- so->so_rcv.sb_wat = tcp_recvspace;
-
- error = soreserve(so, so->so_snd.sb_wat, so->so_rcv.sb_wat);
+ if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 ||
+ sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) ||
+ sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace)) {
+ error = soreserve(so, tcp_sendspace, tcp_recvspace);
if (error)
return (error);
}