Hi,

Spliced TCP sockets become faster if we put the output part into
its own task thread.  This is inspired by userland copy where we
also have to go through the scheduler.  This gives the socket buffer
a chance to be filled up and tcp_output() is called less often and
with bigger chunks.

ok?

bluhm

Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.152
diff -u -p -r1.152 uipc_socket.c
--- kern/uipc_socket.c  13 Jun 2016 21:24:43 -0000      1.152
+++ kern/uipc_socket.c  29 Jul 2016 15:44:33 -0000
@@ -59,6 +59,7 @@ void  sbsync(struct sockbuf *, struct mbu
 int    sosplice(struct socket *, int, off_t, struct timeval *);
 void   sounsplice(struct socket *, struct socket *, int);
 void   soidle(void *);
+void   sotask(void *);
 int    somove(struct socket *, int);
 
 void   filt_sordetach(struct knote *kn);
@@ -85,6 +86,7 @@ int   sominconn = SOMINCONN;
 struct pool socket_pool;
 #ifdef SOCKET_SPLICE
 struct pool sosplice_pool;
+struct taskq *sosplice_taskq;
 #endif
 
 void
@@ -1041,6 +1043,7 @@ sorflush(struct socket *so)
 #define so_splicemax   so_sp->ssp_max
 #define so_idletv      so_sp->ssp_idletv
 #define so_idleto      so_sp->ssp_idleto
+#define so_splicetask  so_sp->ssp_task
 
 int
 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv)
@@ -1049,6 +1052,10 @@ sosplice(struct socket *so, int fd, off_
        struct socket   *sosp;
        int              s, error = 0;
 
+       if (sosplice_taskq == NULL)
+               sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET,
+                   TASKQ_CANTSLEEP);
+
        if ((so->so_proto->pr_flags & PR_SPLICE) == 0)
                return (EPROTONOSUPPORT);
        if (so->so_options & SO_ACCEPTCONN)
@@ -1126,6 +1133,7 @@ sosplice(struct socket *so, int fd, off_
        else
                timerclear(&so->so_idletv);
        timeout_set(&so->so_idleto, soidle, so);
+       task_set(&so->so_splicetask, sotask, so);
 
        /*
         * To prevent softnet interrupt from calling somove() while
@@ -1149,6 +1157,7 @@ sounsplice(struct socket *so, struct soc
 {
        splsoftassert(IPL_SOFTNET);
 
+       task_del(sosplice_taskq, &so->so_splicetask);
        timeout_del(&so->so_idleto);
        sosp->so_snd.sb_flagsintr &= ~SB_SPLICE;
        so->so_rcv.sb_flagsintr &= ~SB_SPLICE;
@@ -1171,6 +1180,27 @@ soidle(void *arg)
        splx(s);
 }
 
+void
+sotask(void *arg)
+{
+       struct socket *so = arg;
+       int s;
+
+       s = splsoftnet();
+       if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
+               /*
+                * We may not sleep here as sofree() and unsplice() may be
+                * called from softnet interrupt context.  This would remove
+                * the socket during somove().
+                */
+               somove(so, M_DONTWAIT);
+       }
+       splx(s);
+
+       /* Avoid user land starvation. */
+       yield();
+}
+
 /*
  * Move data from receive buffer of spliced source socket to send
  * buffer of drain socket.  Try to move as much as possible in one
@@ -1444,19 +1474,26 @@ somove(struct socket *so, int wait)
        return (1);
 }
 
-#undef so_splicelen
-#undef so_splicemax
-#undef so_idletv
-#undef so_idleto
-
 #endif /* SOCKET_SPLICE */
 
 void
 sorwakeup(struct socket *so)
 {
 #ifdef SOCKET_SPLICE
-       if (so->so_rcv.sb_flagsintr & SB_SPLICE)
-               (void) somove(so, M_DONTWAIT);
+       if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
+               /*
+                * TCP has a sendbuffer that can handle multiple packets
+                * at once.  So queue the stream a bit to accumulate data.
+                * The sosplice thread will call somove() later and send
+                * the packets calling tcp_output() only once.
+                * In the UDP case, send out the packets immediately.
+                * Using a thread would make things slower.
+                */
+               if (so->so_proto->pr_flags & PR_WANTRCVD)
+                       task_add(sosplice_taskq, &so->so_splicetask);
+               else
+                       somove(so, M_DONTWAIT);
+       }
        if (isspliced(so))
                return;
 #endif
@@ -1470,7 +1507,7 @@ sowwakeup(struct socket *so)
 {
 #ifdef SOCKET_SPLICE
        if (so->so_snd.sb_flagsintr & SB_SPLICE)
-               (void) somove(so->so_sp->ssp_soback, M_DONTWAIT);
+               task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask);
 #endif
        sowakeup(so, &so->so_snd);
 }
Index: sys/socketvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.61
diff -u -p -r1.61 socketvar.h
--- sys/socketvar.h     28 Jun 2016 14:47:00 -0000      1.61
+++ sys/socketvar.h     29 Jul 2016 15:44:33 -0000
@@ -34,6 +34,7 @@
 
 #include <sys/selinfo.h>                       /* for struct selinfo */
 #include <sys/queue.h>
+#include <sys/task.h>
 #include <sys/timeout.h>
 #include <sys/rwlock.h>
 
@@ -92,6 +93,7 @@ struct socket {
                off_t   ssp_max;                /* maximum number of bytes */
                struct  timeval ssp_idletv;     /* idle timeout */
                struct  timeout ssp_idleto;
+               struct  task ssp_task;          /* task for somove */
        } *so_sp;
 /*
  * Variables for socket buffering.

Reply via email to