On Fri, Jul 29, 2016 at 08:07:14PM -0400, Ted Unangst wrote:
> Alexander Bluhm wrote:
> > On Fri, Jul 29, 2016 at 06:46:52PM -0400, Ted Unangst wrote:
> > > Alexander Bluhm wrote:
> > > > +       /* Avoid user land starvation. */
> > > > +       yield();
> > > 
> > > you don't need to yield here, the task framework should do that for you.
> > 
> > Perhaps the framework should do that, but it does not.  When I run
> > my splicing test on a qemu virtual machine with vio interfaces, an
> > interactive shell hangs completely while data is getting spliced
> > with full load.  The yield() keeps it interactive.
> 
> Why do you always find the things that should work but don't? :)
> 
> There's a sched_yield() in taskq_thread(). Something's not quite right.

So there is the insight that our scheduler cannot handle two kernel
threads doing a lot of work alternately.  I this case userland will
no longer be scheduled on a single cpu system.

Several possible solutions like letting tasks sleep at a different
priority or changing kernel threads to user priority have been
discussed.  There was no consensus what to do.

To move forward and uncouple the splicing improvement from the
scheduler discussion, I would like to commit my original diff with
the yield() workaround.

ok?

bluhm

Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.153
diff -u -p -r1.153 uipc_socket.c
--- kern/uipc_socket.c  22 Aug 2016 10:23:42 -0000      1.153
+++ kern/uipc_socket.c  24 Aug 2016 21:38:12 -0000
@@ -59,6 +59,7 @@ void  sbsync(struct sockbuf *, struct mbu
 int    sosplice(struct socket *, int, off_t, struct timeval *);
 void   sounsplice(struct socket *, struct socket *, int);
 void   soidle(void *);
+void   sotask(void *);
 int    somove(struct socket *, int);
 
 void   filt_sordetach(struct knote *kn);
@@ -85,6 +86,7 @@ int   sominconn = SOMINCONN;
 struct pool socket_pool;
 #ifdef SOCKET_SPLICE
 struct pool sosplice_pool;
+struct taskq *sosplice_taskq;
 #endif
 
 void
@@ -1070,6 +1072,7 @@ sorflush(struct socket *so)
 #define so_splicemax   so_sp->ssp_max
 #define so_idletv      so_sp->ssp_idletv
 #define so_idleto      so_sp->ssp_idleto
+#define so_splicetask  so_sp->ssp_task
 
 int
 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv)
@@ -1078,6 +1081,12 @@ sosplice(struct socket *so, int fd, off_
        struct socket   *sosp;
        int              s, error = 0;
 
+       if (sosplice_taskq == NULL)
+               sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET,
+                   TASKQ_CANTSLEEP);
+       if (sosplice_taskq == NULL)
+               return (ENOMEM);
+
        if ((so->so_proto->pr_flags & PR_SPLICE) == 0)
                return (EPROTONOSUPPORT);
        if (so->so_options & SO_ACCEPTCONN)
@@ -1155,6 +1164,7 @@ sosplice(struct socket *so, int fd, off_
        else
                timerclear(&so->so_idletv);
        timeout_set(&so->so_idleto, soidle, so);
+       task_set(&so->so_splicetask, sotask, so);
 
        /*
         * To prevent softnet interrupt from calling somove() while
@@ -1178,6 +1188,7 @@ sounsplice(struct socket *so, struct soc
 {
        splsoftassert(IPL_SOFTNET);
 
+       task_del(sosplice_taskq, &so->so_splicetask);
        timeout_del(&so->so_idleto);
        sosp->so_snd.sb_flagsintr &= ~SB_SPLICE;
        so->so_rcv.sb_flagsintr &= ~SB_SPLICE;
@@ -1200,6 +1211,27 @@ soidle(void *arg)
        splx(s);
 }
 
+void
+sotask(void *arg)
+{
+       struct socket *so = arg;
+       int s;
+
+       s = splsoftnet();
+       if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
+               /*
+                * We may not sleep here as sofree() and unsplice() may be
+                * called from softnet interrupt context.  This would remove
+                * the socket during somove().
+                */
+               somove(so, M_DONTWAIT);
+       }
+       splx(s);
+
+       /* Avoid user land starvation. */
+       yield();
+}
+
 /*
  * Move data from receive buffer of spliced source socket to send
  * buffer of drain socket.  Try to move as much as possible in one
@@ -1473,19 +1505,26 @@ somove(struct socket *so, int wait)
        return (1);
 }
 
-#undef so_splicelen
-#undef so_splicemax
-#undef so_idletv
-#undef so_idleto
-
 #endif /* SOCKET_SPLICE */
 
 void
 sorwakeup(struct socket *so)
 {
 #ifdef SOCKET_SPLICE
-       if (so->so_rcv.sb_flagsintr & SB_SPLICE)
-               (void) somove(so, M_DONTWAIT);
+       if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
+               /*
+                * TCP has a sendbuffer that can handle multiple packets
+                * at once.  So queue the stream a bit to accumulate data.
+                * The sosplice thread will call somove() later and send
+                * the packets calling tcp_output() only once.
+                * In the UDP case, send out the packets immediately.
+                * Using a thread would make things slower.
+                */
+               if (so->so_proto->pr_flags & PR_WANTRCVD)
+                       task_add(sosplice_taskq, &so->so_splicetask);
+               else
+                       somove(so, M_DONTWAIT);
+       }
        if (isspliced(so))
                return;
 #endif
@@ -1499,7 +1538,7 @@ sowwakeup(struct socket *so)
 {
 #ifdef SOCKET_SPLICE
        if (so->so_snd.sb_flagsintr & SB_SPLICE)
-               (void) somove(so->so_sp->ssp_soback, M_DONTWAIT);
+               task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask);
 #endif
        sowakeup(so, &so->so_snd);
 }
Index: sys/socketvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.61
diff -u -p -r1.61 socketvar.h
--- sys/socketvar.h     28 Jun 2016 14:47:00 -0000      1.61
+++ sys/socketvar.h     24 Aug 2016 21:38:12 -0000
@@ -34,6 +34,7 @@
 
 #include <sys/selinfo.h>                       /* for struct selinfo */
 #include <sys/queue.h>
+#include <sys/task.h>
 #include <sys/timeout.h>
 #include <sys/rwlock.h>
 
@@ -92,6 +93,7 @@ struct socket {
                off_t   ssp_max;                /* maximum number of bytes */
                struct  timeval ssp_idletv;     /* idle timeout */
                struct  timeout ssp_idleto;
+               struct  task ssp_task;          /* task for somove */
        } *so_sp;
 /*
  * Variables for socket buffering.

Reply via email to