On Fri, Jul 29, 2016 at 08:07:14PM -0400, Ted Unangst wrote: > Alexander Bluhm wrote: > > On Fri, Jul 29, 2016 at 06:46:52PM -0400, Ted Unangst wrote: > > > Alexander Bluhm wrote: > > > > + /* Avoid user land starvation. */ > > > > + yield(); > > > > > > you don't need to yield here, the task framework should do that for you. > > > > Perhaps the framework should do that, but it does not. When I run > > my splicing test on a qemu virtual machine with vio interfaces, an > > interactive shell hangs completely while data is getting spliced > > with full load. The yield() keeps it interactive. > > Why do you always find the things that should work but don't? :) > > There's a sched_yield() in taskq_thread(). Something's not quite right.
So there is the insight that our scheduler cannot handle two kernel threads doing a lot of work alternately. I this case userland will no longer be scheduled on a single cpu system. Several possible solutions like letting tasks sleep at a different priority or changing kernel threads to user priority have been discussed. There was no consensus what to do. To move forward and uncouple the splicing improvement from the scheduler discussion, I would like to commit my original diff with the yield() workaround. ok? bluhm Index: kern/uipc_socket.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.153 diff -u -p -r1.153 uipc_socket.c --- kern/uipc_socket.c 22 Aug 2016 10:23:42 -0000 1.153 +++ kern/uipc_socket.c 24 Aug 2016 21:38:12 -0000 @@ -59,6 +59,7 @@ void sbsync(struct sockbuf *, struct mbu int sosplice(struct socket *, int, off_t, struct timeval *); void sounsplice(struct socket *, struct socket *, int); void soidle(void *); +void sotask(void *); int somove(struct socket *, int); void filt_sordetach(struct knote *kn); @@ -85,6 +86,7 @@ int sominconn = SOMINCONN; struct pool socket_pool; #ifdef SOCKET_SPLICE struct pool sosplice_pool; +struct taskq *sosplice_taskq; #endif void @@ -1070,6 +1072,7 @@ sorflush(struct socket *so) #define so_splicemax so_sp->ssp_max #define so_idletv so_sp->ssp_idletv #define so_idleto so_sp->ssp_idleto +#define so_splicetask so_sp->ssp_task int sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) @@ -1078,6 +1081,12 @@ sosplice(struct socket *so, int fd, off_ struct socket *sosp; int s, error = 0; + if (sosplice_taskq == NULL) + sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET, + TASKQ_CANTSLEEP); + if (sosplice_taskq == NULL) + return (ENOMEM); + if ((so->so_proto->pr_flags & PR_SPLICE) == 0) return (EPROTONOSUPPORT); if (so->so_options & SO_ACCEPTCONN) @@ -1155,6 +1164,7 @@ sosplice(struct socket *so, int fd, off_ else timerclear(&so->so_idletv); timeout_set(&so->so_idleto, soidle, so); + task_set(&so->so_splicetask, sotask, so); /* * To prevent softnet interrupt from calling somove() while @@ -1178,6 +1188,7 @@ sounsplice(struct socket *so, struct soc { splsoftassert(IPL_SOFTNET); + task_del(sosplice_taskq, &so->so_splicetask); timeout_del(&so->so_idleto); sosp->so_snd.sb_flagsintr &= ~SB_SPLICE; so->so_rcv.sb_flagsintr &= ~SB_SPLICE; @@ -1200,6 +1211,27 @@ soidle(void *arg) splx(s); } +void +sotask(void *arg) +{ + struct socket *so = arg; + int s; + + s = splsoftnet(); + if (so->so_rcv.sb_flagsintr & SB_SPLICE) { + /* + * We may not sleep here as sofree() and unsplice() may be + * called from softnet interrupt context. This would remove + * the socket during somove(). + */ + somove(so, M_DONTWAIT); + } + splx(s); + + /* Avoid user land starvation. */ + yield(); +} + /* * Move data from receive buffer of spliced source socket to send * buffer of drain socket. Try to move as much as possible in one @@ -1473,19 +1505,26 @@ somove(struct socket *so, int wait) return (1); } -#undef so_splicelen -#undef so_splicemax -#undef so_idletv -#undef so_idleto - #endif /* SOCKET_SPLICE */ void sorwakeup(struct socket *so) { #ifdef SOCKET_SPLICE - if (so->so_rcv.sb_flagsintr & SB_SPLICE) - (void) somove(so, M_DONTWAIT); + if (so->so_rcv.sb_flagsintr & SB_SPLICE) { + /* + * TCP has a sendbuffer that can handle multiple packets + * at once. So queue the stream a bit to accumulate data. + * The sosplice thread will call somove() later and send + * the packets calling tcp_output() only once. + * In the UDP case, send out the packets immediately. + * Using a thread would make things slower. + */ + if (so->so_proto->pr_flags & PR_WANTRCVD) + task_add(sosplice_taskq, &so->so_splicetask); + else + somove(so, M_DONTWAIT); + } if (isspliced(so)) return; #endif @@ -1499,7 +1538,7 @@ sowwakeup(struct socket *so) { #ifdef SOCKET_SPLICE if (so->so_snd.sb_flagsintr & SB_SPLICE) - (void) somove(so->so_sp->ssp_soback, M_DONTWAIT); + task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask); #endif sowakeup(so, &so->so_snd); } Index: sys/socketvar.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v retrieving revision 1.61 diff -u -p -r1.61 socketvar.h --- sys/socketvar.h 28 Jun 2016 14:47:00 -0000 1.61 +++ sys/socketvar.h 24 Aug 2016 21:38:12 -0000 @@ -34,6 +34,7 @@ #include <sys/selinfo.h> /* for struct selinfo */ #include <sys/queue.h> +#include <sys/task.h> #include <sys/timeout.h> #include <sys/rwlock.h> @@ -92,6 +93,7 @@ struct socket { off_t ssp_max; /* maximum number of bytes */ struct timeval ssp_idletv; /* idle timeout */ struct timeout ssp_idleto; + struct task ssp_task; /* task for somove */ } *so_sp; /* * Variables for socket buffering.