On 01/08/18(Wed) 01:53, Mark Kettenis wrote: > > Date: Tue, 31 Jul 2018 14:55:45 -0300 > > From: Martin Pieuchot <m...@openbsd.org> > > > > On 26/06/18(Tue) 12:31, Martin Pieuchot wrote: > > > These syscalls do two operations: they allocate FDs and some buffers. > > > Thanks to the work done for sockets the file-related bits are now > > > mp-safe, so we can push the KERNEL_LOCK() down. > > > > > > Diff below adds some KERNEL_LOCK/UNLOCK() dances around km_alloc(9) and > > > km_free(9). It also makes some counters use atomic operations. > > > > > > My goals for this diff are to prepare the terrain to unlock read(2) & > > > write(2) for pipes and push the KERNEL_LOCK() at UVM borders, to > > > hopefully motivate other hackers to push further down! > > > > The only objection that I received to this diff is that `fd_ofilesflags' > > wasn't always protected. Which meant that fdexpand() couldn't be taken > > out of the KERNEL_LOCK(). This has been fixed since then. So any other > > comment or ok? > > > [...] > > @@ -443,14 +445,13 @@ pipe_write(struct file *fp, off_t *poff, > > * If it is advantageous to resize the pipe buffer, do > > * so. > > */ > > - if ((uio->uio_resid > PIPE_SIZE) && > > - (nbigpipe < LIMITBIGPIPES) && > > + if ((uio->uio_resid > PIPE_SIZE) && (nbigpipe < LIMITBIGPIPES) && > > (wpipe->pipe_buffer.size <= PIPE_SIZE) && > > (wpipe->pipe_buffer.cnt == 0)) { > > > > if ((error = pipelock(wpipe)) == 0) { > > if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) > > - nbigpipe++; > > + atomic_inc_int(&nbigpipe); > > There is a TOCTTOU issue here. Threads can run through here in > parallel and nbigpipe can become larger than LIMITBIGPIPES. You need > to use atomic_inc_int_nv() and back off if you pushed it over the > limit.
Thanks, updated diff below, how does it looks like? Index: kern/sys_pipe.c =================================================================== RCS file: /cvs/src/sys/kern/sys_pipe.c,v retrieving revision 1.82 diff -u -p -r1.82 sys_pipe.c --- kern/sys_pipe.c 10 Jul 2018 08:58:50 -0000 1.82 +++ kern/sys_pipe.c 2 Aug 2018 16:02:08 -0000 @@ -91,8 +91,8 @@ struct filterops pipe_wfiltops = * Limit the number of "big" pipes */ #define LIMITBIGPIPES 32 -int nbigpipe; -static int amountpipekva; +unsigned int nbigpipe; +static unsigned int amountpipekva; struct pool pipe_pool; @@ -214,7 +214,9 @@ pipespace(struct pipe *cpipe, u_int size { caddr_t buffer; + KERNEL_LOCK(); buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok); + KERNEL_UNLOCK(); if (buffer == NULL) { return (ENOMEM); } @@ -227,7 +229,7 @@ pipespace(struct pipe *cpipe, u_int size cpipe->pipe_buffer.out = 0; cpipe->pipe_buffer.cnt = 0; - amountpipekva += cpipe->pipe_buffer.size; + atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); return (0); } @@ -444,15 +446,17 @@ pipe_write(struct file *fp, off_t *poff, * so. */ if ((uio->uio_resid > PIPE_SIZE) && - (nbigpipe < LIMITBIGPIPES) && (wpipe->pipe_buffer.size <= PIPE_SIZE) && (wpipe->pipe_buffer.cnt == 0)) { + unsigned int npipe; - if ((error = pipelock(wpipe)) == 0) { - if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) - nbigpipe++; + npipe = atomic_inc_int_nv(&nbigpipe); + if ((npipe < LIMITBIGPIPES) && (error = pipelock(wpipe)) == 0) { + if (pipespace(wpipe, BIG_PIPE_SIZE) != 0) + atomic_dec_int(&nbigpipe); pipeunlock(wpipe); - } + } else + atomic_dec_int(&nbigpipe); } /* @@ -759,12 +763,15 @@ pipe_close(struct file *fp, struct proc void pipe_free_kmem(struct pipe *cpipe) { + u_int size = cpipe->pipe_buffer.size; + if (cpipe->pipe_buffer.buffer != NULL) { - if (cpipe->pipe_buffer.size > PIPE_SIZE) - --nbigpipe; - amountpipekva -= cpipe->pipe_buffer.size; - km_free(cpipe->pipe_buffer.buffer, cpipe->pipe_buffer.size, - &kv_any, &kp_pageable); + if (size > PIPE_SIZE) + atomic_dec_int(&nbigpipe); + KERNEL_LOCK(); + km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable); + KERNEL_UNLOCK(); + atomic_sub_int(&amountpipekva, size); cpipe->pipe_buffer.buffer = NULL; } } @@ -777,7 +784,6 @@ pipeclose(struct pipe *cpipe) { struct pipe *ppipe; if (cpipe) { - pipeselwakeup(cpipe); /* Index: kern/syscalls.master =================================================================== RCS file: /cvs/src/sys/kern/syscalls.master,v retrieving revision 1.188 diff -u -p -r1.188 syscalls.master --- kern/syscalls.master 30 Jul 2018 19:09:51 -0000 1.188 +++ kern/syscalls.master 31 Jul 2018 17:55:12 -0000 @@ -217,7 +217,7 @@ socklen_t namelen); } 99 STD { int sys_getdents(int fd, void *buf, size_t buflen); } 100 STD { int sys_getpriority(int which, id_t who); } -101 STD { int sys_pipe2(int *fdp, int flags); } +101 STD NOLOCK { int sys_pipe2(int *fdp, int flags); } 102 STD { int sys_dup3(int from, int to, int flags); } 103 STD { int sys_sigreturn(struct sigcontext *sigcntxp); } 104 STD { int sys_bind(int s, const struct sockaddr *name, \ @@ -448,7 +448,7 @@ 260 UNIMPL 261 UNIMPL 262 UNIMPL -263 STD { int sys_pipe(int *fdp); } +263 STD NOLOCK { int sys_pipe(int *fdp); } 264 STD { int sys_fhopen(const fhandle_t *fhp, int flags); } 265 UNIMPL 266 UNIMPL