On 01/08/18(Wed) 01:53, Mark Kettenis wrote:
> > Date: Tue, 31 Jul 2018 14:55:45 -0300
> > From: Martin Pieuchot <[email protected]>
> >
> > On 26/06/18(Tue) 12:31, Martin Pieuchot wrote:
> > > These syscalls do two operations: they allocate FDs and some buffers.
> > > Thanks to the work done for sockets the file-related bits are now
> > > mp-safe, so we can push the KERNEL_LOCK() down.
> > >
> > > Diff below adds some KERNEL_LOCK/UNLOCK() dances around km_alloc(9) and
> > > km_free(9). It also makes some counters use atomic operations.
> > >
> > > My goals for this diff are to prepare the terrain to unlock read(2) &
> > > write(2) for pipes and push the KERNEL_LOCK() at UVM borders, to
> > > hopefully motivate other hackers to push further down!
> >
> > The only objection that I received to this diff is that `fd_ofilesflags'
> > wasn't always protected. Which meant that fdexpand() couldn't be taken
> > out of the KERNEL_LOCK(). This has been fixed since then. So any other
> > comment or ok?
> >
> [...]
> > @@ -443,14 +445,13 @@ pipe_write(struct file *fp, off_t *poff,
> > * If it is advantageous to resize the pipe buffer, do
> > * so.
> > */
> > - if ((uio->uio_resid > PIPE_SIZE) &&
> > - (nbigpipe < LIMITBIGPIPES) &&
> > + if ((uio->uio_resid > PIPE_SIZE) && (nbigpipe < LIMITBIGPIPES) &&
> > (wpipe->pipe_buffer.size <= PIPE_SIZE) &&
> > (wpipe->pipe_buffer.cnt == 0)) {
> >
> > if ((error = pipelock(wpipe)) == 0) {
> > if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
> > - nbigpipe++;
> > + atomic_inc_int(&nbigpipe);
>
> There is a TOCTTOU issue here. Threads can run through here in
> parallel and nbigpipe can become larger than LIMITBIGPIPES. You need
> to use atomic_inc_int_nv() and back off if you pushed it over the
> limit.
Thanks, updated diff below, how does it looks like?
Index: kern/sys_pipe.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_pipe.c,v
retrieving revision 1.82
diff -u -p -r1.82 sys_pipe.c
--- kern/sys_pipe.c 10 Jul 2018 08:58:50 -0000 1.82
+++ kern/sys_pipe.c 2 Aug 2018 16:02:08 -0000
@@ -91,8 +91,8 @@ struct filterops pipe_wfiltops =
* Limit the number of "big" pipes
*/
#define LIMITBIGPIPES 32
-int nbigpipe;
-static int amountpipekva;
+unsigned int nbigpipe;
+static unsigned int amountpipekva;
struct pool pipe_pool;
@@ -214,7 +214,9 @@ pipespace(struct pipe *cpipe, u_int size
{
caddr_t buffer;
+ KERNEL_LOCK();
buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok);
+ KERNEL_UNLOCK();
if (buffer == NULL) {
return (ENOMEM);
}
@@ -227,7 +229,7 @@ pipespace(struct pipe *cpipe, u_int size
cpipe->pipe_buffer.out = 0;
cpipe->pipe_buffer.cnt = 0;
- amountpipekva += cpipe->pipe_buffer.size;
+ atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
return (0);
}
@@ -444,15 +446,17 @@ pipe_write(struct file *fp, off_t *poff,
* so.
*/
if ((uio->uio_resid > PIPE_SIZE) &&
- (nbigpipe < LIMITBIGPIPES) &&
(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
(wpipe->pipe_buffer.cnt == 0)) {
+ unsigned int npipe;
- if ((error = pipelock(wpipe)) == 0) {
- if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
- nbigpipe++;
+ npipe = atomic_inc_int_nv(&nbigpipe);
+ if ((npipe < LIMITBIGPIPES) && (error = pipelock(wpipe)) == 0) {
+ if (pipespace(wpipe, BIG_PIPE_SIZE) != 0)
+ atomic_dec_int(&nbigpipe);
pipeunlock(wpipe);
- }
+ } else
+ atomic_dec_int(&nbigpipe);
}
/*
@@ -759,12 +763,15 @@ pipe_close(struct file *fp, struct proc
void
pipe_free_kmem(struct pipe *cpipe)
{
+ u_int size = cpipe->pipe_buffer.size;
+
if (cpipe->pipe_buffer.buffer != NULL) {
- if (cpipe->pipe_buffer.size > PIPE_SIZE)
- --nbigpipe;
- amountpipekva -= cpipe->pipe_buffer.size;
- km_free(cpipe->pipe_buffer.buffer, cpipe->pipe_buffer.size,
- &kv_any, &kp_pageable);
+ if (size > PIPE_SIZE)
+ atomic_dec_int(&nbigpipe);
+ KERNEL_LOCK();
+ km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable);
+ KERNEL_UNLOCK();
+ atomic_sub_int(&amountpipekva, size);
cpipe->pipe_buffer.buffer = NULL;
}
}
@@ -777,7 +784,6 @@ pipeclose(struct pipe *cpipe)
{
struct pipe *ppipe;
if (cpipe) {
-
pipeselwakeup(cpipe);
/*
Index: kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.188
diff -u -p -r1.188 syscalls.master
--- kern/syscalls.master 30 Jul 2018 19:09:51 -0000 1.188
+++ kern/syscalls.master 31 Jul 2018 17:55:12 -0000
@@ -217,7 +217,7 @@
socklen_t namelen); }
99 STD { int sys_getdents(int fd, void *buf, size_t buflen); }
100 STD { int sys_getpriority(int which, id_t who); }
-101 STD { int sys_pipe2(int *fdp, int flags); }
+101 STD NOLOCK { int sys_pipe2(int *fdp, int flags); }
102 STD { int sys_dup3(int from, int to, int flags); }
103 STD { int sys_sigreturn(struct sigcontext *sigcntxp); }
104 STD { int sys_bind(int s, const struct sockaddr *name, \
@@ -448,7 +448,7 @@
260 UNIMPL
261 UNIMPL
262 UNIMPL
-263 STD { int sys_pipe(int *fdp); }
+263 STD NOLOCK { int sys_pipe(int *fdp); }
264 STD { int sys_fhopen(const fhandle_t *fhp, int flags); }
265 UNIMPL
266 UNIMPL