There's one place in our kernel where `f_data' is overwritten while the
descriptor sits in multiple shared data structures. It is in diskmap.
We want to avoid this situation to be able to treat f_data as immutable.
So the diff below remove `fp' from the shared data structures before it
gets modified. It is not a complete solution to the reference grabbing
problem of vnode. There's still a window where a race is possible
between the moment where we release the mutex and increase the vnode's
reference. The KERNEL_LOCK() is what protects us for now. However this
problem is the next one to solve to be able to unlock syscalls messing
with vnodes.
Still this diff is a step towards that.
Note that this change only apply to vnodes, so it doesn't matter for
socket-related syscalls.
Ok?
Index: dev/diskmap.c
===================================================================
RCS file: /cvs/src/sys/dev/diskmap.c,v
retrieving revision 1.20
diff -u -p -r1.20 diskmap.c
--- dev/diskmap.c 9 May 2018 08:42:02 -0000 1.20
+++ dev/diskmap.c 19 Jun 2018 10:38:37 -0000
@@ -59,7 +59,7 @@ diskmapioctl(dev_t dev, u_long cmd, cadd
struct filedesc *fdp = p->p_fd;
struct file *fp = NULL;
struct vnode *vp = NULL, *ovp;
- char *devname;
+ char *devname, flags;
int fd, error = EINVAL;
if (cmd != DIOCMAP)
@@ -106,6 +106,15 @@ diskmapioctl(dev_t dev, u_long cmd, cadd
vput(ovp);
}
+ /* Remove the file while we're modifying it to prevent races. */
+ flags = fdp->fd_ofileflags[fd];
+ mtx_enter(&fhdlk);
+ fdp->fd_ofiles[fd] = NULL;
+ fdp->fd_ofileflags[fd] = 0;
+ if (fp->f_iflags & FIF_INSERTED)
+ LIST_REMOVE(fp, f_list);
+ mtx_leave(&fhdlk);
+
fp->f_data = (caddr_t)vp;
fp->f_offset = 0;
mtx_enter(&fp->f_mtx);
@@ -115,6 +124,9 @@ diskmapioctl(dev_t dev, u_long cmd, cadd
fp->f_rbytes = 0;
fp->f_wbytes = 0;
mtx_leave(&fp->f_mtx);
+
+ /* Insert it back where it was. */
+ fdinsert(fdp, fd, flags, fp);
VOP_UNLOCK(vp);