Author: kib
Date: Mon Jul  2 21:01:03 2012
New Revision: 238029
URL: http://svn.freebsd.org/changeset/base/238029

Log:
  Extend the KPI to lock and unlock f_offset member of struct file.  It
  now fully encapsulates all accesses to f_offset, and extends f_offset
  locking to other consumers that need it, in particular, to lseek() and
  variants of getdirentries().
  
  Ensure that on 32bit architectures f_offset, which is 64bit quantity,
  always read and written under the mtxpool protection. This fixes
  apparently easy to trigger race when parallel lseek()s or lseek() and
  read/write could destroy file offset.
  
  The already broken ABI emulations, including iBCS and SysV, are not
  converted (yet).
  
  Tested by:    pho
  No objections from:   jhb
  MFC after:    3 weeks

Modified:
  head/sys/compat/linux/linux_file.c
  head/sys/fs/devfs/devfs_vnops.c
  head/sys/kern/kern_descrip.c
  head/sys/kern/vfs_syscalls.c
  head/sys/kern/vfs_vnops.c
  head/sys/sys/file.h
  head/sys/ufs/ffs/ffs_alloc.c

Modified: head/sys/compat/linux/linux_file.c
==============================================================================
--- head/sys/compat/linux/linux_file.c  Mon Jul  2 20:42:43 2012        
(r238028)
+++ head/sys/compat/linux/linux_file.c  Mon Jul  2 21:01:03 2012        
(r238029)
@@ -357,15 +357,16 @@ getdents_common(struct thread *td, struc
                return (EBADF);
        }
 
+       off = foffset_lock(fp, 0);
        vp = fp->f_vnode;
        vfslocked = VFS_LOCK_GIANT(vp->v_mount);
        if (vp->v_type != VDIR) {
                VFS_UNLOCK_GIANT(vfslocked);
+               foffset_unlock(fp, off, 0);
                fdrop(fp, td);
                return (EINVAL);
        }
 
-       off = fp->f_offset;
 
        buflen = max(LINUX_DIRBLKSIZ, nbytes);
        buflen = min(buflen, MAXBSIZE);
@@ -514,7 +515,6 @@ getdents_common(struct thread *td, struc
                goto eof;
        }
 
-       fp->f_offset = off;
        if (justone)
                nbytes = resid + linuxreclen;
 
@@ -527,6 +527,7 @@ out:
 
        VOP_UNLOCK(vp, 0);
        VFS_UNLOCK_GIANT(vfslocked);
+       foffset_unlock(fp, off, 0);
        fdrop(fp, td);
        free(buf, M_TEMP);
        free(lbuf, M_TEMP);

Modified: head/sys/fs/devfs/devfs_vnops.c
==============================================================================
--- head/sys/fs/devfs/devfs_vnops.c     Mon Jul  2 20:42:43 2012        
(r238028)
+++ head/sys/fs/devfs/devfs_vnops.c     Mon Jul  2 21:01:03 2012        
(r238029)
@@ -1170,18 +1170,14 @@ devfs_read_f(struct file *fp, struct uio
        if (ioflag & O_DIRECT)
                ioflag |= IO_DIRECT;
 
-       if ((flags & FOF_OFFSET) == 0)
-               uio->uio_offset = fp->f_offset;
-
+       foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
        error = dsw->d_read(dev, uio, ioflag);
        if (uio->uio_resid != resid || (error == 0 && resid != 0))
                vfs_timestamp(&dev->si_atime);
        td->td_fpop = fpop;
        dev_relthread(dev, ref);
 
-       if ((flags & FOF_OFFSET) == 0)
-               fp->f_offset = uio->uio_offset;
-       fp->f_nextoff = uio->uio_offset;
+       foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
        return (error);
 }
 
@@ -1648,8 +1644,7 @@ devfs_write_f(struct file *fp, struct ui
        ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
        if (ioflag & O_DIRECT)
                ioflag |= IO_DIRECT;
-       if ((flags & FOF_OFFSET) == 0)
-               uio->uio_offset = fp->f_offset;
+       foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
 
        resid = uio->uio_resid;
 
@@ -1661,9 +1656,7 @@ devfs_write_f(struct file *fp, struct ui
        td->td_fpop = fpop;
        dev_relthread(dev, ref);
 
-       if ((flags & FOF_OFFSET) == 0)
-               fp->f_offset = uio->uio_offset;
-       fp->f_nextoff = uio->uio_offset;
+       foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
        return (error);
 }
 

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c        Mon Jul  2 20:42:43 2012        
(r238028)
+++ head/sys/kern/kern_descrip.c        Mon Jul  2 21:01:03 2012        
(r238029)
@@ -465,6 +465,7 @@ kern_fcntl(struct thread *td, int fd, in
        int vfslocked;
        u_int old, new;
        uint64_t bsize;
+       off_t foffset;
 
        vfslocked = 0;
        error = 0;
@@ -606,14 +607,15 @@ kern_fcntl(struct thread *td, int fd, in
                }
                flp = (struct flock *)arg;
                if (flp->l_whence == SEEK_CUR) {
-                       if (fp->f_offset < 0 ||
+                       foffset = foffset_get(fp);
+                       if (foffset < 0 ||
                            (flp->l_start > 0 &&
-                            fp->f_offset > OFF_MAX - flp->l_start)) {
+                            foffset > OFF_MAX - flp->l_start)) {
                                FILEDESC_SUNLOCK(fdp);
                                error = EOVERFLOW;
                                break;
                        }
-                       flp->l_start += fp->f_offset;
+                       flp->l_start += foffset;
                }
 
                /*
@@ -727,15 +729,16 @@ kern_fcntl(struct thread *td, int fd, in
                        break;
                }
                if (flp->l_whence == SEEK_CUR) {
+                       foffset = foffset_get(fp);
                        if ((flp->l_start > 0 &&
-                           fp->f_offset > OFF_MAX - flp->l_start) ||
+                           foffset > OFF_MAX - flp->l_start) ||
                            (flp->l_start < 0 &&
-                            fp->f_offset < OFF_MIN - flp->l_start)) {
+                            foffset < OFF_MIN - flp->l_start)) {
                                FILEDESC_SUNLOCK(fdp);
                                error = EOVERFLOW;
                                break;
                        }
-                       flp->l_start += fp->f_offset;
+                       flp->l_start += foffset;
                }
                /*
                 * VOP_ADVLOCK() may block.
@@ -2810,7 +2813,7 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS)
                        xf.xf_type = fp->f_type;
                        xf.xf_count = fp->f_count;
                        xf.xf_msgcount = 0;
-                       xf.xf_offset = fp->f_offset;
+                       xf.xf_offset = foffset_get(fp);
                        xf.xf_flag = fp->f_flag;
                        error = SYSCTL_OUT(req, &xf, sizeof(xf));
                        if (error)
@@ -3015,7 +3018,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLE
                        kif->kf_flags |= KF_FLAG_DIRECT;
                if (fp->f_flag & FHASLOCK)
                        kif->kf_flags |= KF_FLAG_HASLOCK;
-               kif->kf_offset = fp->f_offset;
+               kif->kf_offset = foffset_get(fp);
                if (vp != NULL) {
                        vref(vp);
                        switch (vp->v_type) {
@@ -3359,7 +3362,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER
                }
                refcnt = fp->f_count;
                fflags = fp->f_flag;
-               offset = fp->f_offset;
+               offset = foffset_get(fp);
 
                /*
                 * Create sysctl entry.

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c        Mon Jul  2 20:42:43 2012        
(r238028)
+++ head/sys/kern/vfs_syscalls.c        Mon Jul  2 21:01:03 2012        
(r238029)
@@ -1981,7 +1981,7 @@ sys_lseek(td, uap)
        struct file *fp;
        struct vnode *vp;
        struct vattr vattr;
-       off_t offset, size;
+       off_t foffset, offset, size;
        int error, noneg;
        int vfslocked;
 
@@ -1993,18 +1993,19 @@ sys_lseek(td, uap)
                return (ESPIPE);
        }
        vp = fp->f_vnode;
+       foffset = foffset_lock(fp, 0);
        vfslocked = VFS_LOCK_GIANT(vp->v_mount);
        noneg = (vp->v_type != VCHR);
        offset = uap->offset;
        switch (uap->whence) {
        case L_INCR:
                if (noneg &&
-                   (fp->f_offset < 0 ||
-                   (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
+                   (foffset < 0 ||
+                   (offset > 0 && foffset > OFF_MAX - offset))) {
                        error = EOVERFLOW;
                        break;
                }
-               offset += fp->f_offset;
+               offset += foffset;
                break;
        case L_XTND:
                vn_lock(vp, LK_SHARED | LK_RETRY);
@@ -2044,12 +2045,12 @@ sys_lseek(td, uap)
                error = EINVAL;
        if (error != 0)
                goto drop;
-       fp->f_offset = offset;
        VFS_KNOTE_UNLOCKED(vp, 0);
-       *(off_t *)(td->td_retval) = fp->f_offset;
+       *(off_t *)(td->td_retval) = offset;
 drop:
        fdrop(fp, td);
        VFS_UNLOCK_GIANT(vfslocked);
+       foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0);
        return (error);
 }
 
@@ -3982,6 +3983,7 @@ kern_ogetdirentries(struct thread *td, s
        caddr_t dirbuf;
        int error, eofflag, readcnt, vfslocked;
        long loff;
+       off_t foffset;
 
        /* XXX arbitrary sanity limit on `count'. */
        if (uap->count > 64 * 1024)
@@ -3994,10 +3996,12 @@ kern_ogetdirentries(struct thread *td, s
                return (EBADF);
        }
        vp = fp->f_vnode;
+       foffset = foffset_lock(fp, 0);
 unionread:
        vfslocked = VFS_LOCK_GIANT(vp->v_mount);
        if (vp->v_type != VDIR) {
                VFS_UNLOCK_GIANT(vfslocked);
+               foffset_unlock(fp, foffset, 0);
                fdrop(fp, td);
                return (EINVAL);
        }
@@ -4010,12 +4014,13 @@ unionread:
        auio.uio_td = td;
        auio.uio_resid = uap->count;
        vn_lock(vp, LK_SHARED | LK_RETRY);
-       loff = auio.uio_offset = fp->f_offset;
+       loff = auio.uio_offset = foffset;
 #ifdef MAC
        error = mac_vnode_check_readdir(td->td_ucred, vp);
        if (error) {
                VOP_UNLOCK(vp, 0);
                VFS_UNLOCK_GIANT(vfslocked);
+               foffset_unlock(fp, foffset, FOF_NOUPDATE);
                fdrop(fp, td);
                return (error);
        }
@@ -4024,7 +4029,7 @@ unionread:
                if (vp->v_mount->mnt_maxsymlinklen <= 0) {
                        error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
                            NULL, NULL);
-                       fp->f_offset = auio.uio_offset;
+                       foffset = auio.uio_offset;
                } else
 #      endif
        {
@@ -4036,7 +4041,7 @@ unionread:
                kiov.iov_base = dirbuf;
                error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
                            NULL, NULL);
-               fp->f_offset = kuio.uio_offset;
+               foffset = kuio.uio_offset;
                if (error == 0) {
                        readcnt = uap->count - kuio.uio_resid;
                        edp = (struct dirent *)&dirbuf[readcnt];
@@ -4074,6 +4079,7 @@ unionread:
        if (error) {
                VOP_UNLOCK(vp, 0);
                VFS_UNLOCK_GIANT(vfslocked);
+               foffset_unlock(fp, foffset, 0);
                fdrop(fp, td);
                return (error);
        }
@@ -4085,13 +4091,14 @@ unionread:
                VREF(vp);
                fp->f_vnode = vp;
                fp->f_data = vp;
-               fp->f_offset = 0;
+               foffset = 0;
                vput(tvp);
                VFS_UNLOCK_GIANT(vfslocked);
                goto unionread;
        }
        VOP_UNLOCK(vp, 0);
        VFS_UNLOCK_GIANT(vfslocked);
+       foffset_unlock(fp, foffset, 0);
        fdrop(fp, td);
        td->td_retval[0] = uap->count - auio.uio_resid;
        if (error == 0)
@@ -4144,6 +4151,7 @@ kern_getdirentries(struct thread *td, in
        int vfslocked;
        long loff;
        int error, eofflag;
+       off_t foffset;
 
        AUDIT_ARG_FD(fd);
        if (count > IOSIZE_MAX)
@@ -4157,6 +4165,7 @@ kern_getdirentries(struct thread *td, in
                return (EBADF);
        }
        vp = fp->f_vnode;
+       foffset = foffset_lock(fp, 0);
 unionread:
        vfslocked = VFS_LOCK_GIANT(vp->v_mount);
        if (vp->v_type != VDIR) {
@@ -4173,14 +4182,14 @@ unionread:
        auio.uio_td = td;
        vn_lock(vp, LK_SHARED | LK_RETRY);
        AUDIT_ARG_VNODE1(vp);
-       loff = auio.uio_offset = fp->f_offset;
+       loff = auio.uio_offset = foffset;
 #ifdef MAC
        error = mac_vnode_check_readdir(td->td_ucred, vp);
        if (error == 0)
 #endif
                error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
                    NULL);
-       fp->f_offset = auio.uio_offset;
+       foffset = auio.uio_offset;
        if (error) {
                VOP_UNLOCK(vp, 0);
                VFS_UNLOCK_GIANT(vfslocked);
@@ -4194,7 +4203,7 @@ unionread:
                VREF(vp);
                fp->f_vnode = vp;
                fp->f_data = vp;
-               fp->f_offset = 0;
+               foffset = 0;
                vput(tvp);
                VFS_UNLOCK_GIANT(vfslocked);
                goto unionread;
@@ -4206,6 +4215,7 @@ unionread:
                *residp = auio.uio_resid;
        td->td_retval[0] = count - auio.uio_resid;
 fail:
+       foffset_unlock(fp, foffset, 0);
        fdrop(fp, td);
        return (error);
 }

Modified: head/sys/kern/vfs_vnops.c
==============================================================================
--- head/sys/kern/vfs_vnops.c   Mon Jul  2 20:42:43 2012        (r238028)
+++ head/sys/kern/vfs_vnops.c   Mon Jul  2 21:01:03 2012        (r238029)
@@ -527,13 +527,22 @@ vn_rdwr_inchunks(rw, vp, base, len, offs
        return (error);
 }
 
-static void
-foffset_lock(struct file *fp, struct uio *uio, int flags)
+off_t
+foffset_lock(struct file *fp, int flags)
 {
        struct mtx *mtxp;
+       off_t res;
 
-       if ((flags & FOF_OFFSET) != 0)
-               return;
+       KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed"));
+
+#if OFF_MAX <= LONG_MAX
+       /*
+        * Caller only wants the current f_offset value.  Assume that
+        * the long and shorter integer types reads are atomic.
+        */
+       if ((flags & FOF_NOLOCK) != 0)
+               return (fp->f_offset);
+#endif
 
        /*
         * According to McKusick the vn lock was protecting f_offset here.
@@ -541,16 +550,68 @@ foffset_lock(struct file *fp, struct uio
         */
        mtxp = mtx_pool_find(mtxpool_sleep, fp);
        mtx_lock(mtxp);
-       while (fp->f_vnread_flags & FOFFSET_LOCKED) {
-               fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
-               msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
-                   "vnread offlock", 0);
+       if ((flags & FOF_NOLOCK) == 0) {
+               while (fp->f_vnread_flags & FOFFSET_LOCKED) {
+                       fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
+                       msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
+                           "vofflock", 0);
+               }
+               fp->f_vnread_flags |= FOFFSET_LOCKED;
+       }
+       res = fp->f_offset;
+       mtx_unlock(mtxp);
+       return (res);
+}
+
+void
+foffset_unlock(struct file *fp, off_t val, int flags)
+{
+       struct mtx *mtxp;
+
+       KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed"));
+
+#if OFF_MAX <= LONG_MAX
+       if ((flags & FOF_NOLOCK) != 0) {
+               if ((flags & FOF_NOUPDATE) == 0)
+                       fp->f_offset = val;
+               if ((flags & FOF_NEXTOFF) != 0)
+                       fp->f_nextoff = val;
+               return;
+       }
+#endif
+
+       mtxp = mtx_pool_find(mtxpool_sleep, fp);
+       mtx_lock(mtxp);
+       if ((flags & FOF_NOUPDATE) == 0)
+               fp->f_offset = val;
+       if ((flags & FOF_NEXTOFF) != 0)
+               fp->f_nextoff = val;
+       if ((flags & FOF_NOLOCK) == 0) {
+               KASSERT((fp->f_vnread_flags & FOFFSET_LOCKED) != 0,
+                   ("Lost FOFFSET_LOCKED"));
+               if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
+                       wakeup(&fp->f_vnread_flags);
+               fp->f_vnread_flags = 0;
        }
-       fp->f_vnread_flags |= FOFFSET_LOCKED;
-       uio->uio_offset = fp->f_offset;
        mtx_unlock(mtxp);
 }
 
+void
+foffset_lock_uio(struct file *fp, struct uio *uio, int flags)
+{
+
+       if ((flags & FOF_OFFSET) == 0)
+               uio->uio_offset = foffset_lock(fp, flags);
+}
+
+void
+foffset_unlock_uio(struct file *fp, struct uio *uio, int flags)
+{
+
+       if ((flags & FOF_OFFSET) == 0)
+               foffset_unlock(fp, uio->uio_offset, flags);
+}
+
 static int
 get_advice(struct file *fp, struct uio *uio)
 {
@@ -570,23 +631,6 @@ get_advice(struct file *fp, struct uio *
        return (ret);
 }
 
-static void
-foffset_unlock(struct file *fp, struct uio *uio, int flags)
-{
-       struct mtx *mtxp;
-
-       if ((flags & FOF_OFFSET) != 0)
-               return;
-
-       fp->f_offset = uio->uio_offset;
-       mtxp = mtx_pool_find(mtxpool_sleep, fp);
-       mtx_lock(mtxp);
-       if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
-               wakeup(&fp->f_vnread_flags);
-       fp->f_vnread_flags = 0;
-       mtx_unlock(mtxp);
-}
-
 /*
  * File table vnode read routine.
  */
@@ -865,7 +909,7 @@ vn_io_fault(struct file *fp, struct uio 
        else
                doio = vn_write;
        vp = fp->f_vnode;
-       foffset_lock(fp, uio, flags);
+       foffset_lock_uio(fp, uio, flags);
 
        if (uio->uio_segflg != UIO_USERSPACE || vp->v_type != VREG ||
            ((mp = vp->v_mount) != NULL &&
@@ -982,7 +1026,7 @@ out:
        vn_rangelock_unlock(vp, rl_cookie);
        free(uio_clone, M_IOV);
 out_last:
-       foffset_unlock(fp, uio, flags);
+       foffset_unlock_uio(fp, uio, flags);
        return (error);
 }
 

Modified: head/sys/sys/file.h
==============================================================================
--- head/sys/sys/file.h Mon Jul  2 20:42:43 2012        (r238028)
+++ head/sys/sys/file.h Mon Jul  2 21:01:03 2012        (r238029)
@@ -72,10 +72,25 @@ struct socket;
 struct file;
 struct ucred;
 
+#define        FOF_OFFSET      0x01    /* Use the offset in uio argument */
+#define        FOF_NOLOCK      0x02    /* Do not take FOFFSET_LOCK */
+#define        FOF_NEXTOFF     0x04    /* Also update f_nextoff */
+#define        FOF_NOUPDATE    0x10    /* Do not update f_offset */
+off_t foffset_lock(struct file *fp, int flags);
+void foffset_lock_uio(struct file *fp, struct uio *uio, int flags);
+void foffset_unlock(struct file *fp, off_t val, int flags);
+void foffset_unlock_uio(struct file *fp, struct uio *uio, int flags);
+
+static inline off_t
+foffset_get(struct file *fp)
+{
+
+       return (foffset_lock(fp, FOF_NOLOCK));
+}
+
 typedef int fo_rdwr_t(struct file *fp, struct uio *uio,
                    struct ucred *active_cred, int flags,
                    struct thread *td);
-#define        FOF_OFFSET      1       /* Use the offset in uio argument */
 typedef        int fo_truncate_t(struct file *fp, off_t length,
                    struct ucred *active_cred, struct thread *td);
 typedef        int fo_ioctl_t(struct file *fp, u_long com, void *data,

Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c        Mon Jul  2 20:42:43 2012        
(r238028)
+++ head/sys/ufs/ffs/ffs_alloc.c        Mon Jul  2 21:01:03 2012        
(r238029)
@@ -2865,10 +2865,9 @@ buffered_write(fp, uio, active_cred, fla
        if (ip->i_devvp != devvp)
                return (EINVAL);
        fs = ip->i_fs;
+       foffset_lock_uio(fp, uio, flags);
        vfslocked = VFS_LOCK_GIANT(ip->i_vnode->v_mount);
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
-       if ((flags & FOF_OFFSET) == 0)
-               uio->uio_offset = fp->f_offset;
 #ifdef DEBUG
        if (fsckcmds) {
                printf("%s: buffered write for block %jd\n",
@@ -2893,11 +2892,9 @@ buffered_write(fp, uio, active_cred, fla
                goto out;
        }
        error = bwrite(bp);
-       if ((flags & FOF_OFFSET) == 0)
-               fp->f_offset = uio->uio_offset;
-       fp->f_nextoff = uio->uio_offset;
 out:
        VOP_UNLOCK(devvp, 0);
        VFS_UNLOCK_GIANT(vfslocked);
+       foffset_unlock_uio(fp, uio, flags | FOF_NEXTOFF);
        return (error);
 }
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to