Author: jeff
Date: Thu May 14 03:24:22 2009
New Revision: 192080
URL: http://svn.freebsd.org/changeset/base/192080

Log:
   - Implement a lockless file descriptor lookup algorithm in
     fget_unlocked().
   - Save old file descriptor tables created on expansion until
     the entire descriptor table is freed so that pointers may be
     followed without regard for expanders.
   - Mark the file zone as NOFREE so we may attempt to reference
     potentially freed files.
   - Convert several fget_locked() users to fget_unlocked().  This
     requires us to manage reference counts explicitly but reduces
     locking overhead in the common case.

Modified:
  head/sys/kern/kern_descrip.c
  head/sys/kern/sys_generic.c
  head/sys/kern/tty.c
  head/sys/kern/uipc_syscalls.c
  head/sys/kern/vfs_syscalls.c
  head/sys/sys/filedesc.h

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c        Thu May 14 02:42:29 2009        
(r192079)
+++ head/sys/kern/kern_descrip.c        Thu May 14 03:24:22 2009        
(r192080)
@@ -125,12 +125,24 @@ static void       fdused(struct filedesc *fdp,
 #define OFILESIZE (sizeof(struct file *) + sizeof(char))
 
 /*
+ * Storage to hold unused ofiles that need to be reclaimed.
+ */
+struct freetable {
+       struct file     **ft_table;
+       SLIST_ENTRY(freetable) ft_next;
+};
+
+/*
  * Basic allocation of descriptors:
  * one of the above, plus arrays for NDFILE descriptors.
  */
 struct filedesc0 {
        struct  filedesc fd_fd;
        /*
+        * ofiles which need to be reclaimed on free.
+        */
+       SLIST_HEAD(,freetable) fd_free;
+       /*
         * These arrays are used when the number of open files is
         * <= NDFILE, and are then pointed to by the pointers above.
         */
@@ -1268,7 +1280,10 @@ out:
 static void
 fdgrowtable(struct filedesc *fdp, int nfd)
 {
+       struct filedesc0 *fdp0;
+       struct freetable *fo;
        struct file **ntable;
+       struct file **otable;
        char *nfileflags;
        int nnfiles, onfiles;
        NDSLOTTYPE *nmap;
@@ -1287,7 +1302,7 @@ fdgrowtable(struct filedesc *fdp, int nf
 
        /* allocate a new table and (if required) new bitmaps */
        FILEDESC_XUNLOCK(fdp);
-       ntable = malloc(nnfiles * OFILESIZE,
+       ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable),
            M_FILEDESC, M_ZERO | M_WAITOK);
        nfileflags = (char *)&ntable[nnfiles];
        if (NDSLOTS(nnfiles) > NDSLOTS(onfiles))
@@ -1311,10 +1326,20 @@ fdgrowtable(struct filedesc *fdp, int nf
        }
        bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
        bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
-       if (onfiles > NDFILE)
-               free(fdp->fd_ofiles, M_FILEDESC);
-       fdp->fd_ofiles = ntable;
+       otable = fdp->fd_ofiles;
        fdp->fd_ofileflags = nfileflags;
+       fdp->fd_ofiles = ntable;
+       /*
+        * We must preserve ofiles until the process exits because we can't
+        * be certain that no threads have references to the old table via
+        * _fget().
+        */
+       if (onfiles > NDFILE) {
+               fo = (struct freetable *)&otable[onfiles];
+               fdp0 = (struct filedesc0 *)fdp;
+               fo->ft_table = otable;
+               SLIST_INSERT_HEAD(&fdp0->fd_free, fo, ft_next);
+       }
        if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
                bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap));
                if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
@@ -1512,6 +1537,8 @@ fdhold(struct proc *p)
 static void
 fddrop(struct filedesc *fdp)
 {
+       struct filedesc0 *fdp0;
+       struct freetable *ft;
        int i;
 
        mtx_lock(&fdesc_mtx);
@@ -1521,6 +1548,11 @@ fddrop(struct filedesc *fdp)
                return;
 
        FILEDESC_LOCK_DESTROY(fdp);
+       fdp0 = (struct filedesc0 *)fdp;
+       while ((ft = SLIST_FIRST(&fdp0->fd_free)) != NULL) {
+               SLIST_REMOVE_HEAD(&fdp0->fd_free, ft_next);
+               free(ft->ft_table, M_FILEDESC);
+       }
        free(fdp, M_FILEDESC);
 }
 
@@ -2022,6 +2054,38 @@ finit(struct file *fp, u_int flag, short
        atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
 }
 
+struct file *
+fget_unlocked(struct filedesc *fdp, int fd)
+{
+       struct file *fp;
+       u_int count;
+
+       if (fd < 0 || fd >= fdp->fd_nfiles)
+               return (NULL);
+       /*
+        * Fetch the descriptor locklessly.  We avoid fdrop() races by
+        * never raising a refcount above 0.  To accomplish this we have
+        * to use a cmpset loop rather than an atomic_add.  The descriptor
+        * must be re-verified once we acquire a reference to be certain
+        * that the identity is still correct and we did not lose a race
+        * due to preemption.
+        */
+       for (;;) {
+               fp = fdp->fd_ofiles[fd];
+               if (fp == NULL)
+                       break;
+               count = fp->f_count;
+               if (count == 0)
+                       continue;
+               if (atomic_cmpset_int(&fp->f_count, count, count + 1) != 1)
+                       continue;
+               if (fp == ((struct file *volatile*)fdp->fd_ofiles)[fd])
+                       break;
+               fdrop(fp, curthread);
+       }
+
+       return (fp);
+}
 
 /*
  * Extract the file pointer associated with the specified descriptor for the
@@ -2030,16 +2094,12 @@ finit(struct file *fp, u_int flag, short
  * If the descriptor doesn't exist or doesn't match 'flags', EBADF is
  * returned.
  *
- * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
- * It should be dropped with fdrop().  If it is not set, then the refcount
- * will not be bumped however the thread's filedesc struct will be returned
- * locked (for fgetsock).
- *
  * If an error occured the non-zero error is returned and *fpp is set to
- * NULL.  Otherwise *fpp is set and zero is returned.
+ * NULL.  Otherwise *fpp is held and set and zero is returned.  Caller is
+ * responsible for fdrop().
  */
 static __inline int
-_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
+_fget(struct thread *td, int fd, struct file **fpp, int flags)
 {
        struct filedesc *fdp;
        struct file *fp;
@@ -2047,29 +2107,22 @@ _fget(struct thread *td, int fd, struct 
        *fpp = NULL;
        if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
                return (EBADF);
-       FILEDESC_SLOCK(fdp);
-       if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
-               FILEDESC_SUNLOCK(fdp);
+       if ((fp = fget_unlocked(fdp, fd)) == NULL)
+               return (EBADF);
+       if (fp->f_ops == &badfileops) {
+               fdrop(fp, td);
                return (EBADF);
        }
-
        /*
         * FREAD and FWRITE failure return EBADF as per POSIX.
         *
         * Only one flag, or 0, may be specified.
         */
-       if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
-               FILEDESC_SUNLOCK(fdp);
-               return (EBADF);
-       }
-       if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
-               FILEDESC_SUNLOCK(fdp);
+       if ((flags == FREAD && (fp->f_flag & FREAD) == 0) ||
+           (flags == FWRITE && (fp->f_flag & FWRITE) == 0)) {
+               fdrop(fp, td);
                return (EBADF);
        }
-       if (hold) {
-               fhold(fp);
-               FILEDESC_SUNLOCK(fdp);
-       }
        *fpp = fp;
        return (0);
 }
@@ -2078,21 +2131,21 @@ int
 fget(struct thread *td, int fd, struct file **fpp)
 {
 
-       return(_fget(td, fd, fpp, 0, 1));
+       return(_fget(td, fd, fpp, 0));
 }
 
 int
 fget_read(struct thread *td, int fd, struct file **fpp)
 {
 
-       return(_fget(td, fd, fpp, FREAD, 1));
+       return(_fget(td, fd, fpp, FREAD));
 }
 
 int
 fget_write(struct thread *td, int fd, struct file **fpp)
 {
 
-       return(_fget(td, fd, fpp, FWRITE, 1));
+       return(_fget(td, fd, fpp, FWRITE));
 }
 
 /*
@@ -2109,7 +2162,7 @@ _fgetvp(struct thread *td, int fd, struc
        int error;
 
        *vpp = NULL;
-       if ((error = _fget(td, fd, &fp, flags, 0)) != 0)
+       if ((error = _fget(td, fd, &fp, flags)) != 0)
                return (error);
        if (fp->f_vnode == NULL) {
                error = EINVAL;
@@ -2117,7 +2170,8 @@ _fgetvp(struct thread *td, int fd, struc
                *vpp = fp->f_vnode;
                vref(*vpp);
        }
-       FILEDESC_SUNLOCK(td->td_proc->p_fd);
+       fdrop(fp, td);
+
        return (error);
 }
 
@@ -2164,7 +2218,7 @@ fgetsock(struct thread *td, int fd, stru
        *spp = NULL;
        if (fflagp != NULL)
                *fflagp = 0;
-       if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
+       if ((error = _fget(td, fd, &fp, 0)) != 0)
                return (error);
        if (fp->f_type != DTYPE_SOCKET) {
                error = ENOTSOCK;
@@ -2176,7 +2230,8 @@ fgetsock(struct thread *td, int fd, stru
                soref(*spp);
                SOCK_UNLOCK(*spp);
        }
-       FILEDESC_SUNLOCK(td->td_proc->p_fd);
+       fdrop(fp, td);
+
        return (error);
 }
 
@@ -3147,7 +3202,7 @@ filelistinit(void *dummy)
 {
 
        file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
-           NULL, NULL, UMA_ALIGN_PTR, 0);
+           NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
        mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
        mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
 }

Modified: head/sys/kern/sys_generic.c
==============================================================================
--- head/sys/kern/sys_generic.c Thu May 14 02:42:29 2009        (r192079)
+++ head/sys/kern/sys_generic.c Thu May 14 03:24:22 2009        (r192080)
@@ -988,7 +988,6 @@ selrescan(struct thread *td, fd_mask **i
        fdp = td->td_proc->p_fd;
        stp = td->td_sel;
        n = 0;
-       FILEDESC_SLOCK(fdp);
        STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
                fd = (int)(uintptr_t)sfp->sf_cookie;
                si = sfp->sf_si;
@@ -996,17 +995,15 @@ selrescan(struct thread *td, fd_mask **i
                /* If the selinfo wasn't cleared the event didn't fire. */
                if (si != NULL)
                        continue;
-               if ((fp = fget_locked(fdp, fd)) == NULL) {
-                       FILEDESC_SUNLOCK(fdp);
+               if ((fp = fget_unlocked(fdp, fd)) == NULL)
                        return (EBADF);
-               }
                idx = fd / NFDBITS;
                bit = (fd_mask)1 << (fd % NFDBITS);
                ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td);
+               fdrop(fp, td);
                if (ev != 0)
                        n += selsetbits(ibits, obits, idx, bit, ev);
        }
-       FILEDESC_SUNLOCK(fdp);
        stp->st_flags = 0;
        td->td_retval[0] = n;
        return (0);
@@ -1030,7 +1027,6 @@ selscan(td, ibits, obits, nfd)
 
        fdp = td->td_proc->p_fd;
        n = 0;
-       FILEDESC_SLOCK(fdp);
        for (idx = 0, fd = 0; fd < nfd; idx++) {
                end = imin(fd + NFDBITS, nfd);
                for (bit = 1; fd < end; bit <<= 1, fd++) {
@@ -1038,18 +1034,16 @@ selscan(td, ibits, obits, nfd)
                        flags = selflags(ibits, idx, bit);
                        if (flags == 0)
                                continue;
-                       if ((fp = fget_locked(fdp, fd)) == NULL) {
-                               FILEDESC_SUNLOCK(fdp);
+                       if ((fp = fget_unlocked(fdp, fd)) == NULL)
                                return (EBADF);
-                       }
                        selfdalloc(td, (void *)(uintptr_t)fd);
                        ev = fo_poll(fp, flags, td->td_ucred, td);
+                       fdrop(fp, td);
                        if (ev != 0)
                                n += selsetbits(ibits, obits, idx, bit, ev);
                }
        }
 
-       FILEDESC_SUNLOCK(fdp);
        td->td_retval[0] = n;
        return (0);
 }

Modified: head/sys/kern/tty.c
==============================================================================
--- head/sys/kern/tty.c Thu May 14 02:42:29 2009        (r192079)
+++ head/sys/kern/tty.c Thu May 14 03:24:22 2009        (r192080)
@@ -1720,10 +1720,13 @@ ttyhook_register(struct tty **rtp, struc
        /* Validate the file descriptor. */
        if ((fdp = p->p_fd) == NULL)
                return (EBADF);
-       FILEDESC_SLOCK(fdp);
-       if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
-               FILEDESC_SUNLOCK(fdp);
+
+       fp = fget_unlocked(fdp, fd);
+       if (fp == NULL)
                return (EBADF);
+       if (fp->f_ops == &badfileops) {
+               error = EBADF;
+               goto done1;
        }
        
        /* Make sure the vnode is bound to a character device. */
@@ -1763,7 +1766,7 @@ ttyhook_register(struct tty **rtp, struc
 
 done3: tty_unlock(tp);
 done2: dev_relthread(dev);
-done1: FILEDESC_SUNLOCK(fdp);
+done1: fdrop(fp, curthread);
        return (error);
 }
 

Modified: head/sys/kern/uipc_syscalls.c
==============================================================================
--- head/sys/kern/uipc_syscalls.c       Thu May 14 02:42:29 2009        
(r192079)
+++ head/sys/kern/uipc_syscalls.c       Thu May 14 03:24:22 2009        
(r192080)
@@ -122,23 +122,16 @@ getsock(struct filedesc *fdp, int fd, st
        int error;
 
        fp = NULL;
-       if (fdp == NULL)
+       if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) {
                error = EBADF;
-       else {
-               FILEDESC_SLOCK(fdp);
-               fp = fget_locked(fdp, fd);
-               if (fp == NULL)
-                       error = EBADF;
-               else if (fp->f_type != DTYPE_SOCKET) {
-                       fp = NULL;
-                       error = ENOTSOCK;
-               } else {
-                       fhold(fp);
-                       if (fflagp != NULL)
-                               *fflagp = fp->f_flag;
-                       error = 0;
-               }
-               FILEDESC_SUNLOCK(fdp);
+       } else if (fp->f_type != DTYPE_SOCKET) {
+               fdrop(fp, curthread);
+               fp = NULL;
+               error = ENOTSOCK;
+       } else {
+               if (fflagp != NULL)
+                       *fflagp = fp->f_flag;
+               error = 0;
        }
        *fpp = fp;
        return (error);

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c        Thu May 14 02:42:29 2009        
(r192079)
+++ head/sys/kern/vfs_syscalls.c        Thu May 14 03:24:22 2009        
(r192080)
@@ -4235,22 +4235,13 @@ getvnode(fdp, fd, fpp)
        int error;
        struct file *fp;
 
+       error = 0;
        fp = NULL;
-       if (fdp == NULL)
+       if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL)
                error = EBADF;
-       else {
-               FILEDESC_SLOCK(fdp);
-               if ((u_int)fd >= fdp->fd_nfiles ||
-                   (fp = fdp->fd_ofiles[fd]) == NULL)
-                       error = EBADF;
-               else if (fp->f_vnode == NULL) {
-                       fp = NULL;
-                       error = EINVAL;
-               } else {
-                       fhold(fp);
-                       error = 0;
-               }
-               FILEDESC_SUNLOCK(fdp);
+       else if (fp->f_vnode == NULL) {
+               error = EINVAL;
+               fdrop(fp, curthread);
        }
        *fpp = fp;
        return (error);

Modified: head/sys/sys/filedesc.h
==============================================================================
--- head/sys/sys/filedesc.h     Thu May 14 02:42:29 2009        (r192079)
+++ head/sys/sys/filedesc.h     Thu May 14 03:24:22 2009        (r192080)
@@ -129,6 +129,10 @@ int        getvnode(struct filedesc *fdp, int f
 void   mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
 void   setugidsafety(struct thread *td);
 
+/* Return a referenced file from an unlocked descriptor. */
+struct file *fget_unlocked(struct filedesc *fdp, int fd);
+
+/* Requires a FILEDESC_{S,X}LOCK held and returns without a ref. */
 static __inline struct file *
 fget_locked(struct filedesc *fdp, int fd)
 {
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to