Author: mjg
Date: Wed Oct 15 16:54:18 2014
New Revision: 273137
URL: https://svnweb.freebsd.org/changeset/base/273137

Log:
  MFC r273109:
  
  fget_unlocked currently reads 'fde' which is a structure consisting of
  serveral fields. In effect the read is inatomic and may result in
  obtaining file pointer with stale or incorrect capabilities.
  
  Example race is with dup2.
  
  Side effect is that capability checks can be circumvented.
  
  Fix the problem with introduction of sequence counters.
  
  MFC r269023,r272503,r272505,r272523,r272567,r272569,r272574:
  
  Prepare fget_unlocked for reading fd table only once.
  
  Some capsicum functions accept fdp + fd and lookup fde based on that.
  Add variants which accept fde.
  
  ===============================
  
  Add sequence counters with memory barriers.
  
  Current implementation is somewhat simplistic and hackish,
  will be improved later after possible memory barrier overhaul.
  
  ===============================
  
  Plug capability races.
  
  fp and appropriate capability lookups were not atomic, which could result in
  improper capabilities being checked.
  
  This could result either in protection bypass or in a spurious ENOTCAPABLE.
  
  Make fp + capability check atomic with the help of sequence counters.
  
  ===============================
  
  Put and #ifdef _KERNEL around the #include for opt_capsicum.h to
  hopefully allow the build to finish after r272505.
  
  ===============================
  
  filedesc: fix up breakage introduced in 272505
  
  Include sequence counter supports incoditionally [1]. This fixes reprted build
  problems with e.g. nvidia driver due to missing opt_capsicum.h.
  
  Replace fishy looking sizeof with offsetof. Make fde_seq the last member in
  order to simplify calculations.
  
  ===============================
  
  Keep struct filedescent comments within 80-char limit.
  
  ===============================
  
  seq_t needs to be visible to userspace
  
  Approved by:  re (kib)

Added:
  releng/10.1/sys/sys/seq.h
     - copied unchanged from r273109, stable/10/sys/sys/seq.h
Modified:
  releng/10.1/sys/kern/kern_descrip.c
  releng/10.1/sys/kern/sys_capability.c
  releng/10.1/sys/sys/capability.h
  releng/10.1/sys/sys/filedesc.h
Directory Properties:
  releng/10.1/   (props changed)

Modified: releng/10.1/sys/kern/kern_descrip.c
==============================================================================
--- releng/10.1/sys/kern/kern_descrip.c Wed Oct 15 14:07:24 2014        
(r273136)
+++ releng/10.1/sys/kern/kern_descrip.c Wed Oct 15 16:54:18 2014        
(r273137)
@@ -304,11 +304,18 @@ _fdfree(struct filedesc *fdp, int fd, in
        struct filedescent *fde;
 
        fde = &fdp->fd_ofiles[fd];
+#ifdef CAPABILITIES
+       if (!last)
+               seq_write_begin(&fde->fde_seq);
+#endif
        filecaps_free(&fde->fde_caps);
        if (last)
                return;
-       bzero(fde, sizeof(*fde));
+       bzero(fde, fde_change_size);
        fdunused(fdp, fd);
+#ifdef CAPABILITIES
+       seq_write_end(&fde->fde_seq);
+#endif
 }
 
 static inline void
@@ -899,13 +906,19 @@ do_dup(struct thread *td, int flags, int
        /*
         * Duplicate the source descriptor.
         */
+#ifdef CAPABILITIES
+       seq_write_begin(&newfde->fde_seq);
+#endif
        filecaps_free(&newfde->fde_caps);
-       *newfde = *oldfde;
+       memcpy(newfde, oldfde, fde_change_size);
        filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps);
        if ((flags & DUP_CLOEXEC) != 0)
                newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE;
        else
                newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE;
+#ifdef CAPABILITIES
+       seq_write_end(&newfde->fde_seq);
+#endif
        *retval = new;
 
        if (delfp != NULL) {
@@ -1804,6 +1817,9 @@ finstall(struct thread *td, struct file 
        }
        fhold(fp);
        fde = &fdp->fd_ofiles[*fd];
+#ifdef CAPABILITIES
+       seq_write_begin(&fde->fde_seq);
+#endif
        fde->fde_file = fp;
        if ((flags & O_CLOEXEC) != 0)
                fde->fde_flags |= UF_EXCLOSE;
@@ -1811,6 +1827,9 @@ finstall(struct thread *td, struct file 
                filecaps_move(fcaps, &fde->fde_caps);
        else
                filecaps_fill(&fde->fde_caps);
+#ifdef CAPABILITIES
+       seq_write_end(&fde->fde_seq);
+#endif
        FILEDESC_XUNLOCK(fdp);
        return (0);
 }
@@ -2336,9 +2355,13 @@ int
 fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
     int needfcntl, struct file **fpp, cap_rights_t *haverightsp)
 {
+#ifdef CAPABILITIES
+       struct filedescent fde;
+#endif
        struct file *fp;
        u_int count;
 #ifdef CAPABILITIES
+       seq_t seq;
        cap_rights_t haverights;
        int error;
 #endif
@@ -2358,17 +2381,27 @@ fget_unlocked(struct filedesc *fdp, int 
         * due to preemption.
         */
        for (;;) {
+#ifdef CAPABILITIES
+               seq = seq_read(fd_seq(fdp, fd));
+               fde = fdp->fd_ofiles[fd];
+               if (!seq_consistent(fd_seq(fdp, fd), seq)) {
+                       cpu_spinwait();
+                       continue;
+               }
+               fp = fde.fde_file;
+#else
                fp = fdp->fd_ofiles[fd].fde_file;
+#endif
                if (fp == NULL)
                        return (EBADF);
 #ifdef CAPABILITIES
-               haverights = *cap_rights(fdp, fd);
+               haverights = *cap_rights_fde(&fde);
                if (needrightsp != NULL) {
                        error = cap_check(&haverights, needrightsp);
                        if (error != 0)
                                return (error);
                        if (cap_rights_is_set(needrightsp, CAP_FCNTL)) {
-                               error = cap_fcntl_check(fdp, fd, needfcntl);
+                               error = cap_fcntl_check_fde(&fde, needfcntl);
                                if (error != 0)
                                        return (error);
                        }
@@ -2383,7 +2416,11 @@ fget_unlocked(struct filedesc *fdp, int 
                 */
                if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1)
                        continue;
+#ifdef CAPABILITIES
+               if (seq_consistent_nomb(fd_seq(fdp, fd), seq))
+#else
                if (fp == fdp->fd_ofiles[fd].fde_file)
+#endif
                        break;
                fdrop(fp, curthread);
        }
@@ -2740,6 +2777,7 @@ int
 dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode,
     int openerror, int *indxp)
 {
+       struct filedescent *newfde, *oldfde;
        struct file *fp;
        int error, indx;
 
@@ -2783,17 +2821,32 @@ dupfdopen(struct thread *td, struct file
                        return (EACCES);
                }
                fhold(fp);
-               fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
-               filecaps_copy(&fdp->fd_ofiles[dfd].fde_caps,
-                   &fdp->fd_ofiles[indx].fde_caps);
+               newfde = &fdp->fd_ofiles[indx];
+               oldfde = &fdp->fd_ofiles[dfd];
+#ifdef CAPABILITIES
+               seq_write_begin(&newfde->fde_seq);
+#endif
+               memcpy(newfde, oldfde, fde_change_size);
+               filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps);
+#ifdef CAPABILITIES
+               seq_write_end(&newfde->fde_seq);
+#endif
                break;
        case ENXIO:
                /*
                 * Steal away the file pointer from dfd and stuff it into indx.
                 */
-               fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
-               bzero(&fdp->fd_ofiles[dfd], sizeof(fdp->fd_ofiles[dfd]));
+               newfde = &fdp->fd_ofiles[indx];
+               oldfde = &fdp->fd_ofiles[dfd];
+#ifdef CAPABILITIES
+               seq_write_begin(&newfde->fde_seq);
+#endif
+               memcpy(newfde, oldfde, fde_change_size);
+               bzero(oldfde, fde_change_size);
                fdunused(fdp, dfd);
+#ifdef CAPABILITIES
+               seq_write_end(&newfde->fde_seq);
+#endif
                break;
        }
        FILEDESC_XUNLOCK(fdp);

Modified: releng/10.1/sys/kern/sys_capability.c
==============================================================================
--- releng/10.1/sys/kern/sys_capability.c       Wed Oct 15 14:07:24 2014        
(r273136)
+++ releng/10.1/sys/kern/sys_capability.c       Wed Oct 15 16:54:18 2014        
(r273137)
@@ -199,11 +199,19 @@ cap_rights_to_vmprot(cap_rights_t *havep
  * any other way, as we want to keep all capability permission evaluation in
  * this one file.
  */
+
+cap_rights_t *
+cap_rights_fde(struct filedescent *fde)
+{
+
+       return (&fde->fde_rights);
+}
+
 cap_rights_t *
 cap_rights(struct filedesc *fdp, int fd)
 {
 
-       return (&fdp->fd_ofiles[fd].fde_rights);
+       return (cap_rights_fde(&fdp->fd_ofiles[fd]));
 }
 
 /*
@@ -486,24 +494,31 @@ out:
  * Test whether a capability grants the given fcntl command.
  */
 int
-cap_fcntl_check(struct filedesc *fdp, int fd, int cmd)
+cap_fcntl_check_fde(struct filedescent *fde, int cmd)
 {
        uint32_t fcntlcap;
 
-       KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
-           ("%s: invalid fd=%d", __func__, fd));
-
        fcntlcap = (1 << cmd);
        KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0,
            ("Unsupported fcntl=%d.", cmd));
 
-       if ((fdp->fd_ofiles[fd].fde_fcntls & fcntlcap) != 0)
+       if ((fde->fde_fcntls & fcntlcap) != 0)
                return (0);
 
        return (ENOTCAPABLE);
 }
 
 int
+cap_fcntl_check(struct filedesc *fdp, int fd, int cmd)
+{
+
+       KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
+           ("%s: invalid fd=%d", __func__, fd));
+
+       return (cap_fcntl_check_fde(&fdp->fd_ofiles[fd], cmd));
+}
+
+int
 sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap)
 {
        struct filedesc *fdp;

Modified: releng/10.1/sys/sys/capability.h
==============================================================================
--- releng/10.1/sys/sys/capability.h    Wed Oct 15 14:07:24 2014        
(r273136)
+++ releng/10.1/sys/sys/capability.h    Wed Oct 15 16:54:18 2014        
(r273137)
@@ -343,6 +343,7 @@ bool cap_rights_contains(const cap_right
 #define IN_CAPABILITY_MODE(td) ((td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) 
!= 0)
 
 struct filedesc;
+struct filedescent;
 
 /*
  * Test whether a capability grants the requested rights.
@@ -357,9 +358,11 @@ u_char     cap_rights_to_vmprot(cap_rights_t
  * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to
  * extract the rights from a capability.
  */
+cap_rights_t   *cap_rights_fde(struct filedescent *fde);
 cap_rights_t   *cap_rights(struct filedesc *fdp, int fd);
 
 int    cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd);
+int    cap_fcntl_check_fde(struct filedescent *fde, int cmd);
 int    cap_fcntl_check(struct filedesc *fdp, int fd, int cmd);
 
 #else /* !_KERNEL */

Modified: releng/10.1/sys/sys/filedesc.h
==============================================================================
--- releng/10.1/sys/sys/filedesc.h      Wed Oct 15 14:07:24 2014        
(r273136)
+++ releng/10.1/sys/sys/filedesc.h      Wed Oct 15 16:54:18 2014        
(r273137)
@@ -38,6 +38,7 @@
 #include <sys/event.h>
 #include <sys/lock.h>
 #include <sys/priority.h>
+#include <sys/seq.h>
 #include <sys/sx.h>
 
 #include <machine/_limits.h>
@@ -50,14 +51,16 @@ struct filecaps {
 };
 
 struct filedescent {
-       struct file     *fde_file;              /* file structure for open file 
*/
-       struct filecaps  fde_caps;              /* per-descriptor rights */
-       uint8_t          fde_flags;             /* per-process open file flags 
*/
+       struct file     *fde_file;      /* file structure for open file */
+       struct filecaps  fde_caps;      /* per-descriptor rights */
+       uint8_t          fde_flags;     /* per-process open file flags */
+       seq_t            fde_seq;       /* keep file and caps in sync */
 };
 #define        fde_rights      fde_caps.fc_rights
 #define        fde_fcntls      fde_caps.fc_fcntls
 #define        fde_ioctls      fde_caps.fc_ioctls
 #define        fde_nioctls     fde_caps.fc_nioctls
+#define        fde_change_size (offsetof(struct filedescent, fde_seq))
 
 /*
  * This structure is used for the management of descriptors.  It may be
@@ -82,6 +85,7 @@ struct filedesc {
        int     fd_holdleaderscount;    /* block fdfree() for shared close() */
        int     fd_holdleaderswakeup;   /* fdfree() needs wakeup */
 };
+#define        fd_seq(fdp, fd) (&(fdp)->fd_ofiles[(fd)].fde_seq)
 
 /*
  * Structure to keep track of (process leader, struct fildedesc) tuples.

Copied: releng/10.1/sys/sys/seq.h (from r273109, stable/10/sys/sys/seq.h)
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ releng/10.1/sys/sys/seq.h   Wed Oct 15 16:54:18 2014        (r273137, copy 
of r273109, stable/10/sys/sys/seq.h)
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 2014 Mateusz Guzik <m...@freebsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_SEQ_H_
+#define _SYS_SEQ_H_
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#endif
+#include <sys/types.h>
+
+/*
+ * seq_t may be included in structs visible to userspace
+ */
+typedef uint32_t seq_t;
+
+#ifdef _KERNEL
+
+/*
+ * Typical usage:
+ *
+ * writers:
+ *     lock_exclusive(&obj->lock);
+ *     seq_write_begin(&obj->seq);
+ *     .....
+ *     seq_write_end(&obj->seq);
+ *     unlock_exclusive(&obj->unlock);
+ *
+ * readers:
+ *     obj_t lobj;
+ *     seq_t seq;
+ *
+ *     for (;;) {
+ *             seq = seq_read(&gobj->seq);
+ *             lobj = gobj;
+ *             if (seq_consistent(&gobj->seq, seq))
+ *                     break;
+ *             cpu_spinwait();
+ *     }
+ *     foo(lobj);
+ */            
+
+/* A hack to get MPASS macro */
+#include <sys/lock.h>
+
+#include <machine/cpu.h>
+
+/*
+ * This is a temporary hack until memory barriers are cleaned up.
+ *
+ * atomic_load_acq_int at least on amd64 provides a full memory barrier,
+ * in a way which affects perforance.
+ *
+ * Hack below covers all architectures and avoids most of the penalty at least
+ * on amd64.
+ */
+static __inline int
+atomic_load_acq_rmb_int(volatile u_int *p)
+{
+       volatile u_int v;
+
+       v = *p;
+       atomic_load_acq_int(&v);
+       return (v);
+}
+
+static __inline bool
+seq_in_modify(seq_t seqp)
+{
+
+       return (seqp & 1);
+}
+
+static __inline void
+seq_write_begin(seq_t *seqp)
+{
+
+       MPASS(!seq_in_modify(*seqp));
+       atomic_add_acq_int(seqp, 1);
+}
+
+static __inline void
+seq_write_end(seq_t *seqp)
+{
+
+       atomic_add_rel_int(seqp, 1);
+       MPASS(!seq_in_modify(*seqp));
+}
+
+static __inline seq_t
+seq_read(seq_t *seqp)
+{
+       seq_t ret;
+
+       for (;;) {
+               ret = atomic_load_acq_rmb_int(seqp);
+               if (seq_in_modify(ret)) {
+                       cpu_spinwait();
+                       continue;
+               }
+               break;
+       }
+
+       return (ret);
+}
+
+static __inline seq_t
+seq_consistent(seq_t *seqp, seq_t oldseq)
+{
+
+       return (atomic_load_acq_rmb_int(seqp) == oldseq);
+}
+
+static __inline seq_t
+seq_consistent_nomb(seq_t *seqp, seq_t oldseq)
+{
+
+       return (*seqp == oldseq);
+}
+
+#endif /* _KERNEL */
+#endif /* _SYS_SEQ_H_ */
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to