On Mon, Jul 06, 2015 at 07:51:35AM +0200, Mateusz Guzik wrote:
> On Thu, Jul 17, 2014 at 03:56:38AM +0300, Konstantin Belousov wrote:
> > On Sun, Jul 13, 2014 at 11:36:24PM +0200, Mateusz Guzik wrote:
> > > There is an additional problem with slocked case: witness report a lor
> > > with devfs vnodes.
> > > 
> > > lock order reversal:
> > >  1st 0xfffff80006fe6ab0 process imagelock (process imagelock) @ 
> > > /usr/src/sys/kern/kern_proc.c:287
> > >  2nd 0xfffff80018c88240 devfs (devfs) @ /usr/src/sys/kern/vfs_cache.c:1241
> > > KDB: stack backtrace:
> > > db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 
> > > 0xfffffe012324f120
> > > kdb_backtrace() at kdb_backtrace+0x39/frame 0xfffffe012324f1d0
> > > witness_checkorder() at witness_checkorder+0xdc2/frame 0xfffffe012324f260
> > > __lockmgr_args() at __lockmgr_args+0x588/frame 0xfffffe012324f3a0
> > > vop_stdlock() at vop_stdlock+0x3c/frame 0xfffffe012324f3c0
> > > VOP_LOCK1_APV() at VOP_LOCK1_APV+0xfc/frame 0xfffffe012324f3f0
> > > _vn_lock() at _vn_lock+0xaa/frame 0xfffffe012324f460
> > > vn_vptocnp_locked() at vn_vptocnp_locked+0xe8/frame 0xfffffe012324f4d0
> > > vn_fullpath1() at vn_fullpath1+0xb0/frame 0xfffffe012324f530
> > > vn_fullpath() at vn_fullpath+0xc1/frame 0xfffffe012324f580
> > > export_fd_to_sb() at export_fd_to_sb+0x489/frame 0xfffffe012324f7b0
> > > kern_proc_filedesc_out() at kern_proc_filedesc_out+0x1c6/frame 
> > > 0xfffffe012324f840
> > > sysctl_kern_proc_filedesc() at sysctl_kern_proc_filedesc+0x84/frame 
> > > 0xfffffe012324f900
> > > sysctl_root_handler_locked() at sysctl_root_handler_locked+0x68/frame 
> > > 0xfffffe012324f940
> > > sysctl_root() at sysctl_root+0x18e/frame 0xfffffe012324f990
> > > userland_sysctl() at userland_sysctl+0x192/frame 0xfffffe012324fa30
> > > 
> > > witness detected the following lock orders:
> > > devfs -> proctree
> > Where is the dependency catched comes from ?  I suspect it might be tty.
> > 
> > I consider this case as an advantage of using sx over the hand-rolled lock,
> > since the issue is/must be present with the counter as well, or the LOR
> > is false positive, possibly due to sx taken in shared mode.  But debugging
> > features of sx give the warning, while counter is silent.
> > 
> > That said, if the issue above is analyzed, I do not have any preference
> > and will not object strongly against you decision.
> > 
> 
> [snip]
> 
> This patch is about providing a way to block execs and exits of
> processes so that it is safe to inspect their state. For instance it
> deals with a case where the target process executes a setuid binary,
> where the process inspecting it could have its permission checked prior
> to exec but is reading data after setuid completed.
> 
> LOR mentioned above indeed comes form tty handling and is handled by
> relocking the vnode.
> 

Oops, the attached patch was missing one place (devfs_lookupx). Updated patch:


diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index fe8e9ef..c7f579a 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -574,6 +574,8 @@ devfs_close(struct vop_close_args *ap)
        if (vp->v_data == NULL)
                return (0);
 
+       vp_locked = VOP_ISLOCKED(vp);
+
        /*
         * Hack: a tty device that is a controlling terminal
         * has a reference from the session structure.
@@ -589,6 +591,7 @@ devfs_close(struct vop_close_args *ap)
                if (vp == p->p_session->s_ttyvp) {
                        PROC_UNLOCK(p);
                        oldvp = NULL;
+                       VOP_UNLOCK(vp, 0);
                        sx_xlock(&proctree_lock);
                        if (vp == p->p_session->s_ttyvp) {
                                SESS_LOCK(p->p_session);
@@ -603,6 +606,7 @@ devfs_close(struct vop_close_args *ap)
                                SESS_UNLOCK(p->p_session);
                        }
                        sx_xunlock(&proctree_lock);
+                       vn_lock(vp, vp_locked | LK_RETRY);
                        if (oldvp != NULL)
                                vrele(oldvp);
                } else
@@ -632,7 +636,6 @@ devfs_close(struct vop_close_args *ap)
        }
        vholdl(vp);
        VI_UNLOCK(vp);
-       vp_locked = VOP_ISLOCKED(vp);
        VOP_UNLOCK(vp, 0);
        KASSERT(dev->si_refcount > 0,
            ("devfs_close() on un-referenced struct cdev *(%s)", 
devtoname(dev)));
@@ -964,10 +967,13 @@ devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
                cdev = NULL;
                DEVFS_DMP_HOLD(dmp);
                sx_xunlock(&dmp->dm_lock);
+               dvplocked = VOP_ISLOCKED(dvp);
+               VOP_UNLOCK(dvp, 0);
                sx_slock(&clone_drain_lock);
                EVENTHANDLER_INVOKE(dev_clone,
                    td->td_ucred, pname, strlen(pname), &cdev);
                sx_sunlock(&clone_drain_lock);
+               vn_lock(dvp, dvplocked | LK_RETRY);
 
                if (cdev == NULL)
                        sx_xlock(&dmp->dm_lock);
diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c
index d3bac30..118a7d9 100644
--- a/sys/fs/nfsclient/nfs_clport.c
+++ b/sys/fs/nfsclient/nfs_clport.c
@@ -1188,7 +1188,7 @@ nfscl_procdoesntexist(u_int8_t *own)
        tl.cval[2] = *own++;
        tl.cval[3] = *own++;
        pid = tl.lval;
-       p = pfind_locked(pid);
+       p = pfind_locked(pid, 0);
        if (p == NULL)
                return (1);
        if (p->p_stats == NULL) {
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 0675128..2ad09aa 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -1887,6 +1887,7 @@ note_procstat_files(void *arg, struct sbuf *sb, size_t 
*sizep)
                sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
                sbuf_set_drain(sb, sbuf_drain_count, &size);
                sbuf_bcat(sb, &structsize, sizeof(structsize));
+               sx_slock(&p->p_imagelock);
                PROC_LOCK(p);
                kern_proc_filedesc_out(p, sb, -1);
                sbuf_finish(sb);
@@ -1895,6 +1896,7 @@ note_procstat_files(void *arg, struct sbuf *sb, size_t 
*sizep)
        } else {
                structsize = sizeof(struct kinfo_file);
                sbuf_bcat(sb, &structsize, sizeof(structsize));
+               sx_slock(&p->p_imagelock);
                PROC_LOCK(p);
                kern_proc_filedesc_out(p, sb, -1);
        }
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 37539c4..9364dcb 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -480,6 +480,7 @@ proc0_init(void *dummy __unused)
        p->p_flag2 = 0;
        p->p_state = PRS_NORMAL;
        knlist_init_mtx(&p->p_klist, &p->p_mtx);
+       sx_init_flags(&p->p_imagelock, "process imagelock", SX_NEW);
        STAILQ_INIT(&p->p_ktr);
        p->p_nice = NZERO;
        /* pid_max cannot be greater than PID_MAX */
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 85cda01..bbdcbf5 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -3271,6 +3271,7 @@ kern_proc_filedesc_out(struct proc *p,  struct sbuf *sb, 
ssize_t maxlen)
        FILEDESC_SUNLOCK(fdp);
        fddrop(fdp);
 fail:
+       sx_sunlock(&p->p_imagelock);
        free(efbuf, M_TEMP);
        return (error);
 }
@@ -3292,7 +3293,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
 
        sbuf_new_for_sysctl(&sb, NULL, FILEDESC_SBUF_SIZE, req);
        sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
-       error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
+       error = pget((pid_t)name[0], PGET_LOCK, &p);
        if (error != 0) {
                sbuf_delete(&sb);
                return (error);
@@ -3359,7 +3360,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
        struct proc *p;
 
        name = (int *)arg1;
-       error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
+       error = pget((pid_t)name[0], PGET_LOCK, &p);
        if (error != 0)
                return (error);
        fdp = fdhold(p);
@@ -3391,6 +3392,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
        }
        FILEDESC_SUNLOCK(fdp);
        fddrop(fdp);
+       sx_sunlock(&p->p_imagelock);
        free(kif, M_TEMP);
        free(okif, M_TEMP);
        return (0);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 859b2e3..0d9aac1 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -381,6 +381,7 @@ do_execve(td, args, mac_p)
         * that might allow a local user to illicitly obtain elevated
         * privileges.
         */
+       sx_xlock(&p->p_imagelock);
        PROC_LOCK(p);
        KASSERT((p->p_flag & P_INEXEC) == 0,
            ("%s(): process already has P_INEXEC flag", __func__));
@@ -907,6 +908,7 @@ exec_fail:
        SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
 
 done2:
+       sx_xunlock(&p->p_imagelock);
 #ifdef MAC
        mac_execve_exit(imgp);
        mac_execve_interpreter_exit(interpvplabel);
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 9ce6d34..27fd764 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -213,6 +213,7 @@ exit1(struct thread *td, int rv)
        /*
         * MUST abort all other threads before proceeding past here.
         */
+       sx_xlock(&p->p_imagelock);
        PROC_LOCK(p);
        /*
         * First check if some other thread or external request got
@@ -279,6 +280,7 @@ exit1(struct thread *td, int rv)
         * decided to wait again after we told them we are exiting.
         */
        p->p_flag |= P_WEXIT;
+       sx_xunlock(&p->p_imagelock);
        wakeup(&p->p_stype);
 
        /*
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 27c6f40..ac6b441 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -230,6 +230,7 @@ proc_init(void *mem, int size, int flags)
        mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW);
        mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW);
        mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW);
+       sx_init_flags(&p->p_imagelock, "process imagelock", SX_NEW);
        cv_init(&p->p_pwait, "ppwait");
        cv_init(&p->p_dbgwait, "dbgwait");
        TAILQ_INIT(&p->p_threads);           /* all threads in proc */
@@ -278,16 +279,20 @@ inferior(struct proc *p)
 }
 
 struct proc *
-pfind_locked(pid_t pid)
+pfind_locked(pid_t pid, int lockimage)
 {
        struct proc *p;
 
        sx_assert(&allproc_lock, SX_LOCKED);
        LIST_FOREACH(p, PIDHASH(pid), p_hash) {
                if (p->p_pid == pid) {
+                       if (lockimage)
+                               sx_slock(&p->p_imagelock);
                        PROC_LOCK(p);
                        if (p->p_state == PRS_NEW) {
                                PROC_UNLOCK(p);
+                               if (lockimage)
+                                       sx_sunlock(&p->p_imagelock);
                                p = NULL;
                        }
                        break;
@@ -308,7 +313,7 @@ pfind(pid_t pid)
        struct proc *p;
 
        sx_slock(&allproc_lock);
-       p = pfind_locked(pid);
+       p = pfind_locked(pid, 0);
        sx_sunlock(&allproc_lock);
        return (p);
 }
@@ -364,11 +369,17 @@ int
 pget(pid_t pid, int flags, struct proc **pp)
 {
        struct proc *p;
-       int error;
+       int error, lockimage;
+
+       lockimage = ((flags & PGET_IMAGELOCK) != 0);
+       if (lockimage) {
+               MPASS((flags & PGET_NOTWEXIT) != 0);
+               MPASS((flags & PGET_HOLD) == 0);
+       }
 
        sx_slock(&allproc_lock);
        if (pid <= PID_MAX) {
-               p = pfind_locked(pid);
+               p = pfind_locked(pid, lockimage);
                if (p == NULL && (flags & PGET_NOTWEXIT) == 0)
                        p = zpfind_locked(pid);
        } else if ((flags & PGET_NOTID) == 0) {
@@ -413,6 +424,8 @@ pget(pid_t pid, int flags, struct proc **pp)
        return (0);
 errout:
        PROC_UNLOCK(p);
+       if (lockimage)
+               sx_sunlock(&p->p_imagelock);
        return (error);
 }
 
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index e6c83b4..5df817a 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -522,6 +522,7 @@ struct proc {
                                               (if I am reaper). */
        LIST_ENTRY(proc) p_reapsibling; /* (e) List of siblings - descendants of
                                               the same reaper. */
+       struct sx       p_imagelock;    /* Lock blocking exec/exit. */
        struct mtx      p_mtx;          /* (n) Lock for this struct. */
        struct mtx      p_statmtx;      /* Lock for the stats */
        struct mtx      p_itimmtx;      /* Lock for the virt/prof timers */
@@ -886,7 +887,7 @@ extern struct proc *initproc, *pageproc; /* Process slots 
for init, pager. */
 extern struct uma_zone *proc_zone;
 
 struct proc *pfind(pid_t);             /* Find process by id. */
-struct proc *pfind_locked(pid_t pid);
+struct proc *pfind_locked(pid_t pid, int lockimage);
 struct pgrp *pgfind(pid_t);            /* Find process group by id. */
 struct proc *zpfind(pid_t);            /* Find zombie process by id. */
 
@@ -900,8 +901,10 @@ struct     proc *zpfind(pid_t);            /* Find zombie 
process by id. */
 #define        PGET_NOTWEXIT   0x00010 /* Check that the process is not in 
P_WEXIT. */
 #define        PGET_NOTINEXEC  0x00020 /* Check that the process is not in 
P_INEXEC. */
 #define        PGET_NOTID      0x00040 /* Do not assume tid if pid > PID_MAX. 
*/
+#define        PGET_IMAGELOCK  0x00080 /* Prevent execs and exits. */
 
 #define        PGET_WANTREAD   (PGET_HOLD | PGET_CANDEBUG | PGET_NOTWEXIT)
+#define        PGET_LOCK       (PGET_CANDEBUG | PGET_NOTWEXIT | PGET_IMAGELOCK)
 
 int    pget(pid_t pid, int flags, struct proc **pp);
 
-- 
Mateusz Guzik
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to