On Sun, 3 Jun 2018, Theo de Raadt wrote:
> Philip Guenther <[email protected]> wrote:
> > The warning is not that a single filesystem is being locked
> > recursively by a single thread, but just that a single thread is
> > holding locks on multiple filesystems.
>
> vfs_stall() needs to grab locks on all filesystems, to stop a variety of
> filesystem transactions. (Other types of transactions are blocked in
> other ways).
>
> sys_umount() grabs locks on all filesystems above it, to stop anyone
> from doing a parallel mount/unmount along the same path.
>
> This is all normal.
Diff below adds VB_DUPOK to indicate that a given vfs_busy() call is
expected/permitted to occur while the thread already holds another
filesystem busy; the caller is responsible for ensuring the filesystems
are locked in the correct order (or that NOWAIT is used safely as in the
sys_mount() case).
As part of this, this plumbs RW_DUPOK for rw_enter().
I no longer get the witness warning on hibernate with this.
ok?
Philip Guenther
Index: sys/mount.h
===================================================================
RCS file: /data/src/openbsd/src/sys/sys/mount.h,v
retrieving revision 1.136
diff -u -p -r1.136 mount.h
--- sys/mount.h 8 May 2018 08:58:49 -0000 1.136
+++ sys/mount.h 3 Jun 2018 17:05:28 -0000
@@ -564,6 +564,7 @@ int vfs_busy(struct mount *, int);
#define VB_WRITE 0x02
#define VB_NOWAIT 0x04 /* immediately fail on busy lock */
#define VB_WAIT 0x08 /* sleep fail on busy lock */
+#define VB_DUPOK 0x10 /* permit duplicate mount busying */
int vfs_isbusy(struct mount *);
int vfs_mount_foreach_vnode(struct mount *, int (*func)(struct vnode *,
Index: sys/rwlock.h
===================================================================
RCS file: /data/src/openbsd/src/sys/sys/rwlock.h,v
retrieving revision 1.22
diff -u -p -r1.22 rwlock.h
--- sys/rwlock.h 12 Aug 2017 23:27:44 -0000 1.22
+++ sys/rwlock.h 3 Jun 2018 17:05:32 -0000
@@ -116,6 +116,7 @@ struct rwlock {
#define RW_SLEEPFAIL 0x0020UL /* fail if we slept for the lock */
#define RW_NOSLEEP 0x0040UL /* don't wait for the lock */
#define RW_RECURSEFAIL 0x0080UL /* Fail on recursion for RRW locks. */
+#define RW_DUPOK 0x0100UL /* Permit duplicate lock */
/*
* for rw_status() and rrw_status() only: exclusive lock held by
Index: kern/kern_rwlock.c
===================================================================
RCS file: /data/src/openbsd/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.35
diff -u -p -r1.35 kern_rwlock.c
--- kern/kern_rwlock.c 21 Mar 2018 12:28:39 -0000 1.35
+++ kern/kern_rwlock.c 3 Jun 2018 17:00:02 -0000
@@ -223,6 +223,8 @@ _rw_enter(struct rwlock *rwl, int flags
lop_flags = LOP_NEWORDER;
if (flags & RW_WRITE)
lop_flags |= LOP_EXCLUSIVE;
+ if (flags & RW_DUPOK)
+ lop_flags |= LOP_DUPOK;
if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0)
WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, file, line,
NULL);
Index: kern/vfs_subr.c
===================================================================
RCS file: /data/src/openbsd/src/sys/kern/vfs_subr.c,v
retrieving revision 1.273
diff -u -p -r1.273 vfs_subr.c
--- kern/vfs_subr.c 27 May 2018 06:02:14 -0000 1.273
+++ kern/vfs_subr.c 3 Jun 2018 17:04:09 -0000
@@ -188,6 +188,11 @@ vfs_busy(struct mount *mp, int flags)
else
rwflags |= RW_NOSLEEP;
+#ifdef WITNESS
+ if (flags & VB_DUPOK)
+ rwflags |= RW_DUPOK;
+#endif
+
if (rw_enter(&mp->mnt_lock, rwflags))
return (EBUSY);
@@ -1602,7 +1607,7 @@ vfs_stall(struct proc *p, int stall)
*/
TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
if (stall) {
- error = vfs_busy(mp, VB_WRITE|VB_WAIT);
+ error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
if (error) {
printf("%s: busy\n", mp->mnt_stat.f_mntonname);
allerror = error;
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /data/src/openbsd/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.284
diff -u -p -r1.284 vfs_syscalls.c
--- kern/vfs_syscalls.c 2 Jun 2018 10:27:43 -0000 1.284
+++ kern/vfs_syscalls.c 3 Jun 2018 17:04:55 -0000
@@ -230,7 +230,7 @@ sys_mount(struct proc *p, void *v, regis
update:
/* Ensure that the parent mountpoint does not get unmounted. */
- error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT);
+ error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT|VB_DUPOK);
if (error) {
if (mp->mnt_flag & MNT_UPDATE) {
mp->mnt_flag = mntflag;
@@ -439,7 +439,7 @@ dounmount(struct mount *mp, int flags, s
error = EBUSY;
goto err;
}
- error = vfs_busy(mp, VB_WRITE|VB_WAIT);
+ error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
if (error) {
if ((flags & MNT_DOOMED)) {
/*