Module Name: src Committed By: hannken Date: Fri Jul 8 07:42:47 UTC 2022
Modified Files: src/sys/kern: vfs_trans.c Log Message: While one thread runs vgone() it is possible for another thread to grab a "v_mount" that will be freed before it uses this mount for fstrans_start(). Add a hashtab to lookup our private mount data "fstrans_mount_info" and use "mp" as an opaque key for lookup. Reported-by: syzbot+54dc9ac0804a97b59...@syzkaller.appspotmail.com To generate a diff of this commit: cvs rdiff -u -r1.65 -r1.66 src/sys/kern/vfs_trans.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/vfs_trans.c diff -u src/sys/kern/vfs_trans.c:1.65 src/sys/kern/vfs_trans.c:1.66 --- src/sys/kern/vfs_trans.c:1.65 Fri Jul 8 07:42:05 2022 +++ src/sys/kern/vfs_trans.c Fri Jul 8 07:42:47 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_trans.c,v 1.65 2022/07/08 07:42:05 hannken Exp $ */ +/* $NetBSD: vfs_trans.c,v 1.66 2022/07/08 07:42:47 hannken Exp $ */ /*- * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.65 2022/07/08 07:42:05 hannken Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.66 2022/07/08 07:42:47 hannken Exp $"); /* * File system transaction operations. @@ -44,6 +44,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c, #include <sys/systm.h> #include <sys/atomic.h> #include <sys/buf.h> +#include <sys/hash.h> #include <sys/kmem.h> #include <sys/mount.h> #include <sys/pserialize.h> @@ -54,6 +55,8 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c, #include <miscfs/specfs/specdev.h> +#define FSTRANS_MOUNT_HASHSIZE 32 + enum fstrans_lock_type { FSTRANS_LAZY, /* Granted while not suspended */ FSTRANS_SHARED /* Granted while not suspending */ @@ -81,10 +84,12 @@ struct fstrans_mount_info { unsigned int fmi_ref_cnt; bool fmi_gone; bool fmi_cow_change; + SLIST_ENTRY(fstrans_mount_info) fmi_hash; LIST_HEAD(, fscow_handler) fmi_cow_handler; struct mount *fmi_mount; struct lwp *fmi_owner; }; +SLIST_HEAD(fstrans_mount_hashhead, fstrans_mount_info); static kmutex_t vfs_suspend_lock /* Serialize suspensions. */ __cacheline_aligned; @@ -97,8 +102,12 @@ static LIST_HEAD(fstrans_lwp_head, fstra /* List of all fstrans_lwp_info. */ static pool_cache_t fstrans_lwp_cache; /* Cache of fstrans_lwp_info. */ +static u_long fstrans_mount_hashmask; +static struct fstrans_mount_hashhead *fstrans_mount_hashtab; static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ +static inline uint32_t fstrans_mount_hash(struct mount *); +static inline struct fstrans_mount_info *fstrans_mount_get(struct mount *); static void fstrans_mount_dtor(struct fstrans_mount_info *); static void fstrans_clear_lwp_info(void); static inline struct fstrans_lwp_info * @@ -116,70 +125,6 @@ static void cow_change_done(struct fstra extern struct mount *dead_rootmount; -#if defined(DIAGNOSTIC) - -struct fstrans_debug_mount { - struct mount *fdm_mount; - SLIST_ENTRY(fstrans_debug_mount) fdm_list; -}; - -static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head = - SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head); - -static void -fstrans_debug_mount(struct mount *mp) -{ - struct fstrans_debug_mount *fdm, *new; - - KASSERT(mutex_owned(&fstrans_lock)); - - mutex_exit(&fstrans_lock); - new = kmem_alloc(sizeof(*new), KM_SLEEP); - new->fdm_mount = mp; - mutex_enter(&fstrans_lock); - - SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) - KASSERT(fdm->fdm_mount != mp); - SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list); -} - -static void -fstrans_debug_unmount(struct mount *mp) -{ - struct fstrans_debug_mount *fdm; - - KASSERT(mutex_owned(&fstrans_lock)); - - SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) - if (fdm->fdm_mount == mp) - break; - KASSERT(fdm != NULL); - SLIST_REMOVE(&fstrans_debug_mount_head, fdm, - fstrans_debug_mount, fdm_list); - kmem_free(fdm, sizeof(*fdm)); -} - -static void -fstrans_debug_validate_mount(struct mount *mp) -{ - struct fstrans_debug_mount *fdm; - - KASSERT(mutex_owned(&fstrans_lock)); - - SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) - if (fdm->fdm_mount == mp) - break; - KASSERTMSG(fdm != NULL, "mount %p invalid", mp); -} - -#else /* defined(DIAGNOSTIC) */ - -#define fstrans_debug_mount(mp) -#define fstrans_debug_unmount(mp) -#define fstrans_debug_validate_mount(mp) - -#endif /* defined(DIAGNOSTIC) */ - /* * Initialize. */ @@ -197,6 +142,8 @@ fstrans_init(void) coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE, fstrans_lwp_pcc, fstrans_lwp_pcd, NULL); KASSERT(fstrans_lwp_cache != NULL); + fstrans_mount_hashtab = hashinit(FSTRANS_MOUNT_HASHSIZE, HASH_SLIST, + true, &fstrans_mount_hashmask); } /* @@ -265,6 +212,38 @@ fstrans_lwp_dtor(lwp_t *l) } /* + * mount pointer to hash + */ +static inline uint32_t +fstrans_mount_hash(struct mount *mp) +{ + + return hash32_buf(&mp, sizeof(mp), HASH32_BUF_INIT) & + fstrans_mount_hashmask; +} + +/* + * retrieve fstrans_mount_info by mount or NULL + */ +static inline struct fstrans_mount_info * +fstrans_mount_get(struct mount *mp) +{ + uint32_t indx; + struct fstrans_mount_info *fmi; + + KASSERT(mutex_owned(&fstrans_lock)); + + indx = fstrans_mount_hash(mp); + SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) { + if (fmi->fmi_mount == mp) { + return fmi; + } + } + + return NULL; +} + +/* * Dereference mount state. */ static void @@ -296,8 +275,11 @@ fstrans_mount_dtor(struct fstrans_mount_ int fstrans_mount(struct mount *mp) { + uint32_t indx; struct fstrans_mount_info *newfmi; + indx = fstrans_mount_hash(mp); + newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); newfmi->fmi_state = FSTRANS_NORMAL; newfmi->fmi_ref_cnt = 1; @@ -308,8 +290,7 @@ fstrans_mount(struct mount *mp) newfmi->fmi_owner = NULL; mutex_enter(&fstrans_lock); - mp->mnt_transinfo = newfmi; - fstrans_debug_mount(mp); + SLIST_INSERT_HEAD(&fstrans_mount_hashtab[indx], newfmi, fmi_hash); mutex_exit(&fstrans_lock); return 0; @@ -321,14 +302,17 @@ fstrans_mount(struct mount *mp) void fstrans_unmount(struct mount *mp) { - struct fstrans_mount_info *fmi = mp->mnt_transinfo; + uint32_t indx; + struct fstrans_mount_info *fmi; - KASSERT(fmi != NULL); + indx = fstrans_mount_hash(mp); mutex_enter(&fstrans_lock); - fstrans_debug_unmount(mp); + fmi = fstrans_mount_get(mp); + KASSERT(fmi != NULL); fmi->fmi_gone = true; - mp->mnt_transinfo = NULL; + SLIST_REMOVE(&fstrans_mount_hashtab[indx], + fmi, fstrans_mount_info, fmi_hash); fstrans_gone_count += 1; fstrans_mount_dtor(fmi); mutex_exit(&fstrans_lock); @@ -409,18 +393,19 @@ fstrans_alloc_lwp_info(struct mount *mp) KASSERT(fli->fli_alias == NULL); KASSERT(fli->fli_mountinfo == NULL); KASSERT(fli->fli_self == NULL); - fli->fli_succ = curlwp->l_fstrans; - curlwp->l_fstrans = fli; /* - * Attach the entry to the mount if its mnt_transinfo is valid. + * Attach the mount info if it is valid. */ mutex_enter(&fstrans_lock); + fmi = fstrans_mount_get(mp); + if (fmi == NULL) { + mutex_exit(&fstrans_lock); + pool_cache_put(fstrans_lwp_cache, fli); + return NULL; + } fli->fli_self = curlwp; - fstrans_debug_validate_mount(mp); - fmi = mp->mnt_transinfo; - KASSERT(fmi != NULL); fli->fli_mount = mp; fli->fli_mountinfo = fmi; fmi->fmi_ref_cnt += 1; @@ -429,6 +414,9 @@ fstrans_alloc_lwp_info(struct mount *mp) } while (mp && mp->mnt_lower); mutex_exit(&fstrans_lock); + fli->fli_succ = curlwp->l_fstrans; + curlwp->l_fstrans = fli; + if (mp) { fli->fli_alias = fstrans_alloc_lwp_info(mp); fli->fli_alias->fli_alias_cnt++; @@ -462,10 +450,6 @@ fstrans_get_lwp_info(struct mount *mp, b if (do_alloc) { if (__predict_false(fli == NULL)) fli = fstrans_alloc_lwp_info(mp); - KASSERT(fli != NULL); - KASSERT(!fli->fli_mountinfo->fmi_gone); - } else { - KASSERT(fli != NULL); } return fli; @@ -500,14 +484,11 @@ _fstrans_start(struct mount *mp, enum fs struct fstrans_lwp_info *fli; struct fstrans_mount_info *fmi; -#ifndef FSTRANS_DEAD_ENABLED - if (mp == dead_rootmount) - return 0; -#endif - ASSERT_SLEEPABLE(); fli = fstrans_get_lwp_info(mp, true); + if (fli == NULL) + return 0; fmi = fli->fli_mountinfo; if (fli->fli_trans_cnt > 0) { @@ -518,6 +499,9 @@ _fstrans_start(struct mount *mp, enum fs s = pserialize_read_enter(); if (__predict_true(grant_lock(fmi, lock_type))) { + + KASSERT(!fmi->fmi_gone); + fli->fli_trans_cnt = 1; fli->fli_lock_type = lock_type; pserialize_read_exit(s); @@ -574,12 +558,9 @@ fstrans_done(struct mount *mp) struct fstrans_lwp_info *fli; struct fstrans_mount_info *fmi; -#ifndef FSTRANS_DEAD_ENABLED - if (mp == dead_rootmount) - return; -#endif - fli = fstrans_get_lwp_info(mp, false); + if (fli == NULL) + return; fmi = fli->fli_mountinfo; KASSERT(fli->fli_trans_cnt > 0); @@ -619,6 +600,8 @@ fstrans_held(struct mount *mp) KASSERT(mp != dead_rootmount); fli = fstrans_get_lwp_info(mp, true); + if (fli == NULL) + return 0; fmi = fli->fli_mountinfo; return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp); @@ -636,6 +619,8 @@ fstrans_is_owner(struct mount *mp) KASSERT(mp != dead_rootmount); fli = fstrans_get_lwp_info(mp, true); + if (fli == NULL) + return 0; fmi = fli->fli_mountinfo; return (fmi->fmi_owner == curlwp); @@ -681,6 +666,8 @@ fstrans_setstate(struct mount *mp, enum KASSERT(mp != dead_rootmount); fli = fstrans_get_lwp_info(mp, true); + if (fli == NULL) + return ENOENT; fmi = fli->fli_mountinfo; old_state = fmi->fmi_state; if (old_state == new_state) @@ -730,6 +717,7 @@ fstrans_getstate(struct mount *mp) KASSERT(mp != dead_rootmount); fli = fstrans_get_lwp_info(mp, true); + KASSERT(fli != NULL); fmi = fli->fli_mountinfo; return fmi->fmi_state; @@ -748,6 +736,8 @@ vfs_suspend(struct mount *mp, int nowait return EOPNOTSUPP; fli = fstrans_get_lwp_info(mp, true); + if (fli == NULL) + return ENOENT; if (nowait) { if (!mutex_tryenter(&vfs_suspend_lock)) @@ -865,7 +855,7 @@ fscow_establish(struct mount *mp, int (* KASSERT(mp != dead_rootmount); mutex_enter(&fstrans_lock); - fmi = mp->mnt_transinfo; + fmi = fstrans_mount_get(mp); KASSERT(fmi != NULL); fmi->fmi_ref_cnt += 1; mutex_exit(&fstrans_lock); @@ -893,8 +883,10 @@ fscow_disestablish(struct mount *mp, int KASSERT(mp != dead_rootmount); - fmi = mp->mnt_transinfo; + mutex_enter(&fstrans_lock); + fmi = fstrans_mount_get(mp); KASSERT(fmi != NULL); + mutex_exit(&fstrans_lock); cow_change_enter(fmi); LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) @@ -941,6 +933,7 @@ fscow_run(struct buf *bp, bool data_vali } fli = fstrans_get_lwp_info(mp, true); + KASSERT(fli != NULL); fmi = fli->fli_mountinfo; /* @@ -1058,9 +1051,14 @@ fstrans_print_lwp(struct proc *p, struct static void fstrans_print_mount(struct mount *mp, int verbose) { + uint32_t indx; struct fstrans_mount_info *fmi; - fmi = mp->mnt_transinfo; + indx = fstrans_mount_hash(mp); + SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) + if (fmi->fmi_mount == mp) + break; + if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) return;