On Fri, Apr 08, 2022 at 06:38:47AM +0200, Sebastien Marie wrote: > On Thu, Apr 07, 2022 at 02:53:07PM +0200, Alexander Bluhm wrote: > > On Wed, Apr 06, 2022 at 03:23:55PM +0200, Sebastien Marie wrote: > > > > panic: vref used where vget required > > > > > > could you try to reproduce with the following kernel diff ? > > > > My machine is still building release, before it took a day or two > > to crash. But it has already printed some of your warnings. My > > serial console creates time stamps, so you see how often it happens. > > The rest of the time it was quiet. > > > > bluhm > > I have a diff for adding some stacktrace history for vnode (keeping track of > lasts vref/vrele/vget/vput). but I am not sure it would be efficient to found > the underline problem.
let's try it. the following diffs adds a stacktrace keeping inside vnode. each vnode will save the stacktrace on v_usecount change (vget/vput/vref/vrele operation). the stacktraces are kept in a ring (9 slots, to avoid too big vnode struct). if powerpc has different stack limitation than amd64/i386, see VH_MAXST inside sys/vnode.h to adjust the size of the ring. ddb "show vnode /f" and vprint() will call vnode_history_print() function. I also added a more strict vn_lock() function. before, I was only checking if the vnode was in freelist. now I am checking that v_usecount != 0 (with a special case, as vrele() is volontary calling vn_lock() with v_usecount==0). due to vn_lock change, the powerpc release(8) build should panic(9) early. the purpose is too look at the stacktrace history to know the functions to review. the problem could be a race, an unmanaged sleeping-point, or a missing lock inside some function. please report the panic() backtrace with the vprint() output or, if no vprint, ddb "show vnode /f 0xXYZ" Andrew, if you write the output by hand, you could send to me some photos if it is more simple for you: the stacktrace history could be somehow unpleasant to manual transcribe. The diff belows applies on top of -current (please discard previous diff). I am running it on i386 without problem for now. Thanks. -- Sebastien Marie diff 34ceb667563449b0c2871b87a9bb5cb5ae05fa82 /home/semarie/repos/openbsd/src blob - 13c70890ccc11f131013d14b853f4f06941924cc file + sys/kern/vfs_subr.c --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -126,6 +126,10 @@ void vfs_unmountall(void); void printlockedvnodes(void); #endif +void vnode_history_init(struct vnode *); +void vnode_history_record(struct vnode *); +void vnode_history_print(struct vnode *, int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); + struct pool vnode_pool; struct pool uvm_vnode_pool; @@ -460,6 +464,7 @@ getnewvnode(enum vtagtype tag, struct mount *mp, const vp->v_socket = NULL; } cache_purge(vp); + vnode_history_init(vp); vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; @@ -674,9 +679,15 @@ vget(struct vnode *vp, int flags) } vp->v_usecount++; + + vnode_history_record(vp); + if (flags & LK_TYPE_MASK) { if ((error = vn_lock(vp, flags)) != 0) { vp->v_usecount--; + + vnode_history_record(vp); + if (vp->v_usecount == 0 && onfreelist) vputonfreelist(vp); } @@ -700,6 +711,7 @@ vref(struct vnode *vp) panic("vref on a VNON vnode"); #endif vp->v_usecount++; + vnode_history_record(vp); } void @@ -762,6 +774,7 @@ vput(struct vnode *vp) } #endif vp->v_usecount--; + vnode_history_record(vp); KASSERT(vp->v_usecount > 0 || vp->v_uvcount == 0); if (vp->v_usecount > 0) { VOP_UNLOCK(vp); @@ -802,6 +815,8 @@ vrele(struct vnode *vp) } #endif vp->v_usecount--; + vnode_history_record(vp); + if (vp->v_usecount > 0) { return (0); } @@ -813,7 +828,7 @@ vrele(struct vnode *vp) } #endif - if (vn_lock(vp, LK_EXCLUSIVE)) { + if (vn_lock(vp, LK_EXCLUSIVE | LK_IOP)) { #ifdef DIAGNOSTIC vprint("vrele: cannot lock", vp); #endif @@ -824,6 +839,7 @@ vrele(struct vnode *vp) if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST)) vputonfreelist(vp); + return (1); } @@ -999,8 +1024,10 @@ vclean(struct vnode *vp, int flags, struct proc *p) * so that its count cannot fall to zero and generate a * race against ourselves to recycle it. */ - if ((active = vp->v_usecount) != 0) + if ((active = vp->v_usecount) != 0) { vp->v_usecount++; + vnode_history_record(vp); + } /* * Prevent the vnode from being recycled or @@ -1065,6 +1092,8 @@ vclean(struct vnode *vp, int flags, struct proc *p) panic("vclean: cannot reclaim"); if (active) { vp->v_usecount--; + vnode_history_record(vp); + if (vp->v_usecount == 0) { if (vp->v_holdcnt > 0) panic("vclean: not clean"); @@ -1201,6 +1230,11 @@ vgonel(struct vnode *vp, struct proc *p) if (vp->v_holdcnt > 0) panic("vgonel: not clean"); +#ifdef DIAGNOSTIC + if (!(vp->v_bioflag & VBIOONFREELIST)) + panic("vgonel: not on freelist"); +#endif + if (TAILQ_FIRST(&vnode_free_list) != vp) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); @@ -1275,8 +1309,7 @@ loop: /* * Print out a description of a vnode. */ -static char *typename[] = - { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; +static char *typename[] = { VTYPE_NAMES }; void vprint(char *label, struct vnode *vp) @@ -1315,6 +1348,8 @@ vprint(char *label, struct vnode *vp) printf("\n\t"); VOP_PRINT(vp); } + + vnode_history_print(vp, &printf); } #endif /* DEBUG || DIAGNOSTIC */ @@ -2251,7 +2286,7 @@ vfs_vnode_print(void *v, int full, { struct vnode *vp = v; - (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", + (*pr)("%p tag %s(%d) type %s(%d) mount %p typedata %p\n", v, (u_int)vp->v_tag >= nitems(vtags)? "<unk>":vtags[vp->v_tag], vp->v_tag, (u_int)vp->v_type >= nitems(vtypes)? "<unk>":vtypes[vp->v_type], @@ -2261,6 +2296,9 @@ vfs_vnode_print(void *v, int full, vp->v_data, vp->v_usecount, vp->v_writecount, vp->v_holdcnt, vp->v_numoutput); + (*pr)("flag 0x%x lflag 0x%x bioflag 0x%x\n", + vp->v_flag, vp->v_lflag, vp->v_bioflag); + /* uvm_object_printit(&vp->v_uobj, full, pr); */ if (full) { @@ -2277,6 +2315,8 @@ vfs_vnode_print(void *v, int full, (*pr)(" bp %p\n", bp); vfs_buf_print(bp, full, pr); } + + vnode_history_print(vp, pr); } } @@ -2376,3 +2416,60 @@ copy_statfs_info(struct statfs *sbp, const struct moun memcpy(&sbp->mount_info, &mp->mnt_stat.mount_info, sizeof(union mount_info)); } + +#ifdef SMALL_KERNEL +void +vnode_history_init(struct vnode *vp) +{ +} + +void +vnode_history_record(struct vnode *vp) +{ +} + +void vnode_history_print(struct vnode *vp, int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) +{ +} +#else +void +vnode_history_init(struct vnode *vp) +{ + KERNEL_ASSERT_LOCKED(); + + bzero(&vp->v_history, sizeof(struct vhistory)); +} + +void +vnode_history_record(struct vnode *vp) +{ + struct stacktrace *st; + + KERNEL_ASSERT_LOCKED(); + + st = &(vp->v_history.vh_st[vp->v_history.vh_next]); + vp->v_history.vh_next = (vp->v_history.vh_next + 1) % VH_MAXST; + + stacktrace_save_at(st, 0); +} + +void +vnode_history_print(struct vnode *vp, int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) +{ + int i; + + KERNEL_ASSERT_LOCKED(); + + pr("==> vnode_history_print %p, next=%d\n", vp, vp->v_history.vh_next); + + for (i = 0; i < VH_MAXST; i++) { + struct stacktrace *st = &(vp->v_history.vh_st[i]); + + if (st->st_count == 0) + continue; + + pr(" [%d%s]\n", i, vp->v_history.vh_next == i ? ">" : ""); + stacktrace_print(st, pr); + } +} +#endif blob - 87182a180504da6342a7a6dfebb3d2e5dc499194 file + sys/kern/vfs_vnops.c --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -566,6 +566,17 @@ vn_lock(struct vnode *vp, int flags) int error, xlocked, do_wakeup; do { +#ifdef DIAGNOSTIC + /* + * The vn_lock() function must not be called when the + * vnode's reference count is zero. + */ + if ((flags & LK_IOP) == 0 && (vp->v_usecount == 0)) { + vprint("vn_lock: v_usecount == 0", vp); + panic("vn_lock: v_usecount == 0"); + } +#endif + mtx_enter(&vnode_mtx); if (vp->v_lflag & VXLOCK) { vp->v_lflag |= VXWANT; blob - 2388b5955dcaf4f6a6cb6a10840d5cb7b0e530c1 file + sys/sys/lock.h --- sys/sys/lock.h +++ sys/sys/lock.h @@ -51,5 +51,6 @@ /* LK_ specific */ #define LK_DRAIN 0x1000UL /* wait for all lock activity to end */ #define LK_RETRY 0x2000UL /* vn_lock: retry until locked */ +#define LK_IOP 0x4000UL /* vn_lock: internal operation (with v_usecount=0) */ #endif /* !_LOCK_H_ */ blob - f7dc8d8bc364e7727c41d17a48fea2644d2eb8c1 file + sys/sys/vnode.h --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -40,6 +40,7 @@ #include <sys/queue.h> #include <sys/selinfo.h> #include <sys/tree.h> +#include <sys/stacktrace.h> /* * The vnode is the focus of all file activity in UNIX. There is a @@ -85,9 +86,18 @@ RBT_HEAD(buf_rb_bufs, buf); struct namecache; RBT_HEAD(namecache_rb_cache, namecache); + +#define VH_MAXST 9 +struct vhistory { + u_int vh_next; /* next free slot in vh_st */ + struct stacktrace vh_st[VH_MAXST]; +}; + /* * Locks used to protect struct members in struct vnode: * a atomic + * B IPL_BIO + * K KERNEL_LOCK * V vnode_mtx */ struct uvm_vnode; @@ -128,6 +137,8 @@ struct vnode { void *v_data; /* private data for fs */ struct selinfo v_selectinfo; /* identity of poller(s) */ + + struct vhistory v_history; /* [K] hold v_usecount change history */ }; #define v_mountedhere v_un.vu_mountedhere #define v_socket v_un.vu_socket blob - c9e7dd6328876cec5a6c0418c67a5d0d5a8df676 file + sys/uvm/uvm_vnode.c --- sys/uvm/uvm_vnode.c +++ sys/uvm/uvm_vnode.c @@ -1147,6 +1147,11 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npag vn = uvn->u_vnode; file_offset = pps[0]->offset; +#ifndef SMALL_KERNEL + if (vn->v_usecount == 0) + vprint("uvn_io: start", vn); +#endif + /* check for sync'ing I/O. */ while (uvn->u_flags & UVM_VNODE_IOSYNC) { if (waitf == M_NOWAIT) { @@ -1225,6 +1230,11 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npag if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL | lkflags); if (result == 0) { +#ifndef SMALL_KERNEL + if (vn->v_usecount == 0) + vprint("uvn_io: after lock", vn); +#endif + /* NOTE: vnode now locked! */ if (rw == UIO_READ) result = VOP_READ(vn, &uio, 0, curproc->p_ucred);