Andrew, Here a new diff which will record a bit more information.
It keep track of the last 10 v_usecount changes with: - pid and ps_comm - usecount value (before and after) - stacktrace We will need: - the vnode_history (without offsets) - the panic and the backtrace of it but keep also photos. On Wed, Apr 13, 2022 at 11:42:22AM +0200, Martin Pieuchot wrote: > On 12/04/22(Tue) 14:58, Sebastien Marie wrote: > > [...] > > uvn_io: start: 0xffff8000ffab9688, type VREG, use 0, write 0, hold 0, flags > > (VBIOONFREELIST) > > tag VT_UFS, ino 14802284, on dev 4, 30 flags 0x100, effnlink 1, nlink 1 > > mode 0100644, owner 858, group 1000, size 345603015 > > => vnode_history_print 0xffff8000ffab9688, next=1 > > So we see the same crash in the pdaemon when trying to sync, at least, > an inactive page to disk... > > [...] > The vnode is mmaped here. > > [...] > The process exit here, it seems single-threaded. > > [...] > The reaper unmap all the memory and release the referenced grabbed > during mmap. > > [...] > Who is doing this? Could you print the pid of the process? It seems > the vnode has been recycled at this point. > > [...] > Same for this, has the vnode been recycled? > > > > panic: vn_lock: v_usecount == 0 > > Stopped at db_enter+0x10: popq %rbp > > TID PID UID PRFLAGS PFLAGS CPU COMMAND > > 448838 87877 858 0 0x4000000 3 qbittorrent-nox > > *281933 50305 0 0x14000 0x200 1K pagedaemon > > db_enter() at ... > > panic(...) > > vn_lock(ffff800ffab9688,81) > > uvn_io(fffffd837a66bee8,...,1,90,1) > > uvm_pager_put(fffffd837a66bee8, ....) > > uvmpd_scan_inactive(...) > > uvmpd_scan() > > uvm_pageout(ffff8000ffff5270) > > This indicates a page hasn't been freed during uvn_detach() which is > called as part of uvm_unmap_detach() by the reaper... Weird. > -- Sebastien Marie diff 153abd98269720429ee0b19133ca24d5c39a7f36 /home/semarie/repos/openbsd/src blob - 13c70890ccc11f131013d14b853f4f06941924cc file + sys/kern/vfs_subr.c --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -66,6 +66,7 @@ #include <sys/tree.h> #include <sys/specdev.h> #include <sys/atomic.h> +#include <sys/stacktrace.h> #include <netinet/in.h> @@ -126,8 +127,27 @@ void vfs_unmountall(void); void printlockedvnodes(void); #endif +#define VH_MAXST 10 /* ring size */ +#define VH_INCR 0 +#define VH_DECR 1 +struct vhistory { + pid_t vhh_pid; + char vhh_comm[_MAXCOMLEN]; /* incl NUL */ + u_int vhh_old_usecount; + u_int vhh_new_usecount; + struct stacktrace vhh_st; +}; +struct vnode_history { + u_int vh_next; /* [K] next free slot in vh_ring */ + struct vhistory vh_ring[VH_MAXST]; /* [K] ring for holding the vnode history */ +}; +void vnode_history_init(struct vnode *); +void vnode_history_record(struct vnode *, int); +void vnode_history_print(struct vnode *, int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); + struct pool vnode_pool; struct pool uvm_vnode_pool; +struct pool vnode_history_pool; static inline int rb_buf_compare(const struct buf *b1, const struct buf *b2); RBT_GENERATE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare); @@ -154,6 +174,8 @@ vntblinit(void) PR_WAITOK, "vnodes", NULL); pool_init(&uvm_vnode_pool, sizeof(struct uvm_vnode), 0, IPL_NONE, PR_WAITOK, "uvmvnodes", NULL); + pool_init(&vnode_history_pool, sizeof(struct vnode_history), 0, + IPL_NONE, PR_WAITOK, "vnodehis", NULL); TAILQ_INIT(&vnode_hold_list); TAILQ_INIT(&vnode_free_list); TAILQ_INIT(&mountlist); @@ -410,6 +432,7 @@ getnewvnode(enum vtagtype tag, struct mount *mp, const splx(s); vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO); vp->v_uvm = pool_get(&uvm_vnode_pool, PR_WAITOK | PR_ZERO); + vp->v_history = pool_get(&vnode_history_pool, PR_WAITOK); vp->v_uvm->u_vnode = vp; uvm_obj_init(&vp->v_uvm->u_obj, &uvm_vnodeops, 0); RBT_INIT(buf_rb_bufs, &vp->v_bufs_tree); @@ -460,6 +483,7 @@ getnewvnode(enum vtagtype tag, struct mount *mp, const vp->v_socket = NULL; } cache_purge(vp); + vnode_history_init(vp); vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; @@ -674,9 +698,13 @@ vget(struct vnode *vp, int flags) } vp->v_usecount++; + vnode_history_record(vp, VH_INCR); + if (flags & LK_TYPE_MASK) { if ((error = vn_lock(vp, flags)) != 0) { vp->v_usecount--; + vnode_history_record(vp, VH_DECR); + if (vp->v_usecount == 0 && onfreelist) vputonfreelist(vp); } @@ -700,6 +728,7 @@ vref(struct vnode *vp) panic("vref on a VNON vnode"); #endif vp->v_usecount++; + vnode_history_record(vp, VH_INCR); } void @@ -762,6 +791,7 @@ vput(struct vnode *vp) } #endif vp->v_usecount--; + vnode_history_record(vp, VH_DECR); KASSERT(vp->v_usecount > 0 || vp->v_uvcount == 0); if (vp->v_usecount > 0) { VOP_UNLOCK(vp); @@ -802,6 +832,8 @@ vrele(struct vnode *vp) } #endif vp->v_usecount--; + vnode_history_record(vp, VH_DECR); + if (vp->v_usecount > 0) { return (0); } @@ -813,7 +845,7 @@ vrele(struct vnode *vp) } #endif - if (vn_lock(vp, LK_EXCLUSIVE)) { + if (vn_lock(vp, LK_EXCLUSIVE | LK_IOP)) { #ifdef DIAGNOSTIC vprint("vrele: cannot lock", vp); #endif @@ -824,6 +856,7 @@ vrele(struct vnode *vp) if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST)) vputonfreelist(vp); + return (1); } @@ -999,8 +1041,10 @@ vclean(struct vnode *vp, int flags, struct proc *p) * so that its count cannot fall to zero and generate a * race against ourselves to recycle it. */ - if ((active = vp->v_usecount) != 0) + if ((active = vp->v_usecount) != 0) { vp->v_usecount++; + vnode_history_record(vp, VH_INCR); + } /* * Prevent the vnode from being recycled or @@ -1065,6 +1109,8 @@ vclean(struct vnode *vp, int flags, struct proc *p) panic("vclean: cannot reclaim"); if (active) { vp->v_usecount--; + vnode_history_record(vp, VH_DECR); + if (vp->v_usecount == 0) { if (vp->v_holdcnt > 0) panic("vclean: not clean"); @@ -1201,6 +1247,11 @@ vgonel(struct vnode *vp, struct proc *p) if (vp->v_holdcnt > 0) panic("vgonel: not clean"); +#ifdef DIAGNOSTIC + if (!(vp->v_bioflag & VBIOONFREELIST)) + panic("vgonel: not on freelist"); +#endif + if (TAILQ_FIRST(&vnode_free_list) != vp) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); @@ -1275,8 +1326,7 @@ loop: /* * Print out a description of a vnode. */ -static char *typename[] = - { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; +static char *typename[] = { VTYPE_NAMES }; void vprint(char *label, struct vnode *vp) @@ -1315,6 +1365,8 @@ vprint(char *label, struct vnode *vp) printf("\n\t"); VOP_PRINT(vp); } + + vnode_history_print(vp, &printf); } #endif /* DEBUG || DIAGNOSTIC */ @@ -2251,7 +2303,7 @@ vfs_vnode_print(void *v, int full, { struct vnode *vp = v; - (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", + (*pr)("%p tag %s(%d) type %s(%d) mount %p typedata %p\n", v, (u_int)vp->v_tag >= nitems(vtags)? "<unk>":vtags[vp->v_tag], vp->v_tag, (u_int)vp->v_type >= nitems(vtypes)? "<unk>":vtypes[vp->v_type], @@ -2261,6 +2313,9 @@ vfs_vnode_print(void *v, int full, vp->v_data, vp->v_usecount, vp->v_writecount, vp->v_holdcnt, vp->v_numoutput); + (*pr)("flag 0x%x lflag 0x%x bioflag 0x%x\n", + vp->v_flag, vp->v_lflag, vp->v_bioflag); + /* uvm_object_printit(&vp->v_uobj, full, pr); */ if (full) { @@ -2277,6 +2332,8 @@ vfs_vnode_print(void *v, int full, (*pr)(" bp %p\n", bp); vfs_buf_print(bp, full, pr); } + + vnode_history_print(vp, pr); } } @@ -2376,3 +2433,68 @@ copy_statfs_info(struct statfs *sbp, const struct moun memcpy(&sbp->mount_info, &mp->mnt_stat.mount_info, sizeof(union mount_info)); } + +#ifdef SMALL_KERNEL +void +vnode_history_init(struct vnode *vp) +{ +} + +void +vnode_history_record(struct vnode *vp, int direction) +{ +} + +void vnode_history_print(struct vnode *vp, int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) +{ +} +#else +void +vnode_history_init(struct vnode *vp) +{ + KERNEL_ASSERT_LOCKED(); + + vp->v_history->vh_next = 0; + bzero(vp->v_history, sizeof(*vp->v_history)); +} + +void +vnode_history_record(struct vnode *vp, int direction) +{ + struct proc *p = curproc; + struct vhistory *vh; + + KERNEL_ASSERT_LOCKED(); + + vh = &(vp->v_history->vh_ring[vp->v_history->vh_next]); + vp->v_history->vh_next = (vp->v_history->vh_next + 1) % VH_MAXST; + + vh->vhh_pid = p->p_p->ps_pid; + strlcpy(vh->vhh_comm, p->p_p->ps_comm, sizeof(vh->vhh_comm)); + vh->vhh_old_usecount = direction == VH_INCR ? (vp->v_usecount - 1) : (vp->v_usecount + 1); + vh->vhh_new_usecount = vp->v_usecount; + stacktrace_save_at(&vh->vhh_st, 0); +} + +void +vnode_history_print(struct vnode *vp, int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) +{ + int i; + + KERNEL_ASSERT_LOCKED(); + + pr("==> vnode_history_print %p, next=%d\n", vp, vp->v_history->vh_next); + + for (i = 0; i < VH_MAXST; i++) { + struct vhistory *vh = &(vp->v_history->vh_ring[i]); + + if (vh->vhh_st.st_count == 0) + continue; + + pr(" [%d%s] %s[%d] usecount %d>%d\n", i, vp->v_history->vh_next == i ? ">" : "", + vh->vhh_comm, vh->vhh_pid, + vh->vhh_old_usecount, vh->vhh_new_usecount); + stacktrace_print(&vh->vhh_st, pr); + } +} +#endif blob - 87182a180504da6342a7a6dfebb3d2e5dc499194 file + sys/kern/vfs_vnops.c --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -566,6 +566,17 @@ vn_lock(struct vnode *vp, int flags) int error, xlocked, do_wakeup; do { +#ifdef DIAGNOSTIC + /* + * The vn_lock() function must not be called when the + * vnode's reference count is zero. + */ + if ((flags & LK_IOP) == 0 && (vp->v_usecount == 0)) { + vprint("vn_lock: v_usecount == 0", vp); + panic("vn_lock: v_usecount == 0"); + } +#endif + mtx_enter(&vnode_mtx); if (vp->v_lflag & VXLOCK) { vp->v_lflag |= VXWANT; blob - 2388b5955dcaf4f6a6cb6a10840d5cb7b0e530c1 file + sys/sys/lock.h --- sys/sys/lock.h +++ sys/sys/lock.h @@ -51,5 +51,6 @@ /* LK_ specific */ #define LK_DRAIN 0x1000UL /* wait for all lock activity to end */ #define LK_RETRY 0x2000UL /* vn_lock: retry until locked */ +#define LK_IOP 0x4000UL /* vn_lock: internal operation (with v_usecount=0) */ #endif /* !_LOCK_H_ */ blob - 0ba69cdb12e3466472d18fe8dc163bfeb050b9e9 file + sys/sys/vnode.h --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -85,11 +85,14 @@ RBT_HEAD(buf_rb_bufs, buf); struct namecache; RBT_HEAD(namecache_rb_cache, namecache); +struct vnode_history; + /* * Locks used to protect struct members in struct vnode: * a atomic * V vnode_mtx * B IPL_BIO + * K KERNEL_LOCK */ struct uvm_vnode; struct vnode { @@ -129,6 +131,8 @@ struct vnode { void *v_data; /* private data for fs */ struct selinfo v_selectinfo; /* identity of poller(s) */ + + struct vnode_history *v_history; /* [K] hold v_usecount change history */ }; #define v_mountedhere v_un.vu_mountedhere #define v_socket v_un.vu_socket blob - c9e7dd6328876cec5a6c0418c67a5d0d5a8df676 file + sys/uvm/uvm_vnode.c --- sys/uvm/uvm_vnode.c +++ sys/uvm/uvm_vnode.c @@ -1147,6 +1147,11 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npag vn = uvn->u_vnode; file_offset = pps[0]->offset; +#ifndef SMALL_KERNEL + if (vn->v_usecount == 0) + vprint("uvn_io: start", vn); +#endif + /* check for sync'ing I/O. */ while (uvn->u_flags & UVM_VNODE_IOSYNC) { if (waitf == M_NOWAIT) {