Previous fix from gnezdo@ pointed out that `u_flags' accesses should be serialized by `vmobjlock'. Diff below documents this and fix the remaining places where the lock isn't yet taken. One exception still remains, the first loop of uvm_vnp_sync(). This cannot be fixed right now due to possible deadlocks but that's not a reason for not documenting & fixing the rest of this file.
This has been tested on amd64 and arm64. Comments? Oks? Index: uvm/uvm_vnode.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v retrieving revision 1.128 diff -u -p -r1.128 uvm_vnode.c --- uvm/uvm_vnode.c 10 Sep 2022 16:14:36 -0000 1.128 +++ uvm/uvm_vnode.c 10 Sep 2022 18:23:57 -0000 @@ -68,11 +68,8 @@ * we keep a simpleq of vnodes that are currently being sync'd. */ -LIST_HEAD(uvn_list_struct, uvm_vnode); -struct uvn_list_struct uvn_wlist; /* writeable uvns */ - -SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode); -struct uvn_sq_struct uvn_sync_q; /* sync'ing uvns */ +LIST_HEAD(, uvm_vnode) uvn_wlist; /* [K] writeable uvns */ +SIMPLEQ_HEAD(, uvm_vnode) uvn_sync_q; /* [S] sync'ing uvns */ struct rwlock uvn_sync_lock; /* locks sync operation */ extern int rebooting; @@ -144,41 +141,40 @@ uvn_attach(struct vnode *vp, vm_prot_t a struct partinfo pi; u_quad_t used_vnode_size = 0; - /* first get a lock on the uvn. */ - while (uvn->u_flags & UVM_VNODE_BLOCKED) { - uvn->u_flags |= UVM_VNODE_WANTED; - tsleep_nsec(uvn, PVM, "uvn_attach", INFSLP); - } - /* if we're mapping a BLK device, make sure it is a disk. */ if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) { return NULL; } + /* first get a lock on the uvn. */ + rw_enter(uvn->u_obj.vmobjlock, RW_WRITE); + while (uvn->u_flags & UVM_VNODE_BLOCKED) { + uvn->u_flags |= UVM_VNODE_WANTED; + rwsleep_nsec(uvn, uvn->u_obj.vmobjlock, PVM, "uvn_attach", + INFSLP); + } + /* * now uvn must not be in a blocked state. * first check to see if it is already active, in which case * we can bump the reference count, check to see if we need to * add it to the writeable list, and then return. */ - rw_enter(uvn->u_obj.vmobjlock, RW_WRITE); if (uvn->u_flags & UVM_VNODE_VALID) { /* already active? */ KASSERT(uvn->u_obj.uo_refs > 0); uvn->u_obj.uo_refs++; /* bump uvn ref! */ - rw_exit(uvn->u_obj.vmobjlock); /* check for new writeable uvn */ if ((accessprot & PROT_WRITE) != 0 && (uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) { - LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist); - /* we are now on wlist! */ uvn->u_flags |= UVM_VNODE_WRITEABLE; + LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist); } + rw_exit(uvn->u_obj.vmobjlock); return (&uvn->u_obj); } - rw_exit(uvn->u_obj.vmobjlock); /* * need to call VOP_GETATTR() to get the attributes, but that could @@ -189,6 +185,7 @@ uvn_attach(struct vnode *vp, vm_prot_t a * it. */ uvn->u_flags = UVM_VNODE_ALOCK; + rw_exit(uvn->u_obj.vmobjlock); if (vp->v_type == VBLK) { /* @@ -213,9 +210,11 @@ uvn_attach(struct vnode *vp, vm_prot_t a } if (result != 0) { + rw_enter(uvn->u_obj.vmobjlock, RW_WRITE); if (uvn->u_flags & UVM_VNODE_WANTED) wakeup(uvn); uvn->u_flags = 0; + rw_exit(uvn->u_obj.vmobjlock); return NULL; } @@ -236,18 +235,19 @@ uvn_attach(struct vnode *vp, vm_prot_t a uvn->u_nio = 0; uvn->u_size = used_vnode_size; - /* if write access, we need to add it to the wlist */ - if (accessprot & PROT_WRITE) { - LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist); - uvn->u_flags |= UVM_VNODE_WRITEABLE; /* we are on wlist! */ - } - /* * add a reference to the vnode. this reference will stay as long * as there is a valid mapping of the vnode. dropped when the * reference count goes to zero. */ vref(vp); + + /* if write access, we need to add it to the wlist */ + if (accessprot & PROT_WRITE) { + uvn->u_flags |= UVM_VNODE_WRITEABLE; + LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist); + } + if (oldflags & UVM_VNODE_WANTED) wakeup(uvn); @@ -273,6 +273,7 @@ uvn_reference(struct uvm_object *uobj) struct uvm_vnode *uvn = (struct uvm_vnode *) uobj; #endif + rw_enter(uobj->vmobjlock, RW_WRITE); #ifdef DEBUG if ((uvn->u_flags & UVM_VNODE_VALID) == 0) { printf("uvn_reference: ref=%d, flags=0x%x\n", @@ -280,7 +281,6 @@ uvn_reference(struct uvm_object *uobj) panic("uvn_reference: invalid state"); } #endif - rw_enter(uobj->vmobjlock, RW_WRITE); uobj->uo_refs++; rw_exit(uobj->vmobjlock); } @@ -858,6 +858,8 @@ uvn_cluster(struct uvm_object *uobj, vof struct uvm_vnode *uvn = (struct uvm_vnode *) uobj; *loffset = offset; + KASSERT(rw_write_held(uobj->vmobjlock)); + if (*loffset >= uvn->u_size) panic("uvn_cluster: offset out of range"); @@ -867,8 +869,6 @@ uvn_cluster(struct uvm_object *uobj, vof *hoffset = *loffset + MAXBSIZE; if (*hoffset > round_page(uvn->u_size)) /* past end? */ *hoffset = round_page(uvn->u_size); - - return; } /* @@ -1132,8 +1132,9 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t off_t file_offset; int waitf, result, mapinflags; size_t got, wanted; - int netunlocked = 0; + int vnlocked, netunlocked = 0; int lkflags = (flags & PGO_NOWAIT) ? LK_NOWAIT : 0; + voff_t uvnsize; KASSERT(rw_write_held(uobj->vmobjlock)); @@ -1172,6 +1173,8 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t * (this time with sleep ok). */ uvn->u_nio++; /* we have an I/O in progress! */ + vnlocked = (uvn->u_flags & UVM_VNODE_VNISLOCKED); + uvnsize = uvn->u_size; rw_exit(uobj->vmobjlock); if (kva == 0) kva = uvm_pagermapin(pps, npages, @@ -1185,8 +1188,8 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t /* fill out uio/iov */ iov.iov_base = (caddr_t) kva; wanted = (size_t)npages << PAGE_SHIFT; - if (file_offset + wanted > uvn->u_size) - wanted = uvn->u_size - file_offset; /* XXX: needed? */ + if (file_offset + wanted > uvnsize) + wanted = uvnsize - file_offset; /* XXX: needed? */ iov.iov_len = wanted; uio.uio_iov = &iov; uio.uio_iovcnt = 1; @@ -1217,7 +1220,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t */ result = 0; KERNEL_LOCK(); - if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) + if (!vnlocked) result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL | lkflags); if (result == 0) { /* NOTE: vnode now locked! */ @@ -1228,7 +1231,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0, curproc->p_ucred); - if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) + if (!vnlocked) VOP_UNLOCK(vn); } Index: uvm/uvm_vnode.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_vnode.h,v retrieving revision 1.19 diff -u -p -r1.19 uvm_vnode.h --- uvm/uvm_vnode.h 10 Sep 2022 16:14:36 -0000 1.19 +++ uvm/uvm_vnode.h 10 Sep 2022 17:55:33 -0000 @@ -44,24 +44,27 @@ * vnode handle into the VM system. */ +struct vnode; + /* * the uvm_vnode structure. + * + * Locks used to protect struct members in this file: + * I immutable after creation + * K kernel lock + * S uvn_sync_lock + * v u_obj's vmobjlock */ - -struct vnode; - struct uvm_vnode { struct uvm_object u_obj; /* the actual VM object */ - struct vnode *u_vnode; /* pointer back to vnode */ - int u_flags; /* flags */ - int u_nio; /* number of running I/O requests */ - voff_t u_size; /* size of object */ + struct vnode *u_vnode; /* [I] pointer back to vnode */ + int u_flags; /* [v] flags */ + int u_nio; /* [v] number of running I/O requests */ + voff_t u_size; /* [v] size of object */ - /* the following entry is locked by uvn_wl_lock */ - LIST_ENTRY(uvm_vnode) u_wlist; /* list of writeable vnode objects */ + LIST_ENTRY(uvm_vnode) u_wlist; /* [K] list of writeable vnode objs */ - /* the following entry is locked by uvn_sync_lock */ - SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* vnode objects due for a "sync" */ + SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* [S] vnode objs due for a "sync" */ }; /*